diff options
Diffstat (limited to 'drivers/net/ethernet/ibm')
-rw-r--r-- | drivers/net/ethernet/ibm/Kconfig | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea_ethtool.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea_main.c | 48 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/emac/core.c | 49 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/emac/emac.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/emac/mal.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.c | 483 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.h | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.c | 3195 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.h | 229 |
11 files changed, 2547 insertions, 1503 deletions
diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig index a95d941360f8..c0c112d95b89 100644 --- a/drivers/net/ethernet/ibm/Kconfig +++ b/drivers/net/ethernet/ibm/Kconfig @@ -7,7 +7,7 @@ config NET_VENDOR_IBM bool "IBM devices" default y depends on PPC_PSERIES || PPC_DCR || (IBMEBUS && SPARSEMEM) - ---help--- + help If you have a network (Ethernet) card belonging to this class, say Y. Note that the answer to this question doesn't directly affect the @@ -20,7 +20,7 @@ if NET_VENDOR_IBM config IBMVETH tristate "IBM LAN Virtual Ethernet support" depends on PPC_PSERIES - ---help--- + help This driver supports virtual ethernet adapters on newer IBM iSeries and pSeries systems. @@ -32,7 +32,7 @@ source "drivers/net/ethernet/ibm/emac/Kconfig" config EHEA tristate "eHEA Ethernet support" depends on IBMEBUS && SPARSEMEM - ---help--- + help This driver supports the IBM pSeries eHEA ethernet adapter. To compile the driver as a module, choose M here. The module @@ -41,7 +41,7 @@ config EHEA config IBMVNIC tristate "IBM Virtual NIC support" depends on PPC_PSERIES - ---help--- + help This driver supports Virtual NIC adapters on IBM i and IBM System p systems. diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h index b140835d4c23..208c440a602b 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea.h +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -19,6 +19,7 @@ #include <linux/ethtool.h> #include <linux/vmalloc.h> #include <linux/if_vlan.h> +#include <linux/platform_device.h> #include <asm/ibmebus.h> #include <asm/io.h> diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c index 6cb86032ce46..1db5b6790a41 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -159,8 +159,8 @@ static int ehea_nway_reset(struct net_device *dev) static void ehea_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static u32 ehea_get_msglevel(struct net_device *dev) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 0273fb7a9d01..b4aff59b3eb4 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -29,6 +29,8 @@ #include <asm/kexec.h> #include <linux/mutex.h> #include <linux/prefetch.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <net/ip.h> @@ -109,6 +111,7 @@ static const struct of_device_id ehea_device_table[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, ehea_device_table); static struct platform_driver ehea_driver = { .driver = { @@ -1212,9 +1215,9 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) } } -static void ehea_neq_tasklet(unsigned long data) +static void ehea_neq_tasklet(struct tasklet_struct *t) { - struct ehea_adapter *adapter = (struct ehea_adapter *)data; + struct ehea_adapter *adapter = from_tasklet(adapter, t, neq_tasklet); struct ehea_eqe *eqe; u64 event_mask; @@ -1543,7 +1546,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, kfree(init_attr); - netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64); + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll); ret = 0; goto out; @@ -1614,7 +1617,7 @@ static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe, * For TSO packets we only copy the headers into the * immediate area. */ - immediate_len = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + immediate_len = skb_tcp_all_headers(skb); } if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) { @@ -1740,7 +1743,7 @@ static int ehea_set_mac_addr(struct net_device *dev, void *sa) goto out_free; } - memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len); + eth_hw_addr_set(dev, mac_addr->sa_data); /* Deregister old MAC in pHYP */ if (port->state == EHEA_PORT_UP) { @@ -2617,10 +2620,8 @@ static int ehea_restart_qps(struct net_device *dev) u16 dummy16 = 0; cb0 = (void *)get_zeroed_page(GFP_KERNEL); - if (!cb0) { - ret = -ENOMEM; - goto out; - } + if (!cb0) + return -ENOMEM; for (i = 0; i < (port->num_def_qps); i++) { struct ehea_port_res *pr = &port->port_res[i]; @@ -2640,6 +2641,7 @@ static int ehea_restart_qps(struct net_device *dev) cb0); if (hret != H_SUCCESS) { netdev_err(dev, "query_ehea_qp failed (1)\n"); + ret = -EFAULT; goto out; } @@ -2652,6 +2654,7 @@ static int ehea_restart_qps(struct net_device *dev) &dummy64, &dummy16, &dummy16); if (hret != H_SUCCESS) { netdev_err(dev, "modify_ehea_qp failed (1)\n"); + ret = -EFAULT; goto out; } @@ -2660,6 +2663,7 @@ static int ehea_restart_qps(struct net_device *dev) cb0); if (hret != H_SUCCESS) { netdev_err(dev, "query_ehea_qp failed (2)\n"); + ret = -EFAULT; goto out; } @@ -2866,14 +2870,14 @@ out: return ret; } -static ssize_t ehea_show_port_id(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t log_port_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); return sprintf(buf, "%d", port->logical_port_id); } -static DEVICE_ATTR(log_port_id, 0444, ehea_show_port_id, NULL); +static DEVICE_ATTR_RO(log_port_id); static void logical_port_release(struct device *dev) { @@ -2896,6 +2900,7 @@ static struct device *ehea_register_port(struct ehea_port *port, ret = of_device_register(&port->ofdev); if (ret) { pr_err("failed to register device. ret=%d\n", ret); + put_device(&port->ofdev.dev); goto out; } @@ -2984,7 +2989,7 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, SET_NETDEV_DEV(dev, port_dev); /* initialize net_device structure */ - memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); + eth_hw_addr_set(dev, (u8 *)&port->mac_addr); dev->netdev_ops = &ehea_netdev_ops; ehea_set_ethtool_ops(dev); @@ -3112,7 +3117,7 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, return NULL; } -static ssize_t ehea_probe_port(struct device *dev, +static ssize_t probe_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -3167,9 +3172,9 @@ static ssize_t ehea_probe_port(struct device *dev, return (ssize_t) count; } -static ssize_t ehea_remove_port(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t remove_port_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct ehea_adapter *adapter = dev_get_drvdata(dev); struct ehea_port *port; @@ -3202,8 +3207,8 @@ static ssize_t ehea_remove_port(struct device *dev, return (ssize_t) count; } -static DEVICE_ATTR(probe_port, 0200, NULL, ehea_probe_port); -static DEVICE_ATTR(remove_port, 0200, NULL, ehea_remove_port); +static DEVICE_ATTR_WO(probe_port); +static DEVICE_ATTR_WO(remove_port); static int ehea_create_device_sysfs(struct platform_device *dev) { @@ -3247,7 +3252,7 @@ static int ehea_mem_notifier(struct notifier_block *nb, switch (action) { case MEM_CANCEL_OFFLINE: pr_info("memory offlining canceled"); - /* Fall through - re-add canceled memory block */ + fallthrough; /* re-add canceled memory block */ case MEM_ONLINE: pr_info("memory is going online"); @@ -3417,8 +3422,7 @@ static int ehea_probe_adapter(struct platform_device *dev) goto out_free_ad; } - tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet, - (unsigned long)adapter); + tasklet_setup(&adapter->neq_tasklet, ehea_neq_tasklet); ret = ehea_create_device_sysfs(dev); if (ret) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index b7fc17756c51..9b08e41ccc29 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -38,6 +38,7 @@ #include <linux/of_irq.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <linux/platform_device.h> #include <linux/slab.h> #include <asm/processor.h> @@ -872,7 +873,7 @@ static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, { struct emac_regs __iomem *p = dev->emacp; u32 r = 0; - int n, err = -ETIMEDOUT; + int n; mutex_lock(&dev->mdio_lock); @@ -919,7 +920,6 @@ static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, goto bail; } } - err = 0; bail: if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); @@ -1013,7 +1013,7 @@ static int emac_set_mac_address(struct net_device *ndev, void *sa) mutex_lock(&dev->link_lock); - memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + eth_hw_addr_set(ndev, addr->sa_data); emac_rx_disable(dev); emac_tx_disable(dev); @@ -2137,8 +2137,11 @@ emac_ethtool_set_link_ksettings(struct net_device *ndev, return 0; } -static void emac_ethtool_get_ringparam(struct net_device *ndev, - struct ethtool_ringparam *rp) +static void +emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp, + struct kernel_ethtool_ringparam *kernel_rp, + struct netlink_ext_ack *extack) { rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; @@ -2281,8 +2284,8 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev, { struct emac_instance *dev = netdev_priv(ndev); - strlcpy(info->driver, "ibm_emac", sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, "ibm_emac", sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF", dev->cell_index, dev->ofdev->dev.of_node); } @@ -2320,7 +2323,7 @@ static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCGMIIPHY: data->phy_id = dev->phy.address; - /* Fall through */ + fallthrough; case SIOCGMIIREG: data->val_out = emac_mdio_read(ndev, dev->phy.address, data->reg_num); @@ -2391,11 +2394,11 @@ static int emac_check_deps(struct emac_instance *dev, static void emac_put_deps(struct emac_instance *dev) { - of_dev_put(dev->mal_dev); - of_dev_put(dev->zmii_dev); - of_dev_put(dev->rgmii_dev); - of_dev_put(dev->mdio_dev); - of_dev_put(dev->tah_dev); + platform_device_put(dev->mal_dev); + platform_device_put(dev->zmii_dev); + platform_device_put(dev->rgmii_dev); + platform_device_put(dev->mdio_dev); + platform_device_put(dev->tah_dev); } static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, @@ -2436,7 +2439,7 @@ static int emac_wait_deps(struct emac_instance *dev) for (i = 0; i < EMAC_DEP_COUNT; i++) { of_node_put(deps[i].node); if (err) - of_dev_put(deps[i].ofdev); + platform_device_put(deps[i].ofdev); } if (err == 0) { dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; @@ -2445,7 +2448,7 @@ static int emac_wait_deps(struct emac_instance *dev) dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; } - of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev); + platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev); return err; } @@ -2848,7 +2851,6 @@ static int emac_init_phy(struct emac_instance *dev) static int emac_init_config(struct emac_instance *dev) { struct device_node *np = dev->ofdev->dev.of_node; - const void *p; int err; /* Read config from device-tree */ @@ -2976,13 +2978,10 @@ static int emac_init_config(struct emac_instance *dev) } /* Read MAC-address */ - p = of_get_property(np, "local-mac-address", NULL); - if (p == NULL) { - printk(KERN_ERR "%pOF: Can't find local-mac-address property\n", - np); - return -ENXIO; - } - memcpy(dev->ndev->dev_addr, p, ETH_ALEN); + err = of_get_ethdev_address(np, dev->ndev); + if (err) + return dev_err_probe(&dev->ofdev->dev, err, + "Can't get valid [local-]mac-address from OF !\n"); /* IAHT and GAHT filter parameterization */ if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { @@ -3011,7 +3010,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, @@ -3023,7 +3022,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_stop = emac_close, .ndo_get_stats = emac_stats, .ndo_set_rx_mode = emac_set_multicast_list, - .ndo_do_ioctl = emac_ioctl, + .ndo_eth_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = emac_set_mac_address, diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index aa9f651288d5..09d3ac374b2d 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -77,7 +77,7 @@ struct emac_regs { struct { u32 rsvd1; u32 revid; - u32 rsvd2[2]; + u32 rsvd2[2]; u32 iaht1; /* Reset, R */ u32 iaht2; /* Reset, R */ u32 iaht3; /* Reset, R */ diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 075c07303f16..ff5487bbebe3 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -605,8 +605,8 @@ static int mal_probe(struct platform_device *ofdev) init_dummy_netdev(&mal->dummy_dev); - netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, - CONFIG_IBM_EMAC_POLL_WEIGHT); + netif_napi_add_weight(&mal->dummy_dev, &mal->napi, mal_poll, + CONFIG_IBM_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ mal_reset(mal); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 84121aab7ff1..5b96cd94dcd2 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -141,6 +141,13 @@ static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD; } +static unsigned int ibmveth_real_max_tx_queues(void) +{ + unsigned int n_cpu = num_online_cpus(); + + return min(n_cpu, IBMVETH_MAX_QUEUES); +} + /* setup the initial settings for a buffer pool */ static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, @@ -456,6 +463,38 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) } } +static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx], + adapter->tx_ltb_size, DMA_TO_DEVICE); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; +} + +static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size, + GFP_KERNEL); + if (!adapter->tx_ltb_ptr[idx]) { + netdev_err(adapter->netdev, + "unable to allocate tx long term buffer\n"); + return -ENOMEM; + } + adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev, + adapter->tx_ltb_ptr[idx], + adapter->tx_ltb_size, + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) { + netdev_err(adapter->netdev, + "unable to DMA map tx long term buffer\n"); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; + return -ENOMEM; + } + + return 0; +} + static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, union ibmveth_buf_desc rxq_desc, u64 mac_address) { @@ -483,17 +522,6 @@ retry: return rc; } -static u64 ibmveth_encode_mac_addr(u8 *mac) -{ - int i; - u64 encoded = 0; - - for (i = 0; i < ETH_ALEN; i++) - encoded = (encoded << 8) | mac[i]; - - return encoded; -} - static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); @@ -549,11 +577,16 @@ static int ibmveth_open(struct net_device *netdev) goto out_unmap_buffer_list; } + for (i = 0; i < netdev->real_num_tx_queues; i++) { + if (ibmveth_allocate_tx_ltb(adapter, i)) + goto out_free_tx_ltb; + } + adapter->rx_queue.index = 0; adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - mac_address = ibmveth_encode_mac_addr(netdev->dev_addr); + mac_address = ether_addr_to_u64(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -605,32 +638,16 @@ static int ibmveth_open(struct net_device *netdev) } rc = -ENOMEM; - adapter->bounce_buffer = - kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); - if (!adapter->bounce_buffer) - goto out_free_irq; - - adapter->bounce_buffer_dma = - dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, - netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { - netdev_err(netdev, "unable to map bounce buffer\n"); - goto out_free_bounce_buffer; - } netdev_dbg(netdev, "initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); - netif_start_queue(netdev); + netif_tx_start_all_queues(netdev); netdev_dbg(netdev, "open complete\n"); return 0; -out_free_bounce_buffer: - kfree(adapter->bounce_buffer); -out_free_irq: - free_irq(netdev->irq, netdev); out_free_buffer_pools: while (--i >= 0) { if (adapter->rx_buff_pool[i].active) @@ -640,6 +657,12 @@ out_free_buffer_pools: out_unmap_filter_list: dma_unmap_single(dev, adapter->filter_list_dma, 4096, DMA_BIDIRECTIONAL); + +out_free_tx_ltb: + while (--i >= 0) { + ibmveth_free_tx_ltb(adapter, i); + } + out_unmap_buffer_list: dma_unmap_single(dev, adapter->buffer_list_dma, 4096, DMA_BIDIRECTIONAL); @@ -668,7 +691,7 @@ static int ibmveth_close(struct net_device *netdev) napi_disable(&adapter->napi); if (!adapter->pool_config) - netif_stop_queue(netdev); + netif_tx_stop_all_queues(netdev); h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); @@ -702,44 +725,49 @@ static int ibmveth_close(struct net_device *netdev) ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]); - dma_unmap_single(&adapter->vdev->dev, adapter->bounce_buffer_dma, - adapter->netdev->mtu + IBMVETH_BUFF_OH, - DMA_BIDIRECTIONAL); - kfree(adapter->bounce_buffer); + for (i = 0; i < netdev->real_num_tx_queues; i++) + ibmveth_free_tx_ltb(adapter, i); netdev_dbg(netdev, "close complete\n"); return 0; } -static int netdev_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int ibmveth_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { - u32 supported, advertising; - - supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_FIBRE); - advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | - ADVERTISED_FIBRE); - cmd->base.speed = SPEED_1000; - cmd->base.duplex = DUPLEX_FULL; - cmd->base.port = PORT_FIBRE; - cmd->base.phy_address = 0; - cmd->base.autoneg = AUTONEG_ENABLE; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + struct ibmveth_adapter *adapter = netdev_priv(dev); + + return ethtool_virtdev_set_link_ksettings(dev, cmd, + &adapter->speed, + &adapter->duplex); +} + +static int ibmveth_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; + cmd->base.port = PORT_OTHER; return 0; } +static void ibmveth_init_link_settings(struct net_device *dev) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + + adapter->speed = SPEED_1000; + adapter->duplex = DUPLEX_FULL; +} + static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); - strlcpy(info->version, ibmveth_driver_version, sizeof(info->version)); + strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); + strscpy(info->version, ibmveth_driver_version, sizeof(info->version)); } static netdev_features_t ibmveth_fix_features(struct net_device *dev, @@ -964,13 +992,79 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); } +static void ibmveth_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + channels->max_tx = ibmveth_real_max_tx_queues(); + channels->tx_count = netdev->real_num_tx_queues; + + channels->max_rx = netdev->real_num_rx_queues; + channels->rx_count = netdev->real_num_rx_queues; +} + +static int ibmveth_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int old = netdev->real_num_tx_queues, + goal = channels->tx_count; + int rc, i; + + /* If ndo_open has not been called yet then don't allocate, just set + * desired netdev_queue's and return + */ + if (!(netdev->flags & IFF_UP)) + return netif_set_real_num_tx_queues(netdev, goal); + + /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated + * but we may need to alloc/free the ltb's. + */ + netif_tx_stop_all_queues(netdev); + + /* Allocate any queue that we need */ + for (i = old; i < goal; i++) { + if (adapter->tx_ltb_ptr[i]) + continue; + + rc = ibmveth_allocate_tx_ltb(adapter, i); + if (!rc) + continue; + + /* if something goes wrong, free everything we just allocated */ + netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + break; + } + rc = netif_set_real_num_tx_queues(netdev, goal); + if (rc) { + netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + } + /* Free any that are no longer needed */ + for (i = old; i > goal; i--) { + if (adapter->tx_ltb_ptr[i - 1]) + ibmveth_free_tx_ltb(adapter, i - 1); + } + + netif_tx_wake_all_queues(netdev); + + return rc; +} + static const struct ethtool_ops netdev_ethtool_ops = { - .get_drvinfo = netdev_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = ibmveth_get_strings, - .get_sset_count = ibmveth_get_sset_count, - .get_ethtool_stats = ibmveth_get_ethtool_stats, - .get_link_ksettings = netdev_get_link_ksettings, + .get_drvinfo = netdev_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = ibmveth_get_strings, + .get_sset_count = ibmveth_get_sset_count, + .get_ethtool_stats = ibmveth_get_ethtool_stats, + .get_link_ksettings = ibmveth_get_link_ksettings, + .set_link_ksettings = ibmveth_set_link_ksettings, + .get_channels = ibmveth_get_channels, + .set_channels = ibmveth_set_channels }; static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -978,10 +1072,8 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } -#define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1)) - static int ibmveth_send(struct ibmveth_adapter *adapter, - union ibmveth_buf_desc *descs, unsigned long mss) + unsigned long desc, unsigned long mss) { unsigned long correlator; unsigned int retry_count; @@ -994,12 +1086,9 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, retry_count = 1024; correlator = 0; do { - ret = h_send_logical_lan(adapter->vdev->unit_address, - descs[0].desc, descs[1].desc, - descs[2].desc, descs[3].desc, - descs[4].desc, descs[5].desc, - correlator, &correlator, mss, - adapter->fw_large_send_support); + ret = h_send_logical_lan(adapter->vdev->unit_address, desc, + correlator, &correlator, mss, + adapter->fw_large_send_support); } while ((ret == H_BUSY) && (retry_count--)); if (ret != H_SUCCESS && ret != H_DROPPED) { @@ -1025,12 +1114,6 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb, ret = -EOPNOTSUPP; } - if (!ether_addr_equal(ether_header->h_source, netdev->dev_addr)) { - netdev_dbg(netdev, "source packet MAC address does not match veth device's, dropping packet.\n"); - netdev->stats.tx_dropped++; - ret = -EOPNOTSUPP; - } - return ret; } @@ -1038,34 +1121,13 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - unsigned int desc_flags; - union ibmveth_buf_desc descs[6]; - int last, i; - int force_bounce = 0; - dma_addr_t dma_addr; + unsigned int desc_flags, total_bytes; + union ibmveth_buf_desc desc; + int i, queue_num = skb_get_queue_mapping(skb); unsigned long mss = 0; if (ibmveth_is_packet_unsupported(skb, netdev)) goto out; - - /* veth doesn't handle frag_list, so linearize the skb. - * When GRO is enabled SKB's can have frag_list. - */ - if (adapter->is_active_trunk && - skb_has_frag_list(skb) && __skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - - /* - * veth handles a maximum of 6 segments including the header, so - * we have to linearize the skb if there are more than this. - */ - if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL && ((skb->protocol == htons(ETH_P_IP) && @@ -1095,56 +1157,6 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, desc_flags |= IBMVETH_BUF_LRG_SND; } -retry_bounce: - memset(descs, 0, sizeof(descs)); - - /* - * If a linear packet is below the rx threshold then - * copy it into the static bounce buffer. This avoids the - * cost of a TCE insert and remove. - */ - if (force_bounce || (!skb_is_nonlinear(skb) && - (skb->len < tx_copybreak))) { - skb_copy_from_linear_data(skb, adapter->bounce_buffer, - skb->len); - - descs[0].fields.flags_len = desc_flags | skb->len; - descs[0].fields.address = adapter->bounce_buffer_dma; - - if (ibmveth_send(adapter, descs, 0)) { - adapter->tx_send_failed++; - netdev->stats.tx_dropped++; - } else { - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; - } - - goto out; - } - - /* Map the header */ - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed; - - descs[0].fields.flags_len = desc_flags | skb_headlen(skb); - descs[0].fields.address = dma_addr; - - /* Map the frags */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); - - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed_frags; - - descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag); - descs[i+1].fields.address = dma_addr; - } - if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { if (adapter->fw_large_send_support) { mss = (unsigned long)skb_shinfo(skb)->gso_size; @@ -1161,7 +1173,36 @@ retry_bounce: } } - if (ibmveth_send(adapter, descs, mss)) { + /* Copy header into mapped buffer */ + if (unlikely(skb->len > adapter->tx_ltb_size)) { + netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", + skb->len, adapter->tx_ltb_size); + netdev->stats.tx_dropped++; + goto out; + } + memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb)); + total_bytes = skb_headlen(skb); + /* Copy frags into mapped buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes, + skb_frag_address_safe(frag), skb_frag_size(frag)); + total_bytes += skb_frag_size(frag); + } + + if (unlikely(total_bytes != skb->len)) { + netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", + skb->len, total_bytes); + netdev->stats.tx_dropped++; + goto out; + } + desc.fields.flags_len = desc_flags | skb->len; + desc.fields.address = adapter->tx_ltb_dma[queue_num]; + /* finish writing to long_term_buff before VIOS accessing it */ + dma_wmb(); + + if (ibmveth_send(adapter, desc.desc, mss)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; } else { @@ -1169,41 +1210,11 @@ retry_bounce: netdev->stats.tx_bytes += skb->len; } - dma_unmap_single(&adapter->vdev->dev, - descs[0].fields.address, - descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - - for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - out: dev_consume_skb_any(skb); return NETDEV_TX_OK; -map_failed_frags: - last = i+1; - for (i = 1; i < last; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - dma_unmap_single(&adapter->vdev->dev, - descs[0].fields.address, - descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); -map_failed: - if (!firmware_has_feature(FW_FEATURE_CMO)) - netdev_err(netdev, "tx: unable to map xmit buffer\n"); - adapter->tx_map_failed++; - if (skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - force_bounce = 1; - goto retry_bounce; } static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) @@ -1285,36 +1296,41 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb, iph_proto = iph6->nexthdr; } - /* In OVS environment, when a flow is not cached, specifically for a - * new TCP connection, the first packet information is passed up + /* When CSO is enabled the TCP checksum may have be set to NULL by + * the sender given that we zeroed out TCP checksum field in + * transmit path (refer ibmveth_start_xmit routine). In this case set + * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will + * then be recalculated by the destination NIC (CSO must be enabled + * on the destination NIC). + * + * In an OVS environment, when a flow is not cached, specifically for a + * new TCP connection, the first packet information is passed up to * the user space for finding a flow. During this process, OVS computes * checksum on the first packet when CHECKSUM_PARTIAL flag is set. * - * Given that we zeroed out TCP checksum field in transmit path - * (refer ibmveth_start_xmit routine) as we set "no checksum bit", - * OVS computed checksum will be incorrect w/o TCP pseudo checksum - * in the packet. This leads to OVS dropping the packet and hence - * TCP retransmissions are seen. - * - * So, re-compute TCP pseudo header checksum. + * So, re-compute TCP pseudo header checksum when configured for + * trunk mode. */ - if (iph_proto == IPPROTO_TCP && adapter->is_active_trunk) { + if (iph_proto == IPPROTO_TCP) { struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen); - - tcphdrlen = skb->len - iphlen; - - /* Recompute TCP pseudo header checksum */ - if (skb_proto == ETH_P_IP) - tcph->check = ~csum_tcpudp_magic(iph->saddr, + if (tcph->check == 0x0000) { + /* Recompute TCP pseudo header checksum */ + if (adapter->is_active_trunk) { + tcphdrlen = skb->len - iphlen; + if (skb_proto == ETH_P_IP) + tcph->check = + ~csum_tcpudp_magic(iph->saddr, iph->daddr, tcphdrlen, iph_proto, 0); - else if (skb_proto == ETH_P_IPV6) - tcph->check = ~csum_ipv6_magic(&iph6->saddr, + else if (skb_proto == ETH_P_IPV6) + tcph->check = + ~csum_ipv6_magic(&iph6->saddr, &iph6->daddr, tcphdrlen, iph_proto, 0); - - /* Setup SKB fields for checksum offload */ - skb_partial_csum_set(skb, iphlen, - offsetof(struct tcphdr, check)); - skb_reset_network_header(skb); + } + /* Setup SKB fields for checksum offload */ + skb_partial_csum_set(skb, iphlen, + offsetof(struct tcphdr, check)); + skb_reset_network_header(skb); + } } } @@ -1343,6 +1359,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) int offset = ibmveth_rxq_frame_offset(adapter); int csum_good = ibmveth_rxq_csum_good(adapter); int lrg_pkt = ibmveth_rxq_large_packet(adapter); + __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); @@ -1379,16 +1396,26 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if (csum_good) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - ibmveth_rx_csum_helper(skb, adapter); + /* PHYP without PLSO support places a -1 in the ip + * checksum for large send frames. + */ + if (skb->protocol == cpu_to_be16(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + iph_check = iph->check; } - if (length > netdev->mtu + ETH_HLEN) { + if ((length > netdev->mtu + ETH_HLEN) || + lrg_pkt || iph_check == 0xffff) { ibmveth_rx_mss_helper(skb, mss, lrg_pkt); adapter->rx_large_packets++; } + if (csum_good) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + ibmveth_rx_csum_helper(skb, adapter); + } + napi_gro_receive(napi, skb); /* send it up */ netdev->stats.rx_packets++; @@ -1467,7 +1494,7 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ u64 mcast_addr; - mcast_addr = ibmveth_encode_mac_addr(ha->addr); + mcast_addr = ether_addr_to_u64(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1570,6 +1597,8 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); + /* add size of mapped tx buffers */ + ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ @@ -1597,14 +1626,14 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - mac_address = ibmveth_encode_mac_addr(addr->sa_data); + mac_address = ether_addr_to_u64(addr->sa_data); rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); if (rc) { netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); return rc; } - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); return 0; } @@ -1614,7 +1643,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_stop = ibmveth_close, .ndo_start_xmit = ibmveth_start_xmit, .ndo_set_rx_mode = ibmveth_set_multicast_list, - .ndo_do_ioctl = ibmveth_ioctl, + .ndo_eth_ioctl = ibmveth_ioctl, .ndo_change_mtu = ibmveth_change_mtu, .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, @@ -1662,8 +1691,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return -EINVAL; } - netdev = alloc_etherdev(sizeof(struct ibmveth_adapter)); - + netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1); if (!netdev) return -ENOMEM; @@ -1674,8 +1702,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->netdev = netdev; adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); adapter->pool_config = 0; + ibmveth_init_link_settings(netdev); - netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); + netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16); netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; @@ -1708,9 +1737,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->min_mtu = IBMVETH_MIN_MTU; - netdev->max_mtu = ETH_MAX_MTU; + netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; - memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); + eth_hw_addr_set(netdev, mac_addr_p); if (firmware_has_feature(FW_FEATURE_CMO)) memcpy(pool_count, pool_count_cmo, sizeof(pool_count)); @@ -1728,6 +1757,18 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) kobject_uevent(kobj, KOBJ_ADD); } + rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(), + IBMVETH_DEFAULT_QUEUES)); + if (rc) { + netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n", + rc); + free_netdev(netdev); + return rc; + } + adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE); + for (i = 0; i < IBMVETH_MAX_QUEUES; i++) + adapter->tx_ltb_ptr[i] = NULL; + netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); netdev_dbg(netdev, "registering netdev...\n"); @@ -1746,7 +1787,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return 0; } -static int ibmveth_remove(struct vio_dev *dev) +static void ibmveth_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmveth_adapter *adapter = netdev_priv(netdev); @@ -1759,8 +1800,6 @@ static int ibmveth_remove(struct vio_dev *dev) free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static struct attribute veth_active_attr; @@ -1789,8 +1828,7 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, struct ibmveth_buff_pool *pool = container_of(kobj, struct ibmveth_buff_pool, kobj); - struct net_device *netdev = dev_get_drvdata( - container_of(kobj->parent, struct device, kobj)); + struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent)); struct ibmveth_adapter *adapter = netdev_priv(netdev); long value = simple_strtol(buf, NULL, 10); long rc; @@ -1894,6 +1932,7 @@ static struct attribute *veth_pool_attrs[] = { &veth_size_attr, NULL, }; +ATTRIBUTE_GROUPS(veth_pool); static const struct sysfs_ops veth_pool_ops = { .show = veth_pool_show, @@ -1903,7 +1942,7 @@ static const struct sysfs_ops veth_pool_ops = { static struct kobj_type ktype_veth_pool = { .release = NULL, .sysfs_ops = &veth_pool_ops, - .default_attrs = veth_pool_attrs, + .default_groups = veth_pool_groups, }; static int ibmveth_resume(struct device *dev) diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 4e9bf3421f4f..115d4c45aa77 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -46,23 +46,23 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) +/* FW allows us to send 6 descriptors but we only use one so mark + * the other 5 as unused (0) + */ static inline long h_send_logical_lan(unsigned long unit_address, - unsigned long desc1, unsigned long desc2, unsigned long desc3, - unsigned long desc4, unsigned long desc5, unsigned long desc6, - unsigned long corellator_in, unsigned long *corellator_out, - unsigned long mss, unsigned long large_send_support) + unsigned long desc, unsigned long corellator_in, + unsigned long *corellator_out, unsigned long mss, + unsigned long large_send_support) { long rc; unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; if (large_send_support) rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, - desc1, desc2, desc3, desc4, desc5, desc6, - corellator_in, mss); + desc, 0, 0, 0, 0, 0, corellator_in, mss); else rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, - desc1, desc2, desc3, desc4, desc5, desc6, - corellator_in); + desc, 0, 0, 0, 0, 0, corellator_in); *corellator_out = retbuf[0]; @@ -98,6 +98,9 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_BUFF_LIST_SIZE 4096 #define IBMVETH_FILT_LIST_SIZE 4096 #define IBMVETH_MAX_BUF_SIZE (1024 * 128) +#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) +#define IBMVETH_MAX_QUEUES 16U +#define IBMVETH_DEFAULT_QUEUES 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; @@ -137,6 +140,9 @@ struct ibmveth_adapter { unsigned int mcastFilterSize; void * buffer_list_addr; void * filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; dma_addr_t buffer_list_dma; dma_addr_t filter_list_dma; struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; @@ -145,8 +151,6 @@ struct ibmveth_adapter { int rx_csum; int large_send; bool is_active_trunk; - void *bounce_buffer; - dma_addr_t bounce_buffer_dma; u64 fw_ipv6_csum_support; u64 fw_ipv4_csum_support; @@ -162,6 +166,9 @@ struct ibmveth_adapter { u64 tx_send_failed; u64 tx_large_packets; u64 rx_large_packets; + /* Ethtool settings */ + u8 duplex; + u32 speed; }; /* diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4bd33245bad6..9282381a438f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -53,6 +53,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include <linux/interrupt.h> @@ -60,6 +61,7 @@ #include <asm/hvcall.h> #include <linux/atomic.h> #include <asm/vio.h> +#include <asm/xive.h> #include <asm/iommu.h> #include <linux/uaccess.h> #include <asm/firmware.h> @@ -78,14 +80,11 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(IBMVNIC_DRIVER_VERSION); static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; -static int ibmvnic_remove(struct vio_dev *); static void release_sub_crqs(struct ibmvnic_adapter *, bool); static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq); static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); static int enable_scrq_irq(struct ibmvnic_adapter *, @@ -97,19 +96,23 @@ static int pending_scrq(struct ibmvnic_adapter *, static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, struct ibmvnic_sub_crq_queue *); static int ibmvnic_poll(struct napi_struct *napi, int data); -static void send_map_query(struct ibmvnic_adapter *adapter); -static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); +static void send_query_map(struct ibmvnic_adapter *adapter); +static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); static int send_request_unmap(struct ibmvnic_adapter *, u8); static int send_login(struct ibmvnic_adapter *adapter); -static void send_cap_queries(struct ibmvnic_adapter *adapter); +static void send_query_cap(struct ibmvnic_adapter *adapter); static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); -static int ibmvnic_init(struct ibmvnic_adapter *); -static int ibmvnic_reset_init(struct ibmvnic_adapter *); +static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset); static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *, u8 *); static int init_crq_queue(struct ibmvnic_adapter *adapter); static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -118,7 +121,7 @@ struct ibmvnic_stat { #define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ offsetof(struct ibmvnic_statistics, stat)) -#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + off))) +#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) static const struct ibmvnic_stat ibmvnic_stats[] = { {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, @@ -145,6 +148,29 @@ static const struct ibmvnic_stat ibmvnic_stats[] = { {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)}, }; +static int send_crq_init_complete(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; + + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_version_xchg(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.version_exchange.first = IBMVNIC_CRQ_CMD; + crq.version_exchange.cmd = VERSION_EXCHANGE; + crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + + return ibmvnic_send_crq(adapter, &crq); +} + static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, unsigned long length, unsigned long *number, unsigned long *irq) @@ -193,53 +219,108 @@ static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter, return -ETIMEDOUT; } +/** + * reuse_ltb() - Check if a long term buffer can be reused + * @ltb: The long term buffer to be checked + * @size: The size of the long term buffer. + * + * An LTB can be reused unless its size has changed. + * + * Return: Return true if the LTB can be reused, false otherwise. + */ +static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size) +{ + return (ltb->buff && ltb->size == size); +} + +/** + * alloc_long_term_buff() - Allocate a long term buffer (LTB) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb: container object for the LTB + * @size: size of the LTB + * + * Allocate an LTB of the specified size and notify VIOS. + * + * If the given @ltb already has the correct size, reuse it. Otherwise if + * its non-NULL, free it. Then allocate a new one of the correct size. + * Notify the VIOS either way since we may now be working with a new VIOS. + * + * Allocating larger chunks of memory during resets, specially LPM or under + * low memory situations can cause resets to fail/timeout and for LPAR to + * lose connectivity. So hold onto the LTB even if we fail to communicate + * with the VIOS and reuse it on next open. Free LTB when adapter is closed. + * + * Return: 0 if we were able to allocate the LTB and notify the VIOS and + * a negative value otherwise. + */ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb, int size) { struct device *dev = &adapter->vdev->dev; + u64 prev = 0; int rc; - ltb->size = size; - ltb->buff = dma_alloc_coherent(dev, ltb->size, <b->addr, - GFP_KERNEL); + if (!reuse_ltb(ltb, size)) { + dev_dbg(dev, + "LTB size changed from 0x%llx to 0x%x, reallocating\n", + ltb->size, size); + prev = ltb->size; + free_long_term_buff(adapter, ltb); + } - if (!ltb->buff) { - dev_err(dev, "Couldn't alloc long term buffer\n"); - return -ENOMEM; + if (ltb->buff) { + dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n", + ltb->map_id, ltb->size); + } else { + ltb->buff = dma_alloc_coherent(dev, size, <b->addr, + GFP_KERNEL); + if (!ltb->buff) { + dev_err(dev, "Couldn't alloc long term buffer\n"); + return -ENOMEM; + } + ltb->size = size; + + ltb->map_id = find_first_zero_bit(adapter->map_ids, + MAX_MAP_ID); + bitmap_set(adapter->map_ids, ltb->map_id, 1); + + dev_dbg(dev, + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); } - ltb->map_id = adapter->map_id; - adapter->map_id++; + + /* Ensure ltb is zeroed - specially when reusing it. */ + memset(ltb->buff, 0, ltb->size); mutex_lock(&adapter->fw_lock); adapter->fw_done_rc = 0; reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, - ltb->size, ltb->map_id); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); if (rc) { - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; } rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); if (rc) { - dev_err(dev, - "Long term map request aborted or timed out,rc = %d\n", + dev_err(dev, "LTB map request aborted or timed out, rc = %d\n", rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + goto out; } if (adapter->fw_done_rc) { - dev_err(dev, "Couldn't map long term buffer,rc = %d\n", + dev_err(dev, "Couldn't map LTB, rc = %d\n", adapter->fw_done_rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return -1; + rc = -EIO; + goto out; } + rc = 0; +out: + /* don't free LTB on communication error - see function header */ mutex_unlock(&adapter->fw_lock); - return 0; + return rc; } static void free_long_term_buff(struct ibmvnic_adapter *adapter, @@ -250,55 +331,230 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, if (!ltb->buff) return; + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ if (adapter->reset_reason != VNIC_RESET_FAILOVER && - adapter->reset_reason != VNIC_RESET_MOBILITY) + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + + ltb->buff = NULL; + /* mark this map_id free */ + bitmap_clear(adapter->map_ids, ltb->map_id, 1); + ltb->map_id = 0; } -static int reset_long_term_buff(struct ibmvnic_adapter *adapter, - struct ibmvnic_long_term_buff *ltb) +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) { struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; int rc; + int n; + int i; - memset(ltb->buff, 0, ltb->size); + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); - mutex_lock(&adapter->fw_lock); - adapter->fw_done_rc = 0; + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; - reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); - if (rc) { - mutex_unlock(&adapter->fw_lock); - return rc; - } + if (ltb_size > tot_size) + ltb_size = tot_size; - rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); - if (rc) { - dev_info(dev, - "Reset failed, long term map request timed out or aborted\n"); - mutex_unlock(&adapter->fw_lock); - return rc; + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; + + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; } - if (adapter->fw_done_rc) { - dev_info(dev, - "Reset failed, attempting to free and reallocate buffer\n"); - free_long_term_buff(adapter, ltb); - mutex_unlock(&adapter->fw_lock); - return alloc_long_term_buff(adapter, ltb, ltb->size); + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; } - mutex_unlock(&adapter->fw_lock); + + WARN_ON(i != new_set.num_ltbs); + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; +} + +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * rxpool->buff_size; +} + +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * txpool->buf_size; } static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) { int i; - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) + for (i = 0; i < adapter->num_active_rx_pools; i++) adapter->rx_pool[i].active = 0; } @@ -306,58 +562,77 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { int count = pool->size - atomic_read(&pool->available); + u64 handle = adapter->rx_scrq[pool->index]->handle; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; + union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; - union sub_crq sub_crq; struct sk_buff *skb; unsigned int offset; dma_addr_t dma_addr; unsigned char *dst; - u64 *handle_array; int shift = 0; - int index; + int bufidx; int i; if (!pool->active) return; - handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf-> - off_rxadd_subcrqs)); + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { + bufidx = pool->free_map[pool->next_free]; - for (i = 0; i < count; ++i) { - skb = alloc_skb(pool->buff_size, GFP_ATOMIC); + /* We maybe reusing the skb from earlier resets. Allocate + * only if necessary. But since the LTB may have changed + * during reset (see init_rx_pools()), update LTB below + * even if reusing skb. + */ + skb = pool->rx_buff[bufidx].skb; if (!skb) { - dev_err(dev, "Couldn't replenish rx buff\n"); - adapter->replenish_no_mem++; - break; + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + dev_err(dev, "Couldn't replenish rx buff\n"); + adapter->replenish_no_mem++; + break; + } } - index = pool->free_map[pool->next_free]; - - if (pool->rx_buff[index].skb) - dev_err(dev, "Inconsistent free_map!\n"); + pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; + pool->next_free = (pool->next_free + 1) % pool->size; /* Copy the skb to the long term mapped DMA buffer */ - offset = index * pool->buff_size; - dst = pool->long_term_buff.buff + offset; + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; memset(dst, 0, pool->buff_size); - dma_addr = pool->long_term_buff.addr + offset; - pool->rx_buff[index].data = dst; - - pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; - pool->rx_buff[index].dma = dma_addr; - pool->rx_buff[index].skb = skb; - pool->rx_buff[index].pool_index = pool->index; - pool->rx_buff[index].size = pool->buff_size; - - memset(&sub_crq, 0, sizeof(sub_crq)); - sub_crq.rx_add.first = IBMVNIC_CRQ_CMD; - sub_crq.rx_add.correlator = - cpu_to_be64((u64)&pool->rx_buff[index]); - sub_crq.rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq.rx_add.map_id = pool->long_term_buff.map_id; + dma_addr = ltb->addr + offset; + + /* add the skb to an rx_buff in the pool */ + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; + + /* queue the rx_buff for the next send_subcrq_indirect */ + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = + cpu_to_be64((u64)&pool->rx_buff[bufidx]); + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = ltb->map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is @@ -367,16 +642,21 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, #ifdef __LITTLE_ENDIAN__ shift = 8; #endif - sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift); - - lpar_rc = send_subcrq(adapter, handle_array[pool->index], - &sub_crq); - if (lpar_rc != H_SUCCESS) - goto failure; - - buffers_added++; - adapter->replenish_add_buff_success++; - pool->next_free = (pool->next_free + 1) % pool->size; + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); + + /* if send_subcrq_indirect queue is full, flush to VIOS */ + if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } } atomic_add(buffers_added, &pool->available); return; @@ -384,13 +664,22 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); - pool->free_map[pool->next_free] = index; - pool->rx_buff[index].skb = NULL; + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; + } + adapter->replenish_add_buff_failure += ind_bufp->index; atomic_add(buffers_added, &pool->available); - + ind_bufp->index = 0; if (lpar_rc == H_CLOSED || adapter->failover_pending) { /* Disable buffer pool replenishment and report carrier off if * queue is closed or pending failover. @@ -407,11 +696,12 @@ static void replenish_pools(struct ibmvnic_adapter *adapter) int i; adapter->replenish_task_cycles++; - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) { + for (i = 0; i < adapter->num_active_rx_pools; i++) { if (adapter->rx_pool[i].active) replenish_rx_pool(adapter, &adapter->rx_pool[i]); } + + netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); } static void release_stats_buffers(struct ibmvnic_adapter *adapter) @@ -458,13 +748,15 @@ static int init_stats_token(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; dma_addr_t stok; + int rc; stok = dma_map_single(dev, &adapter->stats, sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); - if (dma_mapping_error(dev, stok)) { - dev_err(dev, "Couldn't map stats buffer\n"); - return -1; + rc = dma_mapping_error(dev, stok); + if (rc) { + dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); + return rc; } adapter->stats_token = stok; @@ -472,52 +764,12 @@ static int init_stats_token(struct ibmvnic_adapter *adapter) return 0; } -static int reset_rx_pools(struct ibmvnic_adapter *adapter) -{ - struct ibmvnic_rx_pool *rx_pool; - int rx_scrqs; - int i, j, rc; - u64 *size_array; - - size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); - - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - for (i = 0; i < rx_scrqs; i++) { - rx_pool = &adapter->rx_pool[i]; - - netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i); - - if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { - free_long_term_buff(adapter, &rx_pool->long_term_buff); - rx_pool->buff_size = be64_to_cpu(size_array[i]); - rc = alloc_long_term_buff(adapter, - &rx_pool->long_term_buff, - rx_pool->size * - rx_pool->buff_size); - } else { - rc = reset_long_term_buff(adapter, - &rx_pool->long_term_buff); - } - - if (rc) - return rc; - - for (j = 0; j < rx_pool->size; j++) - rx_pool->free_map[j] = j; - - memset(rx_pool->rx_buff, 0, - rx_pool->size * sizeof(struct ibmvnic_rx_buff)); - - atomic_set(&rx_pool->available, 0); - rx_pool->next_alloc = 0; - rx_pool->next_free = 0; - rx_pool->active = 1; - } - - return 0; -} - +/** + * release_rx_pools() - Release any rx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ static void release_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; @@ -532,7 +784,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); kfree(rx_pool->free_map); - free_long_term_buff(adapter, &rx_pool->long_term_buff); + + free_ltb_set(adapter, &rx_pool->ltb_set); if (!rx_pool->rx_buff) continue; @@ -550,50 +803,112 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->rx_pool); adapter->rx_pool = NULL; adapter->num_active_rx_pools = 0; + adapter->prev_rx_pool_size = 0; +} + +/** + * reuse_rx_pools() - Check if the existing rx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing rx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and size of each buffer) have not + * changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the rx pools can be reused, false otherwise. + */ +static bool reuse_rx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_buff_size, new_buff_size; + + if (!adapter->rx_pool) + return false; + + old_num_pools = adapter->num_active_rx_pools; + new_num_pools = adapter->req_rx_queues; + + old_pool_size = adapter->prev_rx_pool_size; + new_pool_size = adapter->req_rx_add_entries_per_subcrq; + + old_buff_size = adapter->prev_rx_buf_sz; + new_buff_size = adapter->cur_rx_buf_sz; + + if (old_buff_size != new_buff_size || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; } +/** + * init_rx_pools(): Initialize the set of receiver pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of receiver pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing rx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ static int init_rx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->vdev->dev; struct ibmvnic_rx_pool *rx_pool; - int rxadd_subcrqs; - u64 *size_array; - int i, j; + u64 num_pools; + u64 pool_size; /* # of buffers in one pool */ + u64 buff_size; + int i, j, rc; - rxadd_subcrqs = - be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + pool_size = adapter->req_rx_add_entries_per_subcrq; + num_pools = adapter->req_rx_queues; + buff_size = adapter->cur_rx_buf_sz; - adapter->rx_pool = kcalloc(rxadd_subcrqs, + if (reuse_rx_pools(adapter)) { + dev_dbg(dev, "Reusing rx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_rx_pools(adapter); + + adapter->rx_pool = kcalloc(num_pools, sizeof(struct ibmvnic_rx_pool), GFP_KERNEL); if (!adapter->rx_pool) { dev_err(dev, "Failed to allocate rx pools\n"); - return -1; + return -ENOMEM; } - adapter->num_active_rx_pools = rxadd_subcrqs; + /* Set num_active_rx_pools early. If we fail below after partial + * allocation, release_rx_pools() will know how many to look for. + */ + adapter->num_active_rx_pools = num_pools; - for (i = 0; i < rxadd_subcrqs; i++) { + for (i = 0; i < num_pools; i++) { rx_pool = &adapter->rx_pool[i]; netdev_dbg(adapter->netdev, "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n", - i, adapter->req_rx_add_entries_per_subcrq, - be64_to_cpu(size_array[i])); + i, pool_size, buff_size); - rx_pool->size = adapter->req_rx_add_entries_per_subcrq; + rx_pool->size = pool_size; rx_pool->index = i; - rx_pool->buff_size = be64_to_cpu(size_array[i]); - rx_pool->active = 1; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), GFP_KERNEL); if (!rx_pool->free_map) { - release_rx_pools(adapter); - return -1; + dev_err(dev, "Couldn't alloc free_map %d\n", i); + rc = -ENOMEM; + goto out_release; } rx_pool->rx_buff = kcalloc(rx_pool->size, @@ -601,65 +916,60 @@ static int init_rx_pools(struct net_device *netdev) GFP_KERNEL); if (!rx_pool->rx_buff) { dev_err(dev, "Couldn't alloc rx buffers\n"); - release_rx_pools(adapter); - return -1; - } - - if (alloc_long_term_buff(adapter, &rx_pool->long_term_buff, - rx_pool->size * rx_pool->buff_size)) { - release_rx_pools(adapter); - return -1; + rc = -ENOMEM; + goto out_release; } - - for (j = 0; j < rx_pool->size; ++j) - rx_pool->free_map[j] = j; - - atomic_set(&rx_pool->available, 0); - rx_pool->next_alloc = 0; - rx_pool->next_free = 0; } - return 0; -} - -static int reset_one_tx_pool(struct ibmvnic_adapter *adapter, - struct ibmvnic_tx_pool *tx_pool) -{ - int rc, i; - - rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff); - if (rc) - return rc; + adapter->prev_rx_pool_size = pool_size; + adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz; - memset(tx_pool->tx_buff, 0, - tx_pool->num_buffers * - sizeof(struct ibmvnic_tx_buff)); +update_ltb: + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", + i, rx_pool->size, rx_pool->buff_size); - for (i = 0; i < tx_pool->num_buffers; i++) - tx_pool->free_map[i] = i; + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) + goto out; - tx_pool->consumer_index = 0; - tx_pool->producer_index = 0; + for (j = 0; j < rx_pool->size; ++j) { + struct ibmvnic_rx_buff *rx_buff; - return 0; -} + rx_pool->free_map[j] = j; -static int reset_tx_pools(struct ibmvnic_adapter *adapter) -{ - int tx_scrqs; - int i, rc; + /* NOTE: Don't clear rx_buff->skb here - will leak + * memory! replenish_rx_pool() will reuse skbs or + * allocate as necessary. + */ + rx_buff = &rx_pool->rx_buff[j]; + rx_buff->dma = 0; + rx_buff->data = 0; + rx_buff->size = 0; + rx_buff->pool_index = 0; + } - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - for (i = 0; i < tx_scrqs; i++) { - rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); - if (rc) - return rc; - rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]); - if (rc) - return rc; + /* Mark pool "empty" so replenish_rx_pools() will + * update the LTB info for each buffer + */ + atomic_set(&rx_pool->available, 0); + rx_pool->next_alloc = 0; + rx_pool->next_free = 0; + /* replenish_rx_pool() may have called deactivate_rx_pools() + * on failover. Ensure pool is active now. + */ + rx_pool->active = 1; } - return 0; +out_release: + release_rx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; } static void release_vpd_data(struct ibmvnic_adapter *adapter) @@ -678,13 +988,22 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter, { kfree(tx_pool->tx_buff); kfree(tx_pool->free_map); - free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_ltb_set(adapter, &tx_pool->ltb_set); } +/** + * release_tx_pools() - Release any tx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ static void release_tx_pools(struct ibmvnic_adapter *adapter) { int i; + /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are + * both NULL or both non-NULL. So we only need to check one. + */ if (!adapter->tx_pool) return; @@ -698,78 +1017,209 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->tso_pool); adapter->tso_pool = NULL; adapter->num_active_tx_pools = 0; + adapter->prev_tx_pool_size = 0; } static int init_one_tx_pool(struct net_device *netdev, struct ibmvnic_tx_pool *tx_pool, - int num_entries, int buf_size) + int pool_size, int buf_size) { - struct ibmvnic_adapter *adapter = netdev_priv(netdev); int i; - tx_pool->tx_buff = kcalloc(num_entries, + tx_pool->tx_buff = kcalloc(pool_size, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) - return -1; - - if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - num_entries * buf_size)) - return -1; + return -ENOMEM; - tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL); - if (!tx_pool->free_map) - return -1; + tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); + if (!tx_pool->free_map) { + kfree(tx_pool->tx_buff); + tx_pool->tx_buff = NULL; + return -ENOMEM; + } - for (i = 0; i < num_entries; i++) + for (i = 0; i < pool_size; i++) tx_pool->free_map[i] = i; tx_pool->consumer_index = 0; tx_pool->producer_index = 0; - tx_pool->num_buffers = num_entries; + tx_pool->num_buffers = pool_size; tx_pool->buf_size = buf_size; return 0; } +/** + * reuse_tx_pools() - Check if the existing tx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing tx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and mtu) have not changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the tx pools can be reused, false otherwise. + */ +static bool reuse_tx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_mtu, new_mtu; + + if (!adapter->tx_pool) + return false; + + old_num_pools = adapter->num_active_tx_pools; + new_num_pools = adapter->num_active_tx_scrqs; + old_pool_size = adapter->prev_tx_pool_size; + new_pool_size = adapter->req_tx_entries_per_subcrq; + old_mtu = adapter->prev_mtu; + new_mtu = adapter->req_mtu; + + if (old_mtu != new_mtu || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_tx_pools(): Initialize the set of transmit pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of transmit pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing tx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ static int init_tx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int tx_subcrqs; - int i, rc; + struct device *dev = &adapter->vdev->dev; + int num_pools; + u64 pool_size; /* # of buffers in pool */ + u64 buff_size; + int i, j, rc; + + num_pools = adapter->req_tx_queues; + + /* We must notify the VIOS about the LTB on all resets - but we only + * need to alloc/populate pools if either the number of buffers or + * size of each buffer in the pool has changed. + */ + if (reuse_tx_pools(adapter)) { + netdev_dbg(netdev, "Reusing tx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_tx_pools(adapter); + + pool_size = adapter->req_tx_entries_per_subcrq; + num_pools = adapter->num_active_tx_scrqs; - tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - adapter->tx_pool = kcalloc(tx_subcrqs, + adapter->tx_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); if (!adapter->tx_pool) - return -1; + return -ENOMEM; - adapter->tso_pool = kcalloc(tx_subcrqs, + adapter->tso_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); - if (!adapter->tso_pool) - return -1; + /* To simplify release_tx_pools() ensure that ->tx_pool and + * ->tso_pool are either both NULL or both non-NULL. + */ + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + return -ENOMEM; + } + + /* Set num_active_tx_pools early. If we fail below after partial + * allocation, release_tx_pools() will know how many to look for. + */ + adapter->num_active_tx_pools = num_pools; - adapter->num_active_tx_pools = tx_subcrqs; + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + for (i = 0; i < num_pools; i++) { + dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n", + i, adapter->req_tx_entries_per_subcrq, buff_size); - for (i = 0; i < tx_subcrqs; i++) { rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], - adapter->req_tx_entries_per_subcrq, - adapter->req_mtu + VLAN_HLEN); - if (rc) { - release_tx_pools(adapter); - return rc; - } + pool_size, buff_size); + if (rc) + goto out_release; rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], IBMVNIC_TSO_BUFS, IBMVNIC_TSO_BUF_SZ); - if (rc) { - release_tx_pools(adapter); - return rc; - } + if (rc) + goto out_release; + } + + adapter->prev_tx_pool_size = pool_size; + adapter->prev_mtu = adapter->req_mtu; + +update_ltb: + /* NOTE: All tx_pools have the same number of buffers (which is + * same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS + * buffers (see calls init_one_tx_pool() for these). + * For consistency, we use tx_pool->num_buffers and + * tso_pool->num_buffers below. + */ + rc = -1; + for (i = 0; i < num_pools; i++) { + struct ibmvnic_tx_pool *tso_pool; + struct ibmvnic_tx_pool *tx_pool; + + tx_pool = &adapter->tx_pool[i]; + + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + + for (j = 0; j < tx_pool->num_buffers; j++) + tx_pool->free_map[j] = j; + + tso_pool = &adapter->tso_pool[i]; + + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; + + tso_pool->consumer_index = 0; + tso_pool->producer_index = 0; + + for (j = 0; j < tso_pool->num_buffers; j++) + tso_pool->free_map[j] = j; } return 0; +out_release: + release_tx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; } static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) @@ -812,7 +1262,7 @@ static int init_napi(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); netif_napi_add(adapter->netdev, &adapter->napi[i], - ibmvnic_poll, NAPI_POLL_WEIGHT); + ibmvnic_poll); } adapter->num_active_rx_napi = adapter->req_rx_queues; @@ -837,36 +1287,72 @@ static void release_napi(struct ibmvnic_adapter *adapter) adapter->napi_enabled = false; } +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + case VNIC_DOWN: + return "DOWN"; + } + return "UNKNOWN"; +} + static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; + int retries = 10; bool retry; int rc; do { retry = false; - if (retry_count > IBMVNIC_MAX_QUEUES) { + if (retry_count > retries) { netdev_warn(netdev, "Login attempts exceeded\n"); - return -1; + return -EACCES; } adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); rc = send_login(adapter); - if (rc) { - netdev_warn(netdev, "Unable to login\n"); + if (rc) return rc; - } if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_warn(netdev, "Login timed out\n"); - return -1; + netdev_warn(netdev, "Login timed out, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + continue; } - if (adapter->init_done_rc == PARTIALSUCCESS) { + if (adapter->init_done_rc == ABORTED) { + netdev_warn(netdev, "Login aborted, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + /* FW or device may be busy, so + * wait a bit before retrying login + */ + msleep(500); + } else if (adapter->init_done_rc == PARTIALSUCCESS) { retry_count++; release_sub_crqs(adapter, 1); @@ -875,35 +1361,37 @@ static int ibmvnic_login(struct net_device *netdev) "Received partial success, retrying...\n"); adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); - send_cap_queries(adapter); + send_query_cap(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { netdev_warn(netdev, "Capabilities query timed out\n"); - return -1; + return -ETIMEDOUT; } rc = init_sub_crqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ initialization failed\n"); - return -1; + return rc; } rc = init_sub_crq_irqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ irq initialization failed\n"); - return -1; + return rc; } } else if (adapter->init_done_rc) { - netdev_warn(netdev, "Adapter login failed\n"); - return -1; + netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", + adapter->init_done_rc); + return -EIO; } } while (retry); __ibmvnic_set_mac(netdev, adapter->mac_addr); + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); return 0; } @@ -923,17 +1411,15 @@ static void release_resources(struct ibmvnic_adapter *adapter) { release_vpd_data(adapter); - release_tx_pools(adapter); - release_rx_pools(adapter); - release_napi(adapter); + release_login_buffer(adapter); release_login_rsp_buffer(adapter); } static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) { struct net_device *netdev = adapter->netdev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); union ibmvnic_crq crq; bool resend; int rc; @@ -958,10 +1444,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { netdev_err(netdev, "timeout setting link state\n"); - return -1; + return -ETIMEDOUT; } - if (adapter->init_done_rc == 1) { + if (adapter->init_done_rc == PARTIALSUCCESS) { /* Partuial success, delay and re-send */ mdelay(1000); resend = true; @@ -1100,13 +1586,11 @@ static int init_resources(struct ibmvnic_adapter *adapter) return rc; } - adapter->map_id = 1; - rc = init_napi(adapter); if (rc) return rc; - send_map_query(adapter); + send_query_map(adapter); rc = init_rx_pools(netdev); if (rc) @@ -1141,16 +1625,25 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); - release_resources(adapter); + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); return rc; } + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + netif_tx_start_all_queues(netdev); if (prev_state == VNIC_CLOSED) { @@ -1167,29 +1660,59 @@ static int ibmvnic_open(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; - /* If device failover is pending, just set device state and return. - * Device operation will be handled by reset routine. + ASSERT_RTNL(); + + /* If device failover is pending or we are about to reset, just set + * device state and return. Device operation will be handled by reset + * routine. + * + * It should be safe to overwrite the adapter->state here. Since + * we hold the rtnl, either the reset has not actually started or + * the rtnl got dropped during the set_link_state() in do_reset(). + * In the former case, no one else is changing the state (again we + * have the rtnl) and in the latter case, do_reset() will detect and + * honor our setting below. */ - if (adapter->failover_pending) { + if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); adapter->state = VNIC_OPEN; - return 0; + rc = 0; + goto out; } if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) - return rc; + goto out; rc = init_resources(adapter); if (rc) { netdev_err(netdev, "failed to initialize resources\n"); - release_resources(adapter); - return rc; + goto out; } } rc = __ibmvnic_open(netdev); +out: + /* If open failed and there is a pending failover or in-progress reset, + * set device state and return. Device operation will be handled by + * reset routine. See also comments above regarding rtnl. + */ + if (rc && + (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { + adapter->state = VNIC_OPEN; + rc = 0; + } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + return rc; } @@ -1295,6 +1818,14 @@ static void ibmvnic_cleanup(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + if (test_bit(0, &adapter->resetting)) netif_tx_disable(netdev); else @@ -1302,9 +1833,6 @@ static void ibmvnic_cleanup(struct net_device *netdev) ibmvnic_napi_disable(adapter); ibmvnic_disable_irqs(adapter); - - clean_rx_pools(adapter); - clean_tx_pools(adapter); } static int __ibmvnic_close(struct net_device *netdev) @@ -1314,10 +1842,8 @@ static int __ibmvnic_close(struct net_device *netdev) adapter->state = VNIC_CLOSING; rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - if (rc) - return rc; adapter->state = VNIC_CLOSED; - return 0; + return rc; } static int ibmvnic_close(struct net_device *netdev) @@ -1325,6 +1851,11 @@ static int ibmvnic_close(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + adapter->force_reset_recovery); + /* If device failover is pending, just set device state and return. * Device operation will be handled by reset routine. */ @@ -1335,16 +1866,18 @@ static int ibmvnic_close(struct net_device *netdev) rc = __ibmvnic_close(netdev); ibmvnic_cleanup(netdev); + clean_rx_pools(adapter); + clean_tx_pools(adapter); return rc; } /** * build_hdr_data - creates L2/L3/L4 header data buffer - * @hdr_field - bitfield determining needed headers - * @skb - socket buffer - * @hdr_len - array of header lengths - * @tot_len - total length of data + * @hdr_field: bitfield determining needed headers + * @skb: socket buffer + * @hdr_len: array of header lengths + * @hdr_data: buffer to write the header to * * Reads hdr_field to determine which headers are needed by firmware. * Builds a buffer containing these headers. Saves individual header @@ -1401,11 +1934,11 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, /** * create_hdr_descs - create header and header extension descriptors - * @hdr_field - bitfield determining needed headers - * @data - buffer containing header data - * @len - length of data buffer - * @hdr_len - array of individual header lengths - * @scrq_arr - descriptor array + * @hdr_field: bitfield determining needed headers + * @hdr_data: buffer containing header data + * @len: length of data buffer + * @hdr_len: array of individual header lengths + * @scrq_arr: descriptor array * * Creates header and, if needed, header extension descriptors and * places them in a descriptor array, scrq_arr @@ -1453,26 +1986,27 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, /** * build_hdr_descs_arr - build a header descriptor array - * @skb - socket buffer - * @num_entries - number of descriptors to be sent - * @subcrq - first TX descriptor - * @hdr_field - bit field determining which headers will be sent + * @skb: tx socket buffer + * @indir_arr: indirect array + * @num_entries: number of descriptors to be sent + * @hdr_field: bit field determining which headers will be sent * * This function will build a TX descriptor array with applicable * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. */ -static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; + u8 hdr_data[140] = {0}; int tot_len; - u8 *hdr_data = txbuff->hdr_data; - tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, - txbuff->hdr_data); + tot_len = build_hdr_data(hdr_field, skb, hdr_len, + hdr_data); *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, - txbuff->indir_arr + 1); + indir_arr + 1); } static int ibmvnic_xmit_workarounds(struct sk_buff *skb, @@ -1490,17 +2024,104 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb, return 0; } +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; + adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + + ind_bufp->index = 0; + + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); + } +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + if (rc) + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + else + ind_bufp->index = 0; + return 0; +} + static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; unsigned int tx_dropped = 0; unsigned int tx_packets = 0; unsigned int tx_bytes = 0; @@ -1511,14 +2132,15 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int offset; int num_entries = 1; unsigned char *dst; - u64 *handle_array; - int index = 0; + int bufidx = 0; u8 proto = 0; - netdev_tx_t ret = NETDEV_TX_OK; - if (test_bit(0, &adapter->resetting)) { - if (!netif_subqueue_stopped(netdev, skb)) - netif_stop_subqueue(netdev, queue_num); + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { dev_kfree_skb_any(skb); tx_send_failed++; @@ -1527,38 +2149,41 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; } + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + if (ibmvnic_xmit_workarounds(skb, netdev)) { tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } + if (skb_is_gso(skb)) tx_pool = &adapter->tso_pool[queue_num]; else tx_pool = &adapter->tx_pool[queue_num]; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); - handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + bufidx = tx_pool->free_map[tx_pool->consumer_index]; - index = tx_pool->free_map[tx_pool->consumer_index]; - - if (index == IBMVNIC_INVALID_MAP) { + if (bufidx == IBMVNIC_INVALID_MAP) { dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; goto out; } tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; - offset = index * tx_pool->buf_size; - dst = tx_pool->long_term_buff.buff + offset; + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; memset(dst, 0, tx_pool->buf_size); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + data_dma_addr = ltb->addr + offset; if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -1571,25 +2196,24 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - memcpy(dst + cur, - page_address(skb_frag_page(frag)) + - skb_frag_off(frag), skb_frag_size(frag)); + memcpy(dst + cur, skb_frag_address(frag), + skb_frag_size(frag)); cur += skb_frag_size(frag); } } else { skb_copy_from_linear_data(skb, dst, skb->len); } + /* post changes to long_term_buff *dst before VIOS accessing it */ + dma_wmb(); + tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; - tx_buff = &tx_pool->tx_buff[index]; + tx_buff = &tx_pool->tx_buff[bufidx]; tx_buff->skb = skb; - tx_buff->data_dma[0] = data_dma_addr; - tx_buff->data_len[0] = skb->len; - tx_buff->index = index; + tx_buff->index = bufidx; tx_buff->pool_index = queue_num; - tx_buff->last_frag = true; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -1600,10 +2224,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (skb_is_gso(skb)) tx_crq.v1.correlator = - cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK); + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); else - tx_crq.v1.correlator = cpu_to_be32(index); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + tx_crq.v1.correlator = cpu_to_be32(bufidx); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); @@ -1634,55 +2258,29 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); hdrs += 2; } - /* determine if l2/3/4 headers are sent to firmware */ - if ((*hdrs >> 7) & 1) { - build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); - tx_crq.v1.n_crq_elem = num_entries; - tx_buff->num_entries = num_entries; - tx_buff->indir_arr[0] = tx_crq; - tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, - sizeof(tx_buff->indir_arr), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, tx_buff->indir_dma)) { - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "tx: unable to map descriptor array\n"); - tx_map_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; - } - lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], - (u64)tx_buff->indir_dma, - (u64)num_entries); - dma_unmap_single(dev, tx_buff->indir_dma, - sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); - } else { - tx_buff->num_entries = num_entries; - lpar_rc = send_subcrq(adapter, handle_array[queue_num], - &tx_crq); - } - if (lpar_rc != H_SUCCESS) { - if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) - dev_err_ratelimited(dev, "tx: send failed\n"); - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (lpar_rc == H_CLOSED || adapter->failover_pending) { - /* Disable TX and report carrier off if queue is closed - * or pending failover. - * Firmware guarantees that a signal will be sent to the - * driver, triggering a reset or some other action. - */ - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); - tx_send_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; } if (atomic_add_return(num_entries, &tx_scrq->used) @@ -1693,19 +2291,32 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_packets++; tx_bytes += skb->len; - txq->trans_start = jiffies; + txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; -tx_err_out: - /* roll back consumer index and map array*/ - if (tx_pool->consumer_index == 0) - tx_pool->consumer_index = - tx_pool->num_buffers - 1; - else - tx_pool->consumer_index--; - tx_pool->free_map[tx_pool->consumer_index] = index; +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } out: + rcu_read_unlock(); netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; netdev->stats.tx_packets += tx_packets; @@ -1809,6 +2420,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) int rc; rc = 0; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + ether_addr_copy(adapter->mac_addr, addr->sa_data); if (adapter->state != VNIC_PROBED) rc = __ibmvnic_set_mac(netdev, addr->sa_data); @@ -1816,105 +2430,80 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return rc; } -/** - * do_change_param_reset returns zero if we are able to keep processing reset - * events, or non-zero if we hit a fatal error and must halt. +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + case VNIC_RESET_PASSIVE_INIT: + return "PASSIVE_INIT"; + } + return "UNKNOWN"; +} + +/* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. */ -static int do_change_param_reset(struct ibmvnic_adapter *adapter, - struct ibmvnic_rwi *rwi, - u32 reset_state) +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) { - struct net_device *netdev = adapter->netdev; - int i, rc; - - netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n", - rwi->reset_reason); - - netif_carrier_off(netdev); - adapter->reset_reason = rwi->reset_reason; - - ibmvnic_cleanup(netdev); - - if (reset_state == VNIC_OPEN) { - rc = __ibmvnic_close(netdev); - if (rc) - return rc; - } - - release_resources(adapter); - release_sub_crqs(adapter, 1); - release_crq_queue(adapter); - - adapter->state = VNIC_PROBED; - - rc = init_crq_queue(adapter); - - if (rc) { - netdev_err(adapter->netdev, - "Couldn't initialize crq. rc=%d\n", rc); - return rc; - } - - rc = ibmvnic_reset_init(adapter); - if (rc) - return IBMVNIC_INIT_FAILED; - - /* If the adapter was in PROBE state prior to the reset, - * exit here. - */ - if (reset_state == VNIC_PROBED) - return 0; - - rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = reset_state; - return rc; - } - - rc = init_resources(adapter); - if (rc) - return rc; - - ibmvnic_disable_irqs(adapter); - - adapter->state = VNIC_CLOSED; - - if (reset_state == VNIC_CLOSED) - return 0; - - rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; - - /* refresh device's multicast list */ - ibmvnic_set_multi(netdev); - - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - - return 0; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; } -/** +/* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. */ static int do_reset(struct ibmvnic_adapter *adapter, struct ibmvnic_rwi *rwi, u32 reset_state) { + struct net_device *netdev = adapter->netdev; u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; - struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; - netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); - rtnl_lock(); + adapter->reset_reason = rwi->reset_reason; + /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_lock(); + + /* Now that we have the rtnl lock, clear any pending failover. + * This will ensure ibmvnic_open() has either completed or will + * block until failover is complete. + */ + if (rwi->reset_reason == VNIC_RESET_FAILOVER) + adapter->failover_pending = false; + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } netif_carrier_off(netdev); - adapter->reset_reason = rwi->reset_reason; old_num_rx_queues = adapter->req_rx_queues; old_num_tx_queues = adapter->req_tx_queues; @@ -1926,25 +2515,53 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (reset_state == VNIC_OPEN && adapter->reset_reason != VNIC_RESET_MOBILITY && adapter->reset_reason != VNIC_RESET_FAILOVER) { - adapter->state = VNIC_CLOSING; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = __ibmvnic_close(netdev); + if (rc) + goto out; + } else { + adapter->state = VNIC_CLOSING; - /* Release the RTNL lock before link state change and - * re-acquire after the link state change to allow - * linkwatch_event to grab the RTNL lock and run during - * a reset. - */ - rtnl_unlock(); - rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - rtnl_lock(); - if (rc) - goto out; + /* Release the RTNL lock before link state change and + * re-acquire after the link state change to allow + * linkwatch_event to grab the RTNL lock and run during + * a reset. + */ + rtnl_unlock(); + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + rtnl_lock(); + if (rc) + goto out; - if (adapter->state != VNIC_CLOSING) { - rc = -1; - goto out; + if (adapter->state == VNIC_OPEN) { + /* When we dropped rtnl, ibmvnic_open() got + * it and noticed that we are resetting and + * set the adapter state to OPEN. Update our + * new "target" state, and resume the reset + * from VNIC_CLOSING state. + */ + netdev_dbg(netdev, + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); + reset_state = VNIC_OPEN; + adapter->state = VNIC_CLOSING; + } + + if (adapter->state != VNIC_CLOSING) { + /* If someone else changed the adapter state + * when we dropped the rtnl, fail the reset + */ + rc = -EAGAIN; + goto out; + } + adapter->state = VNIC_CLOSED; } + } - adapter->state = VNIC_CLOSED; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + release_resources(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); } if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { @@ -1953,49 +2570,59 @@ static int do_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + reinit_init_done(adapter); + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_crq_queue(adapter); + } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { rc = ibmvnic_reenable_crq_queue(adapter); release_sub_crqs(adapter, 1); } else { rc = ibmvnic_reset_crq(adapter); - if (!rc) + if (rc == H_CLOSED || rc == H_SUCCESS) { rc = vio_enable_interrupts(adapter->vdev); + if (rc) + netdev_err(adapter->netdev, + "Reset failed to enable interrupts. rc=%d\n", + rc); + } } if (rc) { netdev_err(adapter->netdev, - "Couldn't initialize crq. rc=%d\n", rc); + "Reset couldn't initialize crq. rc=%d\n", rc); goto out; } - rc = ibmvnic_reset_init(adapter); - if (rc) { - rc = IBMVNIC_INIT_FAILED; + rc = ibmvnic_reset_init(adapter, true); + if (rc) goto out; - } - /* If the adapter was in PROBE state prior to the reset, + /* If the adapter was in PROBE or DOWN state prior to the reset, * exit here. */ - if (reset_state == VNIC_PROBED) { + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) { rc = 0; goto out; } rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = reset_state; + if (rc) goto out; - } - if (adapter->req_rx_queues != old_num_rx_queues || + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_resources(adapter); + if (rc) + goto out; + } else if (adapter->req_rx_queues != old_num_rx_queues || adapter->req_tx_queues != old_num_tx_queues || adapter->req_rx_add_entries_per_subcrq != old_num_rx_slots || adapter->req_tx_entries_per_subcrq != - old_num_tx_slots) { - release_rx_pools(adapter); - release_tx_pools(adapter); + old_num_tx_slots || + !adapter->rx_pool || + !adapter->tso_pool || + !adapter->tx_pool) { release_napi(adapter); release_vpd_data(adapter); @@ -2004,13 +2631,21 @@ static int do_reset(struct ibmvnic_adapter *adapter, goto out; } else { - rc = reset_tx_pools(adapter); - if (rc) + rc = init_tx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init tx pools failed (%d)\n", + rc); goto out; + } - rc = reset_rx_pools(adapter); - if (rc) + rc = init_rx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init rx pools failed (%d)\n", + rc); goto out; + } } ibmvnic_disable_irqs(adapter); } @@ -2030,18 +2665,23 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - - if (adapter->reset_reason != VNIC_RESET_FAILOVER) - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); + if (adapter->reset_reason == VNIC_RESET_FAILOVER || + adapter->reset_reason == VNIC_RESET_MOBILITY) + __netdev_notify_peers(netdev); rc = 0; out: - rtnl_unlock(); + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_unlock(); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2051,8 +2691,16 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, struct net_device *netdev = adapter->netdev; int rc; - netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; @@ -2067,45 +2715,54 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - reinit_completion(&adapter->init_done); + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, "Couldn't initialize crq. rc=%d\n", rc); - return rc; + goto out; } - rc = ibmvnic_init(adapter); + rc = ibmvnic_reset_init(adapter, false); if (rc) - return rc; + goto out; - /* If the adapter was in PROBE state prior to the reset, + /* If the adapter was in PROBE or DOWN state prior to the reset, * exit here. */ - if (reset_state == VNIC_PROBED) - return 0; + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) + goto out; rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = VNIC_PROBED; - return 0; - } + if (rc) + goto out; rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) - return 0; + goto out; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } - return 0; + __netdev_notify_peers(netdev); +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; } static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) @@ -2127,33 +2784,154 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) return rwi; } -static void free_all_rwi(struct ibmvnic_adapter *adapter) +/** + * do_passive_init - complete probing when partner device is detected. + * @adapter: ibmvnic_adapter struct + * + * If the ibmvnic device does not have a partner device to communicate with at boot + * and that partner device comes online at a later time, this function is called + * to complete the initialization process of ibmvnic device. + * Caller is expected to hold rtnl_lock(). + * + * Returns non-zero if sub-CRQs are not initialized properly leaving the device + * in the down state. + * Returns 0 upon success and the device is in PROBED state. + */ + +static int do_passive_init(struct ibmvnic_adapter *adapter) { - struct ibmvnic_rwi *rwi; + unsigned long timeout = msecs_to_jiffies(30000); + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + int rc; - rwi = get_next_rwi(adapter); - while (rwi) { - kfree(rwi); - rwi = get_next_rwi(adapter); + netdev_dbg(netdev, "Partner device found, probing.\n"); + + adapter->state = VNIC_PROBING; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; + adapter->crq.active = true; + + rc = send_crq_init_complete(adapter); + if (rc) + goto out; + + rc = send_version_xchg(adapter); + if (rc) + netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc); + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + rc = -ETIMEDOUT; + goto out; + } + + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc); + goto out; } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc); + goto init_failed; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + + adapter->state = VNIC_PROBED; + netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n"); + + return 0; + +init_failed: + release_sub_crqs(adapter, 1); +out: + adapter->state = VNIC_DOWN; + return rc; } static void __ibmvnic_reset(struct work_struct *work) { - struct ibmvnic_rwi *rwi; struct ibmvnic_adapter *adapter; + unsigned int timeout = 5000; + struct ibmvnic_rwi *tmprwi; bool saved_state = false; + struct ibmvnic_rwi *rwi; unsigned long flags; + struct device *dev; + bool need_reset; + int num_fails = 0; u32 reset_state; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; + + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + return; + } - if (test_and_set_bit_lock(0, &adapter->resetting)) { - schedule_delayed_work(&adapter->ibmvnic_delayed_reset, - IBMVNIC_RESET_DELAY); + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; rwi = get_next_rwi(adapter); while (rwi) { @@ -2169,15 +2947,23 @@ static void __ibmvnic_reset(struct work_struct *work) if (!saved_state) { reset_state = adapter->state; - adapter->state = VNIC_RESETTING; saved_state = true; } spin_unlock_irqrestore(&adapter->state_lock, flags); - if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) { - /* CHANGE_PARAM requestor holds rtnl_lock */ - rc = do_change_param_reset(adapter, rwi, reset_state); + if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) { + rtnl_lock(); + rc = do_passive_init(adapter); + rtnl_unlock(); + if (!rc) + netif_carrier_on(adapter->netdev); } else if (adapter->force_reset_recovery) { + /* Since we are doing a hard reset now, clear the + * failover_pending flag so we don't ignore any + * future MOBILITY or other resets. + */ + adapter->failover_pending = false; + /* Transport event occurred during previous reset */ if (adapter->wait_for_reset) { /* Previous was CHANGE_PARAM; caller locked */ @@ -2189,24 +2975,54 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { + netdev_dbg(adapter->netdev, + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } } else { rc = do_reset(adapter, rwi, reset_state); } - kfree(rwi); - if (rc == IBMVNIC_OPEN_FAILED) { - if (list_empty(&adapter->rwi_list)) - adapter->state = VNIC_CLOSED; - else - adapter->state = reset_state; - rc = 0; - } else if (rc && rc != IBMVNIC_INIT_FAILED && - !adapter->force_reset_recovery) - break; + tmprwi = rwi; + adapter->last_reset_time = jiffies; + + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); rwi = get_next_rwi(adapter); + /* + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). + */ + if (!rwi && rc) + rwi = tmprwi; + else + kfree(tmprwi); + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || - rwi->reset_reason == VNIC_RESET_MOBILITY)) + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) adapter->force_reset_recovery = true; } @@ -2215,12 +3031,13 @@ static void __ibmvnic_reset(struct work_struct *work) complete(&adapter->reset_done); } - if (rc) { - netdev_dbg(adapter->netdev, "Reset failed\n"); - free_all_rwi(adapter); - } - clear_bit_unlock(0, &adapter->resetting); + + netdev_dbg(adapter->netdev, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, + adapter->wait_for_reset); } static void __ibmvnic_delayed_reset(struct work_struct *work) @@ -2232,36 +3049,45 @@ static void __ibmvnic_delayed_reset(struct work_struct *work) __ibmvnic_reset(&adapter->ibmvnic_reset); } +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + static int ibmvnic_reset(struct ibmvnic_adapter *adapter, enum ibmvnic_reset_reason reason) { - struct list_head *entry, *tmp_entry; - struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; unsigned long flags; int ret; + spin_lock_irqsave(&adapter->rwi_lock, flags); + + /* If failover is pending don't schedule any other reset. + * Instead let the failover complete. If there is already a + * a failover reset scheduled, we will detect and drop the + * duplicate reset when walking the ->rwi_list below. + */ if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED || - adapter->failover_pending) { + (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { ret = EBUSY; netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); goto err; } - if (adapter->state == VNIC_PROBING) { - netdev_warn(netdev, "Adapter reset during probe\n"); - ret = adapter->init_done_rc = EAGAIN; - goto err; - } - - spin_lock_irqsave(&adapter->rwi_lock, flags); - - list_for_each(entry, &adapter->rwi_list) { - tmp = list_entry(entry, struct ibmvnic_rwi, list); + list_for_each_entry(tmp, &adapter->rwi_list, list) { if (tmp->reset_reason == reason) { - netdev_dbg(netdev, "Skipping matching reset\n"); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); ret = EBUSY; goto err; } @@ -2269,26 +3095,29 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - ibmvnic_close(netdev); ret = ENOMEM; goto err; } /* if we just received a transport event, * flush reset queue and process this reset */ - if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) - list_del(entry); - } + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); - schedule_work(&adapter->ibmvnic_reset); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); - return 0; + ret = 0; err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + return -ret; } @@ -2296,6 +3125,18 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); + if (test_bit(0, &adapter->resetting)) { + netdev_err(adapter->netdev, + "Adapter is resetting, skip timeout reset\n"); + return; + } + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); } @@ -2314,10 +3155,17 @@ static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, static int ibmvnic_poll(struct napi_struct *napi, int budget) { - struct net_device *netdev = napi->dev; - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int scrq_num = (int)(napi - adapter->napi); - int frames_processed = 0; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; restart_poll: while (frames_processed < budget) { @@ -2330,17 +3178,16 @@ restart_poll: if (unlikely(test_bit(0, &adapter->resetting) && adapter->reset_reason != VNIC_RESET_NON_FATAL)) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); + enable_scrq_irq(adapter, rx_scrq); napi_complete_done(napi, frames_processed); return frames_processed; } - if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) + if (!pending_scrq(adapter, rx_scrq)) break; - next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); - rx_buff = - (struct ibmvnic_rx_buff *)be64_to_cpu(next-> - rx_comp.correlator); + next = ibmvnic_next_scrq(adapter, rx_scrq); + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(next->rx_comp.correlator); /* do error checking */ if (next->rx_comp.rc) { netdev_dbg(netdev, "rx buffer returned with rc %x\n", @@ -2361,6 +3208,8 @@ restart_poll: offset = be16_to_cpu(next->rx_comp.off_frame_data); flags = next->rx_comp.flags; skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); skb_copy_to_linear_data(skb, rx_buff->data + offset, length); @@ -2394,16 +3243,20 @@ restart_poll: frames_processed++; } - if (adapter->state != VNIC_CLOSING) + if (adapter->state != VNIC_CLOSING && + ((atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2) || + frames_processed < budget)) replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); - if (frames_processed < budget) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - napi_complete_done(napi, frames_processed); - if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) && - napi_reschedule(napi)) { - disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - goto restart_poll; + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + if (napi_reschedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } } } return frames_processed; @@ -2527,9 +3380,9 @@ static void ibmvnic_get_drvinfo(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); - strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, adapter->fw_version, + strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); + strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->fw_version, adapter->fw_version, sizeof(info->fw_version)); } @@ -2558,17 +3411,14 @@ static u32 ibmvnic_get_link(struct net_device *netdev) } static void ibmvnic_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; - ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; - } else { - ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - } + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; @@ -2578,26 +3428,26 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, } static int ibmvnic_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + adapter->desired.rx_entries = ring->rx_pending; adapter->desired.tx_entries = ring->tx_pending; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending || - adapter->req_tx_entries_per_subcrq != ring->tx_pending)) - netdev_info(netdev, - "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - ring->rx_pending, ring->tx_pending, - adapter->req_rx_add_entries_per_subcrq, - adapter->req_tx_entries_per_subcrq); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_channels(struct net_device *netdev, @@ -2605,14 +3455,8 @@ static void ibmvnic_get_channels(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - channels->max_rx = adapter->max_rx_queues; - channels->max_tx = adapter->max_tx_queues; - } else { - channels->max_rx = IBMVNIC_MAX_QUEUES; - channels->max_tx = IBMVNIC_MAX_QUEUES; - } - + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->req_rx_queues; @@ -2625,23 +3469,11 @@ static int ibmvnic_set_channels(struct net_device *netdev, struct ethtool_channels *channels) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; adapter->desired.rx_queues = channels->rx_count; adapter->desired.tx_queues = channels->tx_count; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_queues != channels->rx_count || - adapter->req_tx_queues != channels->tx_count)) - netdev_info(netdev, - "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - channels->rx_count, channels->tx_count, - adapter->req_rx_queues, adapter->req_tx_queues); - return ret; - + return wait_for_reset(adapter); } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -2649,43 +3481,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) struct ibmvnic_adapter *adapter = netdev_priv(dev); int i; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); - i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + if (stringset != ETH_SS_STATS) + return; - for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, - "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; - } + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; - for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; - } - break; + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; - case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, - ibmvnic_priv_flags[i]); - break; - default: - return; + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; } } @@ -2698,8 +3519,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(ibmvnic_stats) + adapter->req_tx_queues * NUM_TX_STATS + adapter->req_rx_queues * NUM_RX_STATS; - case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(ibmvnic_priv_flags); default: return -EOPNOTSUPP; } @@ -2730,8 +3549,8 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, return; for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) - data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter, - ibmvnic_stats[i].offset)); + data[i] = be64_to_cpu(IBMVNIC_GET_STAT + (adapter, ibmvnic_stats[i].offset)); for (j = 0; j < adapter->req_tx_queues; j++) { data[i] = adapter->tx_stats_buffers[j].packets; @@ -2752,25 +3571,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, } } -static u32 ibmvnic_get_priv_flags(struct net_device *netdev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - return adapter->priv_flags; -} - -static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES); - - if (which_maxes) - adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES; - else - adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES; - - return 0; -} static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -2784,8 +3584,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, .get_link_ksettings = ibmvnic_get_link_ksettings, - .get_priv_flags = ibmvnic_get_priv_flags, - .set_priv_flags = ibmvnic_set_priv_flags, }; /* Routines for managing CRQs/sCRQs */ @@ -2795,15 +3593,26 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, { int rc; + if (!scrq) { + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); + return -EINVAL; + } + if (scrq->irq) { free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); scrq->irq = 0; } - memset(scrq->msgs, 0, 4 * PAGE_SIZE); - atomic_set(&scrq->used, 0); - scrq->cur = 0; + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + scrq->ind_buf.index = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); @@ -2814,6 +3623,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) { int i, rc; + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); @@ -2855,6 +3667,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } } + dma_free_coherent(dev, + IBMVNIC_IND_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); free_pages((unsigned long)scrq->msgs, 2); @@ -2901,6 +3718,17 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + spin_lock_init(&scrq->lock); netdev_dbg(adapter->netdev, @@ -2909,6 +3737,12 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter return scrq; +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); reg_failed: dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -2931,6 +3765,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); if (adapter->tx_scrq[i]->irq) { free_irq(adapter->tx_scrq[i]->irq, adapter->tx_scrq[i]); @@ -2985,6 +3820,30 @@ static int disable_scrq_irq(struct ibmvnic_adapter *adapter, return rc; } +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + static int enable_scrq_irq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { @@ -2998,15 +3857,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; - - rc = plpar_hcall_norets(H_EOI, val); - /* H_EOI would fail with rc = H_FUNCTION when running - * in XIVE mode which is expected, but not an error. - */ - if (rc && (rc != H_FUNCTION)) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + ibmvnic_clear_pending_interrupt(dev, scrq); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, @@ -3023,22 +3874,20 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; union sub_crq *next; int index; - int i, j; + int i; restart_loop: while (pending_scrq(adapter, scrq)) { unsigned int pool = scrq->pool_index; int num_entries = 0; + int total_bytes = 0; + int num_packets = 0; next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) { - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); - continue; - } index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3048,21 +3897,22 @@ restart_loop: } txbuff = &tx_pool->tx_buff[index]; - - for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) { - if (!txbuff->data_dma[j]) - continue; - - txbuff->data_dma[j] = 0; - } - - if (txbuff->last_frag) { - dev_kfree_skb_any(txbuff->skb); + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); } - - num_entries += txbuff->num_entries; - tx_pool->free_map[tx_pool->producer_index] = index; tx_pool->producer_index = (tx_pool->producer_index + 1) % @@ -3071,13 +3921,22 @@ restart_loop: /* remove tx_comp scrq*/ next->tx_comp.first = 0; + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + if (atomic_sub_return(num_entries, &scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { - netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_dbg(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); } } @@ -3187,7 +4046,7 @@ req_rx_irq_failed: req_tx_irq_failed: for (j = 0; j < i; j++) { free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); - irq_dispose_mapping(adapter->rx_scrq[j]->irq); + irq_dispose_mapping(adapter->tx_scrq[j]->irq); } release_sub_crqs(adapter, 1); return rc; @@ -3206,7 +4065,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter) allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); if (!allqueues) - return -1; + return -ENOMEM; for (i = 0; i < total_queues; i++) { allqueues[i] = init_sub_crq_queue(adapter); @@ -3275,19 +4134,33 @@ tx_failed: for (i = 0; i < registered_queues; i++) release_sub_crq_queue(adapter, allqueues[i], 1); kfree(allqueues); - return -1; + return -ENOMEM; } -static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) +static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) { struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3306,16 +4179,16 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) adapter->desired.rx_entries = adapter->max_rx_add_entries_per_subcrq; - max_entries = IBMVNIC_MAX_LTB_SIZE / + max_entries = IBMVNIC_LTB_SET_SIZE / (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.tx_entries = max_entries; } if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.rx_entries = max_entries; } @@ -3348,44 +4221,45 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3393,27 +4267,37 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { union sub_crq *entry = &scrq->msgs[scrq->cur]; + int rc; - if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) - return 1; - else - return 0; + rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return rc; } static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, @@ -3432,6 +4316,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + return entry; } @@ -3470,38 +4359,6 @@ static void print_subcrq_error(struct device *dev, int rc, const char *func) } } -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq) -{ - unsigned int ua = adapter->vdev->unit_address; - struct device *dev = &adapter->vdev->dev; - u64 *u64_crq = (u64 *)sub_crq; - int rc; - - netdev_dbg(adapter->netdev, - "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n", - (unsigned long int)cpu_to_be64(remote_handle), - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1]), - (unsigned long int)cpu_to_be64(u64_crq[2]), - (unsigned long int)cpu_to_be64(u64_crq[3])); - - /* Make sure the hypervisor sees the complete request */ - mb(); - - rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, - cpu_to_be64(remote_handle), - cpu_to_be64(u64_crq[0]), - cpu_to_be64(u64_crq[1]), - cpu_to_be64(u64_crq[2]), - cpu_to_be64(u64_crq[3])); - - if (rc) - print_subcrq_error(dev, rc, __func__); - - return rc; -} - static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, u64 remote_handle, u64 ioba, u64 num_entries) { @@ -3510,7 +4367,7 @@ static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, int rc; /* Make sure the hypervisor sees the complete request */ - mb(); + dma_wmb(); rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua, cpu_to_be64(remote_handle), ioba, num_entries); @@ -3530,8 +4387,8 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, int rc; netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); if (!adapter->crq.active && crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { @@ -3540,7 +4397,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, } /* Make sure the hypervisor sees the complete request */ - mb(); + dma_wmb(); rc = plpar_hcall_norets(H_SEND_CRQ, ua, cpu_to_be64(u64_crq[0]), @@ -3549,8 +4406,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, if (rc) { if (rc == H_CLOSED) { dev_warn(dev, "CRQ Queue closed\n"); - if (test_bit(0, &adapter->resetting)) - ibmvnic_reset(adapter, VNIC_RESET_FATAL); + /* do not reset, report the fail, wait for passive init from server */ } dev_warn(dev, "Send error (rc=%d)\n", rc); @@ -3561,26 +4417,31 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter) { + struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; + int retries = 100; + int rc; memset(&crq, 0, sizeof(crq)); crq.generic.first = IBMVNIC_CRQ_INIT_CMD; crq.generic.cmd = IBMVNIC_CRQ_INIT; netdev_dbg(adapter->netdev, "Sending CRQ init\n"); - return ibmvnic_send_crq(adapter, &crq); -} + do { + rc = ibmvnic_send_crq(adapter, &crq); + if (rc != H_CLOSED) + break; + retries--; + msleep(50); -static int send_version_xchg(struct ibmvnic_adapter *adapter) -{ - union ibmvnic_crq crq; + } while (retries > 0); - memset(&crq, 0, sizeof(crq)); - crq.version_exchange.first = IBMVNIC_CRQ_CMD; - crq.version_exchange.cmd = VERSION_EXCHANGE; - crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + if (rc) { + dev_err(dev, "Failed to send init request, rc = %d\n", rc); + return rc; + } - return ibmvnic_send_crq(adapter, &crq); + return 0; } struct vnic_login_client_data { @@ -3615,21 +4476,21 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, vlcd->type = 1; len = strlen(os_name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, os_name, len); + strscpy(vlcd->name, os_name, len); vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 2 - LPAR name */ vlcd->type = 2; len = strlen(utsname()->nodename) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, utsname()->nodename, len); + strscpy(vlcd->name, utsname()->nodename, len); vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 3 - device name */ vlcd->type = 3; len = strlen(adapter->netdev->name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, adapter->netdev->name, len); + strscpy(vlcd->name, adapter->netdev->name, len); } static int send_login(struct ibmvnic_adapter *adapter) @@ -3637,24 +4498,27 @@ static int send_login(struct ibmvnic_adapter *adapter) struct ibmvnic_login_rsp_buffer *login_rsp_buffer; struct ibmvnic_login_buffer *login_buffer; struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; dma_addr_t rsp_buffer_token; dma_addr_t buffer_token; size_t rsp_buffer_size; union ibmvnic_crq crq; + int client_data_len; size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; - int client_data_len; - struct vnic_login_client_data *vlcd; + int rc; int i; if (!adapter->tx_scrq || !adapter->rx_scrq) { netdev_err(adapter->netdev, "RX or TX queues are not allocated, device login failed\n"); - return -1; + return -ENOMEM; } + release_login_buffer(adapter); release_login_rsp_buffer(adapter); + client_data_len = vnic_client_data_len(adapter); buffer_size = @@ -3717,15 +4581,15 @@ static int send_login(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_tx_queues; i++) { if (adapter->tx_scrq[i]) { - tx_list_p[i] = cpu_to_be64(adapter->tx_scrq[i]-> - crq_num); + tx_list_p[i] = + cpu_to_be64(adapter->tx_scrq[i]->crq_num); } } for (i = 0; i < adapter->req_rx_queues; i++) { if (adapter->rx_scrq[i]) { - rx_list_p[i] = cpu_to_be64(adapter->rx_scrq[i]-> - crq_num); + rx_list_p[i] = + cpu_to_be64(adapter->rx_scrq[i]->crq_num); } } @@ -3741,7 +4605,7 @@ static int send_login(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_buf))[i]); + ((unsigned long *)(adapter->login_buf))[i]); } memset(&crq, 0, sizeof(crq)); @@ -3749,18 +4613,27 @@ static int send_login(struct ibmvnic_adapter *adapter) crq.login.cmd = LOGIN; crq.login.ioba = cpu_to_be32(buffer_token); crq.login.len = cpu_to_be32(buffer_size); - ibmvnic_send_crq(adapter, &crq); + + adapter->login_pending = true; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_rsp_map_failed; + } return 0; buf_rsp_map_failed: kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; buf_rsp_alloc_failed: dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); buf_map_failed: kfree(login_buffer); + adapter->login_buf = NULL; buf_alloc_failed: - return -1; + return -ENOMEM; } static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, @@ -3788,7 +4661,7 @@ static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) return ibmvnic_send_crq(adapter, &crq); } -static void send_map_query(struct ibmvnic_adapter *adapter) +static void send_query_map(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; @@ -3799,120 +4672,241 @@ static void send_map_query(struct ibmvnic_adapter *adapter) } /* Send a series of CRQs requesting various capabilities of the VNIC server */ -static void send_cap_queries(struct ibmvnic_adapter *adapter) +static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static void send_query_ip_offload(struct ibmvnic_adapter *adapter) +{ + int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + + adapter->ip_offload_tok = + dma_map_single(dev, + &adapter->ip_offload_buf, + buf_sz, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_tok)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(dev, "Couldn't map offload buffer\n"); + return; + } + + memset(&crq, 0, sizeof(crq)); + crq.query_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; + crq.query_ip_offload.len = cpu_to_be32(buf_sz); + crq.query_ip_offload.ioba = + cpu_to_be32(adapter->ip_offload_tok); + + ibmvnic_send_crq(adapter, &crq); +} + +static void send_control_ip_offload(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + struct device *dev = &adapter->vdev->dev; + netdev_features_t old_hw_features = 0; + union ibmvnic_crq crq; + + adapter->ip_offload_ctrl_tok = + dma_map_single(dev, + ctrl_buf, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { + dev_err(dev, "Couldn't map ip offload control buffer\n"); + return; + } + + ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB); + ctrl_buf->ipv4_chksum = buf->ipv4_chksum; + ctrl_buf->ipv6_chksum = buf->ipv6_chksum; + ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum; + ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum; + ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum; + ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum; + ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4; + ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6; + + /* large_rx disabled for now, additional features needed */ + ctrl_buf->large_rx_ipv4 = 0; + ctrl_buf->large_rx_ipv6 = 0; + + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + + if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; + + if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; + + if ((adapter->netdev->features & + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) + adapter->netdev->hw_features |= NETIF_F_RXCSUM; + + if (buf->large_tx_ipv4) + adapter->netdev->hw_features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->hw_features |= NETIF_F_TSO6; + + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } + + memset(&crq, 0, sizeof(crq)); + crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; + crq.control_ip_offload.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); ibmvnic_send_crq(adapter, &crq); } @@ -3977,7 +4971,7 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq, complete: if (adapter->fw_version[0] == '\0') - strncpy((char *)adapter->fw_version, "N/A", 3 * sizeof(char)); + strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version)); complete(&adapter->fw_done); } @@ -3985,8 +4979,6 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; - netdev_features_t old_hw_features = 0; - union ibmvnic_crq crq; int i; dma_unmap_single(dev, adapter->ip_offload_tok, @@ -3995,7 +4987,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(buf))[i]); + ((unsigned long *)(buf))[i]); netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); @@ -4036,74 +5028,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n", buf->off_ipv6_ext_headers); - adapter->ip_offload_ctrl_tok = - dma_map_single(dev, &adapter->ip_offload_ctrl, - sizeof(adapter->ip_offload_ctrl), DMA_TO_DEVICE); - - if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { - dev_err(dev, "Couldn't map ip offload control buffer\n"); - return; - } - - adapter->ip_offload_ctrl.len = - cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); - adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB); - adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum; - adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum; - adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum; - adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; - adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; - adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum; - adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4; - adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6; - - /* large_rx disabled for now, additional features needed */ - adapter->ip_offload_ctrl.large_rx_ipv4 = 0; - adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - - if (adapter->state != VNIC_PROBING) { - old_hw_features = adapter->netdev->hw_features; - adapter->netdev->hw_features = 0; - } - - adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; - - if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) - adapter->netdev->hw_features |= NETIF_F_IP_CSUM; - - if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) - adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; - - if ((adapter->netdev->features & - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) - adapter->netdev->hw_features |= NETIF_F_RXCSUM; - - if (buf->large_tx_ipv4) - adapter->netdev->hw_features |= NETIF_F_TSO; - if (buf->large_tx_ipv6) - adapter->netdev->hw_features |= NETIF_F_TSO6; - - if (adapter->state == VNIC_PROBING) { - adapter->netdev->features |= adapter->netdev->hw_features; - } else if (old_hw_features != adapter->netdev->hw_features) { - netdev_features_t tmp = 0; - - /* disable features no longer supported */ - adapter->netdev->features &= adapter->netdev->hw_features; - /* turn on features now supported if previously enabled */ - tmp = (old_hw_features ^ adapter->netdev->hw_features) & - adapter->netdev->hw_features; - adapter->netdev->features |= - tmp & adapter->netdev->wanted_features; - } - - memset(&crq, 0, sizeof(crq)); - crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; - crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; - crq.control_ip_offload.len = - cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); - crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); - ibmvnic_send_crq(adapter, &crq); + send_control_ip_offload(adapter); } static const char *ibmvnic_fw_err_cause(u16 cause) @@ -4160,7 +5085,11 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); goto out; } - ether_addr_copy(netdev->dev_addr, + /* crq->change_mac_addr.mac_addr is the requested one + * crq->change_mac_addr_rsp.mac_addr is the returned valid one. + */ + eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]); + ether_addr_copy(adapter->mac_addr, &crq->change_mac_addr_rsp.mac_addr[0]); out: complete(&adapter->fw_done); @@ -4175,6 +5104,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4216,8 +5147,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, case PARTIALSUCCESS: dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", *req_value, - (long int)be64_to_cpu(crq->request_capability_rsp. - number), name); + (long)be64_to_cpu(crq->request_capability_rsp.number), + name); if (be16_to_cpu(crq->request_capability_rsp.capability) == REQ_MTU) { @@ -4229,7 +5160,7 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, be64_to_cpu(crq->request_capability_rsp.number); } - ibmvnic_send_req_caps(adapter, 1); + send_request_cap(adapter, 1); return; default: dev_err(dev, "Error %d in request cap rsp\n", @@ -4238,32 +5169,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, } /* Done receiving requested capabilities, query IP offload support */ - if (atomic_read(&adapter->running_cap_crqs) == 0) { - union ibmvnic_crq newcrq; - int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); - struct ibmvnic_query_ip_offload_buffer *ip_offload_buf = - &adapter->ip_offload_buf; - - adapter->wait_capability = false; - adapter->ip_offload_tok = dma_map_single(dev, ip_offload_buf, - buf_sz, - DMA_FROM_DEVICE); - - if (dma_mapping_error(dev, adapter->ip_offload_tok)) { - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "Couldn't map offload buffer\n"); - return; - } - - memset(&newcrq, 0, sizeof(newcrq)); - newcrq.query_ip_offload.first = IBMVNIC_CRQ_CMD; - newcrq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; - newcrq.query_ip_offload.len = cpu_to_be32(buf_sz); - newcrq.query_ip_offload.ioba = - cpu_to_be32(adapter->ip_offload_tok); - - ibmvnic_send_crq(adapter, &newcrq); - } + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_query_ip_offload(adapter); } static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, @@ -4273,8 +5180,22 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, struct net_device *netdev = adapter->netdev; struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; struct ibmvnic_login_buffer *login = adapter->login_buf; + u64 *tx_handle_array; + u64 *rx_handle_array; + int num_tx_pools; + int num_rx_pools; + u64 *size_array; int i; + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_TO_DEVICE); dma_unmap_single(dev, adapter->login_rsp_buf_token, @@ -4290,12 +5211,20 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_rsp_buf))[i]); + ((unsigned long *)(adapter->login_rsp_buf))[i]); } /* Sanity checks */ @@ -4304,9 +5233,33 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, adapter->req_rx_add_queues != be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); - ibmvnic_remove(adapter->vdev); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + /* variable buffer sizes are not supported, so just read the + * first entry. + */ + adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]); + + num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + + tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs)); + + for (i = 0; i < num_tx_pools; i++) + adapter->tx_scrq[i]->handle = tx_handle_array[i]; + + for (i = 0; i < num_rx_pools; i++) + adapter->rx_scrq[i]->handle = rx_handle_array[i]; + + adapter->num_active_tx_scrqs = num_tx_pools; + adapter->num_active_rx_scrqs = num_rx_pools; + release_login_rsp_buffer(adapter); release_login_buffer(adapter); complete(&adapter->init_done); @@ -4336,9 +5289,10 @@ static void handle_query_map_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc); return; } - netdev_dbg(netdev, "page_size = %d\ntot_pages = %d\nfree_pages = %d\n", - crq->query_map_rsp.page_size, crq->query_map_rsp.tot_pages, - crq->query_map_rsp.free_pages); + netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n", + crq->query_map_rsp.page_size, + __be32_to_cpu(crq->query_map_rsp.tot_pages), + __be32_to_cpu(crq->query_map_rsp.free_pages)); } static void handle_query_cap_rsp(union ibmvnic_crq *crq, @@ -4514,10 +5468,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, } out: - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; - ibmvnic_send_req_caps(adapter, 0); - } + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_request_cap(adapter, 0); } static int send_query_phys_parms(struct ibmvnic_adapter *adapter) @@ -4571,7 +5523,7 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, case IBMVNIC_1GBPS: adapter->speed = SPEED_1000; break; - case IBMVNIC_10GBP: + case IBMVNIC_10GBPS: adapter->speed = SPEED_10000; break; case IBMVNIC_25GBPS: @@ -4586,6 +5538,9 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, case IBMVNIC_100GBPS: adapter->speed = SPEED_100000; break; + case IBMVNIC_200GBPS: + adapter->speed = SPEED_200000; + break; default: if (netif_carrier_ok(netdev)) netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed); @@ -4611,20 +5566,46 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, long rc; netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); switch (gen_crq->first) { case IBMVNIC_CRQ_INIT_RSP: switch (gen_crq->cmd) { case IBMVNIC_CRQ_INIT: dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; - adapter->failover_pending = false; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + + if (adapter->state == VNIC_DOWN) + rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT); + else + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } + if (!completion_done(&adapter->init_done)) { + if (!adapter->init_done_rc) + adapter->init_done_rc = -EAGAIN; complete(&adapter->init_done); - adapter->init_done_rc = -EIO; } - ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); @@ -4645,6 +5626,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, adapter->fw_done_rc = -EIO; complete(&adapter->fw_done); } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + if (!completion_done(&adapter->stats_done)) complete(&adapter->stats_done); if (test_bit(0, &adapter->resetting)) @@ -4677,13 +5665,11 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc); break; } - dev_info(dev, "Partner protocol version is %d\n", - crq->version_exchange_rsp.version); - if (be16_to_cpu(crq->version_exchange_rsp.version) < - ibmvnic_version) - ibmvnic_version = + ibmvnic_version = be16_to_cpu(crq->version_exchange_rsp.version); - send_cap_queries(adapter); + dev_info(dev, "Partner protocol version is %d\n", + ibmvnic_version); + send_query_cap(adapter); break; case QUERY_CAPABILITY_RSP: handle_query_cap_rsp(crq, adapter); @@ -4780,33 +5766,27 @@ static irqreturn_t ibmvnic_interrupt(int irq, void *instance) return IRQ_HANDLED; } -static void ibmvnic_tasklet(void *data) +static void ibmvnic_tasklet(struct tasklet_struct *t) { - struct ibmvnic_adapter *adapter = data; + struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet); struct ibmvnic_crq_queue *queue = &adapter->crq; union ibmvnic_crq *crq; unsigned long flags; - bool done = false; spin_lock_irqsave(&queue->lock, flags); - while (!done) { - /* Pull all the valid messages off the CRQ */ - while ((crq = ibmvnic_next_crq(adapter)) != NULL) { - ibmvnic_handle_crq(crq, adapter); - crq->generic.first = 0; - } - /* remain in tasklet until all - * capabilities responses are received + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). */ - if (!adapter->wait_capability) - done = true; + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; } - /* if capabilities CRQ's were sent in this tasklet, the following - * tasklet must wait until all responses are received - */ - if (atomic_read(&adapter->running_cap_crqs) != 0) - adapter->wait_capability = true; + spin_unlock_irqrestore(&queue->lock, flags); } @@ -4838,6 +5818,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; crq->active = false; @@ -4917,8 +5900,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) retrc = 0; - tasklet_init(&adapter->tasklet, (void *)ibmvnic_tasklet, - (unsigned long)adapter); + tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet); netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq); snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x", @@ -4939,6 +5921,9 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) crq->cur = 0; spin_lock_init(&crq->lock); + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + return retrc; req_irq_failed: @@ -4954,44 +5939,57 @@ map_failed: return retrc; } -static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) +static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); - u64 old_num_rx_queues, old_num_tx_queues; + unsigned long timeout = msecs_to_jiffies(20000); + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; int rc; adapter->from_passive_init = false; - old_num_rx_queues = adapter->req_rx_queues; - old_num_tx_queues = adapter->req_tx_queues; + rc = ibmvnic_send_crq_init(adapter); + if (rc) { + dev_err(dev, "Send crq init failed with error %d\n", rc); + return rc; + } - reinit_completion(&adapter->init_done); - adapter->init_done_rc = 0; - ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Initialization sequence timed out\n"); - return -1; + return -ETIMEDOUT; } if (adapter->init_done_rc) { release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); return adapter->init_done_rc; } if (adapter->from_passive_init) { adapter->state = VNIC_OPEN; adapter->from_passive_init = false; - return -1; + dev_err(dev, "CRQ-init failed, passive-init\n"); + return -EINVAL; } - if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && + if (reset && + test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && adapter->reset_reason != VNIC_RESET_MOBILITY) { if (adapter->req_rx_queues != old_num_rx_queues || adapter->req_tx_queues != old_num_tx_queues) { release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { @@ -5013,48 +6011,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) return rc; } -static int ibmvnic_init(struct ibmvnic_adapter *adapter) -{ - struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); - int rc; - - adapter->from_passive_init = false; - - adapter->init_done_rc = 0; - ibmvnic_send_crq_init(adapter); - if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - dev_err(dev, "Initialization sequence timed out\n"); - return -1; - } - - if (adapter->init_done_rc) { - release_crq_queue(adapter); - return adapter->init_done_rc; - } - - if (adapter->from_passive_init) { - adapter->state = VNIC_OPEN; - adapter->from_passive_init = false; - return -1; - } - - rc = init_sub_crqs(adapter); - if (rc) { - dev_err(dev, "Initialization of sub crqs failed\n"); - release_crq_queue(adapter); - return rc; - } - - rc = init_sub_crq_irqs(adapter); - if (rc) { - dev_err(dev, "Failed to initialize sub crq irqs\n"); - release_crq_queue(adapter); - } - - return rc; -} - static struct device_attribute dev_attr_failover; static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) @@ -5062,6 +6018,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct ibmvnic_adapter *adapter; struct net_device *netdev; unsigned char *mac_addr_p; + unsigned long flags; + bool init_success; int rc; dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", @@ -5086,16 +6044,18 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->login_pending = false; + memset(&adapter->map_ids, 0, sizeof(adapter->map_ids)); + /* map_ids start at 1, so ensure map_id 0 is always "in-use" */ + bitmap_set(adapter->map_ids, 0, 1); ether_addr_copy(adapter->mac_addr, mac_addr_p); - ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + eth_hw_addr_set(netdev, adapter->mac_addr); netdev->irq = dev->irq; netdev->netdev_ops = &ibmvnic_netdev_ops; netdev->ethtool_ops = &ibmvnic_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - spin_lock_init(&adapter->stats_lock); - INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, __ibmvnic_delayed_reset); @@ -5103,13 +6063,44 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->rwi_lock); spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); init_completion(&adapter->reset_done); init_completion(&adapter->stats_done); clear_bit(0, &adapter->resetting); + adapter->prev_rx_buf_sz = 0; + adapter->prev_mtu = 0; + init_success = false; do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", @@ -5117,10 +6108,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_init_fail; } - rc = ibmvnic_init(adapter); - if (rc && rc != EAGAIN) - goto ibmvnic_init_fail; - } while (rc == EAGAIN); + rc = ibmvnic_reset_init(adapter, false); + } while (rc == -EAGAIN); + + /* We are ignoring the error from ibmvnic_reset_init() assuming that the + * partner is not ready. CRQ is not active. When the partner becomes + * ready, we will do the passive init reset. + */ + + if (!rc) + init_success = true; rc = init_stats_buffers(adapter); if (rc) @@ -5130,15 +6127,24 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (rc) goto ibmvnic_stats_fail; - netdev->mtu = adapter->req_mtu - ETH_HLEN; - netdev->min_mtu = adapter->min_mtu - ETH_HLEN; - netdev->max_mtu = adapter->max_mtu - ETH_HLEN; - rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) goto ibmvnic_dev_file_err; netif_carrier_off(netdev); + + if (init_success) { + adapter->state = VNIC_PROBED; + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + } else { + adapter->state = VNIC_DOWN; + } + + adapter->wait_for_reset = false; + adapter->last_reset_time = jiffies; + rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); @@ -5146,9 +6152,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } dev_info(&dev->dev, "ibmvnic registered\n"); - adapter->state = VNIC_PROBED; - - adapter->wait_for_reset = false; + complete(&adapter->probe_done); return 0; @@ -5164,31 +6168,53 @@ ibmvnic_stats_fail: ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + mutex_destroy(&adapter->fw_lock); free_netdev(netdev); return rc; } -static int ibmvnic_remove(struct vio_dev *dev) +static void ibmvnic_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); - if (adapter->state == VNIC_RESETTING) { - spin_unlock_irqrestore(&adapter->state_lock, flags); - return -EBUSY; - } + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock(&adapter->rwi_lock); adapter->state = VNIC_REMOVING; + spin_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->state_lock, flags); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + rtnl_lock(); unregister_netdevice(netdev); release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); release_sub_crqs(adapter, 1); release_crq_queue(adapter); @@ -5202,8 +6228,6 @@ static int ibmvnic_remove(struct vio_dev *dev) device_remove_file(&dev->dev, &dev_attr_failover); free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static ssize_t failover_store(struct device *dev, struct device_attribute *attr, @@ -5223,7 +6247,7 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, if (rc) { netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } session_token = (__be64)retbuf[0]; @@ -5232,14 +6256,20 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); if (rc) { - netdev_err(netdev, "Client initiated failover failed, rc %ld\n", + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } return count; -} +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + return count; +} static DEVICE_ATTR_WO(failover); static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) @@ -5264,8 +6294,7 @@ static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++) ret += 4 * PAGE_SIZE; /* the scrq message queue */ - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) + for (i = 0; i < adapter->num_active_rx_pools; i++) ret += adapter->rx_pool[i].size * IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index f8416e1d4cf0..e5c6ff3d0c47 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -18,8 +18,6 @@ #define IBMVNIC_NAME "ibmvnic" #define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 -#define IBMVNIC_STATS_TIMEOUT 1 -#define IBMVNIC_INIT_FAILED 2 #define IBMVNIC_OPEN_FAILED 3 /* basic structures plus 100 2k buffers */ @@ -31,21 +29,59 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 +#define IBMVNIC_MAX_IND_DESCS 16 +#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 #define IBMVNIC_TSO_POOL_MASK 0x80000000 -#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) -#define IBMVNIC_BUFFER_HLEN 500 +/* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool + * has a set of buffers. The size of each buffer is determined by the MTU. + * + * Each Rx/Tx pool is also associated with a DMA region that is shared + * with the "hardware" (VIOS) and used to send/receive packets. The DMA + * region is also referred to as a Long Term Buffer or LTB. + * + * The size of the DMA region required for an Rx/Tx pool depends on the + * number and size (MTU) of the buffers in the pool. At the max levels + * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus + * some padding. + * + * But the size of a single DMA region is limited by MAX_ORDER in the + * kernel (about 16MB currently). To support say 4K Jumbo frames, we + * use a set of LTBs (struct ltb_set) per pool. + * + * IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel + * IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set + * (must be <= IBMVNIC_ONE_LTB_MAX) + * IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set + * + * Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools + * are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB. + * + * The Rx and Tx pools can have upto 4096 buffers. The max size of these + * buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN). + * So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB. + * + * There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large, + * the allocation of the LTB can fail when system is low in memory. If + * its too small, we would need several mappings for each of the Rx/ + * Tx/TSO pools but there is a limit of 255 mappings per vnic in the + * VNIC protocol. + * + * So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set + * to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO + * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 + * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. + */ +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) +#define IBMVNIC_LTB_SET_SIZE (38 << 20) +#define IBMVNIC_BUFFER_HLEN 500 #define IBMVNIC_RESET_DELAY 100 -static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { -#define IBMVNIC_USE_SERVER_MAXES 0x1 - "use-server-maxes" -}; - struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -224,8 +260,6 @@ struct ibmvnic_tx_comp_desc { #define IBMVNIC_TCP_CHKSUM 0x20 #define IBMVNIC_UDP_CHKSUM 0x08 -#define IBMVNIC_MAX_FRAGS_PER_CRQ 3 - struct ibmvnic_tx_desc { u8 first; u8 type; @@ -373,7 +407,7 @@ struct ibmvnic_phys_parms { #define IBMVNIC_10MBPS 0x40000000 #define IBMVNIC_100MBPS 0x20000000 #define IBMVNIC_1GBPS 0x10000000 -#define IBMVNIC_10GBP 0x08000000 +#define IBMVNIC_10GBPS 0x08000000 #define IBMVNIC_40GBPS 0x04000000 #define IBMVNIC_100GBPS 0x02000000 #define IBMVNIC_25GBPS 0x01000000 @@ -412,77 +446,6 @@ struct ibmvnic_control_ip_offload { struct ibmvnic_rc rc; } __packed __aligned(8); -struct ibmvnic_request_dump_size { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_dump { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 ioba; - __be32 len; - u8 reserved2[4]; -} __packed __aligned(8); - -struct ibmvnic_request_dump_rsp { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 dumped_len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comp_num { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 num_components; - u8 reserved2[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comps { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_control_ras { - u8 first; - u8 cmd; - u8 correlator; - u8 level; - u8 op; -#define IBMVNIC_TRACE_LEVEL 1 -#define IBMVNIC_ERROR_LEVEL 2 -#define IBMVNIC_TRACE_PAUSE 3 -#define IBMVNIC_TRACE_RESUME 4 -#define IBMVNIC_TRACE_ON 5 -#define IBMVNIC_TRACE_OFF 6 -#define IBMVNIC_CHG_TRACE_BUFF_SZ 7 - u8 trace_buff_sz[3]; - u8 reserved[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_collect_fw_trace { - u8 first; - u8 cmd; - u8 correlator; - u8 reserved; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_request_statistics { u8 first; u8 cmd; @@ -494,15 +457,6 @@ struct ibmvnic_request_statistics { u8 reserved[4]; } __packed __aligned(8); -struct ibmvnic_request_debug_stats { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_error_indication { u8 first; u8 cmd; @@ -677,22 +631,8 @@ union ibmvnic_crq { struct ibmvnic_query_ip_offload query_ip_offload_rsp; struct ibmvnic_control_ip_offload control_ip_offload; struct ibmvnic_control_ip_offload control_ip_offload_rsp; - struct ibmvnic_request_dump_size request_dump_size; - struct ibmvnic_request_dump_size request_dump_size_rsp; - struct ibmvnic_request_dump request_dump; - struct ibmvnic_request_dump_rsp request_dump_rsp; - struct ibmvnic_request_ras_comp_num request_ras_comp_num; - struct ibmvnic_request_ras_comp_num request_ras_comp_num_rsp; - struct ibmvnic_request_ras_comps request_ras_comps; - struct ibmvnic_request_ras_comps request_ras_comps_rsp; - struct ibmvnic_control_ras control_ras; - struct ibmvnic_control_ras control_ras_rsp; - struct ibmvnic_collect_fw_trace collect_fw_trace; - struct ibmvnic_collect_fw_trace collect_fw_trace_rsp; struct ibmvnic_request_statistics request_statistics; struct ibmvnic_generic_crq request_statistics_rsp; - struct ibmvnic_request_debug_stats request_debug_stats; - struct ibmvnic_request_debug_stats request_debug_stats_rsp; struct ibmvnic_error_indication error_indication; struct ibmvnic_link_state_indication link_state_indication; struct ibmvnic_change_mac_addr change_mac_addr; @@ -845,6 +785,7 @@ struct ibmvnic_crq_queue { union ibmvnic_crq *msgs; int size, cur; dma_addr_t msg_token; + /* Used for serialization of msgs, cur */ spinlock_t lock; bool active; char name[32]; @@ -861,6 +802,12 @@ union sub_crq { struct ibmvnic_rx_buff_add_desc rx_add; }; +struct ibmvnic_ind_xmit_queue { + union sub_crq *indir_arr; + dma_addr_t indir_dma; + int index; +}; + struct ibmvnic_sub_crq_queue { union sub_crq *msgs; int size, cur; @@ -870,12 +817,15 @@ struct ibmvnic_sub_crq_queue { unsigned int irq; unsigned int pool_index; int scrq_num; + /* Used for serialization of msgs, cur */ spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + struct ibmvnic_ind_xmit_queue ind_buf; atomic_t used; char name[32]; -}; + u64 handle; +} ____cacheline_aligned; struct ibmvnic_long_term_buff { unsigned char *buff; @@ -884,16 +834,15 @@ struct ibmvnic_long_term_buff { u8 map_id; }; +struct ibmvnic_ltb_set { + int num_ltbs; + struct ibmvnic_long_term_buff *ltbs; +}; + struct ibmvnic_tx_buff { struct sk_buff *skb; - dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ]; - unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ]; int index; int pool_index; - bool last_frag; - union sub_crq indir_arr[6]; - u8 hdr_data[140]; - dma_addr_t indir_dma; int num_entries; }; @@ -902,10 +851,10 @@ struct ibmvnic_tx_pool { int *free_map; int consumer_index; int producer_index; - struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_ltb_set ltb_set; int num_buffers; int buf_size; -}; +} ____cacheline_aligned; struct ibmvnic_rx_buff { struct sk_buff *skb; @@ -917,7 +866,7 @@ struct ibmvnic_rx_buff { struct ibmvnic_rx_pool { struct ibmvnic_rx_buff *rx_buff; - int size; + int size; /* # of buffers in the pool */ int index; int buff_size; atomic_t available; @@ -925,8 +874,8 @@ struct ibmvnic_rx_pool { int next_free; int next_alloc; int active; - struct ibmvnic_long_term_buff long_term_buff; -}; + struct ibmvnic_ltb_set ltb_set; +} ____cacheline_aligned; struct ibmvnic_vpd { unsigned char *buff; @@ -942,14 +891,15 @@ enum vnic_state {VNIC_PROBING = 1, VNIC_CLOSED, VNIC_REMOVING, VNIC_REMOVED, - VNIC_RESETTING}; + VNIC_DOWN}; enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, VNIC_RESET_MOBILITY, VNIC_RESET_FATAL, VNIC_RESET_NON_FATAL, VNIC_RESET_TIMEOUT, - VNIC_RESET_CHANGE_PARAM}; + VNIC_RESET_CHANGE_PARAM, + VNIC_RESET_PASSIVE_INIT}; struct ibmvnic_rwi { enum ibmvnic_reset_reason reset_reason; @@ -974,7 +924,6 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; - u32 priv_flags; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; @@ -984,7 +933,6 @@ struct ibmvnic_adapter { struct ibmvnic_statistics stats; dma_addr_t stats_token; struct completion stats_done; - spinlock_t stats_lock; int replenish_no_mem; int replenish_add_buff_success; int replenish_add_buff_failure; @@ -1011,10 +959,9 @@ struct ibmvnic_adapter { int login_rsp_buf_sz; atomic_t running_cap_crqs; - bool wait_capability; - struct ibmvnic_sub_crq_queue **tx_scrq; - struct ibmvnic_sub_crq_queue **rx_scrq; + struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; + struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; /* rx structs */ struct napi_struct *napi; @@ -1023,6 +970,7 @@ struct ibmvnic_adapter { struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; struct completion init_done; int init_done_rc; @@ -1057,6 +1005,7 @@ struct ibmvnic_adapter { u64 min_mtu; u64 max_mtu; u64 req_mtu; + u64 prev_mtu; u64 max_multicast_filters; u64 vlan_header_insertion; u64 rx_vlan_header_insertion; @@ -1069,29 +1018,45 @@ struct ibmvnic_adapter { u64 opt_tx_entries_per_subcrq; u64 opt_rxba_entries_per_subcrq; __be64 tx_rx_desc_req; - u8 map_id; +#define MAX_MAP_ID 255 + DECLARE_BITMAP(map_ids, MAX_MAP_ID); u32 num_active_rx_scrqs; u32 num_active_rx_pools; u32 num_active_rx_napi; u32 num_active_tx_scrqs; u32 num_active_tx_pools; + u32 prev_rx_pool_size; + u32 prev_tx_pool_size; + u32 cur_rx_buf_sz; + u32 prev_rx_buf_sz; + struct tasklet_struct tasklet; enum vnic_state state; + /* Used for serialization of state field. When taking both state + * and rwi locks, take state lock first. + */ + spinlock_t state_lock; enum ibmvnic_reset_reason reset_reason; - spinlock_t rwi_lock; struct list_head rwi_list; + /* Used for serialization of rwi_list. When taking both state + * and rwi locks, take state lock first + */ + spinlock_t rwi_lock; struct work_struct ibmvnic_reset; struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; - bool napi_enabled, from_passive_init; - + /* last device reset time */ + unsigned long last_reset_time; + + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; bool failover_pending; bool force_reset_recovery; struct ibmvnic_tunables desired; struct ibmvnic_tunables fallback; - - /* Used for serializatin of state field */ - spinlock_t state_lock; }; |