diff options
Diffstat (limited to 'drivers/net/ethernet/ibm/ibmvnic.c')
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.c | 3195 |
1 files changed, 2112 insertions, 1083 deletions
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4bd33245bad6..9282381a438f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -53,6 +53,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include <linux/interrupt.h> @@ -60,6 +61,7 @@ #include <asm/hvcall.h> #include <linux/atomic.h> #include <asm/vio.h> +#include <asm/xive.h> #include <asm/iommu.h> #include <linux/uaccess.h> #include <asm/firmware.h> @@ -78,14 +80,11 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(IBMVNIC_DRIVER_VERSION); static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; -static int ibmvnic_remove(struct vio_dev *); static void release_sub_crqs(struct ibmvnic_adapter *, bool); static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq); static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); static int enable_scrq_irq(struct ibmvnic_adapter *, @@ -97,19 +96,23 @@ static int pending_scrq(struct ibmvnic_adapter *, static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, struct ibmvnic_sub_crq_queue *); static int ibmvnic_poll(struct napi_struct *napi, int data); -static void send_map_query(struct ibmvnic_adapter *adapter); -static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); +static void send_query_map(struct ibmvnic_adapter *adapter); +static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); static int send_request_unmap(struct ibmvnic_adapter *, u8); static int send_login(struct ibmvnic_adapter *adapter); -static void send_cap_queries(struct ibmvnic_adapter *adapter); +static void send_query_cap(struct ibmvnic_adapter *adapter); static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); -static int ibmvnic_init(struct ibmvnic_adapter *); -static int ibmvnic_reset_init(struct ibmvnic_adapter *); +static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset); static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *, u8 *); static int init_crq_queue(struct ibmvnic_adapter *adapter); static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); +static void free_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -118,7 +121,7 @@ struct ibmvnic_stat { #define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ offsetof(struct ibmvnic_statistics, stat)) -#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + off))) +#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) static const struct ibmvnic_stat ibmvnic_stats[] = { {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, @@ -145,6 +148,29 @@ static const struct ibmvnic_stat ibmvnic_stats[] = { {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)}, }; +static int send_crq_init_complete(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.generic.first = IBMVNIC_CRQ_INIT_CMD; + crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; + + return ibmvnic_send_crq(adapter, &crq); +} + +static int send_version_xchg(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + + memset(&crq, 0, sizeof(crq)); + crq.version_exchange.first = IBMVNIC_CRQ_CMD; + crq.version_exchange.cmd = VERSION_EXCHANGE; + crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + + return ibmvnic_send_crq(adapter, &crq); +} + static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, unsigned long length, unsigned long *number, unsigned long *irq) @@ -193,53 +219,108 @@ static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter, return -ETIMEDOUT; } +/** + * reuse_ltb() - Check if a long term buffer can be reused + * @ltb: The long term buffer to be checked + * @size: The size of the long term buffer. + * + * An LTB can be reused unless its size has changed. + * + * Return: Return true if the LTB can be reused, false otherwise. + */ +static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size) +{ + return (ltb->buff && ltb->size == size); +} + +/** + * alloc_long_term_buff() - Allocate a long term buffer (LTB) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb: container object for the LTB + * @size: size of the LTB + * + * Allocate an LTB of the specified size and notify VIOS. + * + * If the given @ltb already has the correct size, reuse it. Otherwise if + * its non-NULL, free it. Then allocate a new one of the correct size. + * Notify the VIOS either way since we may now be working with a new VIOS. + * + * Allocating larger chunks of memory during resets, specially LPM or under + * low memory situations can cause resets to fail/timeout and for LPAR to + * lose connectivity. So hold onto the LTB even if we fail to communicate + * with the VIOS and reuse it on next open. Free LTB when adapter is closed. + * + * Return: 0 if we were able to allocate the LTB and notify the VIOS and + * a negative value otherwise. + */ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb, int size) { struct device *dev = &adapter->vdev->dev; + u64 prev = 0; int rc; - ltb->size = size; - ltb->buff = dma_alloc_coherent(dev, ltb->size, <b->addr, - GFP_KERNEL); + if (!reuse_ltb(ltb, size)) { + dev_dbg(dev, + "LTB size changed from 0x%llx to 0x%x, reallocating\n", + ltb->size, size); + prev = ltb->size; + free_long_term_buff(adapter, ltb); + } - if (!ltb->buff) { - dev_err(dev, "Couldn't alloc long term buffer\n"); - return -ENOMEM; + if (ltb->buff) { + dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n", + ltb->map_id, ltb->size); + } else { + ltb->buff = dma_alloc_coherent(dev, size, <b->addr, + GFP_KERNEL); + if (!ltb->buff) { + dev_err(dev, "Couldn't alloc long term buffer\n"); + return -ENOMEM; + } + ltb->size = size; + + ltb->map_id = find_first_zero_bit(adapter->map_ids, + MAX_MAP_ID); + bitmap_set(adapter->map_ids, ltb->map_id, 1); + + dev_dbg(dev, + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); } - ltb->map_id = adapter->map_id; - adapter->map_id++; + + /* Ensure ltb is zeroed - specially when reusing it. */ + memset(ltb->buff, 0, ltb->size); mutex_lock(&adapter->fw_lock); adapter->fw_done_rc = 0; reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, - ltb->size, ltb->map_id); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); if (rc) { - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; } rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); if (rc) { - dev_err(dev, - "Long term map request aborted or timed out,rc = %d\n", + dev_err(dev, "LTB map request aborted or timed out, rc = %d\n", rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + goto out; } if (adapter->fw_done_rc) { - dev_err(dev, "Couldn't map long term buffer,rc = %d\n", + dev_err(dev, "Couldn't map LTB, rc = %d\n", adapter->fw_done_rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return -1; + rc = -EIO; + goto out; } + rc = 0; +out: + /* don't free LTB on communication error - see function header */ mutex_unlock(&adapter->fw_lock); - return 0; + return rc; } static void free_long_term_buff(struct ibmvnic_adapter *adapter, @@ -250,55 +331,230 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, if (!ltb->buff) return; + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ if (adapter->reset_reason != VNIC_RESET_FAILOVER && - adapter->reset_reason != VNIC_RESET_MOBILITY) + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + + ltb->buff = NULL; + /* mark this map_id free */ + bitmap_clear(adapter->map_ids, ltb->map_id, 1); + ltb->map_id = 0; } -static int reset_long_term_buff(struct ibmvnic_adapter *adapter, - struct ibmvnic_long_term_buff *ltb) +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) { struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; int rc; + int n; + int i; - memset(ltb->buff, 0, ltb->size); + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); - mutex_lock(&adapter->fw_lock); - adapter->fw_done_rc = 0; + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; - reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); - if (rc) { - mutex_unlock(&adapter->fw_lock); - return rc; - } + if (ltb_size > tot_size) + ltb_size = tot_size; - rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); - if (rc) { - dev_info(dev, - "Reset failed, long term map request timed out or aborted\n"); - mutex_unlock(&adapter->fw_lock); - return rc; + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; + + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; } - if (adapter->fw_done_rc) { - dev_info(dev, - "Reset failed, attempting to free and reallocate buffer\n"); - free_long_term_buff(adapter, ltb); - mutex_unlock(&adapter->fw_lock); - return alloc_long_term_buff(adapter, ltb, ltb->size); + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; } - mutex_unlock(&adapter->fw_lock); + + WARN_ON(i != new_set.num_ltbs); + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; +} + +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * rxpool->buff_size; +} + +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * txpool->buf_size; } static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) { int i; - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) + for (i = 0; i < adapter->num_active_rx_pools; i++) adapter->rx_pool[i].active = 0; } @@ -306,58 +562,77 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { int count = pool->size - atomic_read(&pool->available); + u64 handle = adapter->rx_scrq[pool->index]->handle; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; + union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; - union sub_crq sub_crq; struct sk_buff *skb; unsigned int offset; dma_addr_t dma_addr; unsigned char *dst; - u64 *handle_array; int shift = 0; - int index; + int bufidx; int i; if (!pool->active) return; - handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf-> - off_rxadd_subcrqs)); + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { + bufidx = pool->free_map[pool->next_free]; - for (i = 0; i < count; ++i) { - skb = alloc_skb(pool->buff_size, GFP_ATOMIC); + /* We maybe reusing the skb from earlier resets. Allocate + * only if necessary. But since the LTB may have changed + * during reset (see init_rx_pools()), update LTB below + * even if reusing skb. + */ + skb = pool->rx_buff[bufidx].skb; if (!skb) { - dev_err(dev, "Couldn't replenish rx buff\n"); - adapter->replenish_no_mem++; - break; + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + dev_err(dev, "Couldn't replenish rx buff\n"); + adapter->replenish_no_mem++; + break; + } } - index = pool->free_map[pool->next_free]; - - if (pool->rx_buff[index].skb) - dev_err(dev, "Inconsistent free_map!\n"); + pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; + pool->next_free = (pool->next_free + 1) % pool->size; /* Copy the skb to the long term mapped DMA buffer */ - offset = index * pool->buff_size; - dst = pool->long_term_buff.buff + offset; + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; memset(dst, 0, pool->buff_size); - dma_addr = pool->long_term_buff.addr + offset; - pool->rx_buff[index].data = dst; - - pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; - pool->rx_buff[index].dma = dma_addr; - pool->rx_buff[index].skb = skb; - pool->rx_buff[index].pool_index = pool->index; - pool->rx_buff[index].size = pool->buff_size; - - memset(&sub_crq, 0, sizeof(sub_crq)); - sub_crq.rx_add.first = IBMVNIC_CRQ_CMD; - sub_crq.rx_add.correlator = - cpu_to_be64((u64)&pool->rx_buff[index]); - sub_crq.rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq.rx_add.map_id = pool->long_term_buff.map_id; + dma_addr = ltb->addr + offset; + + /* add the skb to an rx_buff in the pool */ + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; + + /* queue the rx_buff for the next send_subcrq_indirect */ + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = + cpu_to_be64((u64)&pool->rx_buff[bufidx]); + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = ltb->map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is @@ -367,16 +642,21 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, #ifdef __LITTLE_ENDIAN__ shift = 8; #endif - sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift); - - lpar_rc = send_subcrq(adapter, handle_array[pool->index], - &sub_crq); - if (lpar_rc != H_SUCCESS) - goto failure; - - buffers_added++; - adapter->replenish_add_buff_success++; - pool->next_free = (pool->next_free + 1) % pool->size; + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); + + /* if send_subcrq_indirect queue is full, flush to VIOS */ + if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } } atomic_add(buffers_added, &pool->available); return; @@ -384,13 +664,22 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); - pool->free_map[pool->next_free] = index; - pool->rx_buff[index].skb = NULL; + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; + } + adapter->replenish_add_buff_failure += ind_bufp->index; atomic_add(buffers_added, &pool->available); - + ind_bufp->index = 0; if (lpar_rc == H_CLOSED || adapter->failover_pending) { /* Disable buffer pool replenishment and report carrier off if * queue is closed or pending failover. @@ -407,11 +696,12 @@ static void replenish_pools(struct ibmvnic_adapter *adapter) int i; adapter->replenish_task_cycles++; - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) { + for (i = 0; i < adapter->num_active_rx_pools; i++) { if (adapter->rx_pool[i].active) replenish_rx_pool(adapter, &adapter->rx_pool[i]); } + + netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); } static void release_stats_buffers(struct ibmvnic_adapter *adapter) @@ -458,13 +748,15 @@ static int init_stats_token(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; dma_addr_t stok; + int rc; stok = dma_map_single(dev, &adapter->stats, sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); - if (dma_mapping_error(dev, stok)) { - dev_err(dev, "Couldn't map stats buffer\n"); - return -1; + rc = dma_mapping_error(dev, stok); + if (rc) { + dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); + return rc; } adapter->stats_token = stok; @@ -472,52 +764,12 @@ static int init_stats_token(struct ibmvnic_adapter *adapter) return 0; } -static int reset_rx_pools(struct ibmvnic_adapter *adapter) -{ - struct ibmvnic_rx_pool *rx_pool; - int rx_scrqs; - int i, j, rc; - u64 *size_array; - - size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); - - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - for (i = 0; i < rx_scrqs; i++) { - rx_pool = &adapter->rx_pool[i]; - - netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i); - - if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { - free_long_term_buff(adapter, &rx_pool->long_term_buff); - rx_pool->buff_size = be64_to_cpu(size_array[i]); - rc = alloc_long_term_buff(adapter, - &rx_pool->long_term_buff, - rx_pool->size * - rx_pool->buff_size); - } else { - rc = reset_long_term_buff(adapter, - &rx_pool->long_term_buff); - } - - if (rc) - return rc; - - for (j = 0; j < rx_pool->size; j++) - rx_pool->free_map[j] = j; - - memset(rx_pool->rx_buff, 0, - rx_pool->size * sizeof(struct ibmvnic_rx_buff)); - - atomic_set(&rx_pool->available, 0); - rx_pool->next_alloc = 0; - rx_pool->next_free = 0; - rx_pool->active = 1; - } - - return 0; -} - +/** + * release_rx_pools() - Release any rx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ static void release_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; @@ -532,7 +784,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); kfree(rx_pool->free_map); - free_long_term_buff(adapter, &rx_pool->long_term_buff); + + free_ltb_set(adapter, &rx_pool->ltb_set); if (!rx_pool->rx_buff) continue; @@ -550,50 +803,112 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->rx_pool); adapter->rx_pool = NULL; adapter->num_active_rx_pools = 0; + adapter->prev_rx_pool_size = 0; +} + +/** + * reuse_rx_pools() - Check if the existing rx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing rx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and size of each buffer) have not + * changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the rx pools can be reused, false otherwise. + */ +static bool reuse_rx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_buff_size, new_buff_size; + + if (!adapter->rx_pool) + return false; + + old_num_pools = adapter->num_active_rx_pools; + new_num_pools = adapter->req_rx_queues; + + old_pool_size = adapter->prev_rx_pool_size; + new_pool_size = adapter->req_rx_add_entries_per_subcrq; + + old_buff_size = adapter->prev_rx_buf_sz; + new_buff_size = adapter->cur_rx_buf_sz; + + if (old_buff_size != new_buff_size || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; } +/** + * init_rx_pools(): Initialize the set of receiver pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of receiver pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing rx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ static int init_rx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->vdev->dev; struct ibmvnic_rx_pool *rx_pool; - int rxadd_subcrqs; - u64 *size_array; - int i, j; + u64 num_pools; + u64 pool_size; /* # of buffers in one pool */ + u64 buff_size; + int i, j, rc; - rxadd_subcrqs = - be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + pool_size = adapter->req_rx_add_entries_per_subcrq; + num_pools = adapter->req_rx_queues; + buff_size = adapter->cur_rx_buf_sz; - adapter->rx_pool = kcalloc(rxadd_subcrqs, + if (reuse_rx_pools(adapter)) { + dev_dbg(dev, "Reusing rx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_rx_pools(adapter); + + adapter->rx_pool = kcalloc(num_pools, sizeof(struct ibmvnic_rx_pool), GFP_KERNEL); if (!adapter->rx_pool) { dev_err(dev, "Failed to allocate rx pools\n"); - return -1; + return -ENOMEM; } - adapter->num_active_rx_pools = rxadd_subcrqs; + /* Set num_active_rx_pools early. If we fail below after partial + * allocation, release_rx_pools() will know how many to look for. + */ + adapter->num_active_rx_pools = num_pools; - for (i = 0; i < rxadd_subcrqs; i++) { + for (i = 0; i < num_pools; i++) { rx_pool = &adapter->rx_pool[i]; netdev_dbg(adapter->netdev, "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n", - i, adapter->req_rx_add_entries_per_subcrq, - be64_to_cpu(size_array[i])); + i, pool_size, buff_size); - rx_pool->size = adapter->req_rx_add_entries_per_subcrq; + rx_pool->size = pool_size; rx_pool->index = i; - rx_pool->buff_size = be64_to_cpu(size_array[i]); - rx_pool->active = 1; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), GFP_KERNEL); if (!rx_pool->free_map) { - release_rx_pools(adapter); - return -1; + dev_err(dev, "Couldn't alloc free_map %d\n", i); + rc = -ENOMEM; + goto out_release; } rx_pool->rx_buff = kcalloc(rx_pool->size, @@ -601,65 +916,60 @@ static int init_rx_pools(struct net_device *netdev) GFP_KERNEL); if (!rx_pool->rx_buff) { dev_err(dev, "Couldn't alloc rx buffers\n"); - release_rx_pools(adapter); - return -1; - } - - if (alloc_long_term_buff(adapter, &rx_pool->long_term_buff, - rx_pool->size * rx_pool->buff_size)) { - release_rx_pools(adapter); - return -1; + rc = -ENOMEM; + goto out_release; } - - for (j = 0; j < rx_pool->size; ++j) - rx_pool->free_map[j] = j; - - atomic_set(&rx_pool->available, 0); - rx_pool->next_alloc = 0; - rx_pool->next_free = 0; } - return 0; -} - -static int reset_one_tx_pool(struct ibmvnic_adapter *adapter, - struct ibmvnic_tx_pool *tx_pool) -{ - int rc, i; - - rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff); - if (rc) - return rc; + adapter->prev_rx_pool_size = pool_size; + adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz; - memset(tx_pool->tx_buff, 0, - tx_pool->num_buffers * - sizeof(struct ibmvnic_tx_buff)); +update_ltb: + for (i = 0; i < num_pools; i++) { + rx_pool = &adapter->rx_pool[i]; + dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", + i, rx_pool->size, rx_pool->buff_size); - for (i = 0; i < tx_pool->num_buffers; i++) - tx_pool->free_map[i] = i; + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) + goto out; - tx_pool->consumer_index = 0; - tx_pool->producer_index = 0; + for (j = 0; j < rx_pool->size; ++j) { + struct ibmvnic_rx_buff *rx_buff; - return 0; -} + rx_pool->free_map[j] = j; -static int reset_tx_pools(struct ibmvnic_adapter *adapter) -{ - int tx_scrqs; - int i, rc; + /* NOTE: Don't clear rx_buff->skb here - will leak + * memory! replenish_rx_pool() will reuse skbs or + * allocate as necessary. + */ + rx_buff = &rx_pool->rx_buff[j]; + rx_buff->dma = 0; + rx_buff->data = 0; + rx_buff->size = 0; + rx_buff->pool_index = 0; + } - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - for (i = 0; i < tx_scrqs; i++) { - rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); - if (rc) - return rc; - rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]); - if (rc) - return rc; + /* Mark pool "empty" so replenish_rx_pools() will + * update the LTB info for each buffer + */ + atomic_set(&rx_pool->available, 0); + rx_pool->next_alloc = 0; + rx_pool->next_free = 0; + /* replenish_rx_pool() may have called deactivate_rx_pools() + * on failover. Ensure pool is active now. + */ + rx_pool->active = 1; } - return 0; +out_release: + release_rx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; } static void release_vpd_data(struct ibmvnic_adapter *adapter) @@ -678,13 +988,22 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter, { kfree(tx_pool->tx_buff); kfree(tx_pool->free_map); - free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_ltb_set(adapter, &tx_pool->ltb_set); } +/** + * release_tx_pools() - Release any tx pools attached to @adapter. + * @adapter: ibmvnic adapter + * + * Safe to call this multiple times - even if no pools are attached. + */ static void release_tx_pools(struct ibmvnic_adapter *adapter) { int i; + /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are + * both NULL or both non-NULL. So we only need to check one. + */ if (!adapter->tx_pool) return; @@ -698,78 +1017,209 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->tso_pool); adapter->tso_pool = NULL; adapter->num_active_tx_pools = 0; + adapter->prev_tx_pool_size = 0; } static int init_one_tx_pool(struct net_device *netdev, struct ibmvnic_tx_pool *tx_pool, - int num_entries, int buf_size) + int pool_size, int buf_size) { - struct ibmvnic_adapter *adapter = netdev_priv(netdev); int i; - tx_pool->tx_buff = kcalloc(num_entries, + tx_pool->tx_buff = kcalloc(pool_size, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) - return -1; - - if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - num_entries * buf_size)) - return -1; + return -ENOMEM; - tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL); - if (!tx_pool->free_map) - return -1; + tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); + if (!tx_pool->free_map) { + kfree(tx_pool->tx_buff); + tx_pool->tx_buff = NULL; + return -ENOMEM; + } - for (i = 0; i < num_entries; i++) + for (i = 0; i < pool_size; i++) tx_pool->free_map[i] = i; tx_pool->consumer_index = 0; tx_pool->producer_index = 0; - tx_pool->num_buffers = num_entries; + tx_pool->num_buffers = pool_size; tx_pool->buf_size = buf_size; return 0; } +/** + * reuse_tx_pools() - Check if the existing tx pools can be reused. + * @adapter: ibmvnic adapter + * + * Check if the existing tx pools in the adapter can be reused. The + * pools can be reused if the pool parameters (number of pools, + * number of buffers in the pool and mtu) have not changed. + * + * NOTE: This assumes that all pools have the same number of buffers + * which is the case currently. If that changes, we must fix this. + * + * Return: true if the tx pools can be reused, false otherwise. + */ +static bool reuse_tx_pools(struct ibmvnic_adapter *adapter) +{ + u64 old_num_pools, new_num_pools; + u64 old_pool_size, new_pool_size; + u64 old_mtu, new_mtu; + + if (!adapter->tx_pool) + return false; + + old_num_pools = adapter->num_active_tx_pools; + new_num_pools = adapter->num_active_tx_scrqs; + old_pool_size = adapter->prev_tx_pool_size; + new_pool_size = adapter->req_tx_entries_per_subcrq; + old_mtu = adapter->prev_mtu; + new_mtu = adapter->req_mtu; + + if (old_mtu != new_mtu || + old_num_pools != new_num_pools || + old_pool_size != new_pool_size) + return false; + + return true; +} + +/** + * init_tx_pools(): Initialize the set of transmit pools in the adapter. + * @netdev: net device associated with the vnic interface + * + * Initialize the set of transmit pools in the ibmvnic adapter associated + * with the net_device @netdev. If possible, reuse the existing tx pools. + * Otherwise free any existing pools and allocate a new set of pools + * before initializing them. + * + * Return: 0 on success and negative value on error. + */ static int init_tx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int tx_subcrqs; - int i, rc; + struct device *dev = &adapter->vdev->dev; + int num_pools; + u64 pool_size; /* # of buffers in pool */ + u64 buff_size; + int i, j, rc; + + num_pools = adapter->req_tx_queues; + + /* We must notify the VIOS about the LTB on all resets - but we only + * need to alloc/populate pools if either the number of buffers or + * size of each buffer in the pool has changed. + */ + if (reuse_tx_pools(adapter)) { + netdev_dbg(netdev, "Reusing tx pools\n"); + goto update_ltb; + } + + /* Allocate/populate the pools. */ + release_tx_pools(adapter); + + pool_size = adapter->req_tx_entries_per_subcrq; + num_pools = adapter->num_active_tx_scrqs; - tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - adapter->tx_pool = kcalloc(tx_subcrqs, + adapter->tx_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); if (!adapter->tx_pool) - return -1; + return -ENOMEM; - adapter->tso_pool = kcalloc(tx_subcrqs, + adapter->tso_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); - if (!adapter->tso_pool) - return -1; + /* To simplify release_tx_pools() ensure that ->tx_pool and + * ->tso_pool are either both NULL or both non-NULL. + */ + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; + return -ENOMEM; + } + + /* Set num_active_tx_pools early. If we fail below after partial + * allocation, release_tx_pools() will know how many to look for. + */ + adapter->num_active_tx_pools = num_pools; - adapter->num_active_tx_pools = tx_subcrqs; + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); + + for (i = 0; i < num_pools; i++) { + dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n", + i, adapter->req_tx_entries_per_subcrq, buff_size); - for (i = 0; i < tx_subcrqs; i++) { rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], - adapter->req_tx_entries_per_subcrq, - adapter->req_mtu + VLAN_HLEN); - if (rc) { - release_tx_pools(adapter); - return rc; - } + pool_size, buff_size); + if (rc) + goto out_release; rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], IBMVNIC_TSO_BUFS, IBMVNIC_TSO_BUF_SZ); - if (rc) { - release_tx_pools(adapter); - return rc; - } + if (rc) + goto out_release; + } + + adapter->prev_tx_pool_size = pool_size; + adapter->prev_mtu = adapter->req_mtu; + +update_ltb: + /* NOTE: All tx_pools have the same number of buffers (which is + * same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS + * buffers (see calls init_one_tx_pool() for these). + * For consistency, we use tx_pool->num_buffers and + * tso_pool->num_buffers below. + */ + rc = -1; + for (i = 0; i < num_pools; i++) { + struct ibmvnic_tx_pool *tso_pool; + struct ibmvnic_tx_pool *tx_pool; + + tx_pool = &adapter->tx_pool[i]; + + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; + + tx_pool->consumer_index = 0; + tx_pool->producer_index = 0; + + for (j = 0; j < tx_pool->num_buffers; j++) + tx_pool->free_map[j] = j; + + tso_pool = &adapter->tso_pool[i]; + + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; + + tso_pool->consumer_index = 0; + tso_pool->producer_index = 0; + + for (j = 0; j < tso_pool->num_buffers; j++) + tso_pool->free_map[j] = j; } return 0; +out_release: + release_tx_pools(adapter); +out: + /* We failed to allocate one or more LTBs or map them on the VIOS. + * Hold onto the pools and any LTBs that we did allocate/map. + */ + return rc; } static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) @@ -812,7 +1262,7 @@ static int init_napi(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); netif_napi_add(adapter->netdev, &adapter->napi[i], - ibmvnic_poll, NAPI_POLL_WEIGHT); + ibmvnic_poll); } adapter->num_active_rx_napi = adapter->req_rx_queues; @@ -837,36 +1287,72 @@ static void release_napi(struct ibmvnic_adapter *adapter) adapter->napi_enabled = false; } +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + case VNIC_DOWN: + return "DOWN"; + } + return "UNKNOWN"; +} + static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; + int retries = 10; bool retry; int rc; do { retry = false; - if (retry_count > IBMVNIC_MAX_QUEUES) { + if (retry_count > retries) { netdev_warn(netdev, "Login attempts exceeded\n"); - return -1; + return -EACCES; } adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); rc = send_login(adapter); - if (rc) { - netdev_warn(netdev, "Unable to login\n"); + if (rc) return rc; - } if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - netdev_warn(netdev, "Login timed out\n"); - return -1; + netdev_warn(netdev, "Login timed out, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + continue; } - if (adapter->init_done_rc == PARTIALSUCCESS) { + if (adapter->init_done_rc == ABORTED) { + netdev_warn(netdev, "Login aborted, retrying...\n"); + retry = true; + adapter->init_done_rc = 0; + retry_count++; + /* FW or device may be busy, so + * wait a bit before retrying login + */ + msleep(500); + } else if (adapter->init_done_rc == PARTIALSUCCESS) { retry_count++; release_sub_crqs(adapter, 1); @@ -875,35 +1361,37 @@ static int ibmvnic_login(struct net_device *netdev) "Received partial success, retrying...\n"); adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); - send_cap_queries(adapter); + send_query_cap(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { netdev_warn(netdev, "Capabilities query timed out\n"); - return -1; + return -ETIMEDOUT; } rc = init_sub_crqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ initialization failed\n"); - return -1; + return rc; } rc = init_sub_crq_irqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ irq initialization failed\n"); - return -1; + return rc; } } else if (adapter->init_done_rc) { - netdev_warn(netdev, "Adapter login failed\n"); - return -1; + netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", + adapter->init_done_rc); + return -EIO; } } while (retry); __ibmvnic_set_mac(netdev, adapter->mac_addr); + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); return 0; } @@ -923,17 +1411,15 @@ static void release_resources(struct ibmvnic_adapter *adapter) { release_vpd_data(adapter); - release_tx_pools(adapter); - release_rx_pools(adapter); - release_napi(adapter); + release_login_buffer(adapter); release_login_rsp_buffer(adapter); } static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) { struct net_device *netdev = adapter->netdev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); union ibmvnic_crq crq; bool resend; int rc; @@ -958,10 +1444,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { netdev_err(netdev, "timeout setting link state\n"); - return -1; + return -ETIMEDOUT; } - if (adapter->init_done_rc == 1) { + if (adapter->init_done_rc == PARTIALSUCCESS) { /* Partuial success, delay and re-send */ mdelay(1000); resend = true; @@ -1100,13 +1586,11 @@ static int init_resources(struct ibmvnic_adapter *adapter) return rc; } - adapter->map_id = 1; - rc = init_napi(adapter); if (rc) return rc; - send_map_query(adapter); + send_query_map(adapter); rc = init_rx_pools(netdev); if (rc) @@ -1141,16 +1625,25 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); - release_resources(adapter); + ibmvnic_napi_disable(adapter); + ibmvnic_disable_irqs(adapter); return rc; } + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + netif_tx_start_all_queues(netdev); if (prev_state == VNIC_CLOSED) { @@ -1167,29 +1660,59 @@ static int ibmvnic_open(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; - /* If device failover is pending, just set device state and return. - * Device operation will be handled by reset routine. + ASSERT_RTNL(); + + /* If device failover is pending or we are about to reset, just set + * device state and return. Device operation will be handled by reset + * routine. + * + * It should be safe to overwrite the adapter->state here. Since + * we hold the rtnl, either the reset has not actually started or + * the rtnl got dropped during the set_link_state() in do_reset(). + * In the former case, no one else is changing the state (again we + * have the rtnl) and in the latter case, do_reset() will detect and + * honor our setting below. */ - if (adapter->failover_pending) { + if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); adapter->state = VNIC_OPEN; - return 0; + rc = 0; + goto out; } if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) - return rc; + goto out; rc = init_resources(adapter); if (rc) { netdev_err(netdev, "failed to initialize resources\n"); - release_resources(adapter); - return rc; + goto out; } } rc = __ibmvnic_open(netdev); +out: + /* If open failed and there is a pending failover or in-progress reset, + * set device state and return. Device operation will be handled by + * reset routine. See also comments above regarding rtnl. + */ + if (rc && + (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { + adapter->state = VNIC_OPEN; + rc = 0; + } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + return rc; } @@ -1295,6 +1818,14 @@ static void ibmvnic_cleanup(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + if (test_bit(0, &adapter->resetting)) netif_tx_disable(netdev); else @@ -1302,9 +1833,6 @@ static void ibmvnic_cleanup(struct net_device *netdev) ibmvnic_napi_disable(adapter); ibmvnic_disable_irqs(adapter); - - clean_rx_pools(adapter); - clean_tx_pools(adapter); } static int __ibmvnic_close(struct net_device *netdev) @@ -1314,10 +1842,8 @@ static int __ibmvnic_close(struct net_device *netdev) adapter->state = VNIC_CLOSING; rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - if (rc) - return rc; adapter->state = VNIC_CLOSED; - return 0; + return rc; } static int ibmvnic_close(struct net_device *netdev) @@ -1325,6 +1851,11 @@ static int ibmvnic_close(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + adapter->force_reset_recovery); + /* If device failover is pending, just set device state and return. * Device operation will be handled by reset routine. */ @@ -1335,16 +1866,18 @@ static int ibmvnic_close(struct net_device *netdev) rc = __ibmvnic_close(netdev); ibmvnic_cleanup(netdev); + clean_rx_pools(adapter); + clean_tx_pools(adapter); return rc; } /** * build_hdr_data - creates L2/L3/L4 header data buffer - * @hdr_field - bitfield determining needed headers - * @skb - socket buffer - * @hdr_len - array of header lengths - * @tot_len - total length of data + * @hdr_field: bitfield determining needed headers + * @skb: socket buffer + * @hdr_len: array of header lengths + * @hdr_data: buffer to write the header to * * Reads hdr_field to determine which headers are needed by firmware. * Builds a buffer containing these headers. Saves individual header @@ -1401,11 +1934,11 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, /** * create_hdr_descs - create header and header extension descriptors - * @hdr_field - bitfield determining needed headers - * @data - buffer containing header data - * @len - length of data buffer - * @hdr_len - array of individual header lengths - * @scrq_arr - descriptor array + * @hdr_field: bitfield determining needed headers + * @hdr_data: buffer containing header data + * @len: length of data buffer + * @hdr_len: array of individual header lengths + * @scrq_arr: descriptor array * * Creates header and, if needed, header extension descriptors and * places them in a descriptor array, scrq_arr @@ -1453,26 +1986,27 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, /** * build_hdr_descs_arr - build a header descriptor array - * @skb - socket buffer - * @num_entries - number of descriptors to be sent - * @subcrq - first TX descriptor - * @hdr_field - bit field determining which headers will be sent + * @skb: tx socket buffer + * @indir_arr: indirect array + * @num_entries: number of descriptors to be sent + * @hdr_field: bit field determining which headers will be sent * * This function will build a TX descriptor array with applicable * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. */ -static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; + u8 hdr_data[140] = {0}; int tot_len; - u8 *hdr_data = txbuff->hdr_data; - tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, - txbuff->hdr_data); + tot_len = build_hdr_data(hdr_field, skb, hdr_len, + hdr_data); *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, - txbuff->indir_arr + 1); + indir_arr + 1); } static int ibmvnic_xmit_workarounds(struct sk_buff *skb, @@ -1490,17 +2024,104 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb, return 0; } +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; + adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + + ind_bufp->index = 0; + + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); + } +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + if (rc) + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + else + ind_bufp->index = 0; + return 0; +} + static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; unsigned int tx_dropped = 0; unsigned int tx_packets = 0; unsigned int tx_bytes = 0; @@ -1511,14 +2132,15 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int offset; int num_entries = 1; unsigned char *dst; - u64 *handle_array; - int index = 0; + int bufidx = 0; u8 proto = 0; - netdev_tx_t ret = NETDEV_TX_OK; - if (test_bit(0, &adapter->resetting)) { - if (!netif_subqueue_stopped(netdev, skb)) - netif_stop_subqueue(netdev, queue_num); + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { dev_kfree_skb_any(skb); tx_send_failed++; @@ -1527,38 +2149,41 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; } + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + if (ibmvnic_xmit_workarounds(skb, netdev)) { tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } + if (skb_is_gso(skb)) tx_pool = &adapter->tso_pool[queue_num]; else tx_pool = &adapter->tx_pool[queue_num]; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); - handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + - be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + bufidx = tx_pool->free_map[tx_pool->consumer_index]; - index = tx_pool->free_map[tx_pool->consumer_index]; - - if (index == IBMVNIC_INVALID_MAP) { + if (bufidx == IBMVNIC_INVALID_MAP) { dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; goto out; } tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; - offset = index * tx_pool->buf_size; - dst = tx_pool->long_term_buff.buff + offset; + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; memset(dst, 0, tx_pool->buf_size); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + data_dma_addr = ltb->addr + offset; if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -1571,25 +2196,24 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - memcpy(dst + cur, - page_address(skb_frag_page(frag)) + - skb_frag_off(frag), skb_frag_size(frag)); + memcpy(dst + cur, skb_frag_address(frag), + skb_frag_size(frag)); cur += skb_frag_size(frag); } } else { skb_copy_from_linear_data(skb, dst, skb->len); } + /* post changes to long_term_buff *dst before VIOS accessing it */ + dma_wmb(); + tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; - tx_buff = &tx_pool->tx_buff[index]; + tx_buff = &tx_pool->tx_buff[bufidx]; tx_buff->skb = skb; - tx_buff->data_dma[0] = data_dma_addr; - tx_buff->data_len[0] = skb->len; - tx_buff->index = index; + tx_buff->index = bufidx; tx_buff->pool_index = queue_num; - tx_buff->last_frag = true; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -1600,10 +2224,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (skb_is_gso(skb)) tx_crq.v1.correlator = - cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK); + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); else - tx_crq.v1.correlator = cpu_to_be32(index); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + tx_crq.v1.correlator = cpu_to_be32(bufidx); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); @@ -1634,55 +2258,29 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); hdrs += 2; } - /* determine if l2/3/4 headers are sent to firmware */ - if ((*hdrs >> 7) & 1) { - build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); - tx_crq.v1.n_crq_elem = num_entries; - tx_buff->num_entries = num_entries; - tx_buff->indir_arr[0] = tx_crq; - tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, - sizeof(tx_buff->indir_arr), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, tx_buff->indir_dma)) { - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "tx: unable to map descriptor array\n"); - tx_map_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; - } - lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], - (u64)tx_buff->indir_dma, - (u64)num_entries); - dma_unmap_single(dev, tx_buff->indir_dma, - sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); - } else { - tx_buff->num_entries = num_entries; - lpar_rc = send_subcrq(adapter, handle_array[queue_num], - &tx_crq); - } - if (lpar_rc != H_SUCCESS) { - if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) - dev_err_ratelimited(dev, "tx: send failed\n"); - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (lpar_rc == H_CLOSED || adapter->failover_pending) { - /* Disable TX and report carrier off if queue is closed - * or pending failover. - * Firmware guarantees that a signal will be sent to the - * driver, triggering a reset or some other action. - */ - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); - tx_send_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; } if (atomic_add_return(num_entries, &tx_scrq->used) @@ -1693,19 +2291,32 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_packets++; tx_bytes += skb->len; - txq->trans_start = jiffies; + txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; -tx_err_out: - /* roll back consumer index and map array*/ - if (tx_pool->consumer_index == 0) - tx_pool->consumer_index = - tx_pool->num_buffers - 1; - else - tx_pool->consumer_index--; - tx_pool->free_map[tx_pool->consumer_index] = index; +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } out: + rcu_read_unlock(); netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; netdev->stats.tx_packets += tx_packets; @@ -1809,6 +2420,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) int rc; rc = 0; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + ether_addr_copy(adapter->mac_addr, addr->sa_data); if (adapter->state != VNIC_PROBED) rc = __ibmvnic_set_mac(netdev, addr->sa_data); @@ -1816,105 +2430,80 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return rc; } -/** - * do_change_param_reset returns zero if we are able to keep processing reset - * events, or non-zero if we hit a fatal error and must halt. +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + case VNIC_RESET_PASSIVE_INIT: + return "PASSIVE_INIT"; + } + return "UNKNOWN"; +} + +/* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. */ -static int do_change_param_reset(struct ibmvnic_adapter *adapter, - struct ibmvnic_rwi *rwi, - u32 reset_state) +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) { - struct net_device *netdev = adapter->netdev; - int i, rc; - - netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n", - rwi->reset_reason); - - netif_carrier_off(netdev); - adapter->reset_reason = rwi->reset_reason; - - ibmvnic_cleanup(netdev); - - if (reset_state == VNIC_OPEN) { - rc = __ibmvnic_close(netdev); - if (rc) - return rc; - } - - release_resources(adapter); - release_sub_crqs(adapter, 1); - release_crq_queue(adapter); - - adapter->state = VNIC_PROBED; - - rc = init_crq_queue(adapter); - - if (rc) { - netdev_err(adapter->netdev, - "Couldn't initialize crq. rc=%d\n", rc); - return rc; - } - - rc = ibmvnic_reset_init(adapter); - if (rc) - return IBMVNIC_INIT_FAILED; - - /* If the adapter was in PROBE state prior to the reset, - * exit here. - */ - if (reset_state == VNIC_PROBED) - return 0; - - rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = reset_state; - return rc; - } - - rc = init_resources(adapter); - if (rc) - return rc; - - ibmvnic_disable_irqs(adapter); - - adapter->state = VNIC_CLOSED; - - if (reset_state == VNIC_CLOSED) - return 0; - - rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; - - /* refresh device's multicast list */ - ibmvnic_set_multi(netdev); - - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - - return 0; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; } -/** +/* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. */ static int do_reset(struct ibmvnic_adapter *adapter, struct ibmvnic_rwi *rwi, u32 reset_state) { + struct net_device *netdev = adapter->netdev; u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; - struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; - netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); - rtnl_lock(); + adapter->reset_reason = rwi->reset_reason; + /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_lock(); + + /* Now that we have the rtnl lock, clear any pending failover. + * This will ensure ibmvnic_open() has either completed or will + * block until failover is complete. + */ + if (rwi->reset_reason == VNIC_RESET_FAILOVER) + adapter->failover_pending = false; + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } netif_carrier_off(netdev); - adapter->reset_reason = rwi->reset_reason; old_num_rx_queues = adapter->req_rx_queues; old_num_tx_queues = adapter->req_tx_queues; @@ -1926,25 +2515,53 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (reset_state == VNIC_OPEN && adapter->reset_reason != VNIC_RESET_MOBILITY && adapter->reset_reason != VNIC_RESET_FAILOVER) { - adapter->state = VNIC_CLOSING; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = __ibmvnic_close(netdev); + if (rc) + goto out; + } else { + adapter->state = VNIC_CLOSING; - /* Release the RTNL lock before link state change and - * re-acquire after the link state change to allow - * linkwatch_event to grab the RTNL lock and run during - * a reset. - */ - rtnl_unlock(); - rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - rtnl_lock(); - if (rc) - goto out; + /* Release the RTNL lock before link state change and + * re-acquire after the link state change to allow + * linkwatch_event to grab the RTNL lock and run during + * a reset. + */ + rtnl_unlock(); + rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); + rtnl_lock(); + if (rc) + goto out; - if (adapter->state != VNIC_CLOSING) { - rc = -1; - goto out; + if (adapter->state == VNIC_OPEN) { + /* When we dropped rtnl, ibmvnic_open() got + * it and noticed that we are resetting and + * set the adapter state to OPEN. Update our + * new "target" state, and resume the reset + * from VNIC_CLOSING state. + */ + netdev_dbg(netdev, + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); + reset_state = VNIC_OPEN; + adapter->state = VNIC_CLOSING; + } + + if (adapter->state != VNIC_CLOSING) { + /* If someone else changed the adapter state + * when we dropped the rtnl, fail the reset + */ + rc = -EAGAIN; + goto out; + } + adapter->state = VNIC_CLOSED; } + } - adapter->state = VNIC_CLOSED; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + release_resources(adapter); + release_sub_crqs(adapter, 1); + release_crq_queue(adapter); } if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { @@ -1953,49 +2570,59 @@ static int do_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + reinit_init_done(adapter); + + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_crq_queue(adapter); + } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { rc = ibmvnic_reenable_crq_queue(adapter); release_sub_crqs(adapter, 1); } else { rc = ibmvnic_reset_crq(adapter); - if (!rc) + if (rc == H_CLOSED || rc == H_SUCCESS) { rc = vio_enable_interrupts(adapter->vdev); + if (rc) + netdev_err(adapter->netdev, + "Reset failed to enable interrupts. rc=%d\n", + rc); + } } if (rc) { netdev_err(adapter->netdev, - "Couldn't initialize crq. rc=%d\n", rc); + "Reset couldn't initialize crq. rc=%d\n", rc); goto out; } - rc = ibmvnic_reset_init(adapter); - if (rc) { - rc = IBMVNIC_INIT_FAILED; + rc = ibmvnic_reset_init(adapter, true); + if (rc) goto out; - } - /* If the adapter was in PROBE state prior to the reset, + /* If the adapter was in PROBE or DOWN state prior to the reset, * exit here. */ - if (reset_state == VNIC_PROBED) { + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) { rc = 0; goto out; } rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = reset_state; + if (rc) goto out; - } - if (adapter->req_rx_queues != old_num_rx_queues || + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { + rc = init_resources(adapter); + if (rc) + goto out; + } else if (adapter->req_rx_queues != old_num_rx_queues || adapter->req_tx_queues != old_num_tx_queues || adapter->req_rx_add_entries_per_subcrq != old_num_rx_slots || adapter->req_tx_entries_per_subcrq != - old_num_tx_slots) { - release_rx_pools(adapter); - release_tx_pools(adapter); + old_num_tx_slots || + !adapter->rx_pool || + !adapter->tso_pool || + !adapter->tx_pool) { release_napi(adapter); release_vpd_data(adapter); @@ -2004,13 +2631,21 @@ static int do_reset(struct ibmvnic_adapter *adapter, goto out; } else { - rc = reset_tx_pools(adapter); - if (rc) + rc = init_tx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init tx pools failed (%d)\n", + rc); goto out; + } - rc = reset_rx_pools(adapter); - if (rc) + rc = init_rx_pools(netdev); + if (rc) { + netdev_dbg(netdev, + "init rx pools failed (%d)\n", + rc); goto out; + } } ibmvnic_disable_irqs(adapter); } @@ -2030,18 +2665,23 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - - if (adapter->reset_reason != VNIC_RESET_FAILOVER) - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); + if (adapter->reset_reason == VNIC_RESET_FAILOVER || + adapter->reset_reason == VNIC_RESET_MOBILITY) + __netdev_notify_peers(netdev); rc = 0; out: - rtnl_unlock(); + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */ + if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) + rtnl_unlock(); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2051,8 +2691,16 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, struct net_device *netdev = adapter->netdev; int rc; - netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); + + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; @@ -2067,45 +2715,54 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - reinit_completion(&adapter->init_done); + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, "Couldn't initialize crq. rc=%d\n", rc); - return rc; + goto out; } - rc = ibmvnic_init(adapter); + rc = ibmvnic_reset_init(adapter, false); if (rc) - return rc; + goto out; - /* If the adapter was in PROBE state prior to the reset, + /* If the adapter was in PROBE or DOWN state prior to the reset, * exit here. */ - if (reset_state == VNIC_PROBED) - return 0; + if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) + goto out; rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = VNIC_PROBED; - return 0; - } + if (rc) + goto out; rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) - return 0; + goto out; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } - return 0; + __netdev_notify_peers(netdev); +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); + return rc; } static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) @@ -2127,33 +2784,154 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) return rwi; } -static void free_all_rwi(struct ibmvnic_adapter *adapter) +/** + * do_passive_init - complete probing when partner device is detected. + * @adapter: ibmvnic_adapter struct + * + * If the ibmvnic device does not have a partner device to communicate with at boot + * and that partner device comes online at a later time, this function is called + * to complete the initialization process of ibmvnic device. + * Caller is expected to hold rtnl_lock(). + * + * Returns non-zero if sub-CRQs are not initialized properly leaving the device + * in the down state. + * Returns 0 upon success and the device is in PROBED state. + */ + +static int do_passive_init(struct ibmvnic_adapter *adapter) { - struct ibmvnic_rwi *rwi; + unsigned long timeout = msecs_to_jiffies(30000); + struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->vdev->dev; + int rc; - rwi = get_next_rwi(adapter); - while (rwi) { - kfree(rwi); - rwi = get_next_rwi(adapter); + netdev_dbg(netdev, "Partner device found, probing.\n"); + + adapter->state = VNIC_PROBING; + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; + adapter->crq.active = true; + + rc = send_crq_init_complete(adapter); + if (rc) + goto out; + + rc = send_version_xchg(adapter); + if (rc) + netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc); + + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + rc = -ETIMEDOUT; + goto out; + } + + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc); + goto out; } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc); + goto init_failed; + } + + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + + adapter->state = VNIC_PROBED; + netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n"); + + return 0; + +init_failed: + release_sub_crqs(adapter, 1); +out: + adapter->state = VNIC_DOWN; + return rc; } static void __ibmvnic_reset(struct work_struct *work) { - struct ibmvnic_rwi *rwi; struct ibmvnic_adapter *adapter; + unsigned int timeout = 5000; + struct ibmvnic_rwi *tmprwi; bool saved_state = false; + struct ibmvnic_rwi *rwi; unsigned long flags; + struct device *dev; + bool need_reset; + int num_fails = 0; u32 reset_state; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; + + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + return; + } - if (test_and_set_bit_lock(0, &adapter->resetting)) { - schedule_delayed_work(&adapter->ibmvnic_delayed_reset, - IBMVNIC_RESET_DELAY); + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; rwi = get_next_rwi(adapter); while (rwi) { @@ -2169,15 +2947,23 @@ static void __ibmvnic_reset(struct work_struct *work) if (!saved_state) { reset_state = adapter->state; - adapter->state = VNIC_RESETTING; saved_state = true; } spin_unlock_irqrestore(&adapter->state_lock, flags); - if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) { - /* CHANGE_PARAM requestor holds rtnl_lock */ - rc = do_change_param_reset(adapter, rwi, reset_state); + if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) { + rtnl_lock(); + rc = do_passive_init(adapter); + rtnl_unlock(); + if (!rc) + netif_carrier_on(adapter->netdev); } else if (adapter->force_reset_recovery) { + /* Since we are doing a hard reset now, clear the + * failover_pending flag so we don't ignore any + * future MOBILITY or other resets. + */ + adapter->failover_pending = false; + /* Transport event occurred during previous reset */ if (adapter->wait_for_reset) { /* Previous was CHANGE_PARAM; caller locked */ @@ -2189,24 +2975,54 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { + netdev_dbg(adapter->netdev, + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } } else { rc = do_reset(adapter, rwi, reset_state); } - kfree(rwi); - if (rc == IBMVNIC_OPEN_FAILED) { - if (list_empty(&adapter->rwi_list)) - adapter->state = VNIC_CLOSED; - else - adapter->state = reset_state; - rc = 0; - } else if (rc && rc != IBMVNIC_INIT_FAILED && - !adapter->force_reset_recovery) - break; + tmprwi = rwi; + adapter->last_reset_time = jiffies; + + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); rwi = get_next_rwi(adapter); + /* + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). + */ + if (!rwi && rc) + rwi = tmprwi; + else + kfree(tmprwi); + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || - rwi->reset_reason == VNIC_RESET_MOBILITY)) + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) adapter->force_reset_recovery = true; } @@ -2215,12 +3031,13 @@ static void __ibmvnic_reset(struct work_struct *work) complete(&adapter->reset_done); } - if (rc) { - netdev_dbg(adapter->netdev, "Reset failed\n"); - free_all_rwi(adapter); - } - clear_bit_unlock(0, &adapter->resetting); + + netdev_dbg(adapter->netdev, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, + adapter->wait_for_reset); } static void __ibmvnic_delayed_reset(struct work_struct *work) @@ -2232,36 +3049,45 @@ static void __ibmvnic_delayed_reset(struct work_struct *work) __ibmvnic_reset(&adapter->ibmvnic_reset); } +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + static int ibmvnic_reset(struct ibmvnic_adapter *adapter, enum ibmvnic_reset_reason reason) { - struct list_head *entry, *tmp_entry; - struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; unsigned long flags; int ret; + spin_lock_irqsave(&adapter->rwi_lock, flags); + + /* If failover is pending don't schedule any other reset. + * Instead let the failover complete. If there is already a + * a failover reset scheduled, we will detect and drop the + * duplicate reset when walking the ->rwi_list below. + */ if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED || - adapter->failover_pending) { + (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { ret = EBUSY; netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); goto err; } - if (adapter->state == VNIC_PROBING) { - netdev_warn(netdev, "Adapter reset during probe\n"); - ret = adapter->init_done_rc = EAGAIN; - goto err; - } - - spin_lock_irqsave(&adapter->rwi_lock, flags); - - list_for_each(entry, &adapter->rwi_list) { - tmp = list_entry(entry, struct ibmvnic_rwi, list); + list_for_each_entry(tmp, &adapter->rwi_list, list) { if (tmp->reset_reason == reason) { - netdev_dbg(netdev, "Skipping matching reset\n"); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); ret = EBUSY; goto err; } @@ -2269,26 +3095,29 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - ibmvnic_close(netdev); ret = ENOMEM; goto err; } /* if we just received a transport event, * flush reset queue and process this reset */ - if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) - list_del(entry); - } + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); - schedule_work(&adapter->ibmvnic_reset); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); - return 0; + ret = 0; err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + return -ret; } @@ -2296,6 +3125,18 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); + if (test_bit(0, &adapter->resetting)) { + netdev_err(adapter->netdev, + "Adapter is resetting, skip timeout reset\n"); + return; + } + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); } @@ -2314,10 +3155,17 @@ static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, static int ibmvnic_poll(struct napi_struct *napi, int budget) { - struct net_device *netdev = napi->dev; - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int scrq_num = (int)(napi - adapter->napi); - int frames_processed = 0; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; restart_poll: while (frames_processed < budget) { @@ -2330,17 +3178,16 @@ restart_poll: if (unlikely(test_bit(0, &adapter->resetting) && adapter->reset_reason != VNIC_RESET_NON_FATAL)) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); + enable_scrq_irq(adapter, rx_scrq); napi_complete_done(napi, frames_processed); return frames_processed; } - if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) + if (!pending_scrq(adapter, rx_scrq)) break; - next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); - rx_buff = - (struct ibmvnic_rx_buff *)be64_to_cpu(next-> - rx_comp.correlator); + next = ibmvnic_next_scrq(adapter, rx_scrq); + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(next->rx_comp.correlator); /* do error checking */ if (next->rx_comp.rc) { netdev_dbg(netdev, "rx buffer returned with rc %x\n", @@ -2361,6 +3208,8 @@ restart_poll: offset = be16_to_cpu(next->rx_comp.off_frame_data); flags = next->rx_comp.flags; skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); skb_copy_to_linear_data(skb, rx_buff->data + offset, length); @@ -2394,16 +3243,20 @@ restart_poll: frames_processed++; } - if (adapter->state != VNIC_CLOSING) + if (adapter->state != VNIC_CLOSING && + ((atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2) || + frames_processed < budget)) replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); - if (frames_processed < budget) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - napi_complete_done(napi, frames_processed); - if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) && - napi_reschedule(napi)) { - disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - goto restart_poll; + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + if (napi_reschedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } } } return frames_processed; @@ -2527,9 +3380,9 @@ static void ibmvnic_get_drvinfo(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); - strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, adapter->fw_version, + strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); + strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->fw_version, adapter->fw_version, sizeof(info->fw_version)); } @@ -2558,17 +3411,14 @@ static u32 ibmvnic_get_link(struct net_device *netdev) } static void ibmvnic_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; - ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; - } else { - ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - } + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; @@ -2578,26 +3428,26 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, } static int ibmvnic_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + adapter->desired.rx_entries = ring->rx_pending; adapter->desired.tx_entries = ring->tx_pending; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending || - adapter->req_tx_entries_per_subcrq != ring->tx_pending)) - netdev_info(netdev, - "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - ring->rx_pending, ring->tx_pending, - adapter->req_rx_add_entries_per_subcrq, - adapter->req_tx_entries_per_subcrq); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_channels(struct net_device *netdev, @@ -2605,14 +3455,8 @@ static void ibmvnic_get_channels(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - channels->max_rx = adapter->max_rx_queues; - channels->max_tx = adapter->max_tx_queues; - } else { - channels->max_rx = IBMVNIC_MAX_QUEUES; - channels->max_tx = IBMVNIC_MAX_QUEUES; - } - + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->req_rx_queues; @@ -2625,23 +3469,11 @@ static int ibmvnic_set_channels(struct net_device *netdev, struct ethtool_channels *channels) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; adapter->desired.rx_queues = channels->rx_count; adapter->desired.tx_queues = channels->tx_count; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_queues != channels->rx_count || - adapter->req_tx_queues != channels->tx_count)) - netdev_info(netdev, - "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - channels->rx_count, channels->tx_count, - adapter->req_rx_queues, adapter->req_tx_queues); - return ret; - + return wait_for_reset(adapter); } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -2649,43 +3481,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) struct ibmvnic_adapter *adapter = netdev_priv(dev); int i; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); - i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + if (stringset != ETH_SS_STATS) + return; - for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, - "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; - } + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; - for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; - } - break; + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; - case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, - ibmvnic_priv_flags[i]); - break; - default: - return; + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; } } @@ -2698,8 +3519,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(ibmvnic_stats) + adapter->req_tx_queues * NUM_TX_STATS + adapter->req_rx_queues * NUM_RX_STATS; - case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(ibmvnic_priv_flags); default: return -EOPNOTSUPP; } @@ -2730,8 +3549,8 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, return; for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) - data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter, - ibmvnic_stats[i].offset)); + data[i] = be64_to_cpu(IBMVNIC_GET_STAT + (adapter, ibmvnic_stats[i].offset)); for (j = 0; j < adapter->req_tx_queues; j++) { data[i] = adapter->tx_stats_buffers[j].packets; @@ -2752,25 +3571,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, } } -static u32 ibmvnic_get_priv_flags(struct net_device *netdev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - return adapter->priv_flags; -} - -static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES); - - if (which_maxes) - adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES; - else - adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES; - - return 0; -} static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -2784,8 +3584,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, .get_link_ksettings = ibmvnic_get_link_ksettings, - .get_priv_flags = ibmvnic_get_priv_flags, - .set_priv_flags = ibmvnic_set_priv_flags, }; /* Routines for managing CRQs/sCRQs */ @@ -2795,15 +3593,26 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, { int rc; + if (!scrq) { + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); + return -EINVAL; + } + if (scrq->irq) { free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); scrq->irq = 0; } - memset(scrq->msgs, 0, 4 * PAGE_SIZE); - atomic_set(&scrq->used, 0); - scrq->cur = 0; + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + scrq->ind_buf.index = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); @@ -2814,6 +3623,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) { int i, rc; + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); @@ -2855,6 +3667,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } } + dma_free_coherent(dev, + IBMVNIC_IND_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); free_pages((unsigned long)scrq->msgs, 2); @@ -2901,6 +3718,17 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + spin_lock_init(&scrq->lock); netdev_dbg(adapter->netdev, @@ -2909,6 +3737,12 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter return scrq; +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); reg_failed: dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -2931,6 +3765,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); if (adapter->tx_scrq[i]->irq) { free_irq(adapter->tx_scrq[i]->irq, adapter->tx_scrq[i]); @@ -2985,6 +3820,30 @@ static int disable_scrq_irq(struct ibmvnic_adapter *adapter, return rc; } +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + static int enable_scrq_irq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { @@ -2998,15 +3857,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; - - rc = plpar_hcall_norets(H_EOI, val); - /* H_EOI would fail with rc = H_FUNCTION when running - * in XIVE mode which is expected, but not an error. - */ - if (rc && (rc != H_FUNCTION)) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + ibmvnic_clear_pending_interrupt(dev, scrq); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, @@ -3023,22 +3874,20 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; union sub_crq *next; int index; - int i, j; + int i; restart_loop: while (pending_scrq(adapter, scrq)) { unsigned int pool = scrq->pool_index; int num_entries = 0; + int total_bytes = 0; + int num_packets = 0; next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) { - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); - continue; - } index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3048,21 +3897,22 @@ restart_loop: } txbuff = &tx_pool->tx_buff[index]; - - for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) { - if (!txbuff->data_dma[j]) - continue; - - txbuff->data_dma[j] = 0; - } - - if (txbuff->last_frag) { - dev_kfree_skb_any(txbuff->skb); + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); } - - num_entries += txbuff->num_entries; - tx_pool->free_map[tx_pool->producer_index] = index; tx_pool->producer_index = (tx_pool->producer_index + 1) % @@ -3071,13 +3921,22 @@ restart_loop: /* remove tx_comp scrq*/ next->tx_comp.first = 0; + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + if (atomic_sub_return(num_entries, &scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { - netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_dbg(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); } } @@ -3187,7 +4046,7 @@ req_rx_irq_failed: req_tx_irq_failed: for (j = 0; j < i; j++) { free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); - irq_dispose_mapping(adapter->rx_scrq[j]->irq); + irq_dispose_mapping(adapter->tx_scrq[j]->irq); } release_sub_crqs(adapter, 1); return rc; @@ -3206,7 +4065,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter) allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); if (!allqueues) - return -1; + return -ENOMEM; for (i = 0; i < total_queues; i++) { allqueues[i] = init_sub_crq_queue(adapter); @@ -3275,19 +4134,33 @@ tx_failed: for (i = 0; i < registered_queues; i++) release_sub_crq_queue(adapter, allqueues[i], 1); kfree(allqueues); - return -1; + return -ENOMEM; } -static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) +static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) { struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3306,16 +4179,16 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) adapter->desired.rx_entries = adapter->max_rx_add_entries_per_subcrq; - max_entries = IBMVNIC_MAX_LTB_SIZE / + max_entries = IBMVNIC_LTB_SET_SIZE / (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.tx_entries = max_entries; } if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.rx_entries = max_entries; } @@ -3348,44 +4221,45 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3393,27 +4267,37 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { union sub_crq *entry = &scrq->msgs[scrq->cur]; + int rc; - if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) - return 1; - else - return 0; + rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return rc; } static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, @@ -3432,6 +4316,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + return entry; } @@ -3470,38 +4359,6 @@ static void print_subcrq_error(struct device *dev, int rc, const char *func) } } -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq) -{ - unsigned int ua = adapter->vdev->unit_address; - struct device *dev = &adapter->vdev->dev; - u64 *u64_crq = (u64 *)sub_crq; - int rc; - - netdev_dbg(adapter->netdev, - "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n", - (unsigned long int)cpu_to_be64(remote_handle), - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1]), - (unsigned long int)cpu_to_be64(u64_crq[2]), - (unsigned long int)cpu_to_be64(u64_crq[3])); - - /* Make sure the hypervisor sees the complete request */ - mb(); - - rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, - cpu_to_be64(remote_handle), - cpu_to_be64(u64_crq[0]), - cpu_to_be64(u64_crq[1]), - cpu_to_be64(u64_crq[2]), - cpu_to_be64(u64_crq[3])); - - if (rc) - print_subcrq_error(dev, rc, __func__); - - return rc; -} - static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, u64 remote_handle, u64 ioba, u64 num_entries) { @@ -3510,7 +4367,7 @@ static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, int rc; /* Make sure the hypervisor sees the complete request */ - mb(); + dma_wmb(); rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua, cpu_to_be64(remote_handle), ioba, num_entries); @@ -3530,8 +4387,8 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, int rc; netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); if (!adapter->crq.active && crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { @@ -3540,7 +4397,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, } /* Make sure the hypervisor sees the complete request */ - mb(); + dma_wmb(); rc = plpar_hcall_norets(H_SEND_CRQ, ua, cpu_to_be64(u64_crq[0]), @@ -3549,8 +4406,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, if (rc) { if (rc == H_CLOSED) { dev_warn(dev, "CRQ Queue closed\n"); - if (test_bit(0, &adapter->resetting)) - ibmvnic_reset(adapter, VNIC_RESET_FATAL); + /* do not reset, report the fail, wait for passive init from server */ } dev_warn(dev, "Send error (rc=%d)\n", rc); @@ -3561,26 +4417,31 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter) { + struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; + int retries = 100; + int rc; memset(&crq, 0, sizeof(crq)); crq.generic.first = IBMVNIC_CRQ_INIT_CMD; crq.generic.cmd = IBMVNIC_CRQ_INIT; netdev_dbg(adapter->netdev, "Sending CRQ init\n"); - return ibmvnic_send_crq(adapter, &crq); -} + do { + rc = ibmvnic_send_crq(adapter, &crq); + if (rc != H_CLOSED) + break; + retries--; + msleep(50); -static int send_version_xchg(struct ibmvnic_adapter *adapter) -{ - union ibmvnic_crq crq; + } while (retries > 0); - memset(&crq, 0, sizeof(crq)); - crq.version_exchange.first = IBMVNIC_CRQ_CMD; - crq.version_exchange.cmd = VERSION_EXCHANGE; - crq.version_exchange.version = cpu_to_be16(ibmvnic_version); + if (rc) { + dev_err(dev, "Failed to send init request, rc = %d\n", rc); + return rc; + } - return ibmvnic_send_crq(adapter, &crq); + return 0; } struct vnic_login_client_data { @@ -3615,21 +4476,21 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, vlcd->type = 1; len = strlen(os_name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, os_name, len); + strscpy(vlcd->name, os_name, len); vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 2 - LPAR name */ vlcd->type = 2; len = strlen(utsname()->nodename) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, utsname()->nodename, len); + strscpy(vlcd->name, utsname()->nodename, len); vlcd = (struct vnic_login_client_data *)(vlcd->name + len); /* Type 3 - device name */ vlcd->type = 3; len = strlen(adapter->netdev->name) + 1; vlcd->len = cpu_to_be16(len); - strncpy(vlcd->name, adapter->netdev->name, len); + strscpy(vlcd->name, adapter->netdev->name, len); } static int send_login(struct ibmvnic_adapter *adapter) @@ -3637,24 +4498,27 @@ static int send_login(struct ibmvnic_adapter *adapter) struct ibmvnic_login_rsp_buffer *login_rsp_buffer; struct ibmvnic_login_buffer *login_buffer; struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; dma_addr_t rsp_buffer_token; dma_addr_t buffer_token; size_t rsp_buffer_size; union ibmvnic_crq crq; + int client_data_len; size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; - int client_data_len; - struct vnic_login_client_data *vlcd; + int rc; int i; if (!adapter->tx_scrq || !adapter->rx_scrq) { netdev_err(adapter->netdev, "RX or TX queues are not allocated, device login failed\n"); - return -1; + return -ENOMEM; } + release_login_buffer(adapter); release_login_rsp_buffer(adapter); + client_data_len = vnic_client_data_len(adapter); buffer_size = @@ -3717,15 +4581,15 @@ static int send_login(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_tx_queues; i++) { if (adapter->tx_scrq[i]) { - tx_list_p[i] = cpu_to_be64(adapter->tx_scrq[i]-> - crq_num); + tx_list_p[i] = + cpu_to_be64(adapter->tx_scrq[i]->crq_num); } } for (i = 0; i < adapter->req_rx_queues; i++) { if (adapter->rx_scrq[i]) { - rx_list_p[i] = cpu_to_be64(adapter->rx_scrq[i]-> - crq_num); + rx_list_p[i] = + cpu_to_be64(adapter->rx_scrq[i]->crq_num); } } @@ -3741,7 +4605,7 @@ static int send_login(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_buf))[i]); + ((unsigned long *)(adapter->login_buf))[i]); } memset(&crq, 0, sizeof(crq)); @@ -3749,18 +4613,27 @@ static int send_login(struct ibmvnic_adapter *adapter) crq.login.cmd = LOGIN; crq.login.ioba = cpu_to_be32(buffer_token); crq.login.len = cpu_to_be32(buffer_size); - ibmvnic_send_crq(adapter, &crq); + + adapter->login_pending = true; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_rsp_map_failed; + } return 0; buf_rsp_map_failed: kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; buf_rsp_alloc_failed: dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); buf_map_failed: kfree(login_buffer); + adapter->login_buf = NULL; buf_alloc_failed: - return -1; + return -ENOMEM; } static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, @@ -3788,7 +4661,7 @@ static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) return ibmvnic_send_crq(adapter, &crq); } -static void send_map_query(struct ibmvnic_adapter *adapter) +static void send_query_map(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; @@ -3799,120 +4672,241 @@ static void send_map_query(struct ibmvnic_adapter *adapter) } /* Send a series of CRQs requesting various capabilities of the VNIC server */ -static void send_cap_queries(struct ibmvnic_adapter *adapter) +static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); +} + +static void send_query_ip_offload(struct ibmvnic_adapter *adapter) +{ + int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + + adapter->ip_offload_tok = + dma_map_single(dev, + &adapter->ip_offload_buf, + buf_sz, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_tok)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(dev, "Couldn't map offload buffer\n"); + return; + } + + memset(&crq, 0, sizeof(crq)); + crq.query_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; + crq.query_ip_offload.len = cpu_to_be32(buf_sz); + crq.query_ip_offload.ioba = + cpu_to_be32(adapter->ip_offload_tok); + + ibmvnic_send_crq(adapter, &crq); +} + +static void send_control_ip_offload(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; + struct device *dev = &adapter->vdev->dev; + netdev_features_t old_hw_features = 0; + union ibmvnic_crq crq; + + adapter->ip_offload_ctrl_tok = + dma_map_single(dev, + ctrl_buf, + sizeof(adapter->ip_offload_ctrl), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { + dev_err(dev, "Couldn't map ip offload control buffer\n"); + return; + } + + ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB); + ctrl_buf->ipv4_chksum = buf->ipv4_chksum; + ctrl_buf->ipv6_chksum = buf->ipv6_chksum; + ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum; + ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum; + ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum; + ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum; + ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4; + ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6; + + /* large_rx disabled for now, additional features needed */ + ctrl_buf->large_rx_ipv4 = 0; + ctrl_buf->large_rx_ipv6 = 0; + + if (adapter->state != VNIC_PROBING) { + old_hw_features = adapter->netdev->hw_features; + adapter->netdev->hw_features = 0; + } + + adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; + + if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) + adapter->netdev->hw_features |= NETIF_F_IP_CSUM; + + if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) + adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; + + if ((adapter->netdev->features & + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) + adapter->netdev->hw_features |= NETIF_F_RXCSUM; + + if (buf->large_tx_ipv4) + adapter->netdev->hw_features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->hw_features |= NETIF_F_TSO6; + + if (adapter->state == VNIC_PROBING) { + adapter->netdev->features |= adapter->netdev->hw_features; + } else if (old_hw_features != adapter->netdev->hw_features) { + netdev_features_t tmp = 0; + + /* disable features no longer supported */ + adapter->netdev->features &= adapter->netdev->hw_features; + /* turn on features now supported if previously enabled */ + tmp = (old_hw_features ^ adapter->netdev->hw_features) & + adapter->netdev->hw_features; + adapter->netdev->features |= + tmp & adapter->netdev->wanted_features; + } + + memset(&crq, 0, sizeof(crq)); + crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; + crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; + crq.control_ip_offload.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); + crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); ibmvnic_send_crq(adapter, &crq); } @@ -3977,7 +4971,7 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq, complete: if (adapter->fw_version[0] == '\0') - strncpy((char *)adapter->fw_version, "N/A", 3 * sizeof(char)); + strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version)); complete(&adapter->fw_done); } @@ -3985,8 +4979,6 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; - netdev_features_t old_hw_features = 0; - union ibmvnic_crq crq; int i; dma_unmap_single(dev, adapter->ip_offload_tok, @@ -3995,7 +4987,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(buf))[i]); + ((unsigned long *)(buf))[i]); netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); @@ -4036,74 +5028,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n", buf->off_ipv6_ext_headers); - adapter->ip_offload_ctrl_tok = - dma_map_single(dev, &adapter->ip_offload_ctrl, - sizeof(adapter->ip_offload_ctrl), DMA_TO_DEVICE); - - if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { - dev_err(dev, "Couldn't map ip offload control buffer\n"); - return; - } - - adapter->ip_offload_ctrl.len = - cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); - adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB); - adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum; - adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum; - adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum; - adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; - adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; - adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum; - adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4; - adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6; - - /* large_rx disabled for now, additional features needed */ - adapter->ip_offload_ctrl.large_rx_ipv4 = 0; - adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - - if (adapter->state != VNIC_PROBING) { - old_hw_features = adapter->netdev->hw_features; - adapter->netdev->hw_features = 0; - } - - adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; - - if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) - adapter->netdev->hw_features |= NETIF_F_IP_CSUM; - - if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) - adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; - - if ((adapter->netdev->features & - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) - adapter->netdev->hw_features |= NETIF_F_RXCSUM; - - if (buf->large_tx_ipv4) - adapter->netdev->hw_features |= NETIF_F_TSO; - if (buf->large_tx_ipv6) - adapter->netdev->hw_features |= NETIF_F_TSO6; - - if (adapter->state == VNIC_PROBING) { - adapter->netdev->features |= adapter->netdev->hw_features; - } else if (old_hw_features != adapter->netdev->hw_features) { - netdev_features_t tmp = 0; - - /* disable features no longer supported */ - adapter->netdev->features &= adapter->netdev->hw_features; - /* turn on features now supported if previously enabled */ - tmp = (old_hw_features ^ adapter->netdev->hw_features) & - adapter->netdev->hw_features; - adapter->netdev->features |= - tmp & adapter->netdev->wanted_features; - } - - memset(&crq, 0, sizeof(crq)); - crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; - crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; - crq.control_ip_offload.len = - cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); - crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); - ibmvnic_send_crq(adapter, &crq); + send_control_ip_offload(adapter); } static const char *ibmvnic_fw_err_cause(u16 cause) @@ -4160,7 +5085,11 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); goto out; } - ether_addr_copy(netdev->dev_addr, + /* crq->change_mac_addr.mac_addr is the requested one + * crq->change_mac_addr_rsp.mac_addr is the returned valid one. + */ + eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]); + ether_addr_copy(adapter->mac_addr, &crq->change_mac_addr_rsp.mac_addr[0]); out: complete(&adapter->fw_done); @@ -4175,6 +5104,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4216,8 +5147,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, case PARTIALSUCCESS: dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", *req_value, - (long int)be64_to_cpu(crq->request_capability_rsp. - number), name); + (long)be64_to_cpu(crq->request_capability_rsp.number), + name); if (be16_to_cpu(crq->request_capability_rsp.capability) == REQ_MTU) { @@ -4229,7 +5160,7 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, be64_to_cpu(crq->request_capability_rsp.number); } - ibmvnic_send_req_caps(adapter, 1); + send_request_cap(adapter, 1); return; default: dev_err(dev, "Error %d in request cap rsp\n", @@ -4238,32 +5169,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, } /* Done receiving requested capabilities, query IP offload support */ - if (atomic_read(&adapter->running_cap_crqs) == 0) { - union ibmvnic_crq newcrq; - int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); - struct ibmvnic_query_ip_offload_buffer *ip_offload_buf = - &adapter->ip_offload_buf; - - adapter->wait_capability = false; - adapter->ip_offload_tok = dma_map_single(dev, ip_offload_buf, - buf_sz, - DMA_FROM_DEVICE); - - if (dma_mapping_error(dev, adapter->ip_offload_tok)) { - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "Couldn't map offload buffer\n"); - return; - } - - memset(&newcrq, 0, sizeof(newcrq)); - newcrq.query_ip_offload.first = IBMVNIC_CRQ_CMD; - newcrq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; - newcrq.query_ip_offload.len = cpu_to_be32(buf_sz); - newcrq.query_ip_offload.ioba = - cpu_to_be32(adapter->ip_offload_tok); - - ibmvnic_send_crq(adapter, &newcrq); - } + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_query_ip_offload(adapter); } static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, @@ -4273,8 +5180,22 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, struct net_device *netdev = adapter->netdev; struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; struct ibmvnic_login_buffer *login = adapter->login_buf; + u64 *tx_handle_array; + u64 *rx_handle_array; + int num_tx_pools; + int num_rx_pools; + u64 *size_array; int i; + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_TO_DEVICE); dma_unmap_single(dev, adapter->login_rsp_buf_token, @@ -4290,12 +5211,20 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_rsp_buf))[i]); + ((unsigned long *)(adapter->login_rsp_buf))[i]); } /* Sanity checks */ @@ -4304,9 +5233,33 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, adapter->req_rx_add_queues != be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); - ibmvnic_remove(adapter->vdev); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); + /* variable buffer sizes are not supported, so just read the + * first entry. + */ + adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]); + + num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + + tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); + rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs)); + + for (i = 0; i < num_tx_pools; i++) + adapter->tx_scrq[i]->handle = tx_handle_array[i]; + + for (i = 0; i < num_rx_pools; i++) + adapter->rx_scrq[i]->handle = rx_handle_array[i]; + + adapter->num_active_tx_scrqs = num_tx_pools; + adapter->num_active_rx_scrqs = num_rx_pools; + release_login_rsp_buffer(adapter); release_login_buffer(adapter); complete(&adapter->init_done); @@ -4336,9 +5289,10 @@ static void handle_query_map_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc); return; } - netdev_dbg(netdev, "page_size = %d\ntot_pages = %d\nfree_pages = %d\n", - crq->query_map_rsp.page_size, crq->query_map_rsp.tot_pages, - crq->query_map_rsp.free_pages); + netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n", + crq->query_map_rsp.page_size, + __be32_to_cpu(crq->query_map_rsp.tot_pages), + __be32_to_cpu(crq->query_map_rsp.free_pages)); } static void handle_query_cap_rsp(union ibmvnic_crq *crq, @@ -4514,10 +5468,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, } out: - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; - ibmvnic_send_req_caps(adapter, 0); - } + if (atomic_read(&adapter->running_cap_crqs) == 0) + send_request_cap(adapter, 0); } static int send_query_phys_parms(struct ibmvnic_adapter *adapter) @@ -4571,7 +5523,7 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, case IBMVNIC_1GBPS: adapter->speed = SPEED_1000; break; - case IBMVNIC_10GBP: + case IBMVNIC_10GBPS: adapter->speed = SPEED_10000; break; case IBMVNIC_25GBPS: @@ -4586,6 +5538,9 @@ static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, case IBMVNIC_100GBPS: adapter->speed = SPEED_100000; break; + case IBMVNIC_200GBPS: + adapter->speed = SPEED_200000; + break; default: if (netif_carrier_ok(netdev)) netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed); @@ -4611,20 +5566,46 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, long rc; netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); switch (gen_crq->first) { case IBMVNIC_CRQ_INIT_RSP: switch (gen_crq->cmd) { case IBMVNIC_CRQ_INIT: dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; - adapter->failover_pending = false; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + + if (adapter->state == VNIC_DOWN) + rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT); + else + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } + if (!completion_done(&adapter->init_done)) { + if (!adapter->init_done_rc) + adapter->init_done_rc = -EAGAIN; complete(&adapter->init_done); - adapter->init_done_rc = -EIO; } - ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); @@ -4645,6 +5626,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, adapter->fw_done_rc = -EIO; complete(&adapter->fw_done); } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + if (!completion_done(&adapter->stats_done)) complete(&adapter->stats_done); if (test_bit(0, &adapter->resetting)) @@ -4677,13 +5665,11 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc); break; } - dev_info(dev, "Partner protocol version is %d\n", - crq->version_exchange_rsp.version); - if (be16_to_cpu(crq->version_exchange_rsp.version) < - ibmvnic_version) - ibmvnic_version = + ibmvnic_version = be16_to_cpu(crq->version_exchange_rsp.version); - send_cap_queries(adapter); + dev_info(dev, "Partner protocol version is %d\n", + ibmvnic_version); + send_query_cap(adapter); break; case QUERY_CAPABILITY_RSP: handle_query_cap_rsp(crq, adapter); @@ -4780,33 +5766,27 @@ static irqreturn_t ibmvnic_interrupt(int irq, void *instance) return IRQ_HANDLED; } -static void ibmvnic_tasklet(void *data) +static void ibmvnic_tasklet(struct tasklet_struct *t) { - struct ibmvnic_adapter *adapter = data; + struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet); struct ibmvnic_crq_queue *queue = &adapter->crq; union ibmvnic_crq *crq; unsigned long flags; - bool done = false; spin_lock_irqsave(&queue->lock, flags); - while (!done) { - /* Pull all the valid messages off the CRQ */ - while ((crq = ibmvnic_next_crq(adapter)) != NULL) { - ibmvnic_handle_crq(crq, adapter); - crq->generic.first = 0; - } - /* remain in tasklet until all - * capabilities responses are received + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). */ - if (!adapter->wait_capability) - done = true; + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; } - /* if capabilities CRQ's were sent in this tasklet, the following - * tasklet must wait until all responses are received - */ - if (atomic_read(&adapter->running_cap_crqs) != 0) - adapter->wait_capability = true; + spin_unlock_irqrestore(&queue->lock, flags); } @@ -4838,6 +5818,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; crq->active = false; @@ -4917,8 +5900,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) retrc = 0; - tasklet_init(&adapter->tasklet, (void *)ibmvnic_tasklet, - (unsigned long)adapter); + tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet); netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq); snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x", @@ -4939,6 +5921,9 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) crq->cur = 0; spin_lock_init(&crq->lock); + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + return retrc; req_irq_failed: @@ -4954,44 +5939,57 @@ map_failed: return retrc; } -static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) +static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); - u64 old_num_rx_queues, old_num_tx_queues; + unsigned long timeout = msecs_to_jiffies(20000); + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; int rc; adapter->from_passive_init = false; - old_num_rx_queues = adapter->req_rx_queues; - old_num_tx_queues = adapter->req_tx_queues; + rc = ibmvnic_send_crq_init(adapter); + if (rc) { + dev_err(dev, "Send crq init failed with error %d\n", rc); + return rc; + } - reinit_completion(&adapter->init_done); - adapter->init_done_rc = 0; - ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Initialization sequence timed out\n"); - return -1; + return -ETIMEDOUT; } if (adapter->init_done_rc) { release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); return adapter->init_done_rc; } if (adapter->from_passive_init) { adapter->state = VNIC_OPEN; adapter->from_passive_init = false; - return -1; + dev_err(dev, "CRQ-init failed, passive-init\n"); + return -EINVAL; } - if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && + if (reset && + test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && adapter->reset_reason != VNIC_RESET_MOBILITY) { if (adapter->req_rx_queues != old_num_rx_queues || adapter->req_tx_queues != old_num_tx_queues) { release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { @@ -5013,48 +6011,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) return rc; } -static int ibmvnic_init(struct ibmvnic_adapter *adapter) -{ - struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); - int rc; - - adapter->from_passive_init = false; - - adapter->init_done_rc = 0; - ibmvnic_send_crq_init(adapter); - if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { - dev_err(dev, "Initialization sequence timed out\n"); - return -1; - } - - if (adapter->init_done_rc) { - release_crq_queue(adapter); - return adapter->init_done_rc; - } - - if (adapter->from_passive_init) { - adapter->state = VNIC_OPEN; - adapter->from_passive_init = false; - return -1; - } - - rc = init_sub_crqs(adapter); - if (rc) { - dev_err(dev, "Initialization of sub crqs failed\n"); - release_crq_queue(adapter); - return rc; - } - - rc = init_sub_crq_irqs(adapter); - if (rc) { - dev_err(dev, "Failed to initialize sub crq irqs\n"); - release_crq_queue(adapter); - } - - return rc; -} - static struct device_attribute dev_attr_failover; static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) @@ -5062,6 +6018,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct ibmvnic_adapter *adapter; struct net_device *netdev; unsigned char *mac_addr_p; + unsigned long flags; + bool init_success; int rc; dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", @@ -5086,16 +6044,18 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->login_pending = false; + memset(&adapter->map_ids, 0, sizeof(adapter->map_ids)); + /* map_ids start at 1, so ensure map_id 0 is always "in-use" */ + bitmap_set(adapter->map_ids, 0, 1); ether_addr_copy(adapter->mac_addr, mac_addr_p); - ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + eth_hw_addr_set(netdev, adapter->mac_addr); netdev->irq = dev->irq; netdev->netdev_ops = &ibmvnic_netdev_ops; netdev->ethtool_ops = &ibmvnic_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - spin_lock_init(&adapter->stats_lock); - INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, __ibmvnic_delayed_reset); @@ -5103,13 +6063,44 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->rwi_lock); spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); init_completion(&adapter->reset_done); init_completion(&adapter->stats_done); clear_bit(0, &adapter->resetting); + adapter->prev_rx_buf_sz = 0; + adapter->prev_mtu = 0; + init_success = false; do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", @@ -5117,10 +6108,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_init_fail; } - rc = ibmvnic_init(adapter); - if (rc && rc != EAGAIN) - goto ibmvnic_init_fail; - } while (rc == EAGAIN); + rc = ibmvnic_reset_init(adapter, false); + } while (rc == -EAGAIN); + + /* We are ignoring the error from ibmvnic_reset_init() assuming that the + * partner is not ready. CRQ is not active. When the partner becomes + * ready, we will do the passive init reset. + */ + + if (!rc) + init_success = true; rc = init_stats_buffers(adapter); if (rc) @@ -5130,15 +6127,24 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (rc) goto ibmvnic_stats_fail; - netdev->mtu = adapter->req_mtu - ETH_HLEN; - netdev->min_mtu = adapter->min_mtu - ETH_HLEN; - netdev->max_mtu = adapter->max_mtu - ETH_HLEN; - rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) goto ibmvnic_dev_file_err; netif_carrier_off(netdev); + + if (init_success) { + adapter->state = VNIC_PROBED; + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; + } else { + adapter->state = VNIC_DOWN; + } + + adapter->wait_for_reset = false; + adapter->last_reset_time = jiffies; + rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); @@ -5146,9 +6152,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } dev_info(&dev->dev, "ibmvnic registered\n"); - adapter->state = VNIC_PROBED; - - adapter->wait_for_reset = false; + complete(&adapter->probe_done); return 0; @@ -5164,31 +6168,53 @@ ibmvnic_stats_fail: ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + mutex_destroy(&adapter->fw_lock); free_netdev(netdev); return rc; } -static int ibmvnic_remove(struct vio_dev *dev) +static void ibmvnic_remove(struct vio_dev *dev) { struct net_device *netdev = dev_get_drvdata(&dev->dev); struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); - if (adapter->state == VNIC_RESETTING) { - spin_unlock_irqrestore(&adapter->state_lock, flags); - return -EBUSY; - } + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock(&adapter->rwi_lock); adapter->state = VNIC_REMOVING; + spin_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->state_lock, flags); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + rtnl_lock(); unregister_netdevice(netdev); release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); release_sub_crqs(adapter, 1); release_crq_queue(adapter); @@ -5202,8 +6228,6 @@ static int ibmvnic_remove(struct vio_dev *dev) device_remove_file(&dev->dev, &dev_attr_failover); free_netdev(netdev); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static ssize_t failover_store(struct device *dev, struct device_attribute *attr, @@ -5223,7 +6247,7 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, if (rc) { netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } session_token = (__be64)retbuf[0]; @@ -5232,14 +6256,20 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); if (rc) { - netdev_err(netdev, "Client initiated failover failed, rc %ld\n", + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } return count; -} +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + + return count; +} static DEVICE_ATTR_WO(failover); static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) @@ -5264,8 +6294,7 @@ static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++) ret += 4 * PAGE_SIZE; /* the scrq message queue */ - for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - i++) + for (i = 0; i < adapter->num_active_rx_pools; i++) ret += adapter->rx_pool[i].size * IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl); |