From 3df25e4c1e66a69097bde99990fb095b26125c82 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Thu, 11 Oct 2018 10:17:16 +0300 Subject: igc: Add interrupt support This patch set adds interrupt support for the igc interfaces. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc.h | 127 ++++ drivers/net/ethernet/intel/igc/igc_defines.h | 40 + drivers/net/ethernet/intel/igc/igc_hw.h | 84 +++ drivers/net/ethernet/intel/igc/igc_main.c | 1016 ++++++++++++++++++++++++++ 4 files changed, 1267 insertions(+) (limited to 'drivers/net/ethernet/intel') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 2e819cac19e5..e595d135ea7b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -28,6 +28,17 @@ extern char igc_driver_name[]; extern char igc_driver_version[]; +/* Interrupt defines */ +#define IGC_START_ITR 648 /* ~6000 ints/sec */ +#define IGC_FLAG_HAS_MSI BIT(0) +#define IGC_FLAG_QUEUE_PAIRS BIT(4) +#define IGC_FLAG_HAS_MSIX BIT(13) + +#define IGC_START_ITR 648 /* ~6000 ints/sec */ +#define IGC_4K_ITR 980 +#define IGC_20K_ITR 196 +#define IGC_70K_ITR 56 + /* Transmit and receive queues */ #define IGC_MAX_RX_QUEUES 4 #define IGC_MAX_TX_QUEUES 4 @@ -42,10 +53,96 @@ enum igc_state_t { __IGC_PTP_TX_IN_PROGRESS, }; +struct igc_tx_queue_stats { + u64 packets; + u64 bytes; + u64 restart_queue; +}; + +struct igc_rx_queue_stats { + u64 packets; + u64 bytes; + u64 drops; + u64 csum_err; + u64 alloc_failed; +}; + +struct igc_rx_packet_stats { + u64 ipv4_packets; /* IPv4 headers processed */ + u64 ipv4e_packets; /* IPv4E headers with extensions processed */ + u64 ipv6_packets; /* IPv6 headers processed */ + u64 ipv6e_packets; /* IPv6E headers with extensions processed */ + u64 tcp_packets; /* TCP headers processed */ + u64 udp_packets; /* UDP headers processed */ + u64 sctp_packets; /* SCTP headers processed */ + u64 nfs_packets; /* NFS headers processe */ + u64 other_packets; +}; + +struct igc_ring_container { + struct igc_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +struct igc_ring { + struct igc_q_vector *q_vector; /* backlink to q_vector */ + struct net_device *netdev; /* back pointer to net_device */ + struct device *dev; /* device for dma mapping */ + union { /* array of buffer info structs */ + struct igc_tx_buffer *tx_buffer_info; + struct igc_rx_buffer *rx_buffer_info; + }; + void *desc; /* descriptor ring memory */ + unsigned long flags; /* ring specific flags */ + void __iomem *tail; /* pointer to ring tail register */ + dma_addr_t dma; /* phys address of the ring */ + unsigned int size; /* length of desc. ring in bytes */ + + u16 count; /* number of desc. in the ring */ + u8 queue_index; /* logical index of the ring*/ + u8 reg_idx; /* physical index of the ring */ + + /* everything past this point are written often */ + u16 next_to_clean; + u16 next_to_use; + u16 next_to_alloc; + + union { + /* TX */ + struct { + struct igc_tx_queue_stats tx_stats; + }; + /* RX */ + struct { + struct igc_rx_queue_stats rx_stats; + struct igc_rx_packet_stats pkt_stats; + struct sk_buff *skb; + }; + }; +} ____cacheline_internodealigned_in_smp; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ + void __iomem *itr_register; + u32 eims_value; /* EIMS mask value */ + + u16 itr_val; + u8 set_itr; + + struct igc_ring_container rx, tx; struct napi_struct napi; + + struct rcu_head rcu; /* to avoid race with update stats on free */ + char name[IFNAMSIZ + 9]; + struct net_device poll_dev; + + /* for dynamic allocation of rings associated with this q_vector */ + struct igc_ring ring[0] ____cacheline_internodealigned_in_smp; }; struct igc_mac_addr { @@ -65,13 +162,35 @@ struct igc_adapter { unsigned long state; unsigned int flags; unsigned int num_q_vectors; + + struct msix_entry *msix_entries; + + /* TX */ + u16 tx_work_limit; + int num_tx_queues; + struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES]; + + /* RX */ + int num_rx_queues; + struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES]; + + struct timer_list watchdog_timer; + struct timer_list dma_err_timer; + struct timer_list phy_info_timer; + u16 link_speed; u16 link_duplex; u8 port_num; u8 __iomem *io_addr; + /* Interrupt Throttle Rate */ + u32 rx_itr_setting; + u32 tx_itr_setting; + + struct work_struct reset_task; struct work_struct watchdog_task; + struct work_struct dma_err_task; int msg_enable; u32 max_frame_size; @@ -81,8 +200,16 @@ struct igc_adapter { /* structs defined in igc_hw.h */ struct igc_hw hw; + struct igc_hw_stats stats; struct igc_q_vector *q_vector[MAX_Q_VECTORS]; + u32 eims_enable_mask; + u32 eims_other; + + u16 tx_ring_count; + u16 rx_ring_count; + + u32 rss_queues; struct igc_mac_addr *mac_table; }; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c25f75ed9cd4..7262ad44dcf8 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -42,4 +42,44 @@ #define IGC_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ #define IGC_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +/* Interrupt Cause Read */ +#define IGC_ICR_TXDW BIT(0) /* Transmit desc written back */ +#define IGC_ICR_TXQE BIT(1) /* Transmit Queue empty */ +#define IGC_ICR_LSC BIT(2) /* Link Status Change */ +#define IGC_ICR_RXSEQ BIT(3) /* Rx sequence error */ +#define IGC_ICR_RXDMT0 BIT(4) /* Rx desc min. threshold (0) */ +#define IGC_ICR_RXO BIT(6) /* Rx overrun */ +#define IGC_ICR_RXT0 BIT(7) /* Rx timer intr (ring 0) */ +#define IGC_ICR_DRSTA BIT(30) /* Device Reset Asserted */ +#define IGC_ICS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */ + +#define IMS_ENABLE_MASK ( \ + IGC_IMS_RXT0 | \ + IGC_IMS_TXDW | \ + IGC_IMS_RXDMT0 | \ + IGC_IMS_RXSEQ | \ + IGC_IMS_LSC) + +/* Interrupt Mask Set */ +#define IGC_IMS_TXDW IGC_ICR_TXDW /* Tx desc written back */ +#define IGC_IMS_RXSEQ IGC_ICR_RXSEQ /* Rx sequence error */ +#define IGC_IMS_LSC IGC_ICR_LSC /* Link Status Change */ +#define IGC_IMS_DOUTSYNC IGC_ICR_DOUTSYNC /* NIC DMA out of sync */ +#define IGC_IMS_DRSTA IGC_ICR_DRSTA /* Device Reset Asserted */ +#define IGC_IMS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */ +#define IGC_IMS_RXDMT0 IGC_ICR_RXDMT0 /* Rx desc min. threshold */ + +#define IGC_QVECTOR_MASK 0x7FFC /* Q-vector mask */ +#define IGC_ITR_VAL_MASK 0x04 /* ITR value mask */ + +#define IGC_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ +#define IGC_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ +#define IGC_IVAR_VALID 0x80 +#define IGC_GPIE_NSICR 0x00000001 +#define IGC_GPIE_MSIX_MODE 0x00000010 +#define IGC_GPIE_EIAME 0x40000000 +#define IGC_GPIE_PBA 0x80000000 + +#define IGC_N0_QUEUE -1 + #endif /* _IGC_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 4cac2e8868e0..3905efb1bb1b 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -85,6 +85,90 @@ struct igc_hw { u8 revision_id; }; +/* Statistics counters collected by the MAC */ +struct igc_hw_stats { + u64 crcerrs; + u64 algnerrc; + u64 symerrs; + u64 rxerrc; + u64 mpc; + u64 scc; + u64 ecol; + u64 mcc; + u64 latecol; + u64 colc; + u64 dc; + u64 tncrs; + u64 sec; + u64 cexterr; + u64 rlec; + u64 xonrxc; + u64 xontxc; + u64 xoffrxc; + u64 xofftxc; + u64 fcruc; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mgprc; + u64 mgpdc; + u64 mgptc; + u64 tor; + u64 tot; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 tsctc; + u64 tsctfc; + u64 iac; + u64 icrxptc; + u64 icrxatc; + u64 ictxptc; + u64 ictxatc; + u64 ictxqec; + u64 ictxqmtc; + u64 icrxdmtc; + u64 icrxoc; + u64 cbtmpc; + u64 htdpmc; + u64 cbrdpc; + u64 cbrmpc; + u64 rpthc; + u64 hgptc; + u64 htcbdpc; + u64 hgorc; + u64 hgotc; + u64 lenerrs; + u64 scvpc; + u64 hrmpc; + u64 doosync; + u64 o2bgptc; + u64 o2bspc; + u64 b2ospc; + u64 b2ogprc; +}; + s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7c5b0d2f16bf..0fd66620cfa1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -41,6 +41,22 @@ static int igc_sw_init(struct igc_adapter *); static void igc_configure(struct igc_adapter *adapter); static void igc_power_down_link(struct igc_adapter *adapter); static void igc_set_default_mac_filter(struct igc_adapter *adapter); +static void igc_write_itr(struct igc_q_vector *q_vector); +static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector); +static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx); +static void igc_set_interrupt_capability(struct igc_adapter *adapter, + bool msix); +static void igc_free_q_vectors(struct igc_adapter *adapter); +static void igc_irq_disable(struct igc_adapter *adapter); +static void igc_irq_enable(struct igc_adapter *adapter); +static void igc_configure_msix(struct igc_adapter *adapter); + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; static void igc_reset(struct igc_adapter *adapter) { @@ -154,6 +170,7 @@ static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) */ static void igc_up(struct igc_adapter *adapter) { + struct igc_hw *hw = &adapter->hw; int i = 0; /* hardware has been reset, we need to reload some things */ @@ -163,6 +180,15 @@ static void igc_up(struct igc_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) napi_enable(&adapter->q_vector[i]->napi); + + if (adapter->msix_entries) + igc_configure_msix(adapter); + else + igc_assign_vector(adapter->q_vector[0], 0); + + /* Clear any pending interrupts. */ + rd32(IGC_ICR); + igc_irq_enable(adapter); } /** @@ -309,6 +335,958 @@ static void igc_set_default_mac_filter(struct igc_adapter *adapter) igc_rar_set_index(adapter, 0); } +/** + * igc_msix_other - msix other interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t igc_msix_other(int irq, void *data) +{ + struct igc_adapter *adapter = data; + struct igc_hw *hw = &adapter->hw; + u32 icr = rd32(IGC_ICR); + + /* reading ICR causes bit 31 of EICR to be cleared */ + if (icr & IGC_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & IGC_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & IGC_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + wr32(IGC_EIMS, adapter->eims_other); + + return IRQ_HANDLED; +} + +/** + * igc_write_ivar - configure ivar for given MSI-X vector + * @hw: pointer to the HW structure + * @msix_vector: vector number we are allocating to a given ring + * @index: row index of IVAR register to write within IVAR table + * @offset: column offset of in IVAR, should be multiple of 8 + * + * The IVAR table consists of 2 columns, + * each containing an cause allocation for an Rx and Tx ring, and a + * variable number of rows depending on the number of queues supported. + */ +static void igc_write_ivar(struct igc_hw *hw, int msix_vector, + int index, int offset) +{ + u32 ivar = array_rd32(IGC_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((u32)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | IGC_IVAR_VALID) << offset; + + array_wr32(IGC_IVAR0, index, ivar); +} + +static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + int rx_queue = IGC_N0_QUEUE; + int tx_queue = IGC_N0_QUEUE; + + if (q_vector->rx.ring) + rx_queue = q_vector->rx.ring->reg_idx; + if (q_vector->tx.ring) + tx_queue = q_vector->tx.ring->reg_idx; + + switch (hw->mac.type) { + case igc_i225: + if (rx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + rx_queue >> 1, + (rx_queue & 0x1) << 4); + if (tx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + tx_queue >> 1, + ((tx_queue & 0x1) << 4) + 8); + q_vector->eims_value = BIT(msix_vector); + break; + default: + WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); + break; + } + + /* add q_vector eims value to global eims_enable_mask */ + adapter->eims_enable_mask |= q_vector->eims_value; + + /* configure q_vector to set itr on first interrupt */ + q_vector->set_itr = 1; +} + +/** + * igc_configure_msix - Configure MSI-X hardware + * @adapter: Pointer to adapter structure + * + * igc_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. + */ +static void igc_configure_msix(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + int i, vector = 0; + u32 tmp; + + adapter->eims_enable_mask = 0; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case igc_i225: + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. + */ + wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | + IGC_GPIE_PBA | IGC_GPIE_EIAME | + IGC_GPIE_NSICR); + + /* enable msix_other interrupt */ + adapter->eims_other = BIT(vector); + tmp = (vector++ | IGC_IVAR_VALID) << 8; + + wr32(IGC_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ + + adapter->eims_enable_mask |= adapter->eims_other; + + for (i = 0; i < adapter->num_q_vectors; i++) + igc_assign_vector(adapter->q_vector[i], vector++); + + wrfl(); +} + +static irqreturn_t igc_msix_ring(int irq, void *data) +{ + struct igc_q_vector *q_vector = data; + + /* Write the ITR value calculated from the previous interrupt. */ + igc_write_itr(q_vector); + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * igc_request_msix - Initialize MSI-X interrupts + * @adapter: Pointer to adapter structure + * + * igc_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. + */ +static int igc_request_msix(struct igc_adapter *adapter) +{ + int i = 0, err = 0, vector = 0, free_vector = 0; + struct net_device *netdev = adapter->netdev; + + err = request_irq(adapter->msix_entries[vector].vector, + &igc_msix_other, 0, netdev->name, adapter); + if (err) + goto err_out; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igc_q_vector *q_vector = adapter->q_vector[i]; + + vector++; + + q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); + + if (q_vector->rx.ring && q_vector->tx.ring) + sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else if (q_vector->tx.ring) + sprintf(q_vector->name, "%s-tx-%u", netdev->name, + q_vector->tx.ring->queue_index); + else if (q_vector->rx.ring) + sprintf(q_vector->name, "%s-rx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else + sprintf(q_vector->name, "%s-unused", netdev->name); + + err = request_irq(adapter->msix_entries[vector].vector, + igc_msix_ring, 0, q_vector->name, + q_vector); + if (err) + goto err_free; + } + + igc_configure_msix(adapter); + return 0; + +err_free: + /* free already assigned IRQs */ + free_irq(adapter->msix_entries[free_vector++].vector, adapter); + + vector--; + for (i = 0; i < vector; i++) { + free_irq(adapter->msix_entries[free_vector++].vector, + adapter->q_vector[i]); + } +err_out: + return err; +} + +/** + * igc_reset_q_vector - Reset config for interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be reset + * + * If NAPI is enabled it will delete any references to the + * NAPI struct. This is preparation for igc_free_q_vector. + */ +static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + /* if we're coming from igc_set_interrupt_capability, the vectors are + * not yet allocated + */ + if (!q_vector) + return; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; + + netif_napi_del(&q_vector->napi); +} + +static void igc_reset_interrupt_capability(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + if (adapter->msix_entries) { + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IGC_FLAG_HAS_MSI) { + pci_disable_msi(adapter->pdev); + } + + while (v_idx--) + igc_reset_q_vector(adapter, v_idx); +} + +/** + * igc_clear_interrupt_scheme - reset the device to a state of no interrupts + * @adapter: Pointer to adapter structure + * + * This function resets the device so that it has 0 rx queues, tx queues, and + * MSI-X interrupts allocated. + */ +static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) +{ + igc_free_q_vectors(adapter); + igc_reset_interrupt_capability(adapter); +} + +/** + * igc_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + */ +static void igc_free_q_vectors(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) { + igc_reset_q_vector(adapter, v_idx); + igc_free_q_vector(adapter, v_idx); + } +} + +/** + * igc_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. + */ +static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + adapter->q_vector[v_idx] = NULL; + + /* igc_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + if (q_vector) + kfree_rcu(q_vector, rcu); +} + +/** + * igc_update_ring_itr - update the dynamic ITR value based on packet size + * @q_vector: pointer to q_vector + * + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igc_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * NOTE: This function is called only when operating in a multiqueue + * receive environment. + */ +static void igc_update_ring_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + int new_val = q_vector->itr_val; + int avg_wire_size = 0; + unsigned int packets; + + /* For non-gigabit speeds, just fix the interrupt rate at 4000 + * ints/sec - ITR timer value of 120 ticks. + */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + new_val = IGC_4K_ITR; + goto set_itr_val; + default: + break; + } + + packets = q_vector->rx.total_packets; + if (packets) + avg_wire_size = q_vector->rx.total_bytes / packets; + + packets = q_vector->tx.total_packets; + if (packets) + avg_wire_size = max_t(u32, avg_wire_size, + q_vector->tx.total_bytes / packets); + + /* if avg_wire_size isn't set no work was done */ + if (!avg_wire_size) + goto clear_counts; + + /* Add 24 bytes to size to account for CRC, preamble, and gap */ + avg_wire_size += 24; + + /* Don't starve jumbo frames */ + avg_wire_size = min(avg_wire_size, 3000); + + /* Give a little boost to mid-size frames */ + if (avg_wire_size > 300 && avg_wire_size < 1200) + new_val = avg_wire_size / 3; + else + new_val = avg_wire_size / 2; + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (new_val < IGC_20K_ITR && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + new_val = IGC_20K_ITR; + +set_itr_val: + if (new_val != q_vector->itr_val) { + q_vector->itr_val = new_val; + q_vector->set_itr = 1; + } +clear_counts: + q_vector->rx.total_bytes = 0; + q_vector->rx.total_packets = 0; + q_vector->tx.total_bytes = 0; + q_vector->tx.total_packets = 0; +} + +/** + * igc_update_itr - update the dynamic ITR value based on statistics + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * NOTE: These calculations are only valid when operating in a single- + * queue environment. + */ +static void igc_update_itr(struct igc_q_vector *q_vector, + struct igc_ring_container *ring_container) +{ + unsigned int packets = ring_container->total_packets; + unsigned int bytes = ring_container->total_bytes; + u8 itrval = ring_container->itr; + + /* no packets, exit with status unchanged */ + if (packets == 0) + return; + + switch (itrval) { + case lowest_latency: + /* handle TSO and jumbo frames */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + itrval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* this if handles the TSO accounting */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 10) || ((bytes / packets) > 1200)) + itrval = bulk_latency; + else if ((packets > 35)) + itrval = lowest_latency; + } else if (bytes / packets > 2000) { + itrval = bulk_latency; + } else if (packets <= 2 && bytes < 512) { + itrval = lowest_latency; + } + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + itrval = low_latency; + } else if (bytes < 1500) { + itrval = low_latency; + } + break; + } + + /* clear work counters since we have the values we need */ + ring_container->total_bytes = 0; + ring_container->total_packets = 0; + + /* write updated itr to ring container */ + ring_container->itr = itrval; +} + +static void igc_set_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + u32 new_itr = q_vector->itr_val; + u8 current_itr = 0; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + current_itr = 0; + new_itr = IGC_4K_ITR; + goto set_itr_now; + default: + break; + } + + igc_update_itr(q_vector, &q_vector->tx); + igc_update_itr(q_vector, &q_vector->rx); + + current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (current_itr == lowest_latency && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + current_itr = low_latency; + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ + break; + case low_latency: + new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ + break; + case bulk_latency: + new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ + break; + default: + break; + } + +set_itr_now: + if (new_itr != q_vector->itr_val) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing + */ + new_itr = new_itr > q_vector->itr_val ? + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : new_itr; + /* Don't write the value here; it resets the adapter's + * internal timer, and causes us to delay far longer than + * we should between interrupts. Instead, we write the ITR + * value at the beginning of the next interrupt so the timing + * ends up being correct. + */ + q_vector->itr_val = new_itr; + q_vector->set_itr = 1; + } +} + +static void igc_ring_irq_enable(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + + if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || + (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { + if (adapter->num_q_vectors == 1) + igc_set_itr(q_vector); + else + igc_update_ring_itr(q_vector); + } + + if (!test_bit(__IGC_DOWN, &adapter->state)) { + if (adapter->msix_entries) + wr32(IGC_EIMS, q_vector->eims_value); + else + igc_irq_enable(adapter); + } +} + +/** + * igc_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle + */ +static int igc_poll(struct napi_struct *napi, int budget) +{ + struct igc_q_vector *q_vector = container_of(napi, + struct igc_q_vector, + napi); + bool clean_complete = true; + int work_done = 0; + int cleaned = 0; + + if (q_vector->rx.ring) { + work_done += cleaned; + if (cleaned >= budget) + clean_complete = false; + } + + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; + + /* If not enough Rx work done, exit the polling mode */ + napi_complete_done(napi, work_done); + igc_ring_irq_enable(q_vector); + + return 0; +} + +/** + * igc_set_interrupt_capability - set MSI or MSI-X if supported + * @adapter: Pointer to adapter structure + * + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. + */ +static void igc_set_interrupt_capability(struct igc_adapter *adapter, + bool msix) +{ + int numvecs, i; + int err; + + if (!msix) + goto msi_only; + adapter->flags |= IGC_FLAG_HAS_MSIX; + + /* Number of supported queues. */ + adapter->num_rx_queues = adapter->rss_queues; + + adapter->num_tx_queues = adapter->rss_queues; + + /* start with one vector for every Rx queue */ + numvecs = adapter->num_rx_queues; + + /* if Tx handler is separate add 1 for every Tx queue */ + if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) + numvecs += adapter->num_tx_queues; + + /* store the number of vectors reserved for queues */ + adapter->num_q_vectors = numvecs; + + /* add 1 vector for link status interrupts */ + numvecs++; + + adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), + GFP_KERNEL); + + if (!adapter->msix_entries) + return; + + /* populate entry values */ + for (i = 0; i < numvecs; i++) + adapter->msix_entries[i].entry = i; + + err = pci_enable_msix_range(adapter->pdev, + adapter->msix_entries, + numvecs, + numvecs); + if (err > 0) + return; + + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + + igc_reset_interrupt_capability(adapter); + +msi_only: + adapter->flags &= ~IGC_FLAG_HAS_MSIX; + + adapter->rss_queues = 1; + adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_q_vectors = 1; + if (!pci_enable_msi(adapter->pdev)) + adapter->flags |= IGC_FLAG_HAS_MSI; +} + +static void igc_add_ring(struct igc_ring *ring, + struct igc_ring_container *head) +{ + head->ring = ring; + head->count++; +} + +/** + * igc_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int igc_alloc_q_vector(struct igc_adapter *adapter, + unsigned int v_count, unsigned int v_idx, + unsigned int txr_count, unsigned int txr_idx, + unsigned int rxr_count, unsigned int rxr_idx) +{ + struct igc_q_vector *q_vector; + struct igc_ring *ring; + int ring_count, size; + + /* igc only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + size = sizeof(struct igc_q_vector) + + (sizeof(struct igc_ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = adapter->q_vector[v_idx]; + if (!q_vector) + q_vector = kzalloc(size, GFP_KERNEL); + else + memset(q_vector, 0, size); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igc_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->io_addr + IGC_EITR(0); + q_vector->itr_val = IGC_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* initialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igc_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igc_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; +} + +/** + * igc_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +static int igc_alloc_q_vectors(struct igc_adapter *adapter) +{ + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int q_vectors = adapter->num_q_vectors; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; + } + } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igc_free_q_vector(adapter, v_idx); + + return -ENOMEM; +} + +/** + * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * @adapter: Pointer to adapter structure + * + * This function initializes the interrupts and allocates all of the queues. + */ +static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + igc_set_interrupt_capability(adapter, msix); + + err = igc_alloc_q_vectors(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); + goto err_alloc_q_vectors; + } + + return 0; + +err_alloc_q_vectors: + igc_reset_interrupt_capability(adapter); + return err; +} + +static void igc_free_irq(struct igc_adapter *adapter) +{ + if (adapter->msix_entries) { + int vector = 0, i; + + free_irq(adapter->msix_entries[vector++].vector, adapter); + + for (i = 0; i < adapter->num_q_vectors; i++) + free_irq(adapter->msix_entries[vector++].vector, + adapter->q_vector[i]); + } else { + free_irq(adapter->pdev->irq, adapter); + } +} + +/** + * igc_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + */ +static void igc_irq_disable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 regval = rd32(IGC_EIAM); + + wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); + wr32(IGC_EIMC, adapter->eims_enable_mask); + regval = rd32(IGC_EIAC); + wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); + } + + wr32(IGC_IAM, 0); + wr32(IGC_IMC, ~0); + wrfl(); + + if (adapter->msix_entries) { + int vector = 0, i; + + synchronize_irq(adapter->msix_entries[vector++].vector); + + for (i = 0; i < adapter->num_q_vectors; i++) + synchronize_irq(adapter->msix_entries[vector++].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } +} + +/** + * igc_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + */ +static void igc_irq_enable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; + u32 regval = rd32(IGC_EIAC); + + wr32(IGC_EIAC, regval | adapter->eims_enable_mask); + regval = rd32(IGC_EIAM); + wr32(IGC_EIAM, regval | adapter->eims_enable_mask); + wr32(IGC_EIMS, adapter->eims_enable_mask); + wr32(IGC_IMS, ims); + } else { + wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + } +} + +/** + * igc_request_irq - initialize interrupts + * @adapter: Pointer to adapter structure + * + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. + */ +static int igc_request_irq(struct igc_adapter *adapter) +{ + int err = 0; + + if (adapter->flags & IGC_FLAG_HAS_MSIX) { + err = igc_request_msix(adapter); + if (!err) + goto request_done; + /* fall back to MSI */ + + igc_clear_interrupt_scheme(adapter); + err = igc_init_interrupt_scheme(adapter, false); + if (err) + goto request_done; + igc_configure(adapter); + } + +request_done: + return err; +} + +static void igc_write_itr(struct igc_q_vector *q_vector) +{ + u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; + + if (!q_vector->set_itr) + return; + + if (!itr_val) + itr_val = IGC_ITR_VAL_MASK; + + itr_val |= IGC_EITR_CNT_IGNR; + + writel(itr_val, q_vector->itr_register); + q_vector->set_itr = 0; +} + /** * igc_open - Called when a network interface is made active * @netdev: network interface device structure @@ -325,6 +1303,7 @@ static int __igc_open(struct net_device *netdev, bool resuming) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; + int err = 0; int i = 0; /* disallow open during test */ @@ -340,15 +1319,40 @@ static int __igc_open(struct net_device *netdev, bool resuming) igc_configure(adapter); + err = igc_request_irq(adapter); + if (err) + goto err_req_irq; + + /* Notify the stack of the actual queue counts. */ + netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + if (err) + goto err_set_queues; + clear_bit(__IGC_DOWN, &adapter->state); for (i = 0; i < adapter->num_q_vectors; i++) napi_enable(&adapter->q_vector[i]->napi); + /* Clear any pending interrupts. */ + rd32(IGC_ICR); + igc_irq_enable(adapter); + /* start the watchdog. */ hw->mac.get_link_status = 1; return IGC_SUCCESS; + +err_set_queues: + igc_free_irq(adapter); +err_req_irq: + igc_release_hw_control(adapter); + igc_power_down_link(adapter); + + return err; } static int igc_open(struct net_device *netdev) @@ -377,6 +1381,8 @@ static int __igc_close(struct net_device *netdev, bool suspending) igc_release_hw_control(adapter); + igc_free_irq(adapter); + return 0; } @@ -595,6 +1601,8 @@ static int igc_probe(struct pci_dev *pdev, err_register: igc_release_hw_control(adapter); err_sw_init: + igc_clear_interrupt_scheme(adapter); + iounmap(adapter->io_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: @@ -672,6 +1680,14 @@ static int igc_sw_init(struct igc_adapter *adapter) adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + if (igc_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + /* Explicitly disable IRQ since the NIC can be in any state. */ + igc_irq_disable(adapter); + set_bit(__IGC_DOWN, &adapter->state); return 0; -- cgit v1.2.3-59-g8ed1b