diff options
author | Kiran Patil <kiran.patil@intel.com> | 2015-11-06 15:26:02 -0800 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2015-12-03 02:23:31 -0800 |
commit | 9c6c12595b73e580f103b0812837f015034d460e (patch) | |
tree | 08c5a9ea8b32e9be73b40f69ceb99cac6810547c /drivers/net/ethernet/intel/i40e | |
parent | i40evf: remove duplicate string (diff) | |
download | linux-dev-9c6c12595b73e580f103b0812837f015034d460e.tar.xz linux-dev-9c6c12595b73e580f103b0812837f015034d460e.zip |
i40e: Detection and recovery of TX queue hung logic moved to service_task from tx_timeout
This patch contains following changes:
- detection and recovery logic (issue SW interrupt) has been moved to
service_task from timeout function.
- added some more debug info from tx_timeout.
Logic to detect and recover TX queue hung is now two step process:
- service_task detects TX queue hung and sets a bit(hung_detected) if
it was not set.
- if bit was set (means this is back-back hung condition detected),
issue SW interrupt and clear the bit.
- napi_poll clears the bit unconditionally since it cleans TX/RX queues.
Change-ID: Ieed03a48927c845a988b3ff375090bf37caeb903
Signed-off-by: Kiran Patil <kiran.patil@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/i40e')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 36 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 |
3 files changed, 35 insertions, 6 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 0b9537b37412..dde7ae71bfb8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -579,6 +579,9 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ +#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */ + unsigned long hung_detected; /* Set/Reset for hung_detection logic */ + cpumask_t affinity_mask; struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9e6268b4295a..e19a5790a3c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4368,17 +4368,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + /* Bail out if interrupts are disabled because napi_poll + * execution in-progress or will get scheduled soon. + * napi_poll cleans TX and RX queues and updates 'next_to_clean'. + */ + if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) + return; + head = i40e_get_head(tx_ring); tx_pending = i40e_get_tx_pending(tx_ring); - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. + /* HW is done executing descriptors, updated HEAD write back, + * but SW hasn't processed those descriptors. If interrupt is + * not generated from this point ON, it could result into + * dev_watchdog detecting timeout on those netdev_queue, + * hence proactively trigger SW interrupt. */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); + if (tx_pending) { + /* NAPI Poll didn't run and clear since it was set */ + if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected)) { + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending, + tx_ring->next_to_clean, head, + tx_ring->next_to_use, + readl(tx_ring->tail)); + netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n", + vsi->seid, q_idx, val); + i40e_force_wb(vsi, tx_ring->q_vector); + } else { + /* First Chance - detected possible hung */ + set_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected); + } + } } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7371df7b6fb0..1d7d01c723b9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1891,6 +1891,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return 0; } + /* Clear hung_detected bit */ + clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ |