aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorKaike Wan <kaike.wan@intel.com>2018-11-28 10:19:04 -0800
committerJason Gunthorpe <jgg@mellanox.com>2018-12-06 19:50:08 -0700
commitc1a797c0818e0122c7ec8422edd971cfec9b15ea (patch)
tree3d11557a227b00b4f9c0efa75c5b34b34dd76e86 /drivers/infiniband
parentIB/hfi1: Dump pio info for non-user send contexts (diff)
downloadlinux-dev-c1a797c0818e0122c7ec8422edd971cfec9b15ea.tar.xz
linux-dev-c1a797c0818e0122c7ec8422edd971cfec9b15ea.zip
IB/hfi1: Ignore LNI errors before DC8051 transitions to Polling state
When it is requested to change its physical state back to Offline while in the process to go up, DC8051 will set the ERROR field in the DC8051_DBG_ERR_INFO_SET_BY_8051 register. This ERROR field will remain until the next time when DC8051 transitions from Offline to Polling. Subsequently, when the host requests DC8051 to change its physical state to Polling again, it may receive a DC8051 interrupt with the stale ERROR field still in DC8051_DBG_ERR_INFO_SET_BY_8051. If the host link state has been changed to Polling, this stale ERROR will force the host to transition to Offline state, resulting in a vicious cycle of Polling ->Offline->Polling->Offline. On the other hand, if the host link state is still Offline when the stale ERROR is received, the stale ERROR will be ignored, and the link will come up correctly. This patch implements the correct behavior by changing host link state to Polling only after DC8051 changes its physical state to Polling. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Krzysztof Goreczny <krzysztof.goreczny@intel.com> Signed-off-by: Kaike Wan <kaike.wan@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c47
1 files changed, 46 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9b20479dc710..385c33745c9f 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -12927,6 +12939,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
return read_state;
}
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)