From f456393e195e0aa16029985f63cd93b601a0d315 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 30 Oct 2006 15:18:39 -0800 Subject: [SCSI] libsas: modify error handler to use scsi_eh_* functions This patch adds an EH done queue to sas_ha, converts the error handling strategy function and the sas_scsi_task_done functions in libsas to use the scsi_eh_* commands for error'd commands, and adds checks for the INITIATOR_ABORTED flag so that we do the right thing if a sas_task has been aborted by the initiator. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_init.c | 2 ++ drivers/scsi/libsas/sas_scsi_host.c | 29 +++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/scsi/libsas') diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index c836a237fb79..a2b479a65908 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -112,6 +112,8 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) } } + INIT_LIST_HEAD(&sas_ha->eh_done_q); + return 0; Undo_ports: diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index e46e79355b77..6a97b07849b4 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "../scsi_sas_internal.h" @@ -46,6 +47,7 @@ static void sas_scsi_task_done(struct sas_task *task) { struct task_status_struct *ts = &task->task_status; struct scsi_cmnd *sc = task->uldd_task; + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(sc->device->host); unsigned ts_flags = task->task_state_flags; int hs = 0, stat = 0; @@ -116,7 +118,7 @@ static void sas_scsi_task_done(struct sas_task *task) sas_free_task(task); /* This is very ugly but this is how SCSI Core works. */ if (ts_flags & SAS_TASK_STATE_ABORTED) - scsi_finish_command(sc); + scsi_eh_finish_cmd(sc, &sas_ha->eh_done_q); else sc->scsi_done(sc); } @@ -307,6 +309,15 @@ static enum task_disposition sas_scsi_find_task(struct sas_task *task) spin_unlock_irqrestore(&core->task_queue_lock, flags); } + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_INITIATOR_ABORTED) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + SAS_DPRINTK("%s: task 0x%p already aborted\n", + __FUNCTION__, task); + return TASK_IS_ABORTED; + } + spin_unlock_irqrestore(&task->task_state_lock, flags); + for (i = 0; i < 5; i++) { SAS_DPRINTK("%s: aborting task 0x%p\n", __FUNCTION__, task); res = si->dft->lldd_abort_task(task); @@ -409,13 +420,16 @@ Again: SAS_DPRINTK("going over list...\n"); list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { struct sas_task *task = TO_SAS_TASK(cmd); + list_del_init(&cmd->eh_entry); + if (!task) { + SAS_DPRINTK("%s: taskless cmd?!\n", __FUNCTION__); + continue; + } SAS_DPRINTK("trying to find task 0x%p\n", task); - list_del_init(&cmd->eh_entry); res = sas_scsi_find_task(task); cmd->eh_eflags = 0; - shost->host_failed--; switch (res) { case TASK_IS_DONE: @@ -491,6 +505,7 @@ Again: } } out: + scsi_eh_flush_done_q(&ha->eh_done_q); SAS_DPRINTK("--- Exit %s\n", __FUNCTION__); return; clear_q: @@ -508,12 +523,18 @@ enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) unsigned long flags; if (!task) { - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", + SAS_DPRINTK("command 0x%p, task 0x%p, gone: EH_HANDLED\n", cmd, task); return EH_HANDLED; } spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_INITIATOR_ABORTED) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + SAS_DPRINTK("command 0x%p, task 0x%p, aborted by initiator: " + "EH_NOT_HANDLED\n", cmd, task); + return EH_NOT_HANDLED; + } if (task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", -- cgit v1.2.3-59-g8ed1b From 79a5eb609b74e7b3638861c41b98eafa74920a1f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 30 Oct 2006 15:18:50 -0800 Subject: [SCSI] libsas: add sas_abort_task This patch adds an external function, sas_abort_task, to enable LLDDs to abort sas_tasks. It also adds a work_struct so that the actual work of aborting a task can be shifted from tasklet context (in the LLDD) onto the scsi_host's workqueue. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 60 +++++++++++++++++++++++++++++++++++++ include/scsi/libsas.h | 4 +++ 2 files changed, 64 insertions(+) (limited to 'drivers/scsi/libsas') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 6a97b07849b4..c5fd37522728 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -33,6 +33,7 @@ #include #include #include "../scsi_sas_internal.h" +#include "../scsi_transport_api.h" #include #include @@ -798,6 +799,64 @@ void sas_shutdown_queue(struct sas_ha_struct *sas_ha) spin_unlock_irqrestore(&core->task_queue_lock, flags); } +static int do_sas_task_abort(struct sas_task *task) +{ + struct scsi_cmnd *sc = task->uldd_task; + struct sas_internal *si = + to_sas_internal(task->dev->port->ha->core.shost->transportt); + unsigned long flags; + int res; + + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + SAS_DPRINTK("%s: Task %p already aborted.\n", __FUNCTION__, + task); + return 0; + } + + task->task_state_flags |= SAS_TASK_INITIATOR_ABORTED; + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + if (!si->dft->lldd_abort_task) + return -ENODEV; + + res = si->dft->lldd_abort_task(task); + if ((task->task_state_flags & SAS_TASK_STATE_DONE) || + (res == TMF_RESP_FUNC_COMPLETE)) + { + /* SMP commands don't have scsi_cmds(?) */ + if (!sc) { + task->task_done(task); + return 0; + } + scsi_req_abort_cmd(sc); + scsi_schedule_eh(sc->device->host); + return 0; + } + + spin_lock_irqsave(&task->task_state_lock, flags); + task->task_state_flags &= ~SAS_TASK_INITIATOR_ABORTED; + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + task->task_state_flags &= ~SAS_TASK_STATE_ABORTED; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + return -EAGAIN; +} + +void sas_task_abort(struct sas_task *task) +{ + int i; + + for (i = 0; i < 5; i++) + if (!do_sas_task_abort(task)) + return; + + SAS_DPRINTK("%s: Could not kill task!\n", __FUNCTION__); +} + EXPORT_SYMBOL_GPL(sas_queuecommand); EXPORT_SYMBOL_GPL(sas_target_alloc); EXPORT_SYMBOL_GPL(sas_slave_configure); @@ -805,3 +864,4 @@ EXPORT_SYMBOL_GPL(sas_slave_destroy); EXPORT_SYMBOL_GPL(sas_change_queue_depth); EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); +EXPORT_SYMBOL_GPL(sas_task_abort); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 7bf2e8b9903c..a1fc20a47c50 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -528,6 +528,8 @@ struct sas_task { void *lldd_task; /* for use by LLDDs */ void *uldd_task; + + struct work_struct abort_work; }; @@ -627,4 +629,6 @@ void sas_unregister_dev(struct domain_device *); void sas_init_dev(struct domain_device *); +void sas_task_abort(struct sas_task *task); + #endif /* _SASLIB_H_ */ -- cgit v1.2.3-59-g8ed1b From dea22214790d1306f3a3444db13d2c726037b189 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Nov 2006 17:28:55 -0800 Subject: [PATCH] aic94xx: handle REQ_DEVICE_RESET This patch implements a REQ_DEVICE_RESET handler for the aic94xx driver. Like the earlier REQ_TASK_ABORT patch, this patch defers the device reset to the Scsi_Host's workqueue, which has the added benefit of ensuring that the device reset does not happen at the same time that the abort tmfs are being processed. After the phy reset, the busted drive should go away and be re-detected later, which is indeed what I've seen on both a x260 and a x206m. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_scb.c | 51 ++++++++++++++++++++++++++++++++----- drivers/scsi/libsas/sas_init.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 1 + include/scsi/libsas.h | 1 + include/scsi/scsi_transport_sas.h | 2 ++ 5 files changed, 49 insertions(+), 8 deletions(-) (limited to 'drivers/scsi/libsas') diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index 1911c5d17875..a014418d670e 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -343,6 +343,27 @@ void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id) } } +/* hard reset a phy later */ +static void do_phy_reset_later(void *data) +{ + struct sas_phy *sas_phy = data; + int error; + + ASD_DPRINTK("%s: About to hard reset phy %d\n", __FUNCTION__, + sas_phy->identify.phy_identifier); + /* Reset device port */ + error = sas_phy_reset(sas_phy, 1); + if (error) + ASD_DPRINTK("%s: Hard reset of phy %d failed (%d).\n", + __FUNCTION__, sas_phy->identify.phy_identifier, error); +} + +static void phy_reset_later(struct sas_phy *sas_phy, struct Scsi_Host *shost) +{ + INIT_WORK(&sas_phy->reset_work, do_phy_reset_later, sas_phy); + queue_work(shost->work_q, &sas_phy->reset_work); +} + /* start up the ABORT TASK tmf... */ static void task_kill_later(struct asd_ascb *ascb) { @@ -402,7 +423,9 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, goto out; } case REQ_DEVICE_RESET: { - struct asd_ascb *a, *b; + struct Scsi_Host *shost = sas_ha->core.shost; + struct sas_phy *dev_phy; + struct asd_ascb *a; u16 conn_handle; conn_handle = *((u16*)(&dl->status_block[1])); @@ -412,17 +435,31 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, dl->status_block[3]); /* Kill all pending tasks and reset the device */ - list_for_each_entry_safe(a, b, &asd_ha->seq.pend_q, list) { - struct sas_task *task = a->uldd_task; - struct domain_device *dev = task->dev; + dev_phy = NULL; + list_for_each_entry(a, &asd_ha->seq.pend_q, list) { + struct sas_task *task; + struct domain_device *dev; u16 x; - x = *((u16*)(&dev->lldd_dev)); - if (x == conn_handle) + task = a->uldd_task; + if (!task) + continue; + dev = task->dev; + + x = (u16)dev->lldd_dev; + if (x == conn_handle) { + dev_phy = dev->port->phy; task_kill_later(a); + } } - /* FIXME: Reset device port (huh?) */ + /* Reset device port */ + if (!dev_phy) { + ASD_DPRINTK("%s: No pending commands; can't reset.\n", + __FUNCTION__); + goto out; + } + phy_reset_later(dev_phy, shost); goto out; } case SIGNAL_NCQ_ERROR: diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index a2b479a65908..0fb347b4b1a2 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -144,7 +144,7 @@ static int sas_get_linkerrors(struct sas_phy *phy) return sas_smp_get_phy_events(phy); } -static int sas_phy_reset(struct sas_phy *phy, int hard_reset) +int sas_phy_reset(struct sas_phy *phy, int hard_reset) { int ret; enum phy_func reset_type; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index c5fd37522728..e064aac06b90 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -865,3 +865,4 @@ EXPORT_SYMBOL_GPL(sas_change_queue_depth); EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); EXPORT_SYMBOL_GPL(sas_task_abort); +EXPORT_SYMBOL_GPL(sas_phy_reset); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index a1fc20a47c50..29f6e1af1bf9 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -597,6 +597,7 @@ struct sas_domain_function_template { extern int sas_register_ha(struct sas_ha_struct *); extern int sas_unregister_ha(struct sas_ha_struct *); +int sas_phy_reset(struct sas_phy *phy, int hard_reset); extern int sas_queuecommand(struct scsi_cmnd *, void (*scsi_done)(struct scsi_cmnd *)); extern int sas_target_alloc(struct scsi_target *); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 53024377f3b8..59633a82de47 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -73,6 +73,8 @@ struct sas_phy { /* for the list of phys belonging to a port */ struct list_head port_siblings; + + struct work_struct reset_work; }; #define dev_to_phy(d) \ -- cgit v1.2.3-59-g8ed1b From 024879ead9594acab30ce9e23c955086e2d967a0 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 15 Nov 2006 18:03:07 -0600 Subject: [SCSI] libsas: better error handling in sas_expander.c With async scanning, we're now tripping the BUG_ON in sas_ex_discover_end_dev(), so make the error handling here correct. Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'drivers/scsi/libsas') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index e34a93435497..d31e6fa466f7 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -597,10 +597,15 @@ static struct domain_device *sas_ex_discover_end_dev( child->iproto = phy->attached_iproto; memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE); sas_hash_addr(child->hashed_sas_addr, child->sas_addr); - phy->port = sas_port_alloc(&parent->rphy->dev, phy_id); - BUG_ON(!phy->port); - /* FIXME: better error handling*/ - BUG_ON(sas_port_add(phy->port) != 0); + if (!phy->port) { + phy->port = sas_port_alloc(&parent->rphy->dev, phy_id); + if (unlikely(!phy->port)) + goto out_err; + if (unlikely(sas_port_add(phy->port) != 0)) { + sas_port_free(phy->port); + goto out_err; + } + } sas_ex_get_linkrate(parent, child, phy); if ((phy->attached_tproto & SAS_PROTO_STP) || phy->attached_sata_dev) { @@ -615,8 +620,7 @@ static struct domain_device *sas_ex_discover_end_dev( SAS_DPRINTK("report phy sata to %016llx:0x%x returned " "0x%x\n", SAS_ADDR(parent->sas_addr), phy_id, res); - kfree(child); - return NULL; + goto out_free; } memcpy(child->frame_rcvd, &child->sata_dev.rps_resp.rps.fis, sizeof(struct dev_to_host_fis)); @@ -627,14 +631,14 @@ static struct domain_device *sas_ex_discover_end_dev( "%016llx:0x%x returned 0x%x\n", SAS_ADDR(child->sas_addr), SAS_ADDR(parent->sas_addr), phy_id, res); - kfree(child); - return NULL; + goto out_free; } } else if (phy->attached_tproto & SAS_PROTO_SSP) { child->dev_type = SAS_END_DEV; rphy = sas_end_device_alloc(phy->port); /* FIXME: error handling */ - BUG_ON(!rphy); + if (unlikely(!rphy)) + goto out_free; child->tproto = phy->attached_tproto; sas_init_dev(child); @@ -651,9 +655,7 @@ static struct domain_device *sas_ex_discover_end_dev( "at %016llx:0x%x returned 0x%x\n", SAS_ADDR(child->sas_addr), SAS_ADDR(parent->sas_addr), phy_id, res); - /* FIXME: this kfrees list elements without removing them */ - //kfree(child); - return NULL; + goto out_list_del; } } else { SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n", @@ -663,6 +665,16 @@ static struct domain_device *sas_ex_discover_end_dev( list_add_tail(&child->siblings, &parent_ex->children); return child; + + out_list_del: + list_del(&child->dev_list_node); + sas_rphy_free(rphy); + out_free: + sas_port_delete(phy->port); + out_err: + phy->port = NULL; + kfree(child); + return NULL; } static struct domain_device *sas_ex_discover_expander( -- cgit v1.2.3-59-g8ed1b