aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c195
-rw-r--r--drivers/nvme/host/fabrics.c25
-rw-r--r--drivers/nvme/host/fabrics.h5
-rw-r--r--drivers/nvme/host/fc.c2
-rw-r--r--drivers/nvme/host/lightnvm.c8
-rw-r--r--drivers/nvme/host/multipath.c5
-rw-r--r--drivers/nvme/host/nvme.h17
-rw-r--r--drivers/nvme/host/pci.c42
-rw-r--r--drivers/nvme/host/rdma.c4
-rw-r--r--drivers/nvme/host/tcp.c2
-rw-r--r--drivers/nvme/host/zns.c13
11 files changed, 214 insertions, 104 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 40ca71b29bb9..ce1b61519441 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -85,7 +85,7 @@ static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida);
-static dev_t nvme_chr_devt;
+static dev_t nvme_ctrl_base_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;
@@ -93,16 +93,6 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
-static void nvme_update_bdev_size(struct gendisk *disk)
-{
- struct block_device *bdev = bdget_disk(disk, 0);
-
- if (bdev) {
- bd_set_nr_sectors(bdev, get_capacity(disk));
- bdput(bdev);
- }
-}
-
/*
* Prepare a queue for teardown.
*
@@ -119,8 +109,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
blk_set_queue_dying(ns->queue);
blk_mq_unquiesce_queue(ns->queue);
- set_capacity(ns->disk, 0);
- nvme_update_bdev_size(ns->disk);
+ set_capacity_and_notify(ns->disk, 0);
}
static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -148,6 +137,38 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+static void nvme_failfast_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, failfast_work);
+
+ if (ctrl->state != NVME_CTRL_CONNECTING)
+ return;
+
+ set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+ dev_info(ctrl->device, "failfast expired\n");
+ nvme_kick_requeue_lists(ctrl);
+}
+
+static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+ if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1)
+ return;
+
+ schedule_delayed_work(&ctrl->failfast_work,
+ ctrl->opts->fast_io_fail_tmo * HZ);
+}
+
+static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+ if (!ctrl->opts)
+ return;
+
+ cancel_delayed_work_sync(&ctrl->failfast_work);
+ clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -433,8 +454,17 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
spin_unlock_irqrestore(&ctrl->lock, flags);
- if (changed && ctrl->state == NVME_CTRL_LIVE)
+ if (!changed)
+ return false;
+
+ if (ctrl->state == NVME_CTRL_LIVE) {
+ if (old_state == NVME_CTRL_CONNECTING)
+ nvme_stop_failfast_work(ctrl);
nvme_kick_requeue_lists(ctrl);
+ } else if (ctrl->state == NVME_CTRL_CONNECTING &&
+ old_state == NVME_CTRL_RESETTING) {
+ nvme_start_failfast_work(ctrl);
+ }
return changed;
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -518,29 +548,49 @@ static inline void nvme_clear_nvme_request(struct request *req)
}
}
-struct request *nvme_alloc_request(struct request_queue *q,
- struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
+static inline unsigned int nvme_req_op(struct nvme_command *cmd)
{
- unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
- struct request *req;
+ return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
+}
- if (qid == NVME_QID_ANY) {
- req = blk_mq_alloc_request(q, op, flags);
- } else {
- req = blk_mq_alloc_request_hctx(q, op, flags,
- qid ? qid - 1 : 0);
- }
- if (IS_ERR(req))
- return req;
+static inline void nvme_init_request(struct request *req,
+ struct nvme_command *cmd)
+{
+ if (req->q->queuedata)
+ req->timeout = NVME_IO_TIMEOUT;
+ else /* no queuedata implies admin queue */
+ req->timeout = NVME_ADMIN_TIMEOUT;
req->cmd_flags |= REQ_FAILFAST_DRIVER;
nvme_clear_nvme_request(req);
nvme_req(req)->cmd = cmd;
+}
+struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, blk_mq_req_flags_t flags)
+{
+ struct request *req;
+
+ req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
+ if (!IS_ERR(req))
+ nvme_init_request(req, cmd);
return req;
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
+struct request *nvme_alloc_request_qid(struct request_queue *q,
+ struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
+{
+ struct request *req;
+
+ req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
+ qid ? qid - 1 : 0);
+ if (!IS_ERR(req))
+ nvme_init_request(req, cmd);
+ return req;
+}
+EXPORT_SYMBOL_GPL(nvme_alloc_request_qid);
+
static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
struct nvme_command c;
@@ -897,11 +947,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
struct request *req;
int ret;
- req = nvme_alloc_request(q, cmd, flags, qid);
+ if (qid == NVME_QID_ANY)
+ req = nvme_alloc_request(q, cmd, flags);
+ else
+ req = nvme_alloc_request_qid(q, cmd, flags, qid);
if (IS_ERR(req))
return PTR_ERR(req);
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+ if (timeout)
+ req->timeout = timeout;
if (buffer && bufflen) {
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -1067,11 +1121,12 @@ static int nvme_submit_user_cmd(struct request_queue *q,
void *meta = NULL;
int ret;
- req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY);
+ req = nvme_alloc_request(q, cmd, 0);
if (IS_ERR(req))
return PTR_ERR(req);
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+ if (timeout)
+ req->timeout = timeout;
nvme_req(req)->flags |= NVME_REQ_USERCMD;
if (ubuffer && bufflen) {
@@ -1141,8 +1196,8 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
struct request *rq;
- rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
- NVME_QID_ANY);
+ rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
+ BLK_MQ_REQ_RESERVED);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -1302,7 +1357,8 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
NVME_IDENTIFY_DATA_SIZE);
if (status) {
dev_warn(ctrl->device,
- "Identify Descriptors failed (%d)\n", status);
+ "Identify Descriptors failed (nsid=%u, status=0x%x)\n",
+ nsid, status);
goto free_data;
}
@@ -2053,15 +2109,14 @@ static void nvme_update_disk_info(struct gendisk *disk,
capacity = 0;
}
- set_capacity_revalidate_and_notify(disk, capacity, false);
+ set_capacity_and_notify(disk, capacity);
nvme_config_discard(disk, ns);
nvme_config_write_zeroes(disk, ns);
- if (id->nsattr & NVME_NS_ATTR_RO)
+ if ((id->nsattr & NVME_NS_ATTR_RO) ||
+ test_bit(NVME_NS_FORCE_RO, &ns->flags))
set_disk_ro(disk, true);
- else
- set_disk_ro(disk, false);
}
static inline bool nvme_first_scan(struct gendisk *disk)
@@ -2136,7 +2191,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
blk_queue_update_readahead(ns->head->disk->queue);
- nvme_update_bdev_size(ns->head->disk);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
#endif
@@ -2263,13 +2317,13 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
cmd.common.cdw11 = cpu_to_le32(len);
- return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
- ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false);
+ return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0,
+ NVME_QID_ANY, 1, 0, false);
}
EXPORT_SYMBOL_GPL(nvme_sec_submit);
#endif /* CONFIG_BLK_SED_OPAL */
-static const struct block_device_operations nvme_fops = {
+static const struct block_device_operations nvme_bdev_ops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
@@ -2931,7 +2985,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
- struct nvme_cel *cel = xa_load(&ctrl->cels, csi);
+ struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi);
int ret;
if (cel)
@@ -2942,16 +2996,15 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
return -ENOMEM;
ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi,
- &cel->log, sizeof(cel->log), 0);
+ cel, sizeof(*cel), 0);
if (ret) {
kfree(cel);
return ret;
}
- cel->csi = csi;
- xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL);
+ xa_store(&ctrl->cels, csi, cel, GFP_KERNEL);
out:
- *log = &cel->log;
+ *log = cel;
return 0;
}
@@ -3277,7 +3330,7 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->fops == &nvme_fops)
+ if (disk->fops == &nvme_bdev_ops)
return nvme_get_ns_from_dev(dev)->head;
else
return disk->private_data;
@@ -3386,7 +3439,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
}
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
- if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
+ if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
@@ -3807,7 +3860,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct gendisk *disk;
struct nvme_id_ns *id;
char disk_name[DISK_NAME_LEN];
- int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret;
+ int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT;
if (nvme_identify_ns(ctrl, nsid, ids, &id))
return;
@@ -3831,8 +3884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
ns->ctrl = ctrl;
kref_init(&ns->kref);
- ret = nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED);
- if (ret)
+ if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
goto out_free_queue;
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
@@ -3840,7 +3892,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (!disk)
goto out_unlink_ns;
- disk->fops = &nvme_fops;
+ disk->fops = &nvme_bdev_ops;
disk->private_data = ns;
disk->queue = ns->queue;
disk->flags = flags;
@@ -3851,8 +3903,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_put_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
- ret = nvme_nvm_register(ns, disk_name, node);
- if (ret) {
+ if (nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_put_disk;
}
@@ -3965,8 +4016,6 @@ out:
*/
if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR)))
nvme_ns_remove(ns);
- else
- revalidate_disk_size(ns->disk, true);
}
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -4045,8 +4094,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
ret = nvme_submit_sync_cmd(ctrl->admin_q, &cmd, ns_list,
NVME_IDENTIFY_DATA_SIZE);
- if (ret)
+ if (ret) {
+ dev_warn(ctrl->device,
+ "Identify NS List failed (status=0x%x)\n", ret);
goto free;
+ }
for (i = 0; i < nr_entries; i++) {
u32 nsid = le32_to_cpu(ns_list[i]);
@@ -4349,6 +4401,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl);
+ nvme_stop_failfast_work(ctrl);
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fw_act_work);
}
@@ -4376,6 +4429,19 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
+static void nvme_free_cels(struct nvme_ctrl *ctrl)
+{
+ struct nvme_effects_log *cel;
+ unsigned long i;
+
+ xa_for_each (&ctrl->cels, i, cel) {
+ xa_erase(&ctrl->cels, i);
+ kfree(cel);
+ }
+
+ xa_destroy(&ctrl->cels);
+}
+
static void nvme_free_ctrl(struct device *dev)
{
struct nvme_ctrl *ctrl =
@@ -4385,8 +4451,7 @@ static void nvme_free_ctrl(struct device *dev)
if (!subsys || ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
- xa_destroy(&ctrl->cels);
-
+ nvme_free_cels(ctrl);
nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page);
@@ -4414,6 +4479,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
int ret;
ctrl->state = NVME_CTRL_NEW;
+ clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4430,6 +4496,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+ INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
@@ -4448,7 +4515,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
device_initialize(&ctrl->ctrl_device);
ctrl->device = &ctrl->ctrl_device;
- ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
+ ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
+ ctrl->instance);
ctrl->device->class = nvme_class;
ctrl->device->parent = ctrl->dev;
ctrl->device->groups = nvme_dev_attr_groups;
@@ -4657,7 +4725,8 @@ static int __init nvme_core_init(void)
if (!nvme_delete_wq)
goto destroy_reset_wq;
- result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
+ result = alloc_chrdev_region(&nvme_ctrl_base_chr_devt, 0,
+ NVME_MINORS, "nvme");
if (result < 0)
goto destroy_delete_wq;
@@ -4678,7 +4747,7 @@ static int __init nvme_core_init(void)
destroy_class:
class_destroy(nvme_class);
unregister_chrdev:
- unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+ unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq:
destroy_workqueue(nvme_delete_wq);
destroy_reset_wq:
@@ -4693,7 +4762,7 @@ static void __exit nvme_core_exit(void)
{
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
- unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+ unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_delete_wq);
destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 8575724734e0..72ac00173500 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
ctrl->state != NVME_CTRL_DEAD &&
+ !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
@@ -615,6 +616,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" },
{ NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
{ NVMF_OPT_TOS, "tos=%d" },
+ { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
{ NVMF_OPT_ERR, NULL }
};
@@ -634,6 +636,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
opts->duplicate_connect = false;
+ opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
opts->hdr_digest = false;
opts->data_digest = false;
opts->tos = -1; /* < 0 == use transport default */
@@ -754,6 +757,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
ctrl_loss_tmo = token;
break;
+ case NVMF_OPT_FAIL_FAST_TMO:
+ if (match_int(args, &token)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (token >= 0)
+ pr_warn("I/O fail on reconnect controller after %d sec\n",
+ token);
+ opts->fast_io_fail_tmo = token;
+ break;
case NVMF_OPT_HOSTNQN:
if (opts->host) {
pr_err("hostnqn already user-assigned: %s\n",
@@ -884,11 +898,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->nr_poll_queues = 0;
opts->duplicate_connect = true;
}
- if (ctrl_loss_tmo < 0)
+ if (ctrl_loss_tmo < 0) {
opts->max_reconnects = -1;
- else
+ } else {
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
opts->reconnect_delay);
+ if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
+ pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
+ opts->fast_io_fail_tmo, ctrl_loss_tmo);
+ }
if (!opts->host) {
kref_get(&nvmf_default_host->ref);
@@ -988,7 +1006,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
- NVMF_OPT_DISABLE_SQFLOW)
+ NVMF_OPT_DISABLE_SQFLOW |\
+ NVMF_OPT_FAIL_FAST_TMO)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a9c1e3b4585e..733010d2eafd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -15,6 +15,8 @@
#define NVMF_DEF_RECONNECT_DELAY 10
/* default to 600 seconds of reconnect attempts before giving up */
#define NVMF_DEF_CTRL_LOSS_TMO 600
+/* default is -1: the fail fast mechanism is disabled */
+#define NVMF_DEF_FAIL_FAST_TMO -1
/*
* Define a host as seen by the target. We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
NVMF_OPT_TOS = 1 << 19,
+ NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
};
/**
@@ -89,6 +92,7 @@ enum {
* @nr_write_queues: number of queues for write I/O
* @nr_poll_queues: number of queues for polling I/O
* @tos: type of service
+ * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
*/
struct nvmf_ctrl_options {
unsigned mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
int tos;
+ int fast_io_fail_tmo;
};
/*
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f4c246462658..38373a0e86ef 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3479,7 +3479,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->lport->ops->fcprqst_priv_sz);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
- ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+ ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT;
ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 8e562d0f2c30..470cef3abec3 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -653,7 +653,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q,
nvme_nvm_rqtocmd(rqd, ns, cmd);
- rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
+ rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0);
if (IS_ERR(rq))
return rq;
@@ -767,14 +767,14 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
DECLARE_COMPLETION_ONSTACK(wait);
int ret = 0;
- rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
- NVME_QID_ANY);
+ rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0);
if (IS_ERR(rq)) {
ret = -ENOMEM;
goto err_cmd;
}
- rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+ if (timeout)
+ rq->timeout = timeout;
if (ppa_buf && ppa_len) {
ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74896be40c17..9ac762b28811 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -279,6 +279,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
struct nvme_ns *ns;
list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
+ continue;
switch (ns->ctrl->state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
@@ -312,8 +314,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
if (likely(ns)) {
bio->bi_disk = ns->disk;
bio->bi_opf |= REQ_NVME_MPATH;
- trace_block_bio_remap(bio->bi_disk->queue, bio,
- disk_devt(ns->head->disk),
+ trace_block_bio_remap(bio, disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
ret = submit_bio_noacct(bio);
} else if (nvme_available_path(head)) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bc330bf0d3bd..7e49f61f81df 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -24,7 +24,7 @@ extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
extern unsigned int admin_timeout;
-#define ADMIN_TIMEOUT (admin_timeout * HZ)
+#define NVME_ADMIN_TIMEOUT (admin_timeout * HZ)
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
@@ -178,7 +178,8 @@ static inline u16 nvme_req_qid(struct request *req)
{
if (!req->q->queuedata)
return 0;
- return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
+
+ return req->mq_hctx->queue_num + 1;
}
/* The below value is the specific amount of delay needed before checking
@@ -226,12 +227,6 @@ struct nvme_fault_inject {
#endif
};
-struct nvme_cel {
- struct list_head entry;
- struct nvme_effects_log log;
- u8 csi;
-};
-
struct nvme_ctrl {
bool comp_seen;
enum nvme_ctrl_state state;
@@ -304,6 +299,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ struct delayed_work failfast_work;
struct nvme_command ka_cmd;
struct work_struct fw_act_work;
unsigned long events;
@@ -337,6 +333,8 @@ struct nvme_ctrl {
u16 icdoff;
u16 maxcmd;
int nr_reconnects;
+ unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED 0
struct nvmf_ctrl_options *opts;
struct page *discard_page;
@@ -448,6 +446,7 @@ struct nvme_ns {
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
+#define NVME_NS_FORCE_RO 3
struct nvme_fault_inject fault_inject;
@@ -610,6 +609,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, blk_mq_req_flags_t flags);
+struct request *nvme_alloc_request_qid(struct request_queue *q,
struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0578ff253c47..b4385cb0ff60 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -292,9 +292,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev,
nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
}
+static void nvme_dbbuf_free(struct nvme_queue *nvmeq)
+{
+ if (!nvmeq->qid)
+ return;
+
+ nvmeq->dbbuf_sq_db = NULL;
+ nvmeq->dbbuf_cq_db = NULL;
+ nvmeq->dbbuf_sq_ei = NULL;
+ nvmeq->dbbuf_cq_ei = NULL;
+}
+
static void nvme_dbbuf_set(struct nvme_dev *dev)
{
struct nvme_command c;
+ unsigned int i;
if (!dev->dbbuf_dbs)
return;
@@ -308,6 +320,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
/* Free memory and continue on */
nvme_dbbuf_dma_free(dev);
+
+ for (i = 1; i <= dev->online_queues; i++)
+ nvme_dbbuf_free(&dev->queues[i]);
}
}
@@ -1304,13 +1319,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
req->tag, nvmeq->qid);
abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
- BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+ BLK_MQ_REQ_NOWAIT);
if (IS_ERR(abort_req)) {
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
- abort_req->timeout = ADMIN_TIMEOUT;
abort_req->end_io_data = NULL;
blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
@@ -1607,7 +1621,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.nr_hw_queues = 1;
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- dev->admin_tagset.timeout = ADMIN_TIMEOUT;
+ dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
@@ -2089,6 +2103,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
{
+ /*
+ * If tags are shared with admin queue (Apple bug), then
+ * make sure we only use one IO queue.
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
+ return 1;
return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
}
@@ -2107,16 +2127,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
- /*
- * If tags are shared with admin queue (Apple bug), then
- * make sure we only use one IO queue.
- */
- if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
- nr_io_queues = 1;
- else
- nr_io_queues = min(nvme_max_io_queues(dev),
- dev->nr_allocated_queues - 1);
-
+ nr_io_queues = dev->nr_allocated_queues - 1;
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
return result;
@@ -2219,11 +2230,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
cmd.delete_queue.opcode = opcode;
cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
- req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+ req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
if (IS_ERR(req))
return PTR_ERR(req);
- req->timeout = ADMIN_TIMEOUT;
req->end_io_data = nvmeq;
init_completion(&nvmeq->delete_done);
@@ -2239,7 +2249,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
unsigned long timeout;
retry:
- timeout = ADMIN_TIMEOUT;
+ timeout = NVME_ADMIN_TIMEOUT;
while (nr_queues > 0) {
if (nvme_delete_queue(&dev->queues[nr_queues], opcode))
break;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 65e3d0ef36e1..cf6c49d09c82 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -797,7 +797,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
NVME_RDMA_DATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
- set->timeout = ADMIN_TIMEOUT;
+ set->timeout = NVME_ADMIN_TIMEOUT;
set->flags = BLK_MQ_F_NO_SCHED;
} else {
set = &ctrl->tag_set;
@@ -853,7 +853,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
return error;
ctrl->device = ctrl->queues[0].device;
- ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
+ ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
/* T10-PI support */
if (ctrl->device->dev->attrs.device_cap_flags &
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c0c33320fe65..1ba659927442 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1568,7 +1568,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
- set->timeout = ADMIN_TIMEOUT;
+ set->timeout = NVME_ADMIN_TIMEOUT;
} else {
set = &ctrl->tag_set;
memset(set, 0, sizeof(*set));
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 67e87e9f306f..1dfe9a3500e3 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -55,12 +55,17 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
int status;
/* Driver requires zone append support */
- if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
+ if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
NVME_CMD_EFFECTS_CSUPP)) {
+ if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
+ dev_warn(ns->ctrl->device,
+ "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
+ ns->head->ns_id);
+ } else {
+ set_bit(NVME_NS_FORCE_RO, &ns->flags);
dev_warn(ns->ctrl->device,
- "append not supported for zoned namespace:%d\n",
- ns->head->ns_id);
- return -EINVAL;
+ "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
+ ns->head->ns_id);
}
/* Lazily query controller append limit for the first zoned namespace */