aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c198
1 files changed, 142 insertions, 56 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6600e138945e..838b5e2058be 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/blk-integrity.h>
#include <linux/compat.h>
#include <linux/delay.h>
#include <linux/errno.h>
@@ -13,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
-#include <linux/list_sort.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/pr.h>
@@ -119,25 +119,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
struct nvme_command *cmd);
-/*
- * Prepare a queue for teardown.
- *
- * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
- * the capacity to 0 after that to avoid blocking dispatchers that may be
- * holding bd_butex. This will end buffered writers dirtying pages that can't
- * be synced.
- */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
-{
- if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- return;
-
- blk_set_queue_dying(ns->queue);
- blk_mq_unquiesce_queue(ns->queue);
-
- set_capacity_and_notify(ns->disk, 0);
-}
-
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
@@ -222,7 +203,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
{
dev_info(ctrl->device,
- "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
+ "Removing ctrl: NQN \"%s\"\n", nvmf_ctrl_subsysnqn(ctrl));
flush_work(&ctrl->reset_work);
nvme_stop_ctrl(ctrl);
@@ -346,15 +327,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
return RETRY;
}
-static inline void nvme_end_req(struct request *req)
+static inline void nvme_end_req_zoned(struct request *req)
{
- blk_status_t status = nvme_error_status(nvme_req(req)->status);
-
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
req_op(req) == REQ_OP_ZONE_APPEND)
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
+}
+static inline void nvme_end_req(struct request *req)
+{
+ blk_status_t status = nvme_error_status(nvme_req(req)->status);
+
+ nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
blk_mq_end_request(req, status);
}
@@ -381,6 +366,13 @@ void nvme_complete_rq(struct request *req)
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
+void nvme_complete_batch_req(struct request *req)
+{
+ nvme_cleanup_cmd(req);
+ nvme_end_req_zoned(req);
+}
+EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
+
/*
* Called to unwind from ->queue_rq on a failed command submission so that the
* multipathing code gets called to potentially failover to another path.
@@ -632,7 +624,7 @@ static inline void nvme_init_request(struct request *req,
req->cmd_flags |= REQ_FAILFAST_DRIVER;
if (req->mq_hctx->type == HCTX_TYPE_POLL)
- req->cmd_flags |= REQ_HIPRI;
+ req->cmd_flags |= REQ_POLLED;
nvme_clear_nvme_request(req);
memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
}
@@ -823,6 +815,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
+ memset(cmnd, 0, sizeof(*cmnd));
cmnd->common.opcode = nvme_cmd_flush;
cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
}
@@ -874,6 +867,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
return BLK_STS_IOERR;
}
+ memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->dsm.nr = cpu_to_le32(segments - 1);
@@ -890,6 +884,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd)
{
+ memset(cmnd, 0, sizeof(*cmnd));
+
if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
return nvme_setup_discard(ns, req, cmnd);
@@ -923,9 +919,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd->rw.opcode = op;
+ cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->rw.rsvd2 = 0;
+ cmnd->rw.metadata = 0;
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.reftag = 0;
+ cmnd->rw.apptag = 0;
+ cmnd->rw.appmask = 0;
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
@@ -979,12 +981,11 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
{
struct nvme_command *cmd = nvme_req(req)->cmd;
+ struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
blk_status_t ret = BLK_STS_OK;
- if (!(req->rq_flags & RQF_DONTPREP)) {
+ if (!(req->rq_flags & RQF_DONTPREP))
nvme_clear_nvme_request(req);
- memset(cmd, 0, sizeof(*cmd));
- }
switch (req_op(req)) {
case REQ_OP_DRV_IN:
@@ -1027,7 +1028,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
return BLK_STS_IOERR;
}
- nvme_req(req)->genctr++;
+ if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN))
+ nvme_req(req)->genctr++;
cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
@@ -2599,6 +2601,24 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev,
}
static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
+static ssize_t nvme_subsys_show_type(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ switch (subsys->subtype) {
+ case NVME_NQN_DISC:
+ return sysfs_emit(buf, "discovery\n");
+ case NVME_NQN_NVME:
+ return sysfs_emit(buf, "nvm\n");
+ default:
+ return sysfs_emit(buf, "reserved\n");
+ }
+}
+static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type);
+
#define nvme_subsys_show_str_function(field) \
static ssize_t subsys_##field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -2619,6 +2639,7 @@ static struct attribute *nvme_subsys_attrs[] = {
&subsys_attr_serial.attr,
&subsys_attr_firmware_rev.attr,
&subsys_attr_subsysnqn.attr,
+ &subsys_attr_subsystype.attr,
#ifdef CONFIG_NVME_MULTIPATH
&subsys_attr_iopolicy.attr,
#endif
@@ -2689,6 +2710,21 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+
+ /* Versions prior to 1.4 don't necessarily report a valid type */
+ if (id->cntrltype == NVME_CTRL_DISC ||
+ !strcmp(subsys->subnqn, NVME_DISC_SUBSYS_NAME))
+ subsys->subtype = NVME_NQN_DISC;
+ else
+ subsys->subtype = NVME_NQN_NVME;
+
+ if (nvme_discovery_ctrl(ctrl) && subsys->subtype != NVME_NQN_DISC) {
+ dev_err(ctrl->device,
+ "Subsystem %s is not a discovery controller",
+ subsys->subnqn);
+ kfree(subsys);
+ return -EINVAL;
+ }
subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
subsys->iopolicy = NVME_IOPOLICY_NUMA;
@@ -3549,10 +3585,15 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
return 0;
}
+static void nvme_cdev_rel(struct device *dev)
+{
+ ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
+}
+
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
{
cdev_device_del(cdev, cdev_device);
- ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt));
+ put_device(cdev_device);
}
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
@@ -3565,14 +3606,14 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
return minor;
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
cdev_device->class = nvme_ns_chr_class;
+ cdev_device->release = nvme_cdev_rel;
device_initialize(cdev_device);
cdev_init(cdev, fops);
cdev->owner = owner;
ret = cdev_device_add(cdev, cdev_device);
- if (ret) {
+ if (ret)
put_device(cdev_device);
- ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
- }
+
return ret;
}
@@ -3604,11 +3645,9 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
ns->ctrl->instance, ns->head->instance);
if (ret)
return ret;
- ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
- ns->ctrl->ops->module);
- if (ret)
- kfree_const(ns->cdev_device.kobj.name);
- return ret;
+
+ return nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
+ ns->ctrl->ops->module);
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
@@ -3716,15 +3755,6 @@ out_unlock:
return ret;
}
-static int ns_cmp(void *priv, const struct list_head *a,
- const struct list_head *b)
-{
- struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
- struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
-
- return nsa->head->ns_id - nsb->head->ns_id;
-}
-
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns, *ret = NULL;
@@ -3745,6 +3775,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
}
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
+/*
+ * Add the namespace to the controller list while keeping the list ordered.
+ */
+static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
+{
+ struct nvme_ns *tmp;
+
+ list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
+ if (tmp->head->ns_id < ns->head->ns_id) {
+ list_add(&ns->list, &tmp->list);
+ return;
+ }
+ }
+ list_add(&ns->list, &ns->ctrl->namespaces);
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids)
{
@@ -3795,9 +3841,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem);
- list_add_tail(&ns->list, &ctrl->namespaces);
+ nvme_ns_add_to_ctrl_list(ns);
up_write(&ctrl->namespaces_rwsem);
-
nvme_get_ctrl(ctrl);
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
@@ -4080,10 +4125,6 @@ static void nvme_scan_work(struct work_struct *work)
if (nvme_scan_ns_list(ctrl) != 0)
nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock);
-
- down_write(&ctrl->namespaces_rwsem);
- list_sort(NULL, &ctrl->namespaces, ns_cmp);
- up_write(&ctrl->namespaces_rwsem);
}
/*
@@ -4467,6 +4508,37 @@ out:
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+static void nvme_start_ns_queue(struct nvme_ns *ns)
+{
+ if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
+ blk_mq_unquiesce_queue(ns->queue);
+}
+
+static void nvme_stop_ns_queue(struct nvme_ns *ns)
+{
+ if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
+ blk_mq_quiesce_queue(ns->queue);
+}
+
+/*
+ * Prepare a queue for teardown.
+ *
+ * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
+ * the capacity to 0 after that to avoid blocking dispatchers that may be
+ * holding bd_butex. This will end buffered writers dirtying pages that can't
+ * be synced.
+ */
+static void nvme_set_queue_dying(struct nvme_ns *ns)
+{
+ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ return;
+
+ blk_set_queue_dying(ns->queue);
+ nvme_start_ns_queue(ns);
+
+ set_capacity_and_notify(ns->disk, 0);
+}
+
/**
* nvme_kill_queues(): Ends all namespace queues
* @ctrl: the dead controller that needs to end
@@ -4482,7 +4554,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
/* Forcibly unquiesce queues to avoid blocking dispatch */
if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_start_admin_queue(ctrl);
list_for_each_entry(ns, &ctrl->namespaces, list)
nvme_set_queue_dying(ns);
@@ -4545,7 +4617,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
- blk_mq_quiesce_queue(ns->queue);
+ nvme_stop_ns_queue(ns);
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_stop_queues);
@@ -4556,11 +4628,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
- blk_mq_unquiesce_queue(ns->queue);
+ nvme_start_ns_queue(ns);
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
+void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
+ blk_mq_quiesce_queue(ctrl->admin_q);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
+
+void nvme_start_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+}
+EXPORT_SYMBOL_GPL(nvme_start_admin_queue);
+
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;