aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c414
-rw-r--r--drivers/nvme/host/fabrics.c7
-rw-r--r--drivers/nvme/host/fabrics.h2
-rw-r--r--drivers/nvme/host/fc.c47
-rw-r--r--drivers/nvme/host/lightnvm.c315
-rw-r--r--drivers/nvme/host/nvme.h38
-rw-r--r--drivers/nvme/host/pci.c109
-rw-r--r--drivers/nvme/host/rdma.c73
-rw-r--r--drivers/nvme/host/scsi.c34
-rw-r--r--drivers/nvme/target/admin-cmd.c10
-rw-r--r--drivers/nvme/target/configfs.c1
-rw-r--r--drivers/nvme/target/core.c27
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fabrics-cmd.c6
-rw-r--r--drivers/nvme/target/fc.c44
-rw-r--r--drivers/nvme/target/fcloop.c4
-rw-r--r--drivers/nvme/target/loop.c5
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/nvme/target/rdma.c24
19 files changed, 879 insertions, 287 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b40cfb076f02..9b3b57fef446 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -26,6 +26,7 @@
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
+#include <linux/pm_qos.h>
#include <scsi/sg.h>
#include <asm/unaligned.h>
@@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries);
static int nvme_char_major;
module_param(nvme_char_major, int, 0);
+static unsigned long default_ps_max_latency_us = 25000;
+module_param(default_ps_max_latency_us, ulong, 0644);
+MODULE_PARM_DESC(default_ps_max_latency_us,
+ "max power saving latency for new devices; use PM QOS to change per device");
+
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
@@ -208,18 +214,18 @@ EXPORT_SYMBOL_GPL(nvme_requeue_req);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid)
{
+ unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
struct request *req;
if (qid == NVME_QID_ANY) {
- req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags);
+ req = blk_mq_alloc_request(q, op, flags);
} else {
- req = blk_mq_alloc_request_hctx(q, nvme_is_write(cmd), flags,
+ req = blk_mq_alloc_request_hctx(q, op, flags,
qid ? qid - 1 : 0);
}
if (IS_ERR(req))
return req;
- req->cmd_type = REQ_TYPE_DRV_PRIV;
req->cmd_flags |= REQ_FAILFAST_DRIVER;
nvme_req(req)->cmd = cmd;
@@ -238,26 +244,38 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
+ unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
struct nvme_dsm_range *range;
- unsigned int nr_bytes = blk_rq_bytes(req);
+ struct bio *bio;
- range = kmalloc(sizeof(*range), GFP_ATOMIC);
+ range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
if (!range)
return BLK_MQ_RQ_QUEUE_BUSY;
- range->cattr = cpu_to_le32(0);
- range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift);
- range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ __rq_for_each_bio(bio, req) {
+ u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
+ u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+
+ range[n].cattr = cpu_to_le32(0);
+ range[n].nlb = cpu_to_le32(nlb);
+ range[n].slba = cpu_to_le64(slba);
+ n++;
+ }
+
+ if (WARN_ON_ONCE(n != segments)) {
+ kfree(range);
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
- cmnd->dsm.nr = 0;
+ cmnd->dsm.nr = segments - 1;
cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range);
+ req->special_vec.bv_len = sizeof(*range) * segments;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_MQ_RQ_QUEUE_OK;
@@ -309,17 +327,27 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
{
int ret = BLK_MQ_RQ_QUEUE_OK;
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ switch (req_op(req)) {
+ case REQ_OP_DRV_IN:
+ case REQ_OP_DRV_OUT:
memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
- else if (req_op(req) == REQ_OP_FLUSH)
+ break;
+ case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
- else if (req_op(req) == REQ_OP_DISCARD)
+ break;
+ case REQ_OP_DISCARD:
ret = nvme_setup_discard(ns, req, cmd);
- else
+ break;
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
nvme_setup_rw(ns, req, cmd);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
cmd->common.command_id = req->tag;
-
return ret;
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
@@ -538,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
- c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
+ c.identify.cns = NVME_ID_CNS_CTRL;
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
@@ -556,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
struct nvme_command c = { };
c.identify.opcode = nvme_admin_identify;
- c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
+ c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
c.identify.nsid = cpu_to_le32(nsid);
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
@@ -568,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
int error;
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
- c.identify.opcode = nvme_admin_identify,
- c.identify.nsid = cpu_to_le32(nsid),
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS;
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
@@ -784,6 +813,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_sg_io(ns, (void __user *)arg);
#endif
default:
+#ifdef CONFIG_NVM
+ if (ns->ndev)
+ return nvme_nvm_ioctl(ns, cmd, arg);
+#endif
+ if (is_sed_ioctl(cmd))
+ return sed_ioctl(ns->ctrl->opal_dev, cmd,
+ (void __user *) arg);
return -ENOTTY;
}
}
@@ -861,6 +897,9 @@ static void nvme_config_discard(struct nvme_ns *ns)
struct nvme_ctrl *ctrl = ns->ctrl;
u32 logical_block_size = queue_logical_block_size(ns->queue);
+ BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
+ NVME_DSM_MAX_RANGES);
+
if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
ns->queue->limits.discard_zeroes_data = 1;
else
@@ -869,6 +908,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
+ blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
}
@@ -1051,6 +1091,28 @@ static const struct pr_ops nvme_pr_ops = {
.pr_clear = nvme_pr_clear,
};
+#ifdef CONFIG_BLK_SED_OPAL
+int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
+ bool send)
+{
+ struct nvme_ctrl *ctrl = data;
+ struct nvme_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ if (send)
+ cmd.common.opcode = nvme_admin_security_send;
+ else
+ cmd.common.opcode = nvme_admin_security_recv;
+ cmd.common.nsid = 0;
+ cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
+ cmd.common.cdw10[1] = cpu_to_le32(len);
+
+ return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
+ ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0);
+}
+EXPORT_SYMBOL_GPL(nvme_sec_submit);
+#endif /* CONFIG_BLK_SED_OPAL */
+
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
@@ -1106,12 +1168,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
if (ret)
return ret;
- /* Checking for ctrl->tagset is a trick to avoid sleeping on module
- * load, since we only need the quirk on reset_controller. Notice
- * that the HGST device needs this delay only in firmware activation
- * procedure; unfortunately we have no (easy) way to verify this.
- */
- if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset)
+ if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, cap, false);
@@ -1193,14 +1250,184 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
- if (ctrl->stripe_size)
- blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+ if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+ blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
blk_queue_virt_boundary(q, ctrl->page_size - 1);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
blk_queue_write_cache(q, vwc, vwc);
}
+static void nvme_configure_apst(struct nvme_ctrl *ctrl)
+{
+ /*
+ * APST (Autonomous Power State Transition) lets us program a
+ * table of power state transitions that the controller will
+ * perform automatically. We configure it with a simple
+ * heuristic: we are willing to spend at most 2% of the time
+ * transitioning between power states. Therefore, when running
+ * in any given state, we will enter the next lower-power
+ * non-operational state after waiting 100 * (enlat + exlat)
+ * microseconds, as long as that state's total latency is under
+ * the requested maximum latency.
+ *
+ * We will not autonomously enter any non-operational state for
+ * which the total latency exceeds ps_max_latency_us. Users
+ * can set ps_max_latency_us to zero to turn off APST.
+ */
+
+ unsigned apste;
+ struct nvme_feat_auto_pst *table;
+ int ret;
+
+ /*
+ * If APST isn't supported or if we haven't been initialized yet,
+ * then don't do anything.
+ */
+ if (!ctrl->apsta)
+ return;
+
+ if (ctrl->npss > 31) {
+ dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
+ return;
+ }
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return;
+
+ if (ctrl->ps_max_latency_us == 0) {
+ /* Turn off APST. */
+ apste = 0;
+ } else {
+ __le64 target = cpu_to_le64(0);
+ int state;
+
+ /*
+ * Walk through all states from lowest- to highest-power.
+ * According to the spec, lower-numbered states use more
+ * power. NPSS, despite the name, is the index of the
+ * lowest-power state, not the number of states.
+ */
+ for (state = (int)ctrl->npss; state >= 0; state--) {
+ u64 total_latency_us, transition_ms;
+
+ if (target)
+ table->entries[state] = target;
+
+ /*
+ * Is this state a useful non-operational state for
+ * higher-power states to autonomously transition to?
+ */
+ if (!(ctrl->psd[state].flags &
+ NVME_PS_FLAGS_NON_OP_STATE))
+ continue;
+
+ total_latency_us =
+ (u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
+ + le32_to_cpu(ctrl->psd[state].exit_lat);
+ if (total_latency_us > ctrl->ps_max_latency_us)
+ continue;
+
+ /*
+ * This state is good. Use it as the APST idle
+ * target for higher power states.
+ */
+ transition_ms = total_latency_us + 19;
+ do_div(transition_ms, 20);
+ if (transition_ms > (1 << 24) - 1)
+ transition_ms = (1 << 24) - 1;
+
+ target = cpu_to_le64((state << 3) |
+ (transition_ms << 8));
+ }
+
+ apste = 1;
+ }
+
+ ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
+ table, sizeof(*table), NULL);
+ if (ret)
+ dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
+
+ kfree(table);
+}
+
+static void nvme_set_latency_tolerance(struct device *dev, s32 val)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ u64 latency;
+
+ switch (val) {
+ case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
+ case PM_QOS_LATENCY_ANY:
+ latency = U64_MAX;
+ break;
+
+ default:
+ latency = val;
+ }
+
+ if (ctrl->ps_max_latency_us != latency) {
+ ctrl->ps_max_latency_us = latency;
+ nvme_configure_apst(ctrl);
+ }
+}
+
+struct nvme_core_quirk_entry {
+ /*
+ * NVMe model and firmware strings are padded with spaces. For
+ * simplicity, strings in the quirk table are padded with NULLs
+ * instead.
+ */
+ u16 vid;
+ const char *mn;
+ const char *fr;
+ unsigned long quirks;
+};
+
+static const struct nvme_core_quirk_entry core_quirks[] = {
+ /*
+ * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
+ * the controller to go out to lunch. It dies when the watchdog
+ * timer reads CSTS and gets 0xffffffff.
+ */
+ {
+ .vid = 0x144d,
+ .fr = "BXW75D0Q",
+ .quirks = NVME_QUIRK_NO_APST,
+ },
+};
+
+/* match is null-terminated but idstr is space-padded. */
+static bool string_matches(const char *idstr, const char *match, size_t len)
+{
+ size_t matchlen;
+
+ if (!match)
+ return true;
+
+ matchlen = strlen(match);
+ WARN_ON_ONCE(matchlen > len);
+
+ if (memcmp(idstr, match, matchlen))
+ return false;
+
+ for (; matchlen < len; matchlen++)
+ if (idstr[matchlen] != ' ')
+ return false;
+
+ return true;
+}
+
+static bool quirk_matches(const struct nvme_id_ctrl *id,
+ const struct nvme_core_quirk_entry *q)
+{
+ return q->vid == le16_to_cpu(id->vid) &&
+ string_matches(id->mn, q->mn, sizeof(id->mn)) &&
+ string_matches(id->fr, q->fr, sizeof(id->fr));
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@@ -1212,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
+ u8 prev_apsta;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
@@ -1235,6 +1463,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return -EIO;
}
+ if (!ctrl->identified) {
+ /*
+ * Check for quirks. Quirk can depend on firmware version,
+ * so, in principle, the set of quirks present can change
+ * across a reset. As a possible future enhancement, we
+ * could re-scan for quirks every time we reinitialize
+ * the device, but we'd have to make sure that the driver
+ * behaves intelligently if the quirks change.
+ */
+
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
+ if (quirk_matches(id, &core_quirks[i]))
+ ctrl->quirks |= core_quirks[i].quirks;
+ }
+ }
+
+ ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->vid = le16_to_cpu(id->vid);
ctrl->oncs = le16_to_cpup(&id->oncs);
atomic_set(&ctrl->abort_limit, id->acl + 1);
@@ -1250,23 +1497,15 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->max_hw_sectors =
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
- if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
- unsigned int max_hw_sectors;
-
- ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
- max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
- if (ctrl->max_hw_sectors) {
- ctrl->max_hw_sectors = min(max_hw_sectors,
- ctrl->max_hw_sectors);
- } else {
- ctrl->max_hw_sectors = max_hw_sectors;
- }
- }
-
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
+ ctrl->npss = id->npss;
+ prev_apsta = ctrl->apsta;
+ ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
+ memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
+
if (ctrl->ops->is_fabrics) {
ctrl->icdoff = le16_to_cpu(id->icdoff);
ctrl->ioccsz = le32_to_cpu(id->ioccsz);
@@ -1290,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
kfree(id);
+
+ if (ctrl->apsta && !prev_apsta)
+ dev_pm_qos_expose_latency_tolerance(ctrl->device);
+ else if (!ctrl->apsta && prev_apsta)
+ dev_pm_qos_hide_latency_tolerance(ctrl->device);
+
+ nvme_configure_apst(ctrl);
+
+ ctrl->identified = true;
+
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_identify);
@@ -1539,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev,
}
static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
+static ssize_t nvme_sysfs_show_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ static const char *const state_name[] = {
+ [NVME_CTRL_NEW] = "new",
+ [NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_RESETTING] = "resetting",
+ [NVME_CTRL_RECONNECTING]= "reconnecting",
+ [NVME_CTRL_DELETING] = "deleting",
+ [NVME_CTRL_DEAD] = "dead",
+ };
+
+ if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
+ state_name[ctrl->state])
+ return sprintf(buf, "%s\n", state_name[ctrl->state]);
+
+ return sprintf(buf, "unknown state\n");
+}
+
+static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
+
static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1571,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_transport.attr,
&dev_attr_subsysnqn.attr,
&dev_attr_address.attr,
+ &dev_attr_state.attr,
NULL
};
@@ -2027,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
list_add_tail(&ctrl->node, &nvme_ctrl_list);
spin_unlock(&dev_list_lock);
+ /*
+ * Initialize latency tolerance controls. The sysfs files won't
+ * be visible to userspace unless the device actually supports APST.
+ */
+ ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
+ dev_pm_qos_update_user_latency_tolerance(ctrl->device,
+ min(default_ps_max_latency_us, (unsigned long)S32_MAX));
+
return 0;
out_release_instance:
nvme_release_instance(ctrl);
@@ -2052,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
* Revalidating a dead namespace sets capacity to 0. This will
* end buffered writers dirtying pages that can't be synced.
*/
- if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- revalidate_disk(ns->disk);
-
+ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ continue;
+ revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
blk_mq_abort_requeue_list(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true);
@@ -2063,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_unfreeze_queue(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+ if (timeout <= 0)
+ break;
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_freeze_queue_wait(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_freeze_queue_start(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 916d13608059..5b7386f69f4d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
* being implemented to the common NVMe fabrics library. Part of
* the overall init sequence of starting up a fabrics driver.
*/
-void nvmf_register_transport(struct nvmf_transport_ops *ops)
+int nvmf_register_transport(struct nvmf_transport_ops *ops)
{
+ if (!ops->create_ctrl)
+ return -EINVAL;
+
mutex_lock(&nvmf_transports_mutex);
list_add_tail(&ops->entry, &nvmf_transports);
mutex_unlock(&nvmf_transports_mutex);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(nvmf_register_transport);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 924145c979f1..156018182ce4 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
-void nvmf_register_transport(struct nvmf_transport_ops *ops);
+int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 771e2e761872..9690beb15e69 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1491,19 +1491,20 @@ static int
nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
{
struct nvme_fc_queue *queue = &ctrl->queues[1];
- int i, j, ret;
+ int i, ret;
for (i = 1; i < ctrl->queue_count; i++, queue++) {
ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
- if (ret) {
- for (j = i-1; j >= 0; j--)
- __nvme_fc_delete_hw_queue(ctrl,
- &ctrl->queues[j], j);
- return ret;
- }
+ if (ret)
+ goto delete_queues;
}
return 0;
+
+delete_queues:
+ for (; i >= 0; i--)
+ __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
+ return ret;
}
static int
@@ -1653,23 +1654,22 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
struct nvme_fc_fcp_op *op)
{
struct nvmefc_fcp_req *freq = &op->fcp_req;
- u32 map_len = nvme_map_len(rq);
enum dma_data_direction dir;
int ret;
freq->sg_cnt = 0;
- if (!map_len)
+ if (!blk_rq_payload_bytes(rq))
return 0;
freq->sg_table.sgl = freq->first_sgl;
- ret = sg_alloc_table_chained(&freq->sg_table, rq->nr_phys_segments,
- freq->sg_table.sgl);
+ ret = sg_alloc_table_chained(&freq->sg_table,
+ blk_rq_nr_phys_segments(rq), freq->sg_table.sgl);
if (ret)
return -ENOMEM;
op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
- WARN_ON(op->nents > rq->nr_phys_segments);
+ WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
op->nents, dir);
@@ -1853,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret)
return ret;
- data_len = nvme_map_len(rq);
+ data_len = blk_rq_payload_bytes(rq);
if (data_len)
io_dir = ((rq_data_dir(rq) == WRITE) ?
NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
@@ -1937,7 +1937,7 @@ nvme_fc_complete_rq(struct request *rq)
return;
}
- if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
+ if (blk_rq_is_passthrough(rq))
error = rq->errors;
else
error = nvme_error_status(rq->errors);
@@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
/* sanity checks */
- /* FC-NVME supports 64-byte SQE only */
- if (ctrl->ctrl.ioccsz != 4) {
- dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n",
- ctrl->ctrl.ioccsz);
- goto out_remove_admin_queue;
- }
- /* FC-NVME supports 16-byte CQE only */
- if (ctrl->ctrl.iorcsz != 1) {
- dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n",
- ctrl->ctrl.iorcsz);
- goto out_remove_admin_queue;
- }
/* FC-NVME does not have other data in the capsule */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
@@ -2401,8 +2389,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
WARN_ON_ONCE(!changed);
dev_info(ctrl->ctrl.device,
- "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n",
- ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl);
+ "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
+ ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
kref_get(&ctrl->ctrl.kref);
@@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void)
if (!nvme_fc_wq)
return -ENOMEM;
- nvmf_register_transport(&nvme_fc_transport);
- return 0;
+ return nvmf_register_transport(&nvme_fc_transport);
}
static void __exit nvme_fc_exit_module(void)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 588d4a34c083..21cac8523bd8 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -26,6 +26,8 @@
#include <linux/bitops.h>
#include <linux/lightnvm.h>
#include <linux/vmalloc.h>
+#include <linux/sched/sysctl.h>
+#include <uapi/linux/lightnvm.h>
enum nvme_nvm_admin_opcode {
nvme_nvm_admin_identity = 0xe2,
@@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
{
struct nvme_nvm_id_group *src;
struct nvm_id_group *dst;
- int i, end;
-
- end = min_t(u32, 4, nvm_id->cgrps);
-
- for (i = 0; i < end; i++) {
- src = &nvme_nvm_id->groups[i];
- dst = &nvm_id->groups[i];
-
- dst->mtype = src->mtype;
- dst->fmtype = src->fmtype;
- dst->num_ch = src->num_ch;
- dst->num_lun = src->num_lun;
- dst->num_pln = src->num_pln;
-
- dst->num_pg = le16_to_cpu(src->num_pg);
- dst->num_blk = le16_to_cpu(src->num_blk);
- dst->fpg_sz = le16_to_cpu(src->fpg_sz);
- dst->csecs = le16_to_cpu(src->csecs);
- dst->sos = le16_to_cpu(src->sos);
-
- dst->trdt = le32_to_cpu(src->trdt);
- dst->trdm = le32_to_cpu(src->trdm);
- dst->tprt = le32_to_cpu(src->tprt);
- dst->tprm = le32_to_cpu(src->tprm);
- dst->tbet = le32_to_cpu(src->tbet);
- dst->tbem = le32_to_cpu(src->tbem);
- dst->mpos = le32_to_cpu(src->mpos);
- dst->mccap = le32_to_cpu(src->mccap);
-
- dst->cpar = le16_to_cpu(src->cpar);
-
- if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
- memcpy(dst->lptbl.id, src->lptbl.id, 8);
- dst->lptbl.mlc.num_pairs =
- le16_to_cpu(src->lptbl.mlc.num_pairs);
-
- if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
- pr_err("nvm: number of MLC pairs not supported\n");
- return -EINVAL;
- }
- memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
- dst->lptbl.mlc.num_pairs);
+ if (nvme_nvm_id->cgrps != 1)
+ return -EINVAL;
+
+ src = &nvme_nvm_id->groups[0];
+ dst = &nvm_id->grp;
+
+ dst->mtype = src->mtype;
+ dst->fmtype = src->fmtype;
+ dst->num_ch = src->num_ch;
+ dst->num_lun = src->num_lun;
+ dst->num_pln = src->num_pln;
+
+ dst->num_pg = le16_to_cpu(src->num_pg);
+ dst->num_blk = le16_to_cpu(src->num_blk);
+ dst->fpg_sz = le16_to_cpu(src->fpg_sz);
+ dst->csecs = le16_to_cpu(src->csecs);
+ dst->sos = le16_to_cpu(src->sos);
+
+ dst->trdt = le32_to_cpu(src->trdt);
+ dst->trdm = le32_to_cpu(src->trdm);
+ dst->tprt = le32_to_cpu(src->tprt);
+ dst->tprm = le32_to_cpu(src->tprm);
+ dst->tbet = le32_to_cpu(src->tbet);
+ dst->tbem = le32_to_cpu(src->tbem);
+ dst->mpos = le32_to_cpu(src->mpos);
+ dst->mccap = le32_to_cpu(src->mccap);
+
+ dst->cpar = le16_to_cpu(src->cpar);
+
+ if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
+ memcpy(dst->lptbl.id, src->lptbl.id, 8);
+ dst->lptbl.mlc.num_pairs =
+ le16_to_cpu(src->lptbl.mlc.num_pairs);
+
+ if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
+ pr_err("nvm: number of MLC pairs not supported\n");
+ return -EINVAL;
}
+
+ memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
+ dst->lptbl.mlc.num_pairs);
}
return 0;
@@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
nvm_id->ver_id = nvme_nvm_id->ver_id;
nvm_id->vmnt = nvme_nvm_id->vmnt;
- nvm_id->cgrps = nvme_nvm_id->cgrps;
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
@@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
}
/* Transform physical address to target address space */
- nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb);
+ nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
ret = -EINTR;
@@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error)
struct nvm_rq *rqd = rq->end_io_data;
rqd->ppa_status = nvme_req(rq)->result.u64;
- nvm_end_io(rqd, error);
+ rqd->error = error;
+ nvm_end_io(rqd);
kfree(nvme_req(rq)->cmd);
blk_mq_free_request(rq);
@@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.max_phys_sect = 64,
};
+static void nvme_nvm_end_user_vio(struct request *rq, int error)
+{
+ struct completion *waiting = rq->end_io_data;
+
+ complete(waiting);
+}
+
+static int nvme_nvm_submit_user_cmd(struct request_queue *q,
+ struct nvme_ns *ns,
+ struct nvme_nvm_command *vcmd,
+ void __user *ubuf, unsigned int bufflen,
+ void __user *meta_buf, unsigned int meta_len,
+ void __user *ppa_buf, unsigned int ppa_len,
+ u32 *result, u64 *status, unsigned int timeout)
+{
+ bool write = nvme_is_write((struct nvme_command *)vcmd);
+ struct nvm_dev *dev = ns->ndev;
+ struct gendisk *disk = ns->disk;
+ struct request *rq;
+ struct bio *bio = NULL;
+ __le64 *ppa_list = NULL;
+ dma_addr_t ppa_dma;
+ __le64 *metadata = NULL;
+ dma_addr_t metadata_dma;
+ DECLARE_COMPLETION_ONSTACK(wait);
+ int ret;
+
+ rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
+ NVME_QID_ANY);
+ if (IS_ERR(rq)) {
+ ret = -ENOMEM;
+ goto err_cmd;
+ }
+
+ rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
+ rq->end_io_data = &wait;
+
+ if (ppa_buf && ppa_len) {
+ ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
+ if (!ppa_list) {
+ ret = -ENOMEM;
+ goto err_rq;
+ }
+ if (copy_from_user(ppa_list, (void __user *)ppa_buf,
+ sizeof(u64) * (ppa_len + 1))) {
+ ret = -EFAULT;
+ goto err_ppa;
+ }
+ vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
+ } else {
+ vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
+ }
+
+ if (ubuf && bufflen) {
+ ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
+ if (ret)
+ goto err_ppa;
+ bio = rq->bio;
+
+ if (meta_buf && meta_len) {
+ metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
+ &metadata_dma);
+ if (!metadata) {
+ ret = -ENOMEM;
+ goto err_map;
+ }
+
+ if (write) {
+ if (copy_from_user(metadata,
+ (void __user *)meta_buf,
+ meta_len)) {
+ ret = -EFAULT;
+ goto err_meta;
+ }
+ }
+ vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
+ }
+
+ if (!disk)
+ goto submit;
+
+ bio->bi_bdev = bdget_disk(disk, 0);
+ if (!bio->bi_bdev) {
+ ret = -ENODEV;
+ goto err_meta;
+ }
+ }
+
+submit:
+ blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
+
+ wait_for_completion_io(&wait);
+
+ ret = nvme_error_status(rq->errors);
+ if (result)
+ *result = rq->errors & 0x7ff;
+ if (status)
+ *status = le64_to_cpu(nvme_req(rq)->result.u64);
+
+ if (metadata && !ret && !write) {
+ if (copy_to_user(meta_buf, (void *)metadata, meta_len))
+ ret = -EFAULT;
+ }
+err_meta:
+ if (meta_buf && meta_len)
+ dma_pool_free(dev->dma_pool, metadata, metadata_dma);
+err_map:
+ if (bio) {
+ if (disk && bio->bi_bdev)
+ bdput(bio->bi_bdev);
+ blk_rq_unmap_user(bio);
+ }
+err_ppa:
+ if (ppa_buf && ppa_len)
+ dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
+err_rq:
+ blk_mq_free_request(rq);
+err_cmd:
+ return ret;
+}
+
+static int nvme_nvm_submit_vio(struct nvme_ns *ns,
+ struct nvm_user_vio __user *uvio)
+{
+ struct nvm_user_vio vio;
+ struct nvme_nvm_command c;
+ unsigned int length;
+ int ret;
+
+ if (copy_from_user(&vio, uvio, sizeof(vio)))
+ return -EFAULT;
+ if (vio.flags)
+ return -EINVAL;
+
+ memset(&c, 0, sizeof(c));
+ c.ph_rw.opcode = vio.opcode;
+ c.ph_rw.nsid = cpu_to_le32(ns->ns_id);
+ c.ph_rw.control = cpu_to_le16(vio.control);
+ c.ph_rw.length = cpu_to_le16(vio.nppas);
+
+ length = (vio.nppas + 1) << ns->lba_shift;
+
+ ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
+ (void __user *)(uintptr_t)vio.addr, length,
+ (void __user *)(uintptr_t)vio.metadata,
+ vio.metadata_len,
+ (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
+ &vio.result, &vio.status, 0);
+
+ if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
+ return -EFAULT;
+
+ return ret;
+}
+
+static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
+ struct nvm_passthru_vio __user *uvcmd)
+{
+ struct nvm_passthru_vio vcmd;
+ struct nvme_nvm_command c;
+ struct request_queue *q;
+ unsigned int timeout = 0;
+ int ret;
+
+ if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
+ return -EFAULT;
+ if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
+ return -EACCES;
+ if (vcmd.flags)
+ return -EINVAL;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = vcmd.opcode;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
+ c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
+ /* cdw11-12 */
+ c.ph_rw.length = cpu_to_le16(vcmd.nppas);
+ c.ph_rw.control = cpu_to_le32(vcmd.control);
+ c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
+ c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
+ c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
+
+ if (vcmd.timeout_ms)
+ timeout = msecs_to_jiffies(vcmd.timeout_ms);
+
+ q = admin ? ns->ctrl->admin_q : ns->queue;
+
+ ret = nvme_nvm_submit_user_cmd(q, ns,
+ (struct nvme_nvm_command *)&c,
+ (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
+ (void __user *)(uintptr_t)vcmd.metadata,
+ vcmd.metadata_len,
+ (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
+ &vcmd.result, &vcmd.status, timeout);
+
+ if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
+ return -EFAULT;
+
+ return ret;
+}
+
+int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case NVME_NVM_IOCTL_ADMIN_VIO:
+ return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
+ case NVME_NVM_IOCTL_IO_VIO:
+ return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
+ case NVME_NVM_IOCTL_SUBMIT_VIO:
+ return nvme_nvm_submit_vio(ns, (void __user *)arg);
+ default:
+ return -ENOTTY;
+ }
+}
+
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
{
struct request_queue *q = ns->queue;
@@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
return 0;
id = &ndev->identity;
- grp = &id->groups[0];
+ grp = &id->grp;
attr = &dattr->attr;
if (strcmp(attr->name, "version") == 0) {
@@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
} else if (strcmp(attr->name, "device_mode") == 0) {
return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
+ /* kept for compatibility */
} else if (strcmp(attr->name, "media_manager") == 0) {
- if (!ndev->mt)
- return scnprintf(page, PAGE_SIZE, "%s\n", "none");
- return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name);
+ return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
} else if (strcmp(attr->name, "ppa_format") == 0) {
return scnprintf(page, PAGE_SIZE,
"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bd5321441d12..2aa20e3e5675 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,7 @@
#include <linux/kref.h>
#include <linux/blk-mq.h>
#include <linux/lightnvm.h>
+#include <linux/sed-opal.h>
enum {
/*
@@ -77,6 +78,11 @@ enum nvme_quirks {
* readiness, which is done by reading the NVME_CSTS_RDY bit.
*/
NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3),
+
+ /*
+ * APST should not be used.
+ */
+ NVME_QUIRK_NO_APST = (1 << 4),
};
/*
@@ -111,6 +117,7 @@ enum nvme_ctrl_state {
struct nvme_ctrl {
enum nvme_ctrl_state state;
+ bool identified;
spinlock_t lock;
const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
@@ -125,6 +132,8 @@ struct nvme_ctrl {
struct list_head node;
struct ida ns_ida;
+ struct opal_dev *opal_dev;
+
char name[12];
char serial[20];
char model[40];
@@ -135,22 +144,28 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
- u32 stripe_size;
u16 oncs;
u16 vid;
+ u16 oacs;
atomic_t abort_limit;
u8 event_limit;
u8 vwc;
u32 vs;
u32 sgls;
u16 kas;
+ u8 npss;
+ u8 apsta;
unsigned int kato;
bool subsystem;
unsigned long quirks;
+ struct nvme_id_power_state psd[32];
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ /* Power saving configuration */
+ u64 ps_max_latency_us;
+
/* Fabrics only */
u16 sqsize;
u32 ioccsz;
@@ -226,14 +241,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
return (sector >> (ns->lba_shift - 9));
}
-static inline unsigned nvme_map_len(struct request *rq)
-{
- if (req_op(rq) == REQ_OP_DISCARD)
- return sizeof(struct nvme_dsm_range);
- else
- return blk_rq_bytes(rq);
-}
-
static inline void nvme_cleanup_cmd(struct request *req)
{
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
@@ -276,6 +283,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_queue_scan(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
+ bool send);
+
#define NVME_NR_AERS 1
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res);
@@ -284,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
@@ -327,6 +341,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
+int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
#else
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
int node)
@@ -344,6 +359,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i
{
return 0;
}
+static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
#endif /* CONFIG_NVM */
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3d21a154dce7..26a5fd05fe88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -43,6 +43,7 @@
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
+#include <linux/sed-opal.h>
#include "nvme.h"
@@ -306,11 +307,11 @@ static __le64 **iod_list(struct request *req)
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
-static int nvme_init_iod(struct request *rq, unsigned size,
- struct nvme_dev *dev)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
+ unsigned int size = blk_rq_payload_bytes(rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -420,12 +421,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
}
#endif
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
- int total_len)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
- int length = total_len;
+ int length = blk_rq_payload_bytes(req);
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
@@ -501,7 +501,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
}
static int nvme_map_data(struct nvme_dev *dev, struct request *req,
- unsigned size, struct nvme_command *cmnd)
+ struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
@@ -519,7 +519,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
DMA_ATTR_NO_WARN))
goto out;
- if (!nvme_setup_prps(dev, req, size))
+ if (!nvme_setup_prps(dev, req))
goto out_unmap;
ret = BLK_MQ_RQ_QUEUE_ERROR;
@@ -580,7 +580,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
- unsigned map_len;
int ret = BLK_MQ_RQ_QUEUE_OK;
/*
@@ -590,7 +589,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
*/
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
- req->cmd_type != REQ_TYPE_DRV_PRIV) {
+ !blk_rq_is_passthrough(req)) {
blk_mq_end_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
@@ -600,13 +599,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ret != BLK_MQ_RQ_QUEUE_OK)
return ret;
- map_len = nvme_map_len(req);
- ret = nvme_init_iod(req, map_len, dev);
+ ret = nvme_init_iod(req, dev);
if (ret != BLK_MQ_RQ_QUEUE_OK)
goto out_free_cmd;
if (blk_rq_nr_phys_segments(req))
- ret = nvme_map_data(dev, req, map_len, &cmnd);
+ ret = nvme_map_data(dev, req, &cmnd);
if (ret != BLK_MQ_RQ_QUEUE_OK)
goto out_cleanup_iod;
@@ -615,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
- ret = BLK_MQ_RQ_QUEUE_BUSY;
- else
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
@@ -648,7 +643,7 @@ static void nvme_complete_rq(struct request *req)
return;
}
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ if (blk_rq_is_passthrough(req))
error = req->errors;
else
error = nvme_error_status(req->errors);
@@ -712,15 +707,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
nvme_req(req)->result = cqe.result;
blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
-
}
- /* If the controller ignores the cq head doorbell and continuously
- * writes to the queue, it is theoretically possible to wrap around
- * the queue twice and mistakenly return IRQ_NONE. Linux only
- * requires that 0.1% of your interrupts are handled, so this isn't
- * a big problem.
- */
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
return;
@@ -905,12 +893,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
return BLK_EH_HANDLED;
}
- iod->aborted = 1;
-
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
+ iod->aborted = 1;
memset(&cmd, 0, sizeof(cmd));
cmd.abort.opcode = nvme_admin_abort_cmd;
@@ -1051,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth)
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+ struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+ node);
if (!nvmeq)
return NULL;
@@ -1188,6 +1176,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
+ dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
@@ -1229,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
nvmeq = dev->queues[0];
if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+ nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
if (!nvmeq)
return -ENOMEM;
}
@@ -1321,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
int ret = 0;
for (i = dev->queue_count; i <= dev->max_qid; i++) {
- if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ /* vector == qid - 1, match nvme_create_queue */
+ if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
}
@@ -1683,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i, queues;
- u32 csts = -1;
+ bool dead = true;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(to_pci_dev(dev->dev))) {
- nvme_stop_queues(&dev->ctrl);
- csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ if (dev->ctrl.state == NVME_CTRL_LIVE)
+ nvme_start_freeze(&dev->ctrl);
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+ pdev->error_state != pci_channel_io_normal);
}
+ /*
+ * Give the controller a chance to complete all entered requests if
+ * doing a safe shutdown.
+ */
+ if (!dead && shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ nvme_stop_queues(&dev->ctrl);
+
queues = dev->online_queues - 1;
for (i = dev->queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
- if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+ if (dead) {
/* A device might become IO incapable very soon during
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
@@ -1712,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+ /*
+ * The driver will not be starting up queues again if shutting down so
+ * must flush all entered requests to their failed completion to avoid
+ * deadlocking blk-mq hot-cpu notifier.
+ */
+ if (shutdown)
+ nvme_start_queues(&dev->ctrl);
mutex_unlock(&dev->shutdown_lock);
}
@@ -1748,6 +1761,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
+ free_opal_dev(dev->ctrl.opal_dev);
kfree(dev);
}
@@ -1764,6 +1778,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
static void nvme_reset_work(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
@@ -1796,6 +1811,17 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
+ if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
+ if (!dev->ctrl.opal_dev)
+ dev->ctrl.opal_dev =
+ init_opal_dev(&dev->ctrl, &nvme_sec_submit);
+ else if (was_suspend)
+ opal_unlock_from_suspend(dev->ctrl.opal_dev);
+ } else {
+ free_opal_dev(dev->ctrl.opal_dev);
+ dev->ctrl.opal_dev = NULL;
+ }
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -1821,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
+ nvme_wait_freeze(&dev->ctrl);
nvme_dev_add(dev);
+ nvme_unfreeze(&dev->ctrl);
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
@@ -1909,10 +1937,10 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (!dev->bar)
goto release;
- return 0;
+ return 0;
release:
- pci_release_mem_regions(pdev);
- return -ENODEV;
+ pci_release_mem_regions(pdev);
+ return -ENODEV;
}
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -2000,8 +2028,10 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
- if (!pci_device_is_present(pdev))
+ if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+ nvme_dev_disable(dev, false);
+ }
flush_work(&dev->reset_work);
nvme_uninit_ctrl(&dev->ctrl);
@@ -2120,6 +2150,7 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f587af345889..779f516e7a4e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -42,28 +42,6 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
-static const char *const nvme_rdma_cm_status_strs[] = {
- [NVME_RDMA_CM_INVALID_LEN] = "invalid length",
- [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
- [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
- [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
- [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
- [NVME_RDMA_CM_NO_RSC] = "resource not found",
- [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
- [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
-};
-
-static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
-{
- size_t index = status;
-
- if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
- nvme_rdma_cm_status_strs[index])
- return nvme_rdma_cm_status_strs[index];
- else
- return "unrecognized reason";
-};
-
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
@@ -155,6 +133,10 @@ struct nvme_rdma_ctrl {
struct sockaddr addr;
struct sockaddr_in addr_in;
};
+ union {
+ struct sockaddr src_addr;
+ struct sockaddr_in src_addr_in;
+ };
struct nvme_ctrl ctrl;
};
@@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
int idx, size_t queue_size)
{
struct nvme_rdma_queue *queue;
+ struct sockaddr *src_addr = NULL;
int ret;
queue = &ctrl->queues[idx];
@@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
}
queue->cm_error = -ETIMEDOUT;
- ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
+ if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
+ src_addr = &ctrl->src_addr;
+
+ ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
dev_info(ctrl->ctrl.device,
@@ -981,8 +967,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
}
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
- struct request *rq, unsigned int map_len,
- struct nvme_command *c)
+ struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
@@ -1014,9 +999,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
}
if (count == 1) {
- if (rq_data_dir(rq) == WRITE &&
- map_len <= nvme_rdma_inline_data_size(queue) &&
- nvme_rdma_queue_idx(queue))
+ if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+ blk_rq_payload_bytes(rq) <=
+ nvme_rdma_inline_data_size(queue))
return nvme_rdma_map_sg_inline(queue, req, c);
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -1066,7 +1051,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
* sequencer is not allocated in our driver's tagset and it's
* triggered to be freed by blk_cleanup_queue(). So we need to
* always mark it as signaled to ensure that the "wr_cqe", which is
- * embeded in request's payload, is not freed when __ib_process_cq()
+ * embedded in request's payload, is not freed when __ib_process_cq()
* calls wr_cqe->done().
*/
if ((++queue->sig_count % 32) == 0 || flush)
@@ -1266,7 +1251,7 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
dev = nvme_rdma_find_get_device(queue->cm_id);
if (!dev) {
- dev_err(queue->cm_id->device->dma_device,
+ dev_err(queue->cm_id->device->dev.parent,
"no client data found!\n");
return -ECONNREFUSED;
}
@@ -1422,9 +1407,9 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
- struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+ struct nvme_command *cmd = nvme_req(rq)->cmd;
- if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
+ if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect)
return false;
@@ -1444,7 +1429,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- unsigned int map_len;
int ret;
WARN_ON_ONCE(rq->tag < 0);
@@ -1462,8 +1446,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
- map_len = nvme_map_len(rq);
- ret = nvme_rdma_map_data(queue, rq, map_len, c);
+ ret = nvme_rdma_map_data(queue, rq, c);
if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);
@@ -1474,7 +1457,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
ib_dma_sync_single_for_device(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
- if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
+ if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
@@ -1525,7 +1508,7 @@ static void nvme_rdma_complete_rq(struct request *rq)
return;
}
- if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
+ if (blk_rq_is_passthrough(rq))
error = rq->errors;
else
error = nvme_error_status(rq->errors);
@@ -1908,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
goto out_free_ctrl;
}
+ if (opts->mask & NVMF_OPT_HOST_TRADDR) {
+ ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
+ opts->host_traddr);
+ if (ret) {
+ pr_err("malformed src IP address passed: %s\n",
+ opts->host_traddr);
+ goto out_free_ctrl;
+ }
+ }
+
if (opts->mask & NVMF_OPT_TRSVCID) {
u16 port;
@@ -2019,7 +2012,8 @@ out_free_ctrl:
static struct nvmf_transport_ops nvme_rdma_transport = {
.name = "rdma",
.required_opts = NVMF_OPT_TRADDR,
- .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
+ .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
+ NVMF_OPT_HOST_TRADDR,
.create_ctrl = nvme_rdma_create_ctrl,
};
@@ -2066,8 +2060,7 @@ static int __init nvme_rdma_init_module(void)
return ret;
}
- nvmf_register_transport(&nvme_rdma_transport);
- return 0;
+ return nvmf_register_transport(&nvme_rdma_transport);
}
static void __exit nvme_rdma_cleanup_module(void)
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index b71e95044b43..f49ae2758bb7 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -43,6 +43,7 @@
#include <asm/unaligned.h>
#include <scsi/sg.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_request.h>
#include "nvme.h"
@@ -2160,30 +2161,6 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
return nvme_trans_status_code(hdr, nvme_sc);
}
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- u8 immed, no_flush;
-
- immed = cmd[1] & 0x01;
- no_flush = cmd[4] & 0x04;
-
- if (immed != 0) {
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- } else {
- if (no_flush == 0) {
- /* Issue NVME FLUSH command prior to START STOP UNIT */
- int res = nvme_trans_synchronize_cache(ns, hdr);
- if (res)
- return res;
- }
-
- return 0;
- }
-}
-
static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
@@ -2371,12 +2348,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
{
- u8 cmd[BLK_MAX_CDB];
+ u8 cmd[16];
int retcode;
unsigned int opcode;
if (hdr->cmdp == NULL)
return -EMSGSIZE;
+ if (hdr->cmd_len > sizeof(cmd))
+ return -EINVAL;
if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
@@ -2439,9 +2418,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
case SECURITY_PROTOCOL_OUT:
retcode = nvme_trans_security_protocol(ns, hdr, cmd);
break;
- case START_STOP:
- retcode = nvme_trans_start_stop(ns, hdr, cmd);
- break;
case SYNCHRONIZE_CACHE:
retcode = nvme_trans_synchronize_cache(ns, hdr);
break;
@@ -2478,8 +2454,6 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
return -EFAULT;
if (hdr.interface_id != 'S')
return -EINVAL;
- if (hdr.cmd_len > BLK_MAX_CDB)
- return -EINVAL;
/*
* A positive return code means a NVMe status, which has been
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ec1ad2aa0a4c..a7bcff45f437 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,6 +13,8 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/rculist.h>
+
#include <generated/utsrelease.h>
#include <asm/unaligned.h>
#include "nvmet.h"
@@ -41,7 +43,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
if (!ns) {
status = NVME_SC_INVALID_NS;
- pr_err("nvmet : Counld not find namespace id : %d\n",
+ pr_err("nvmet : Could not find namespace id : %d\n",
le32_to_cpu(req->cmd->get_log_page.nsid));
goto out;
}
@@ -382,7 +384,6 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
- u64 val;
u32 val32;
u16 status = 0;
@@ -392,8 +393,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
case NVME_FEAT_KATO:
- val = le64_to_cpu(req->cmd->prop_set.value);
- val32 = val & 0xffff;
+ val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato);
break;
@@ -511,7 +511,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
break;
case nvme_admin_identify:
req->data_len = 4096;
- switch (le32_to_cpu(cmd->identify.cns)) {
+ switch (cmd->identify.cns) {
case NVME_ID_CNS_NS:
req->execute = nvmet_execute_identify_ns;
return 0;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 6f5074153dcd..be8c800078e2 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -631,6 +631,7 @@ static void nvmet_subsys_release(struct config_item *item)
{
struct nvmet_subsys *subsys = to_subsys(item);
+ nvmet_subsys_del_ctrls(subsys);
nvmet_subsys_put(subsys);
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b1d66ed655c9..11b0a0a5f661 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -14,9 +14,12 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/random.h>
+#include <linux/rculist.h>
+
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
+static DEFINE_IDA(cntlid_ida);
/*
* This read/write semaphore is used to synchronize access to configuration
@@ -200,7 +203,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
ctrl->cntlid, ctrl->kato);
- ctrl->ops->delete_ctrl(ctrl);
+ nvmet_ctrl_fatal_error(ctrl);
}
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
@@ -749,7 +752,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs)
goto out_free_cqs;
- ret = ida_simple_get(&subsys->cntlid_ida,
+ ret = ida_simple_get(&cntlid_ida,
NVME_CNTLID_MIN, NVME_CNTLID_MAX,
GFP_KERNEL);
if (ret < 0) {
@@ -816,7 +819,10 @@ static void nvmet_ctrl_free(struct kref *ref)
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
- ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+ flush_work(&ctrl->async_event_work);
+ cancel_work_sync(&ctrl->fatal_err_work);
+
+ ida_simple_remove(&cntlid_ida, ctrl->cntlid);
nvmet_subsys_put(subsys);
kfree(ctrl->sqs);
@@ -915,9 +921,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
mutex_init(&subsys->lock);
INIT_LIST_HEAD(&subsys->namespaces);
INIT_LIST_HEAD(&subsys->ctrls);
-
- ida_init(&subsys->cntlid_ida);
-
INIT_LIST_HEAD(&subsys->hosts);
return subsys;
@@ -930,11 +933,20 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!list_empty(&subsys->namespaces));
- ida_destroy(&subsys->cntlid_ida);
kfree(subsys->subsysnqn);
kfree(subsys);
}
+void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
+{
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ ctrl->ops->delete_ctrl(ctrl);
+ mutex_unlock(&subsys->lock);
+}
+
void nvmet_subsys_put(struct nvmet_subsys *subsys)
{
kref_put(&subsys->ref, nvmet_subsys_free);
@@ -963,6 +975,7 @@ static void __exit nvmet_exit(void)
{
nvmet_exit_configfs();
nvmet_exit_discovery();
+ ida_destroy(&cntlid_ida);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 12f39eea569f..af8aabf05335 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
}
case nvme_admin_identify:
req->data_len = 4096;
- switch (le32_to_cpu(cmd->identify.cns)) {
+ switch (cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute =
nvmet_execute_identify_disc_ctrl;
return 0;
default:
pr_err("nvmet: unsupported identify cns %d\n",
- le32_to_cpu(cmd->identify.cns));
+ cmd->identify.cns);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index f4088198cd0d..8bd022af3df6 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
- pr_info("creating controller %d for NQN %s.\n",
- ctrl->cntlid, ctrl->hostnqn);
+ pr_info("creating controller %d for subsystem %s for NQN %s.\n",
+ ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req)
req->ns = NULL;
- if (req->cmd->common.opcode != nvme_fabrics_command) {
+ if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 173e842f19c9..8f483ee7868c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1314,7 +1314,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
(struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
struct fcnvme_ls_disconnect_acc *acc =
(struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
- struct nvmet_fc_tgt_queue *queue;
+ struct nvmet_fc_tgt_queue *queue = NULL;
struct nvmet_fc_tgt_assoc *assoc;
int ret = 0;
bool del_assoc = false;
@@ -1348,7 +1348,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
assoc = nvmet_fc_find_target_assoc(tgtport,
be64_to_cpu(rqst->associd.association_id));
iod->assoc = assoc;
- if (!assoc)
+ if (assoc) {
+ if (rqst->discon_cmd.scope ==
+ FCNVME_DISCONN_CONNECTION) {
+ queue = nvmet_fc_find_target_queue(tgtport,
+ be64_to_cpu(
+ rqst->discon_cmd.id));
+ if (!queue) {
+ nvmet_fc_tgt_a_put(assoc);
+ ret = VERR_NO_CONN;
+ }
+ }
+ } else
ret = VERR_NO_ASSOC;
}
@@ -1373,21 +1384,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
FCNVME_LS_DISCONNECT);
- if (rqst->discon_cmd.scope == FCNVME_DISCONN_CONNECTION) {
- queue = nvmet_fc_find_target_queue(tgtport,
- be64_to_cpu(rqst->discon_cmd.id));
- if (queue) {
- int qid = queue->qid;
+ /* are we to delete a Connection ID (queue) */
+ if (queue) {
+ int qid = queue->qid;
- nvmet_fc_delete_target_queue(queue);
+ nvmet_fc_delete_target_queue(queue);
- /* release the get taken by find_target_queue */
- nvmet_fc_tgt_q_put(queue);
+ /* release the get taken by find_target_queue */
+ nvmet_fc_tgt_q_put(queue);
- /* tear association down if io queue terminated */
- if (!qid)
- del_assoc = true;
- }
+ /* tear association down if io queue terminated */
+ if (!qid)
+ del_assoc = true;
}
/* release get taken in nvmet_fc_find_target_assoc */
@@ -1809,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
/* data no longer needed */
nvmet_fc_free_tgt_pgs(fod);
- if (fcpreq->fcp_error || abort)
- nvmet_req_complete(&fod->req, fcpreq->fcp_error);
-
+ nvmet_req_complete(&fod->req, fcpreq->fcp_error);
return;
}
switch (fcpreq->op) {
case NVMET_FCOP_WRITEDATA:
- if (abort || fcpreq->fcp_error ||
+ if (fcpreq->fcp_error ||
fcpreq->transferred_length != fcpreq->transfer_length) {
nvmet_req_complete(&fod->req,
NVME_SC_FC_TRANSPORT_ERROR);
@@ -1841,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq)
case NVMET_FCOP_READDATA:
case NVMET_FCOP_READDATA_RSP:
- if (abort || fcpreq->fcp_error ||
+ if (fcpreq->fcp_error ||
fcpreq->transferred_length != fcpreq->transfer_length) {
/* data no longer needed */
nvmet_fc_free_tgt_pgs(fod);
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index bcb8ebeb01c5..4e8e6a22bce1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -845,7 +845,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
rport->lport = nport->lport;
nport->rport = rport;
- return ret ? ret : count;
+ return count;
}
@@ -952,7 +952,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
tport->lport = nport->lport;
nport->tport = tport;
- return ret ? ret : count;
+ return count;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9aaa70071ae5..d1f06e7768ff 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -104,7 +104,7 @@ static void nvme_loop_complete_rq(struct request *req)
return;
}
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ if (blk_rq_is_passthrough(req))
error = req->errors;
else
error = nvme_error_status(req->errors);
@@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void)
ret = nvmet_register_transport(&nvme_loop_ops);
if (ret)
return ret;
- nvmf_register_transport(&nvme_loop_transport);
- return 0;
+ return nvmf_register_transport(&nvme_loop_transport);
}
static void __exit nvme_loop_cleanup_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 23d5eb1c944f..1370eee0a3c0 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -142,7 +142,6 @@ struct nvmet_subsys {
unsigned int max_nsid;
struct list_head ctrls;
- struct ida cntlid_ida;
struct list_head hosts;
bool allow_any_host;
@@ -282,6 +281,7 @@ void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type);
void nvmet_subsys_put(struct nvmet_subsys *subsys);
+void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys);
struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
void nvmet_put_namespace(struct nvmet_ns *ns);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 8c3760a78ac0..9aa1da3778b3 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -438,6 +438,10 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
{
struct ib_recv_wr *bad_wr;
+ ib_dma_sync_single_for_device(ndev->device,
+ cmd->sge[0].addr, cmd->sge[0].length,
+ DMA_FROM_DEVICE);
+
if (ndev->srq)
return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
@@ -538,6 +542,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
first_wr = &rsp->send_wr;
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
+
+ ib_dma_sync_single_for_device(rsp->queue->dev->device,
+ rsp->send_sge.addr, rsp->send_sge.length,
+ DMA_TO_DEVICE);
+
if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp);
@@ -698,6 +707,14 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
cmd->n_rdma = 0;
cmd->req.port = queue->port;
+
+ ib_dma_sync_single_for_cpu(queue->dev->device,
+ cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
+ DMA_FROM_DEVICE);
+ ib_dma_sync_single_for_cpu(queue->dev->device,
+ cmd->send_sge.addr, cmd->send_sge.length,
+ DMA_TO_DEVICE);
+
if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
&queue->nvme_sq, &nvmet_rdma_ops))
return;
@@ -1024,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id,
{
struct nvme_rdma_cm_rej rej;
+ pr_debug("rejecting connect request: status %d (%s)\n",
+ status, nvme_rdma_cm_msg(status));
+
rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
rej.sts = cpu_to_le16(status);
@@ -1074,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
if (queue->idx < 0) {
ret = NVME_RDMA_CM_NO_RSC;
- goto out_free_queue;
+ goto out_destroy_sq;
}
ret = nvmet_rdma_alloc_rsps(queue);
@@ -1118,7 +1138,6 @@ out_destroy_sq:
out_free_queue:
kfree(queue);
out_reject:
- pr_debug("rejecting connect request with status code %d\n", ret);
nvmet_rdma_cm_reject(cm_id, ret);
return NULL;
}
@@ -1171,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ndev = nvmet_rdma_find_get_device(cm_id);
if (!ndev) {
- pr_err("no client data!\n");
nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
return -ECONNREFUSED;
}