aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c88
-rw-r--r--drivers/nvme/host/fabrics.c2
-rw-r--r--drivers/nvme/host/fault_inject.c41
-rw-r--r--drivers/nvme/host/fc.c64
-rw-r--r--drivers/nvme/host/lightnvm.c2
-rw-r--r--drivers/nvme/host/multipath.c18
-rw-r--r--drivers/nvme/host/nvme.h43
-rw-r--r--drivers/nvme/host/pci.c167
-rw-r--r--drivers/nvme/host/rdma.c9
-rw-r--r--drivers/nvme/host/tcp.c9
-rw-r--r--drivers/nvme/host/trace.c90
-rw-r--r--drivers/nvme/host/trace.h66
-rw-r--r--drivers/nvme/target/Makefile3
-rw-r--r--drivers/nvme/target/admin-cmd.c3
-rw-r--r--drivers/nvme/target/configfs.c4
-rw-r--r--drivers/nvme/target/core.c12
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fabrics-cmd.c2
-rw-r--r--drivers/nvme/target/fc.c13
-rw-r--r--drivers/nvme/target/fcloop.c81
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c39
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/nvmet.h10
-rw-r--r--drivers/nvme/target/trace.c201
-rw-r--r--drivers/nvme/target/trace.h141
25 files changed, 928 insertions, 188 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 120fb593d1da..cc09b81fc7f4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -11,6 +11,7 @@
#include <linux/hdreg.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/backing-dev.h>
#include <linux/list_sort.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -1113,15 +1114,15 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
return id;
}
-static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
- void *buffer, size_t buflen, u32 *result)
+static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
+ unsigned int dword11, void *buffer, size_t buflen, u32 *result)
{
struct nvme_command c;
union nvme_result res;
int ret;
memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_set_features;
+ c.features.opcode = op;
c.features.fid = cpu_to_le32(fid);
c.features.dword11 = cpu_to_le32(dword11);
@@ -1132,6 +1133,24 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword
return ret;
}
+int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
+ unsigned int dword11, void *buffer, size_t buflen,
+ u32 *result)
+{
+ return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
+ buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_set_features);
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
+ unsigned int dword11, void *buffer, size_t buflen,
+ u32 *result)
+{
+ return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
+ buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_get_features);
+
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -1608,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
+ u32 atomic_bs, phys_bs, io_opt;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
@@ -1616,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
+ if (id->nabo == 0) {
+ /*
+ * Bit 1 indicates whether NAWUPF is defined for this namespace
+ * and whether it should be used instead of AWUPF. If NAWUPF ==
+ * 0 then AWUPF must be used instead.
+ */
+ if (id->nsfeat & (1 << 1) && id->nawupf)
+ atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+ else
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+ } else {
+ atomic_bs = bs;
+ }
+ phys_bs = bs;
+ io_opt = bs;
+ if (id->nsfeat & (1 << 4)) {
+ /* NPWG = Namespace Preferred Write Granularity */
+ phys_bs *= 1 + le16_to_cpu(id->npwg);
+ /* NOWS = Namespace Optimal Write Size */
+ io_opt *= 1 + le16_to_cpu(id->nows);
+ }
+
blk_queue_logical_block_size(disk->queue, bs);
- blk_queue_physical_block_size(disk->queue, bs);
- blk_queue_io_min(disk->queue, bs);
+ /*
+ * Linux filesystems assume writing a single physical block is
+ * an atomic operation. Hence limit the physical block size to the
+ * value of the Atomic Write Unit Power Fail parameter.
+ */
+ blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
+ blk_queue_io_min(disk->queue, phys_bs);
+ blk_queue_io_opt(disk->queue, io_opt);
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
@@ -2368,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
lockdep_assert_held(&nvme_subsystems_lock);
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
- if (ctrl->state == NVME_CTRL_DELETING ||
- ctrl->state == NVME_CTRL_DEAD)
+ if (tmp->state == NVME_CTRL_DELETING ||
+ tmp->state == NVME_CTRL_DEAD)
continue;
if (tmp->cntlid == ctrl->cntlid) {
@@ -2415,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
subsys->iopolicy = NVME_IOPOLICY_NUMA;
#endif
@@ -3256,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
goto out_free_ns;
}
+ if (ctrl->opts && ctrl->opts->data_digest)
+ ns->queue->backing_dev_info->capabilities
+ |= BDI_CAP_STABLE_WRITES;
+
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
@@ -3318,7 +3371,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
nvme_mpath_add_disk(ns, id);
- nvme_fault_inject_init(ns);
+ nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
kfree(id);
return 0;
@@ -3343,7 +3396,15 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;
- nvme_fault_inject_fini(ns);
+ nvme_fault_inject_fini(&ns->fault_inject);
+
+ mutex_lock(&ns->ctrl->subsys->lock);
+ list_del_rcu(&ns->siblings);
+ mutex_unlock(&ns->ctrl->subsys->lock);
+ synchronize_rcu(); /* guarantee not available in head->list */
+ nvme_mpath_clear_current_path(ns);
+ synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
+
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
@@ -3351,16 +3412,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
blk_integrity_unregister(ns->disk);
}
- mutex_lock(&ns->ctrl->subsys->lock);
- list_del_rcu(&ns->siblings);
- nvme_mpath_clear_current_path(ns);
- mutex_unlock(&ns->ctrl->subsys->lock);
-
down_write(&ns->ctrl->namespaces_rwsem);
list_del_init(&ns->list);
up_write(&ns->ctrl->namespaces_rwsem);
- synchronize_srcu(&ns->head->srcu);
nvme_mpath_check_last_path(ns);
nvme_put_ns(ns);
}
@@ -3702,6 +3757,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
+ nvme_fault_inject_fini(&ctrl->fault_inject);
dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
}
@@ -3797,6 +3853,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
dev_pm_qos_update_user_latency_tolerance(ctrl->device,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
+ nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+
return 0;
out_free_name:
kfree_const(ctrl->device->kobj.name);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5838f7cd53ac..1994d5b42f94 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -578,7 +578,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
switch (ctrl->state) {
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
- if (req->cmd->common.opcode == nvme_fabrics_command &&
+ if (nvme_is_fabrics(req->cmd) &&
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
return true;
break;
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 4cfd2c9222d4..1352159733b0 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -15,11 +15,10 @@ static DECLARE_FAULT_ATTR(fail_default_attr);
static char *fail_request;
module_param(fail_request, charp, 0000);
-void nvme_fault_inject_init(struct nvme_ns *ns)
+void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
+ const char *dev_name)
{
struct dentry *dir, *parent;
- char *name = ns->disk->disk_name;
- struct nvme_fault_inject *fault_inj = &ns->fault_inject;
struct fault_attr *attr = &fault_inj->attr;
/* set default fault injection attribute */
@@ -27,20 +26,20 @@ void nvme_fault_inject_init(struct nvme_ns *ns)
setup_fault_attr(&fail_default_attr, fail_request);
/* create debugfs directory and attribute */
- parent = debugfs_create_dir(name, NULL);
+ parent = debugfs_create_dir(dev_name, NULL);
if (!parent) {
- pr_warn("%s: failed to create debugfs directory\n", name);
+ pr_warn("%s: failed to create debugfs directory\n", dev_name);
return;
}
*attr = fail_default_attr;
dir = fault_create_debugfs_attr("fault_inject", parent, attr);
if (IS_ERR(dir)) {
- pr_warn("%s: failed to create debugfs attr\n", name);
+ pr_warn("%s: failed to create debugfs attr\n", dev_name);
debugfs_remove_recursive(parent);
return;
}
- ns->fault_inject.parent = parent;
+ fault_inj->parent = parent;
/* create debugfs for status code and dont_retry */
fault_inj->status = NVME_SC_INVALID_OPCODE;
@@ -49,29 +48,33 @@ void nvme_fault_inject_init(struct nvme_ns *ns)
debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry);
}
-void nvme_fault_inject_fini(struct nvme_ns *ns)
+void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject)
{
/* remove debugfs directories */
- debugfs_remove_recursive(ns->fault_inject.parent);
+ debugfs_remove_recursive(fault_inject->parent);
}
void nvme_should_fail(struct request *req)
{
struct gendisk *disk = req->rq_disk;
- struct nvme_ns *ns = NULL;
+ struct nvme_fault_inject *fault_inject = NULL;
u16 status;
- /*
- * make sure this request is coming from a valid namespace
- */
- if (!disk)
- return;
+ if (disk) {
+ struct nvme_ns *ns = disk->private_data;
+
+ if (ns)
+ fault_inject = &ns->fault_inject;
+ else
+ WARN_ONCE(1, "No namespace found for request\n");
+ } else {
+ fault_inject = &nvme_req(req)->ctrl->fault_inject;
+ }
- ns = disk->private_data;
- if (ns && should_fail(&ns->fault_inject.attr, 1)) {
+ if (fault_inject && should_fail(&fault_inject->attr, 1)) {
/* inject status code and DNR bit */
- status = ns->fault_inject.status;
- if (ns->fault_inject.dont_retry)
+ status = fault_inject->status;
+ if (fault_inject->dont_retry)
status |= NVME_SC_DNR;
nvme_req(req)->status = status;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index dd8169bbf0d2..232d8094091b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
static struct workqueue_struct *nvme_fc_wq;
+static bool nvme_fc_waiting_to_unload;
+static DECLARE_COMPLETION(nvme_fc_unload_proceed);
+
/*
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
@@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref)
/* remove from transport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&lport->port_list);
+ if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
+ complete(&nvme_fc_unload_proceed);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -2112,7 +2117,8 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
freq->sg_table.sgl = freq->first_sgl;
ret = sg_alloc_table_chained(&freq->sg_table,
- blk_rq_nr_phys_segments(rq), freq->sg_table.sgl);
+ blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
+ SG_CHUNK_SIZE);
if (ret)
return -ENOMEM;
@@ -2122,7 +2128,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
op->nents, dir);
if (unlikely(freq->sg_cnt <= 0)) {
- sg_free_table_chained(&freq->sg_table, true);
+ sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
freq->sg_cnt = 0;
return -EFAULT;
}
@@ -2148,7 +2154,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
nvme_cleanup_cmd(rq);
- sg_free_table_chained(&freq->sg_table, true);
+ sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
freq->sg_cnt = 0;
}
@@ -2607,6 +2613,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (nvme_fc_ctlr_active_on_rport(ctrl))
return -ENOTUNIQ;
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: create association : host wwpn 0x%016llx "
+ " rport wwpn 0x%016llx: NQN \"%s\"\n",
+ ctrl->cnum, ctrl->lport->localport.port_name,
+ ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
+
/*
* Create the admin queue
*/
@@ -3450,11 +3462,51 @@ out_destroy_wq:
return ret;
}
+static void
+nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
+{
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock(&rport->lock);
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: transport unloading: deleting ctrl\n",
+ ctrl->cnum);
+ nvme_delete_ctrl(&ctrl->ctrl);
+ }
+ spin_unlock(&rport->lock);
+}
+
+static void
+nvme_fc_cleanup_for_unload(void)
+{
+ struct nvme_fc_lport *lport;
+ struct nvme_fc_rport *rport;
+
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ nvme_fc_delete_controllers(rport);
+ }
+ }
+}
+
static void __exit nvme_fc_exit_module(void)
{
- /* sanity check - all lports should be removed */
- if (!list_empty(&nvme_fc_lport_list))
- pr_warn("%s: localport list not empty\n", __func__);
+ unsigned long flags;
+ bool need_cleanup = false;
+
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+ nvme_fc_waiting_to_unload = true;
+ if (!list_empty(&nvme_fc_lport_list)) {
+ need_cleanup = true;
+ nvme_fc_cleanup_for_unload();
+ }
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ if (need_cleanup) {
+ pr_info("%s: waiting for ctlr deletes\n", __func__);
+ wait_for_completion(&nvme_fc_unload_proceed);
+ pr_info("%s: ctrl deletes complete\n", __func__);
+ }
nvmf_unregister_transport(&nvme_fc_transport);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 4f20a10b39d3..ba009d4c9dfa 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -660,7 +660,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q,
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
if (rqd->bio)
- blk_init_request_from_bio(rq, rqd->bio);
+ blk_rq_append_bio(rq, &rqd->bio);
else
rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 499acf07d61a..a9a927677970 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns)
}
}
+static bool nvme_path_is_disabled(struct nvme_ns *ns)
+{
+ return ns->ctrl->state != NVME_CTRL_LIVE ||
+ test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
+ test_bit(NVME_NS_REMOVING, &ns->flags);
+}
+
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
list_for_each_entry_rcu(ns, &head->list, siblings) {
- if (ns->ctrl->state != NVME_CTRL_LIVE ||
- test_bit(NVME_NS_ANA_PENDING, &ns->flags))
+ if (nvme_path_is_disabled(ns))
continue;
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
@@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
{
struct nvme_ns *ns, *found, *fallback = NULL;
- if (list_is_singular(&head->list))
+ if (list_is_singular(&head->list)) {
+ if (nvme_path_is_disabled(old))
+ return NULL;
return old;
+ }
for (ns = nvme_next_ns(head, old);
ns != old;
ns = nvme_next_ns(head, ns)) {
- if (ns->ctrl->state != NVME_CTRL_LIVE ||
- test_bit(NVME_NS_ANA_PENDING, &ns->flags))
+ if (nvme_path_is_disabled(ns))
continue;
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55553d293a98..716a876119c8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -146,6 +146,15 @@ enum nvme_ctrl_state {
NVME_CTRL_DEAD,
};
+struct nvme_fault_inject {
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+ struct fault_attr attr;
+ struct dentry *parent;
+ bool dont_retry; /* DNR, do not retry */
+ u16 status; /* status code */
+#endif
+};
+
struct nvme_ctrl {
bool comp_seen;
enum nvme_ctrl_state state;
@@ -247,6 +256,8 @@ struct nvme_ctrl {
struct page *discard_page;
unsigned long discard_page_busy;
+
+ struct nvme_fault_inject fault_inject;
};
enum nvme_iopolicy {
@@ -272,6 +283,7 @@ struct nvme_subsystem {
char firmware_rev[8];
u8 cmic;
u16 vendor_id;
+ u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
@@ -313,15 +325,6 @@ struct nvme_ns_head {
#endif
};
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-struct nvme_fault_inject {
- struct fault_attr attr;
- struct dentry *parent;
- bool dont_retry; /* DNR, do not retry */
- u16 status; /* status code */
-};
-#endif
-
struct nvme_ns {
struct list_head list;
@@ -349,9 +352,7 @@ struct nvme_ns {
#define NVME_NS_ANA_PENDING 2
u16 noiob;
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
struct nvme_fault_inject fault_inject;
-#endif
};
@@ -372,12 +373,18 @@ struct nvme_ctrl_ops {
};
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-void nvme_fault_inject_init(struct nvme_ns *ns);
-void nvme_fault_inject_fini(struct nvme_ns *ns);
+void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
+ const char *dev_name);
+void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject);
void nvme_should_fail(struct request *req);
#else
-static inline void nvme_fault_inject_init(struct nvme_ns *ns) {}
-static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
+static inline void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
+ const char *dev_name)
+{
+}
+static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj)
+{
+}
static inline void nvme_should_fail(struct request *req) {}
#endif
@@ -459,6 +466,12 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head,
blk_mq_req_flags_t flags, bool poll);
+int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
+ unsigned int dword11, void *buffer, size_t buflen,
+ u32 *result);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
+ unsigned int dword11, void *buffer, size_t buflen,
+ u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 524d6bd6d095..bb970ca82517 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/once.h>
#include <linux/pci.h>
+#include <linux/suspend.h>
#include <linux/t10-pi.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
@@ -67,20 +68,14 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
-static int queue_count_set(const char *val, const struct kernel_param *kp);
-static const struct kernel_param_ops queue_count_ops = {
- .set = queue_count_set,
- .get = param_get_int,
-};
-
static int write_queues;
-module_param_cb(write_queues, &queue_count_ops, &write_queues, 0644);
+module_param(write_queues, int, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
-static int poll_queues = 0;
-module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644);
+static int poll_queues;
+module_param(poll_queues, int, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@@ -116,6 +111,7 @@ struct nvme_dev {
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
+ u32 last_ps;
mempool_t *iod_mempool;
@@ -144,19 +140,6 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
return param_set_int(val, kp);
}
-static int queue_count_set(const char *val, const struct kernel_param *kp)
-{
- int n, ret;
-
- ret = kstrtoint(val, 10, &n);
- if (ret)
- return ret;
- if (n > num_possible_cpus())
- n = num_possible_cpus();
-
- return param_set_int(val, kp);
-}
-
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
{
return qid * 2 * stride;
@@ -1456,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
- nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
- nvmeq->sq_cmds);
- if (nvmeq->sq_dma_addr) {
- set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
- return 0;
+ if (nvmeq->sq_cmds) {
+ nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
+ nvmeq->sq_cmds);
+ if (nvmeq->sq_dma_addr) {
+ set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
+ return 0;
+ }
+
+ pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
}
}
@@ -2068,6 +2055,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
+ unsigned int nr_cpus = num_possible_cpus();
/*
* Poll queues don't need interrupts, but we need at least one IO
@@ -2078,7 +2066,10 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- irq_queues = nr_io_queues - this_p_queues + 1;
+ if (nr_cpus < nr_io_queues - this_p_queues)
+ irq_queues = nr_cpus + 1;
+ else
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
@@ -2263,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
- dev->tagset.nr_maps = 2; /* default + read */
+ dev->tagset.nr_maps = 1; /* default */
+ if (dev->io_queues[HCTX_TYPE_READ])
+ dev->tagset.nr_maps++;
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -2302,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
pci_set_master(pdev);
- if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
- dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
+ if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
goto disable;
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
@@ -2464,10 +2456,8 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
kfree(dev);
}
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
- dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
-
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl);
@@ -2480,11 +2470,13 @@ static void nvme_reset_work(struct work_struct *work)
struct nvme_dev *dev =
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
- int result = -ENODEV;
+ int result;
enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
- if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
+ if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
+ result = -ENODEV;
goto out;
+ }
/*
* If we're called to reset a live controller first shut it down before
@@ -2511,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work)
* Limit the max command size to prevent iod->sg allocations going
* over a single page.
*/
- dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+ dev->ctrl.max_hw_sectors = min_t(u32,
+ NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
dev->ctrl.max_segments = NVME_MAX_SEGS;
/*
@@ -2528,6 +2521,7 @@ static void nvme_reset_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(dev->ctrl.device,
"failed to mark controller CONNECTING\n");
+ result = -EBUSY;
goto out;
}
@@ -2588,6 +2582,7 @@ static void nvme_reset_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
dev_warn(dev->ctrl.device,
"failed to mark controller state %d\n", new_state);
+ result = -ENODEV;
goto out;
}
@@ -2597,7 +2592,10 @@ static void nvme_reset_work(struct work_struct *work)
out_unlock:
mutex_unlock(&dev->shutdown_lock);
out:
- nvme_remove_dead_ctrl(dev, result);
+ if (result)
+ dev_warn(dev->ctrl.device,
+ "Removing after probe failure status: %d\n", result);
+ nvme_remove_dead_ctrl(dev);
}
static void nvme_remove_dead_ctrl_work(struct work_struct *work)
@@ -2835,16 +2833,94 @@ static void nvme_remove(struct pci_dev *pdev)
}
#ifdef CONFIG_PM_SLEEP
+static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
+{
+ return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
+}
+
+static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
+{
+ return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
+}
+
+static int nvme_resume(struct device *dev)
+{
+ struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
+ struct nvme_ctrl *ctrl = &ndev->ctrl;
+
+ if (pm_resume_via_firmware() || !ctrl->npss ||
+ nvme_set_power_state(ctrl, ndev->last_ps) != 0)
+ nvme_reset_ctrl(ctrl);
+ return 0;
+}
+
static int nvme_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
+ struct nvme_ctrl *ctrl = &ndev->ctrl;
+ int ret = -EBUSY;
+
+ /*
+ * The platform does not remove power for a kernel managed suspend so
+ * use host managed nvme power settings for lowest idle power if
+ * possible. This should have quicker resume latency than a full device
+ * shutdown. But if the firmware is involved after the suspend or the
+ * device does not support any non-default power states, shut down the
+ * device fully.
+ */
+ if (pm_suspend_via_firmware() || !ctrl->npss) {
+ nvme_dev_disable(ndev, true);
+ return 0;
+ }
+
+ nvme_start_freeze(ctrl);
+ nvme_wait_freeze(ctrl);
+ nvme_sync_queues(ctrl);
+
+ if (ctrl->state != NVME_CTRL_LIVE &&
+ ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ goto unfreeze;
+
+ ndev->last_ps = 0;
+ ret = nvme_get_power_state(ctrl, &ndev->last_ps);
+ if (ret < 0)
+ goto unfreeze;
+
+ ret = nvme_set_power_state(ctrl, ctrl->npss);
+ if (ret < 0)
+ goto unfreeze;
+
+ if (ret) {
+ /*
+ * Clearing npss forces a controller reset on resume. The
+ * correct value will be resdicovered then.
+ */
+ nvme_dev_disable(ndev, true);
+ ctrl->npss = 0;
+ ret = 0;
+ goto unfreeze;
+ }
+ /*
+ * A saved state prevents pci pm from generically controlling the
+ * device's power. If we're using protocol specific settings, we don't
+ * want pci interfering.
+ */
+ pci_save_state(pdev);
+unfreeze:
+ nvme_unfreeze(ctrl);
+ return ret;
+}
+
+static int nvme_simple_suspend(struct device *dev)
+{
+ struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
nvme_dev_disable(ndev, true);
return 0;
}
-static int nvme_resume(struct device *dev)
+static int nvme_simple_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
@@ -2852,9 +2928,16 @@ static int nvme_resume(struct device *dev)
nvme_reset_ctrl(&ndev->ctrl);
return 0;
}
-#endif
-static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+static const struct dev_pm_ops nvme_dev_pm_ops = {
+ .suspend = nvme_suspend,
+ .resume = nvme_resume,
+ .freeze = nvme_simple_suspend,
+ .thaw = nvme_simple_resume,
+ .poweroff = nvme_simple_suspend,
+ .restore = nvme_simple_resume,
+};
+#endif /* CONFIG_PM_SLEEP */
static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
@@ -2959,9 +3042,11 @@ static struct pci_driver nvme_driver = {
.probe = nvme_probe,
.remove = nvme_remove,
.shutdown = nvme_shutdown,
+#ifdef CONFIG_PM_SLEEP
.driver = {
.pm = &nvme_dev_pm_ops,
},
+#endif
.sriov_configure = pci_sriov_configure_simple,
.err_handler = &nvme_err_handler,
};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 97f668a39ae1..a249db528d54 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -482,7 +482,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
- nvme_rdma_get_max_fr_pages(ibdev));
+ nvme_rdma_get_max_fr_pages(ibdev), 0);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",
@@ -1144,7 +1144,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
nvme_cleanup_cmd(rq);
- sg_free_table_chained(&req->sg_table, true);
+ sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1259,7 +1259,8 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
- blk_rq_nr_phys_segments(rq), req->sg_table.sgl);
+ blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
+ SG_CHUNK_SIZE);
if (ret)
return -ENOMEM;
@@ -1299,7 +1300,7 @@ out_unmap_sg:
req->nents, rq_data_dir(rq) ==
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
out_free_table:
- sg_free_table_chained(&req->sg_table, true);
+ sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
return ret;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 08a2501b9357..606b13d35d16 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
else
flags |= MSG_MORE;
- ret = kernel_sendpage(queue->sock, page, offset, len, flags);
+ /* can't zcopy slab pages */
+ if (unlikely(PageSlab(page))) {
+ ret = sock_no_sendpage(queue->sock, page, offset, len,
+ flags);
+ } else {
+ ret = kernel_sendpage(queue->sock, page, offset, len,
+ flags);
+ }
if (ret <= 0)
return ret;
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 5f24ea7a28eb..9778eb0406b3 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -7,6 +7,17 @@
#include <asm/unaligned.h>
#include "trace.h"
+static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 sqid = get_unaligned_le16(cdw10);
+
+ trace_seq_printf(p, "sqid=%u", sqid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
return ret;
}
+static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 cqid = get_unaligned_le16(cdw10);
+
+ trace_seq_printf(p, "cqid=%u", cqid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
u8 opcode, u8 *cdw10)
{
switch (opcode) {
+ case nvme_admin_delete_sq:
+ return nvme_trace_delete_sq(p, cdw10);
case nvme_admin_create_sq:
return nvme_trace_create_sq(p, cdw10);
+ case nvme_admin_delete_cq:
+ return nvme_trace_delete_cq(p, cdw10);
case nvme_admin_create_cq:
return nvme_trace_create_cq(p, cdw10);
case nvme_admin_identify:
@@ -135,6 +161,69 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
}
}
+static const char *nvme_trace_fabrics_property_set(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+ u64 value = get_unaligned_le64(spc + 8);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx",
+ attrib, ofst, value);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvme_trace_fabrics_connect(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 recfmt = get_unaligned_le16(spc);
+ u16 qid = get_unaligned_le16(spc + 2);
+ u16 sqsize = get_unaligned_le16(spc + 4);
+ u8 cattr = spc[6];
+ u32 kato = get_unaligned_le32(spc + 8);
+
+ trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u",
+ recfmt, qid, sqsize, cattr, kato);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "specific=%*ph", 24, spc);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p,
+ u8 fctype, u8 *spc)
+{
+ switch (fctype) {
+ case nvme_fabrics_type_property_set:
+ return nvme_trace_fabrics_property_set(p, spc);
+ case nvme_fabrics_type_connect:
+ return nvme_trace_fabrics_connect(p, spc);
+ case nvme_fabrics_type_property_get:
+ return nvme_trace_fabrics_property_get(p, spc);
+ default:
+ return nvme_trace_fabrics_common(p, spc);
+ }
+}
+
const char *nvme_trace_disk_name(struct trace_seq *p, char *name)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -145,6 +234,5 @@ const char *nvme_trace_disk_name(struct trace_seq *p, char *name)
return ret;
}
-EXPORT_SYMBOL_GPL(nvme_trace_disk_name);
EXPORT_TRACEPOINT_SYMBOL_GPL(nvme_sq);
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index e71502d141ed..daaf700eae79 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -16,59 +16,19 @@
#include "nvme.h"
-#define nvme_admin_opcode_name(opcode) { opcode, #opcode }
-#define show_admin_opcode_name(val) \
- __print_symbolic(val, \
- nvme_admin_opcode_name(nvme_admin_delete_sq), \
- nvme_admin_opcode_name(nvme_admin_create_sq), \
- nvme_admin_opcode_name(nvme_admin_get_log_page), \
- nvme_admin_opcode_name(nvme_admin_delete_cq), \
- nvme_admin_opcode_name(nvme_admin_create_cq), \
- nvme_admin_opcode_name(nvme_admin_identify), \
- nvme_admin_opcode_name(nvme_admin_abort_cmd), \
- nvme_admin_opcode_name(nvme_admin_set_features), \
- nvme_admin_opcode_name(nvme_admin_get_features), \
- nvme_admin_opcode_name(nvme_admin_async_event), \
- nvme_admin_opcode_name(nvme_admin_ns_mgmt), \
- nvme_admin_opcode_name(nvme_admin_activate_fw), \
- nvme_admin_opcode_name(nvme_admin_download_fw), \
- nvme_admin_opcode_name(nvme_admin_ns_attach), \
- nvme_admin_opcode_name(nvme_admin_keep_alive), \
- nvme_admin_opcode_name(nvme_admin_directive_send), \
- nvme_admin_opcode_name(nvme_admin_directive_recv), \
- nvme_admin_opcode_name(nvme_admin_dbbuf), \
- nvme_admin_opcode_name(nvme_admin_format_nvm), \
- nvme_admin_opcode_name(nvme_admin_security_send), \
- nvme_admin_opcode_name(nvme_admin_security_recv), \
- nvme_admin_opcode_name(nvme_admin_sanitize_nvm))
-
-#define nvme_opcode_name(opcode) { opcode, #opcode }
-#define show_nvm_opcode_name(val) \
- __print_symbolic(val, \
- nvme_opcode_name(nvme_cmd_flush), \
- nvme_opcode_name(nvme_cmd_write), \
- nvme_opcode_name(nvme_cmd_read), \
- nvme_opcode_name(nvme_cmd_write_uncor), \
- nvme_opcode_name(nvme_cmd_compare), \
- nvme_opcode_name(nvme_cmd_write_zeroes), \
- nvme_opcode_name(nvme_cmd_dsm), \
- nvme_opcode_name(nvme_cmd_resv_register), \
- nvme_opcode_name(nvme_cmd_resv_report), \
- nvme_opcode_name(nvme_cmd_resv_acquire), \
- nvme_opcode_name(nvme_cmd_resv_release))
-
-#define show_opcode_name(qid, opcode) \
- (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode))
-
const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
u8 *cdw10);
const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
u8 *cdw10);
+const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
+ u8 *spc);
-#define parse_nvme_cmd(qid, opcode, cdw10) \
- (qid ? \
- nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \
- nvme_trace_parse_admin_cmd(p, opcode, cdw10))
+#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \
+ ((opcode) == nvme_fabrics_command ? \
+ nvme_trace_parse_fabrics_cmd(p, fctype, cdw10) : \
+ ((qid) ? \
+ nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \
+ nvme_trace_parse_admin_cmd(p, opcode, cdw10)))
const char *nvme_trace_disk_name(struct trace_seq *p, char *name);
#define __print_disk_name(name) \
@@ -93,6 +53,7 @@ TRACE_EVENT(nvme_setup_cmd,
__field(int, qid)
__field(u8, opcode)
__field(u8, flags)
+ __field(u8, fctype)
__field(u16, cid)
__field(u32, nsid)
__field(u64, metadata)
@@ -106,6 +67,7 @@ TRACE_EVENT(nvme_setup_cmd,
__entry->cid = cmd->common.command_id;
__entry->nsid = le32_to_cpu(cmd->common.nsid);
__entry->metadata = le64_to_cpu(cmd->common.metadata);
+ __entry->fctype = cmd->fabrics.fctype;
__assign_disk_name(__entry->disk, req->rq_disk);
memcpy(__entry->cdw10, &cmd->common.cdw10,
sizeof(__entry->cdw10));
@@ -114,8 +76,10 @@ TRACE_EVENT(nvme_setup_cmd,
__entry->ctrl_id, __print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->nsid,
__entry->flags, __entry->metadata,
- show_opcode_name(__entry->qid, __entry->opcode),
- parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10))
+ show_opcode_name(__entry->qid, __entry->opcode,
+ __entry->fctype),
+ parse_nvme_cmd(__entry->qid, __entry->opcode,
+ __entry->fctype, __entry->cdw10))
);
TRACE_EVENT(nvme_complete_rq,
@@ -141,7 +105,7 @@ TRACE_EVENT(nvme_complete_rq,
__entry->status = nvme_req(req)->status;
__assign_disk_name(__entry->disk, req->rq_disk);
),
- TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
+ TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%#llx, retries=%u, flags=0x%x, status=%#x",
__entry->ctrl_id, __print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->result,
__entry->retries, __entry->flags, __entry->status)
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 8c3ad0fb6860..2b33836f3d3e 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I$(src)
+
obj-$(CONFIG_NVME_TARGET) += nvmet.o
obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o
obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
@@ -14,3 +16,4 @@ nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
nvme-fcloop-y += fcloop.o
nvmet-tcp-y += tcp.o
+nvmet-$(CONFIG_TRACING) += trace.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 9f72d515fc4b..4dc12ea52f23 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -442,6 +442,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
break;
}
+ if (ns->bdev)
+ nvmet_bdev_set_limits(ns->bdev, id);
+
/*
* We just provide a single LBA format that matches what the
* underlying device reports.
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 08dd5af357f7..cd52b9f15376 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -588,8 +588,10 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
goto out;
ret = -EINVAL;
- if (nsid == 0 || nsid == NVME_NSID_ALL)
+ if (nsid == 0 || nsid == NVME_NSID_ALL) {
+ pr_err("invalid nsid %#x", nsid);
goto out;
+ }
ret = -ENOMEM;
ns = nvmet_ns_alloc(subsys, nsid);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 7734a6acff85..dad0243c7c96 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -10,6 +10,9 @@
#include <linux/pci-p2pdma.h>
#include <linux/scatterlist.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#include "nvmet.h"
struct workqueue_struct *buffered_io_wq;
@@ -311,6 +314,7 @@ int nvmet_enable_port(struct nvmet_port *port)
port->inline_data_size = 0;
port->enabled = true;
+ port->tr_ops = ops;
return 0;
}
@@ -321,6 +325,7 @@ void nvmet_disable_port(struct nvmet_port *port)
lockdep_assert_held(&nvmet_config_sem);
port->enabled = false;
+ port->tr_ops = NULL;
ops = nvmet_transports[port->disc_addr.trtype];
ops->remove_port(port);
@@ -689,6 +694,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
if (unlikely(status))
nvmet_set_error(req, status);
+
+ trace_nvmet_req_complete(req);
+
if (req->ns)
nvmet_put_namespace(req->ns);
req->ops->queue_response(req);
@@ -848,6 +856,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
+ trace_nvmet_req_init(req, req->cmd);
+
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
req->error_loc = offsetof(struct nvme_common_command, flags);
@@ -871,7 +881,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
status = nvmet_parse_connect_cmd(req);
else if (likely(req->sq->qid != 0))
status = nvmet_parse_io_cmd(req);
- else if (req->cmd->common.opcode == nvme_fabrics_command)
+ else if (nvme_is_fabrics(req->cmd))
status = nvmet_parse_fabrics_cmd(req);
else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
status = nvmet_parse_discovery_cmd(req);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 5baf269f3f8a..8efca26b4776 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -41,6 +41,10 @@ void nvmet_port_disc_changed(struct nvmet_port *port,
__nvmet_disc_changed(port, ctrl);
}
mutex_unlock(&nvmet_disc_subsys->lock);
+
+ /* If transport can signal change, notify transport */
+ if (port->tr_ops && port->tr_ops->discovery_chg)
+ port->tr_ops->discovery_chg(port);
}
static void __nvmet_subsys_disc_changed(struct nvmet_port *port,
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 3b9f79aba98f..d16b55ffe79f 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -268,7 +268,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- if (cmd->common.opcode != nvme_fabrics_command) {
+ if (!nvme_is_fabrics(cmd)) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 508661af0f50..ce8d819f86cc 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1806,7 +1806,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport,
*/
rspcnt = atomic_inc_return(&fod->queue->zrspcnt);
if (!(rspcnt % fod->queue->ersp_ratio) ||
- sqe->opcode == nvme_fabrics_command ||
+ nvme_is_fabrics((struct nvme_command *) sqe) ||
xfr_length != fod->req.transfer_len ||
(le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] ||
(sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) ||
@@ -2549,6 +2549,16 @@ nvmet_fc_remove_port(struct nvmet_port *port)
kfree(pe);
}
+static void
+nvmet_fc_discovery_chg(struct nvmet_port *port)
+{
+ struct nvmet_fc_port_entry *pe = port->priv;
+ struct nvmet_fc_tgtport *tgtport = pe->tgtport;
+
+ if (tgtport && tgtport->ops->discovery_event)
+ tgtport->ops->discovery_event(&tgtport->fc_target_port);
+}
+
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_FC,
@@ -2557,6 +2567,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.remove_port = nvmet_fc_remove_port,
.queue_response = nvmet_fc_fcp_nvme_cmd_done,
.delete_ctrl = nvmet_fc_delete_ctrl,
+ .discovery_chg = nvmet_fc_discovery_chg,
};
static int __init nvmet_fc_init_module(void)
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 381b5a90c48b..b50b53db3746 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -231,6 +231,11 @@ struct fcloop_lsreq {
int status;
};
+struct fcloop_rscn {
+ struct fcloop_tport *tport;
+ struct work_struct work;
+};
+
enum {
INI_IO_START = 0,
INI_IO_ACTIVE = 1,
@@ -348,6 +353,37 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport,
return 0;
}
+/*
+ * Simulate reception of RSCN and converting it to a initiator transport
+ * call to rescan a remote port.
+ */
+static void
+fcloop_tgt_rscn_work(struct work_struct *work)
+{
+ struct fcloop_rscn *tgt_rscn =
+ container_of(work, struct fcloop_rscn, work);
+ struct fcloop_tport *tport = tgt_rscn->tport;
+
+ if (tport->remoteport)
+ nvme_fc_rescan_remoteport(tport->remoteport);
+ kfree(tgt_rscn);
+}
+
+static void
+fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport)
+{
+ struct fcloop_rscn *tgt_rscn;
+
+ tgt_rscn = kzalloc(sizeof(*tgt_rscn), GFP_KERNEL);
+ if (!tgt_rscn)
+ return;
+
+ tgt_rscn->tport = tgtport->private;
+ INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work);
+
+ schedule_work(&tgt_rscn->work);
+}
+
static void
fcloop_tfcp_req_free(struct kref *ref)
{
@@ -398,7 +434,7 @@ fcloop_fcp_recv_work(struct work_struct *work)
int ret = 0;
bool aborted = false;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
switch (tfcp_req->inistate) {
case INI_IO_START:
tfcp_req->inistate = INI_IO_ACTIVE;
@@ -407,11 +443,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
aborted = true;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(aborted))
ret = -ECANCELED;
@@ -433,7 +469,7 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
struct nvmefc_fcp_req *fcpreq;
bool completed = false;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
switch (tfcp_req->inistate) {
case INI_IO_ABORTED:
@@ -442,11 +478,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
completed = true;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(completed)) {
/* remove reference taken in original abort downcall */
@@ -458,9 +494,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
&tfcp_req->tgt_fcp_req);
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->fcpreq = NULL;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
/* call_host_done releases reference for abort downcall */
@@ -477,10 +513,10 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
container_of(work, struct fcloop_fcpreq, tio_done_work);
struct nvmefc_fcp_req *fcpreq;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
tfcp_req->inistate = INI_IO_COMPLETED;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
}
@@ -499,7 +535,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
if (!rport->targetport)
return -ECONNREFUSED;
- tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL);
+ tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_ATOMIC);
if (!tfcp_req)
return -ENOMEM;
@@ -585,12 +621,12 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
int fcp_err = 0, active, aborted;
u8 op = tgt_fcpreq->op;
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
fcpreq = tfcp_req->fcpreq;
active = tfcp_req->active;
aborted = tfcp_req->aborted;
tfcp_req->active = true;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (unlikely(active))
/* illegal - call while i/o active */
@@ -598,9 +634,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
if (unlikely(aborted)) {
/* target transport has aborted i/o prior */
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->active = false;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tgt_fcpreq->transferred_length = 0;
tgt_fcpreq->fcp_error = -ECANCELED;
tgt_fcpreq->done(tgt_fcpreq);
@@ -657,9 +693,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
break;
}
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->active = false;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tgt_fcpreq->transferred_length = xfrlen;
tgt_fcpreq->fcp_error = fcp_err;
@@ -679,9 +715,9 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
* (one doing io, other doing abort) and only kills ops posted
* after the abort request
*/
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
tfcp_req->aborted = true;
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
tfcp_req->status = NVME_SC_INTERNAL;
@@ -729,7 +765,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
return;
/* break initiator/target relationship for io */
- spin_lock(&tfcp_req->reqlock);
+ spin_lock_irq(&tfcp_req->reqlock);
switch (tfcp_req->inistate) {
case INI_IO_START:
case INI_IO_ACTIVE:
@@ -739,11 +775,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
abortio = false;
break;
default:
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
WARN_ON(1);
return;
}
- spin_unlock(&tfcp_req->reqlock);
+ spin_unlock_irq(&tfcp_req->reqlock);
if (abortio)
/* leave the reference while the work item is scheduled */
@@ -839,6 +875,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.fcp_op = fcloop_fcp_op,
.fcp_abort = fcloop_tgt_fcp_abort,
.fcp_req_release = fcloop_fcp_req_release,
+ .discovery_event = fcloop_tgt_discovery_evt,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 7a1cf6437a6a..de0bff70ebb6 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -8,6 +8,45 @@
#include <linux/module.h>
#include "nvmet.h"
+void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
+{
+ const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
+ /* Number of physical blocks per logical block. */
+ const u32 ppl = ql->physical_block_size / ql->logical_block_size;
+ /* Physical blocks per logical block, 0's based. */
+ const __le16 ppl0b = to0based(ppl);
+
+ /*
+ * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
+ * NAWUPF, and NACWU are defined for this namespace and should be
+ * used by the host for this namespace instead of the AWUN, AWUPF,
+ * and ACWU fields in the Identify Controller data structure. If
+ * any of these fields are zero that means that the corresponding
+ * field from the identify controller data structure should be used.
+ */
+ id->nsfeat |= 1 << 1;
+ id->nawun = ppl0b;
+ id->nawupf = ppl0b;
+ id->nacwu = ppl0b;
+
+ /*
+ * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
+ * NOWS are defined for this namespace and should be used by
+ * the host for I/O optimization.
+ */
+ id->nsfeat |= 1 << 4;
+ /* NPWG = Namespace Preferred Write Granularity. 0's based */
+ id->npwg = ppl0b;
+ /* NPWA = Namespace Preferred Write Alignment. 0's based */
+ id->npwa = id->npwg;
+ /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
+ id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
+ /* NPDG = Namespace Preferred Deallocate Alignment */
+ id->npda = id->npdg;
+ /* NOWS = Namespace Optimal Write Size */
+ id->nows = to0based(ql->io_opt / ql->logical_block_size);
+}
+
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9e211ad6bdd3..b16dc3981c69 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -77,7 +77,7 @@ static void nvme_loop_complete_rq(struct request *req)
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
nvme_cleanup_cmd(req);
- sg_free_table_chained(&iod->sg_table, true);
+ sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
nvme_complete_rq(req);
}
@@ -157,7 +157,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl))
+ iod->sg_table.sgl, SG_CHUNK_SIZE))
return BLK_STS_RESOURCE;
iod->req.sg = iod->sg_table.sgl;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c25d88fc9dec..6ee66c610739 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -140,6 +140,7 @@ struct nvmet_port {
void *priv;
bool enabled;
int inline_data_size;
+ const struct nvmet_fabrics_ops *tr_ops;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -277,6 +278,7 @@ struct nvmet_fabrics_ops {
void (*disc_traddr)(struct nvmet_req *req,
struct nvmet_port *port, char *traddr);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
+ void (*discovery_chg)(struct nvmet_port *port);
};
#define NVMET_MAX_INLINE_BIOVEC 8
@@ -363,6 +365,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
void nvmet_execute_async_event(struct nvmet_req *req);
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
+void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
@@ -490,4 +493,11 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
}
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
+
+/* Convert a 32-bit number to a 16-bit 0's based number */
+static inline __le16 to0based(u32 a)
+{
+ return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
+}
+
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
new file mode 100644
index 000000000000..6af11d493271
--- /dev/null
+++ b/drivers/nvme/target/trace.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express target device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ */
+
+#include <asm/unaligned.h>
+#include "trace.h"
+
+static const char *nvmet_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 cns = cdw10[0];
+ u16 ctrlid = get_unaligned_le16(cdw10 + 2);
+
+ trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sel = cdw10[1] & 0x7;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 slba = get_unaligned_le64(cdw10);
+ u16 length = get_unaligned_le16(cdw10 + 8);
+ u16 control = get_unaligned_le16(cdw10 + 10);
+ u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+ u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+ trace_seq_printf(p,
+ "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+ slba, length, control, dsmgmt, reftag);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_dsm(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "nr=%u, attributes=%u",
+ get_unaligned_le32(cdw10),
+ get_unaligned_le32(cdw10 + 4));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_common(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "cdw10=%*ph", 24, cdw10);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_admin_identify:
+ return nvmet_trace_admin_identify(p, cdw10);
+ case nvme_admin_get_features:
+ return nvmet_trace_admin_get_features(p, cdw10);
+ default:
+ return nvmet_trace_common(p, cdw10);
+ }
+}
+
+const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
+ u8 opcode, u8 *cdw10)
+{
+ switch (opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ case nvme_cmd_write_zeroes:
+ return nvmet_trace_read_write(p, cdw10);
+ case nvme_cmd_dsm:
+ return nvmet_trace_dsm(p, cdw10);
+ default:
+ return nvmet_trace_common(p, cdw10);
+ }
+}
+
+static const char *nvmet_trace_fabrics_property_set(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+ u64 value = get_unaligned_le64(spc + 8);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx",
+ attrib, ofst, value);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_connect(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u16 recfmt = get_unaligned_le16(spc);
+ u16 qid = get_unaligned_le16(spc + 2);
+ u16 sqsize = get_unaligned_le16(spc + 4);
+ u8 cattr = spc[6];
+ u32 kato = get_unaligned_le32(spc + 8);
+
+ trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u",
+ recfmt, qid, sqsize, cattr, kato);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_property_get(struct trace_seq *p,
+ u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 attrib = spc[0];
+ u32 ofst = get_unaligned_le32(spc + 4);
+
+ trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "specific=%*ph", 24, spc);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p,
+ u8 fctype, u8 *spc)
+{
+ switch (fctype) {
+ case nvme_fabrics_type_property_set:
+ return nvmet_trace_fabrics_property_set(p, spc);
+ case nvme_fabrics_type_connect:
+ return nvmet_trace_fabrics_connect(p, spc);
+ case nvme_fabrics_type_property_get:
+ return nvmet_trace_fabrics_property_get(p, spc);
+ default:
+ return nvmet_trace_fabrics_common(p, spc);
+ }
+}
+
+const char *nvmet_trace_disk_name(struct trace_seq *p, char *name)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (*name)
+ trace_seq_printf(p, "disk=%s, ", name);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ /*
+ * XXX: We don't know the controller instance before executing the
+ * connect command itself because the connect command for the admin
+ * queue will not provide the cntlid which will be allocated in this
+ * command. In case of io queues, the controller instance will be
+ * mapped by the extra data of the connect command.
+ * If we can know the extra data of the connect command in this stage,
+ * we can update this print statement later.
+ */
+ if (ctrl)
+ trace_seq_printf(p, "%d", ctrl->cntlid);
+ else
+ trace_seq_printf(p, "_");
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
new file mode 100644
index 000000000000..e645caa882dd
--- /dev/null
+++ b/drivers/nvme/target/trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NVM Express target device driver tracepoints
+ * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH
+ *
+ * This is entirely based on drivers/nvme/host/trace.h
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nvmet
+
+#if !defined(_TRACE_NVMET_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NVMET_H
+
+#include <linux/nvme.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "nvmet.h"
+
+const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode,
+ u8 *cdw10);
+const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
+ u8 *spc);
+
+#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \
+ ((opcode) == nvme_fabrics_command ? \
+ nvmet_trace_parse_fabrics_cmd(p, fctype, cdw10) : \
+ (qid ? \
+ nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \
+ nvmet_trace_parse_admin_cmd(p, opcode, cdw10)))
+
+const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl);
+#define __print_ctrl_name(ctrl) \
+ nvmet_trace_ctrl_name(p, ctrl)
+
+const char *nvmet_trace_disk_name(struct trace_seq *p, char *name);
+#define __print_disk_name(name) \
+ nvmet_trace_disk_name(p, name)
+
+#ifndef TRACE_HEADER_MULTI_READ
+static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
+{
+ return req->sq->ctrl;
+}
+
+static inline void __assign_disk_name(char *name, struct nvmet_req *req,
+ bool init)
+{
+ struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req);
+ struct nvmet_ns *ns;
+
+ if ((init && req->sq->qid) || (!init && req->cq->qid)) {
+ ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid);
+ strncpy(name, ns->device_path, DISK_NAME_LEN);
+ return;
+ }
+
+ memset(name, 0, DISK_NAME_LEN);
+}
+#endif
+
+TRACE_EVENT(nvmet_req_init,
+ TP_PROTO(struct nvmet_req *req, struct nvme_command *cmd),
+ TP_ARGS(req, cmd),
+ TP_STRUCT__entry(
+ __field(struct nvme_command *, cmd)
+ __field(struct nvmet_ctrl *, ctrl)
+ __array(char, disk, DISK_NAME_LEN)
+ __field(int, qid)
+ __field(u16, cid)
+ __field(u8, opcode)
+ __field(u8, fctype)
+ __field(u8, flags)
+ __field(u32, nsid)
+ __field(u64, metadata)
+ __array(u8, cdw10, 24)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ __entry->ctrl = nvmet_req_to_ctrl(req);
+ __assign_disk_name(__entry->disk, req, true);
+ __entry->qid = req->sq->qid;
+ __entry->cid = cmd->common.command_id;
+ __entry->opcode = cmd->common.opcode;
+ __entry->fctype = cmd->fabrics.fctype;
+ __entry->flags = cmd->common.flags;
+ __entry->nsid = le32_to_cpu(cmd->common.nsid);
+ __entry->metadata = le64_to_cpu(cmd->common.metadata);
+ memcpy(__entry->cdw10, &cmd->common.cdw10,
+ sizeof(__entry->cdw10));
+ ),
+ TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, "
+ "meta=%#llx, cmd=(%s, %s)",
+ __print_ctrl_name(__entry->ctrl),
+ __print_disk_name(__entry->disk),
+ __entry->qid, __entry->cid, __entry->nsid,
+ __entry->flags, __entry->metadata,
+ show_opcode_name(__entry->qid, __entry->opcode,
+ __entry->fctype),
+ parse_nvme_cmd(__entry->qid, __entry->opcode,
+ __entry->fctype, __entry->cdw10))
+);
+
+TRACE_EVENT(nvmet_req_complete,
+ TP_PROTO(struct nvmet_req *req),
+ TP_ARGS(req),
+ TP_STRUCT__entry(
+ __field(struct nvmet_ctrl *, ctrl)
+ __array(char, disk, DISK_NAME_LEN)
+ __field(int, qid)
+ __field(int, cid)
+ __field(u64, result)
+ __field(u16, status)
+ ),
+ TP_fast_assign(
+ __entry->ctrl = nvmet_req_to_ctrl(req);
+ __entry->qid = req->cq->qid;
+ __entry->cid = req->cqe->command_id;
+ __entry->result = le64_to_cpu(req->cqe->result.u64);
+ __entry->status = le16_to_cpu(req->cqe->status) >> 1;
+ __assign_disk_name(__entry->disk, req, false);
+ ),
+ TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
+ __print_ctrl_name(__entry->ctrl),
+ __print_disk_name(__entry->disk),
+ __entry->qid, __entry->cid, __entry->result, __entry->status)
+
+);
+
+#endif /* _TRACE_NVMET_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>