aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c215
1 files changed, 141 insertions, 74 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2c43e12b70af..120fb593d1da 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -388,7 +388,7 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
- cleanup_srcu_struct_quiesced(&head->srcu);
+ cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
}
@@ -1105,7 +1105,7 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (error) {
- dev_warn(ctrl->device, "Identify namespace failed\n");
+ dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
kfree(id);
return NULL;
}
@@ -1259,8 +1259,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (ctrl->effects)
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
- else
- effects = nvme_known_admin_effects(opcode);
+ effects |= nvme_known_admin_effects(opcode);
/*
* For simplicity, IO to all namespaces is quiesced even if the command
@@ -1362,9 +1361,14 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
{
#ifdef CONFIG_NVME_MULTIPATH
if (disk->fops == &nvme_ns_head_ops) {
+ struct nvme_ns *ns;
+
*head = disk->private_data;
*srcu_idx = srcu_read_lock(&(*head)->srcu);
- return nvme_find_path(*head);
+ ns = nvme_find_path(*head);
+ if (!ns)
+ srcu_read_unlock(&(*head)->srcu, *srcu_idx);
+ return ns;
}
#endif
*head = NULL;
@@ -1378,42 +1382,56 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}
-static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg)
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
{
+ struct nvme_ns_head *head = NULL;
+ void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+
+ ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+ if (unlikely(!ns))
+ return -EWOULDBLOCK;
+
+ /*
+ * Handle ioctls that apply to the controller instead of the namespace
+ * seperately and drop the ns SRCU reference early. This avoids a
+ * deadlock when deleting namespaces using the passthrough interface.
+ */
+ if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ if (cmd == NVME_IOCTL_ADMIN_CMD)
+ ret = nvme_user_cmd(ctrl, NULL, argp);
+ else
+ ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
+
+ nvme_put_ctrl(ctrl);
+ return ret;
+ }
+
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
- return ns->head->ns_id;
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+ ret = ns->head->ns_id;
+ break;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+ ret = nvme_user_cmd(ns->ctrl, ns, argp);
+ break;
case NVME_IOCTL_SUBMIT_IO:
- return nvme_submit_io(ns, (void __user *)arg);
+ ret = nvme_submit_io(ns, argp);
+ break;
default:
-#ifdef CONFIG_NVM
if (ns->ndev)
- return nvme_nvm_ioctl(ns, cmd, arg);
-#endif
- if (is_sed_ioctl(cmd))
- return sed_ioctl(ns->ctrl->opal_dev, cmd,
- (void __user *) arg);
- return -ENOTTY;
+ ret = nvme_nvm_ioctl(ns, cmd, arg);
+ else
+ ret = -ENOTTY;
}
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct nvme_ns_head *head = NULL;
- struct nvme_ns *ns;
- int srcu_idx, ret;
- ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
- if (unlikely(!ns))
- ret = -EWOULDBLOCK;
- else
- ret = nvme_ns_ioctl(ns, cmd, arg);
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
@@ -1588,9 +1606,13 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
- sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+ sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
+ if (ns->lba_shift > PAGE_SHIFT) {
+ /* unsupported block size, set capacity to 0 later */
+ bs = (1 << 9);
+ }
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
@@ -1601,7 +1623,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type);
- if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
+ if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
+ ns->lba_shift > PAGE_SHIFT)
capacity = 0;
set_capacity(disk, capacity);
@@ -2337,20 +2360,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
NULL,
};
-static int nvme_active_ctrls(struct nvme_subsystem *subsys)
+static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
+ struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
- int count = 0;
- struct nvme_ctrl *ctrl;
+ struct nvme_ctrl *tmp;
- mutex_lock(&subsys->lock);
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
- if (ctrl->state != NVME_CTRL_DELETING &&
- ctrl->state != NVME_CTRL_DEAD)
- count++;
+ lockdep_assert_held(&nvme_subsystems_lock);
+
+ list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
+ if (ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DEAD)
+ continue;
+
+ if (tmp->cntlid == ctrl->cntlid) {
+ dev_err(ctrl->device,
+ "Duplicate cntlid %u with %s, rejecting\n",
+ ctrl->cntlid, dev_name(tmp->device));
+ return false;
+ }
+
+ if ((id->cmic & (1 << 1)) ||
+ (ctrl->opts && ctrl->opts->discovery_nqn))
+ continue;
+
+ dev_err(ctrl->device,
+ "Subsystem does not support multiple controllers\n");
+ return false;
}
- mutex_unlock(&subsys->lock);
- return count;
+ return true;
}
static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
@@ -2390,22 +2428,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
mutex_lock(&nvme_subsystems_lock);
found = __nvme_find_get_subsystem(subsys->subnqn);
if (found) {
- /*
- * Verify that the subsystem actually supports multiple
- * controllers, else bail out.
- */
- if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
- nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
- dev_err(ctrl->device,
- "ignoring ctrl due to duplicate subnqn (%s).\n",
- found->subnqn);
- nvme_put_subsystem(found);
- ret = -EINVAL;
- goto out_unlock;
- }
-
__nvme_release_subsystem(subsys);
subsys = found;
+
+ if (!nvme_validate_cntlid(subsys, ctrl, id)) {
+ ret = -EINVAL;
+ goto out_put_subsystem;
+ }
} else {
ret = device_add(&subsys->dev);
if (ret) {
@@ -2417,23 +2446,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
list_add_tail(&subsys->entry, &nvme_subsystems);
}
- ctrl->subsys = subsys;
- mutex_unlock(&nvme_subsystems_lock);
-
if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
dev_name(ctrl->device))) {
dev_err(ctrl->device,
"failed to create sysfs link from subsystem.\n");
- /* the transport driver will eventually put the subsystem */
- return -EINVAL;
+ goto out_put_subsystem;
}
- mutex_lock(&subsys->lock);
+ ctrl->subsys = subsys;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
- mutex_unlock(&subsys->lock);
-
+ mutex_unlock(&nvme_subsystems_lock);
return 0;
+out_put_subsystem:
+ nvme_put_subsystem(subsys);
out_unlock:
mutex_unlock(&nvme_subsystems_lock);
put_device(&subsys->dev);
@@ -2549,7 +2575,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->crdt[2] = le16_to_cpu(id->crdt3);
ctrl->oacs = le16_to_cpu(id->oacs);
- ctrl->oncs = le16_to_cpup(&id->oncs);
+ ctrl->oncs = le16_to_cpu(id->oncs);
+ ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->oaes = le32_to_cpu(id->oaes);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
@@ -3373,7 +3400,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
{
struct nvme_ns *ns;
__le32 *ns_list;
- unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+ unsigned i, j, nsid, prev = 0;
+ unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
int ret = 0;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
@@ -3600,19 +3628,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
u32 aer_notice_type = (result & 0xff00) >> 8;
+ trace_nvme_async_event(ctrl, aer_notice_type);
+
switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
- trace_nvme_async_event(ctrl, aer_notice_type);
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- trace_nvme_async_event(ctrl, aer_notice_type);
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
- trace_nvme_async_event(ctrl, aer_notice_type);
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
@@ -3675,6 +3702,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
+ dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
@@ -3691,10 +3719,10 @@ static void nvme_free_ctrl(struct device *dev)
__free_page(ctrl->discard_page);
if (subsys) {
- mutex_lock(&subsys->lock);
+ mutex_lock(&nvme_subsystems_lock);
list_del(&ctrl->subsys_entry);
- mutex_unlock(&subsys->lock);
sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
+ mutex_unlock(&nvme_subsystems_lock);
}
ctrl->ops->free_ctrl(ctrl);
@@ -3874,10 +3902,49 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
-int __init nvme_core_init(void)
+
+void nvme_sync_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ down_read(&ctrl->namespaces_rwsem);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_sync_queue(ns->queue);
+ up_read(&ctrl->namespaces_rwsem);
+}
+EXPORT_SYMBOL_GPL(nvme_sync_queues);
+
+/*
+ * Check we didn't inadvertently grow the command structure sizes:
+ */
+static inline void _nvme_check_size(void)
+{
+ BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+ BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
+}
+
+
+static int __init nvme_core_init(void)
{
int result = -ENOMEM;
+ _nvme_check_size();
+
nvme_wq = alloc_workqueue("nvme-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!nvme_wq)
@@ -3924,7 +3991,7 @@ out:
return result;
}
-void __exit nvme_core_exit(void)
+static void __exit nvme_core_exit(void)
{
ida_destroy(&nvme_subsystems_ida);
class_destroy(nvme_subsys_class);