diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 414 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 7 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 47 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 315 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 38 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 109 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 73 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c | 34 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 10 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 27 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 6 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 44 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 2 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 24 |
19 files changed, 879 insertions, 287 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b40cfb076f02..9b3b57fef446 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/nvme_ioctl.h> #include <linux/t10-pi.h> +#include <linux/pm_qos.h> #include <scsi/sg.h> #include <asm/unaligned.h> @@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); +static unsigned long default_ps_max_latency_us = 25000; +module_param(default_ps_max_latency_us, ulong, 0644); +MODULE_PARM_DESC(default_ps_max_latency_us, + "max power saving latency for new devices; use PM QOS to change per device"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); @@ -208,18 +214,18 @@ EXPORT_SYMBOL_GPL(nvme_requeue_req); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { + unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; struct request *req; if (qid == NVME_QID_ANY) { - req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags); + req = blk_mq_alloc_request(q, op, flags); } else { - req = blk_mq_alloc_request_hctx(q, nvme_is_write(cmd), flags, + req = blk_mq_alloc_request_hctx(q, op, flags, qid ? qid - 1 : 0); } if (IS_ERR(req)) return req; - req->cmd_type = REQ_TYPE_DRV_PRIV; req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_req(req)->cmd = cmd; @@ -238,26 +244,38 @@ static inline void nvme_setup_flush(struct nvme_ns *ns, static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0; struct nvme_dsm_range *range; - unsigned int nr_bytes = blk_rq_bytes(req); + struct bio *bio; - range = kmalloc(sizeof(*range), GFP_ATOMIC); + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); if (!range) return BLK_MQ_RQ_QUEUE_BUSY; - range->cattr = cpu_to_le32(0); - range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift); - range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + __rq_for_each_bio(bio, req) { + u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + + range[n].cattr = cpu_to_le32(0); + range[n].nlb = cpu_to_le32(nlb); + range[n].slba = cpu_to_le64(slba); + n++; + } + + if (WARN_ON_ONCE(n != segments)) { + kfree(range); + return BLK_MQ_RQ_QUEUE_ERROR; + } memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); - cmnd->dsm.nr = 0; + cmnd->dsm.nr = segments - 1; cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); req->special_vec.bv_page = virt_to_page(range); req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range); + req->special_vec.bv_len = sizeof(*range) * segments; req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_MQ_RQ_QUEUE_OK; @@ -309,17 +327,27 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + switch (req_op(req)) { + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); - else if (req_op(req) == REQ_OP_FLUSH) + break; + case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); - else if (req_op(req) == REQ_OP_DISCARD) + break; + case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); - else + break; + case REQ_OP_READ: + case REQ_OP_WRITE: nvme_setup_rw(ns, req, cmd); + break; + default: + WARN_ON_ONCE(1); + return BLK_MQ_RQ_QUEUE_ERROR; + } cmd->common.command_id = req->tag; - return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); @@ -538,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL); + c.identify.cns = NVME_ID_CNS_CTRL; *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) @@ -556,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n struct nvme_command c = { }; c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST); + c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; c.identify.nsid = cpu_to_le32(nsid); return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } @@ -568,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, int error; /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ - c.identify.opcode = nvme_admin_identify, - c.identify.nsid = cpu_to_le32(nsid), + c.identify.opcode = nvme_admin_identify; + c.identify.nsid = cpu_to_le32(nsid); + c.identify.cns = NVME_ID_CNS_NS; *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); if (!*id) @@ -784,6 +813,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return nvme_sg_io(ns, (void __user *)arg); #endif default: +#ifdef CONFIG_NVM + if (ns->ndev) + return nvme_nvm_ioctl(ns, cmd, arg); +#endif + if (is_sed_ioctl(cmd)) + return sed_ioctl(ns->ctrl->opal_dev, cmd, + (void __user *) arg); return -ENOTTY; } } @@ -861,6 +897,9 @@ static void nvme_config_discard(struct nvme_ns *ns) struct nvme_ctrl *ctrl = ns->ctrl; u32 logical_block_size = queue_logical_block_size(ns->queue); + BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < + NVME_DSM_MAX_RANGES); + if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) ns->queue->limits.discard_zeroes_data = 1; else @@ -869,6 +908,7 @@ static void nvme_config_discard(struct nvme_ns *ns) ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); + blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); } @@ -1051,6 +1091,28 @@ static const struct pr_ops nvme_pr_ops = { .pr_clear = nvme_pr_clear, }; +#ifdef CONFIG_BLK_SED_OPAL +int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, + bool send) +{ + struct nvme_ctrl *ctrl = data; + struct nvme_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + if (send) + cmd.common.opcode = nvme_admin_security_send; + else + cmd.common.opcode = nvme_admin_security_recv; + cmd.common.nsid = 0; + cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); + cmd.common.cdw10[1] = cpu_to_le32(len); + + return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, + ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0); +} +EXPORT_SYMBOL_GPL(nvme_sec_submit); +#endif /* CONFIG_BLK_SED_OPAL */ + static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, @@ -1106,12 +1168,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) if (ret) return ret; - /* Checking for ctrl->tagset is a trick to avoid sleeping on module - * load, since we only need the quirk on reset_controller. Notice - * that the HGST device needs this delay only in firmware activation - * procedure; unfortunately we have no (easy) way to verify this. - */ - if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset) + if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); return nvme_wait_ready(ctrl, cap, false); @@ -1193,14 +1250,184 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if (ctrl->stripe_size) - blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); + if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) + blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; blk_queue_write_cache(q, vwc, vwc); } +static void nvme_configure_apst(struct nvme_ctrl *ctrl) +{ + /* + * APST (Autonomous Power State Transition) lets us program a + * table of power state transitions that the controller will + * perform automatically. We configure it with a simple + * heuristic: we are willing to spend at most 2% of the time + * transitioning between power states. Therefore, when running + * in any given state, we will enter the next lower-power + * non-operational state after waiting 100 * (enlat + exlat) + * microseconds, as long as that state's total latency is under + * the requested maximum latency. + * + * We will not autonomously enter any non-operational state for + * which the total latency exceeds ps_max_latency_us. Users + * can set ps_max_latency_us to zero to turn off APST. + */ + + unsigned apste; + struct nvme_feat_auto_pst *table; + int ret; + + /* + * If APST isn't supported or if we haven't been initialized yet, + * then don't do anything. + */ + if (!ctrl->apsta) + return; + + if (ctrl->npss > 31) { + dev_warn(ctrl->device, "NPSS is invalid; not using APST\n"); + return; + } + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return; + + if (ctrl->ps_max_latency_us == 0) { + /* Turn off APST. */ + apste = 0; + } else { + __le64 target = cpu_to_le64(0); + int state; + + /* + * Walk through all states from lowest- to highest-power. + * According to the spec, lower-numbered states use more + * power. NPSS, despite the name, is the index of the + * lowest-power state, not the number of states. + */ + for (state = (int)ctrl->npss; state >= 0; state--) { + u64 total_latency_us, transition_ms; + + if (target) + table->entries[state] = target; + + /* + * Is this state a useful non-operational state for + * higher-power states to autonomously transition to? + */ + if (!(ctrl->psd[state].flags & + NVME_PS_FLAGS_NON_OP_STATE)) + continue; + + total_latency_us = + (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + + + le32_to_cpu(ctrl->psd[state].exit_lat); + if (total_latency_us > ctrl->ps_max_latency_us) + continue; + + /* + * This state is good. Use it as the APST idle + * target for higher power states. + */ + transition_ms = total_latency_us + 19; + do_div(transition_ms, 20); + if (transition_ms > (1 << 24) - 1) + transition_ms = (1 << 24) - 1; + + target = cpu_to_le64((state << 3) | + (transition_ms << 8)); + } + + apste = 1; + } + + ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, + table, sizeof(*table), NULL); + if (ret) + dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret); + + kfree(table); +} + +static void nvme_set_latency_tolerance(struct device *dev, s32 val) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + u64 latency; + + switch (val) { + case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT: + case PM_QOS_LATENCY_ANY: + latency = U64_MAX; + break; + + default: + latency = val; + } + + if (ctrl->ps_max_latency_us != latency) { + ctrl->ps_max_latency_us = latency; + nvme_configure_apst(ctrl); + } +} + +struct nvme_core_quirk_entry { + /* + * NVMe model and firmware strings are padded with spaces. For + * simplicity, strings in the quirk table are padded with NULLs + * instead. + */ + u16 vid; + const char *mn; + const char *fr; + unsigned long quirks; +}; + +static const struct nvme_core_quirk_entry core_quirks[] = { + /* + * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes + * the controller to go out to lunch. It dies when the watchdog + * timer reads CSTS and gets 0xffffffff. + */ + { + .vid = 0x144d, + .fr = "BXW75D0Q", + .quirks = NVME_QUIRK_NO_APST, + }, +}; + +/* match is null-terminated but idstr is space-padded. */ +static bool string_matches(const char *idstr, const char *match, size_t len) +{ + size_t matchlen; + + if (!match) + return true; + + matchlen = strlen(match); + WARN_ON_ONCE(matchlen > len); + + if (memcmp(idstr, match, matchlen)) + return false; + + for (; matchlen < len; matchlen++) + if (idstr[matchlen] != ' ') + return false; + + return true; +} + +static bool quirk_matches(const struct nvme_id_ctrl *id, + const struct nvme_core_quirk_entry *q) +{ + return q->vid == le16_to_cpu(id->vid) && + string_matches(id->mn, q->mn, sizeof(id->mn)) && + string_matches(id->fr, q->fr, sizeof(id->fr)); +} + /* * Initialize the cached copies of the Identify data and various controller * register in our nvme_ctrl structure. This should be called as soon as @@ -1212,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) u64 cap; int ret, page_shift; u32 max_hw_sectors; + u8 prev_apsta; ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); if (ret) { @@ -1235,6 +1463,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } + if (!ctrl->identified) { + /* + * Check for quirks. Quirk can depend on firmware version, + * so, in principle, the set of quirks present can change + * across a reset. As a possible future enhancement, we + * could re-scan for quirks every time we reinitialize + * the device, but we'd have to make sure that the driver + * behaves intelligently if the quirks change. + */ + + int i; + + for (i = 0; i < ARRAY_SIZE(core_quirks); i++) { + if (quirk_matches(id, &core_quirks[i])) + ctrl->quirks |= core_quirks[i].quirks; + } + } + + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); atomic_set(&ctrl->abort_limit, id->acl + 1); @@ -1250,23 +1497,15 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_hw_sectors = min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { - unsigned int max_hw_sectors; - - ctrl->stripe_size = 1 << (id->vs[3] + page_shift); - max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); - if (ctrl->max_hw_sectors) { - ctrl->max_hw_sectors = min(max_hw_sectors, - ctrl->max_hw_sectors); - } else { - ctrl->max_hw_sectors = max_hw_sectors; - } - } - nvme_set_queue_limits(ctrl, ctrl->admin_q); ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); + ctrl->npss = id->npss; + prev_apsta = ctrl->apsta; + ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); + if (ctrl->ops->is_fabrics) { ctrl->icdoff = le16_to_cpu(id->icdoff); ctrl->ioccsz = le32_to_cpu(id->ioccsz); @@ -1290,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } kfree(id); + + if (ctrl->apsta && !prev_apsta) + dev_pm_qos_expose_latency_tolerance(ctrl->device); + else if (!ctrl->apsta && prev_apsta) + dev_pm_qos_hide_latency_tolerance(ctrl->device); + + nvme_configure_apst(ctrl); + + ctrl->identified = true; + return ret; } EXPORT_SYMBOL_GPL(nvme_init_identify); @@ -1539,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev, } static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); +static ssize_t nvme_sysfs_show_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + static const char *const state_name[] = { + [NVME_CTRL_NEW] = "new", + [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_RESETTING] = "resetting", + [NVME_CTRL_RECONNECTING]= "reconnecting", + [NVME_CTRL_DELETING] = "deleting", + [NVME_CTRL_DEAD] = "dead", + }; + + if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && + state_name[ctrl->state]) + return sprintf(buf, "%s\n", state_name[ctrl->state]); + + return sprintf(buf, "unknown state\n"); +} + +static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); + static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, struct device_attribute *attr, char *buf) @@ -1571,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_transport.attr, &dev_attr_subsysnqn.attr, &dev_attr_address.attr, + &dev_attr_state.attr, NULL }; @@ -2027,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, list_add_tail(&ctrl->node, &nvme_ctrl_list); spin_unlock(&dev_list_lock); + /* + * Initialize latency tolerance controls. The sysfs files won't + * be visible to userspace unless the device actually supports APST. + */ + ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance; + dev_pm_qos_update_user_latency_tolerance(ctrl->device, + min(default_ps_max_latency_us, (unsigned long)S32_MAX)); + return 0; out_release_instance: nvme_release_instance(ctrl); @@ -2052,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * Revalidating a dead namespace sets capacity to 0. This will * end buffered writers dirtying pages that can't be synced. */ - if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - revalidate_disk(ns->disk); - + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + continue; + revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); blk_mq_abort_requeue_list(ns->queue); blk_mq_start_stopped_hw_queues(ns->queue, true); @@ -2063,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_kill_queues); +void nvme_unfreeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_unfreeze_queue(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_unfreeze); + +void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) { + timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); + if (timeout <= 0) + break; + } + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); + +void nvme_wait_freeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_freeze_queue_wait(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_wait_freeze); + +void nvme_start_freeze(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_freeze_queue_start(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_start_freeze); + void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 916d13608059..5b7386f69f4d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); * being implemented to the common NVMe fabrics library. Part of * the overall init sequence of starting up a fabrics driver. */ -void nvmf_register_transport(struct nvmf_transport_ops *ops) +int nvmf_register_transport(struct nvmf_transport_ops *ops) { + if (!ops->create_ctrl) + return -EINVAL; + mutex_lock(&nvmf_transports_mutex); list_add_tail(&ops->entry, &nvmf_transports); mutex_unlock(&nvmf_transports_mutex); + + return 0; } EXPORT_SYMBOL_GPL(nvmf_register_transport); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 924145c979f1..156018182ce4 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl); int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid); -void nvmf_register_transport(struct nvmf_transport_ops *ops); +int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 771e2e761872..9690beb15e69 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1491,19 +1491,20 @@ static int nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) { struct nvme_fc_queue *queue = &ctrl->queues[1]; - int i, j, ret; + int i, ret; for (i = 1; i < ctrl->queue_count; i++, queue++) { ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); - if (ret) { - for (j = i-1; j >= 0; j--) - __nvme_fc_delete_hw_queue(ctrl, - &ctrl->queues[j], j); - return ret; - } + if (ret) + goto delete_queues; } return 0; + +delete_queues: + for (; i >= 0; i--) + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); + return ret; } static int @@ -1653,23 +1654,22 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, struct nvme_fc_fcp_op *op) { struct nvmefc_fcp_req *freq = &op->fcp_req; - u32 map_len = nvme_map_len(rq); enum dma_data_direction dir; int ret; freq->sg_cnt = 0; - if (!map_len) + if (!blk_rq_payload_bytes(rq)) return 0; freq->sg_table.sgl = freq->first_sgl; - ret = sg_alloc_table_chained(&freq->sg_table, rq->nr_phys_segments, - freq->sg_table.sgl); + ret = sg_alloc_table_chained(&freq->sg_table, + blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); if (ret) return -ENOMEM; op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); - WARN_ON(op->nents > rq->nr_phys_segments); + WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, dir); @@ -1853,7 +1853,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - data_len = nvme_map_len(rq); + data_len = blk_rq_payload_bytes(rq); if (data_len) io_dir = ((rq_data_dir(rq) == WRITE) ? NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); @@ -1937,7 +1937,7 @@ nvme_fc_complete_rq(struct request *rq) return; } - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(rq)) error = rq->errors; else error = nvme_error_status(rq->errors); @@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, /* sanity checks */ - /* FC-NVME supports 64-byte SQE only */ - if (ctrl->ctrl.ioccsz != 4) { - dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n", - ctrl->ctrl.ioccsz); - goto out_remove_admin_queue; - } - /* FC-NVME supports 16-byte CQE only */ - if (ctrl->ctrl.iorcsz != 1) { - dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n", - ctrl->ctrl.iorcsz); - goto out_remove_admin_queue; - } /* FC-NVME does not have other data in the capsule */ if (ctrl->ctrl.icdoff) { dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", @@ -2401,8 +2389,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, WARN_ON_ONCE(!changed); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl); + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); kref_get(&ctrl->ctrl.kref); @@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void) if (!nvme_fc_wq) return -ENOMEM; - nvmf_register_transport(&nvme_fc_transport); - return 0; + return nvmf_register_transport(&nvme_fc_transport); } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 588d4a34c083..21cac8523bd8 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -26,6 +26,8 @@ #include <linux/bitops.h> #include <linux/lightnvm.h> #include <linux/vmalloc.h> +#include <linux/sched/sysctl.h> +#include <uapi/linux/lightnvm.h> enum nvme_nvm_admin_opcode { nvme_nvm_admin_identity = 0xe2, @@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) { struct nvme_nvm_id_group *src; struct nvm_id_group *dst; - int i, end; - - end = min_t(u32, 4, nvm_id->cgrps); - - for (i = 0; i < end; i++) { - src = &nvme_nvm_id->groups[i]; - dst = &nvm_id->groups[i]; - - dst->mtype = src->mtype; - dst->fmtype = src->fmtype; - dst->num_ch = src->num_ch; - dst->num_lun = src->num_lun; - dst->num_pln = src->num_pln; - - dst->num_pg = le16_to_cpu(src->num_pg); - dst->num_blk = le16_to_cpu(src->num_blk); - dst->fpg_sz = le16_to_cpu(src->fpg_sz); - dst->csecs = le16_to_cpu(src->csecs); - dst->sos = le16_to_cpu(src->sos); - - dst->trdt = le32_to_cpu(src->trdt); - dst->trdm = le32_to_cpu(src->trdm); - dst->tprt = le32_to_cpu(src->tprt); - dst->tprm = le32_to_cpu(src->tprm); - dst->tbet = le32_to_cpu(src->tbet); - dst->tbem = le32_to_cpu(src->tbem); - dst->mpos = le32_to_cpu(src->mpos); - dst->mccap = le32_to_cpu(src->mccap); - - dst->cpar = le16_to_cpu(src->cpar); - - if (dst->fmtype == NVM_ID_FMTYPE_MLC) { - memcpy(dst->lptbl.id, src->lptbl.id, 8); - dst->lptbl.mlc.num_pairs = - le16_to_cpu(src->lptbl.mlc.num_pairs); - - if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { - pr_err("nvm: number of MLC pairs not supported\n"); - return -EINVAL; - } - memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, - dst->lptbl.mlc.num_pairs); + if (nvme_nvm_id->cgrps != 1) + return -EINVAL; + + src = &nvme_nvm_id->groups[0]; + dst = &nvm_id->grp; + + dst->mtype = src->mtype; + dst->fmtype = src->fmtype; + dst->num_ch = src->num_ch; + dst->num_lun = src->num_lun; + dst->num_pln = src->num_pln; + + dst->num_pg = le16_to_cpu(src->num_pg); + dst->num_blk = le16_to_cpu(src->num_blk); + dst->fpg_sz = le16_to_cpu(src->fpg_sz); + dst->csecs = le16_to_cpu(src->csecs); + dst->sos = le16_to_cpu(src->sos); + + dst->trdt = le32_to_cpu(src->trdt); + dst->trdm = le32_to_cpu(src->trdm); + dst->tprt = le32_to_cpu(src->tprt); + dst->tprm = le32_to_cpu(src->tprm); + dst->tbet = le32_to_cpu(src->tbet); + dst->tbem = le32_to_cpu(src->tbem); + dst->mpos = le32_to_cpu(src->mpos); + dst->mccap = le32_to_cpu(src->mccap); + + dst->cpar = le16_to_cpu(src->cpar); + + if (dst->fmtype == NVM_ID_FMTYPE_MLC) { + memcpy(dst->lptbl.id, src->lptbl.id, 8); + dst->lptbl.mlc.num_pairs = + le16_to_cpu(src->lptbl.mlc.num_pairs); + + if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { + pr_err("nvm: number of MLC pairs not supported\n"); + return -EINVAL; } + + memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, + dst->lptbl.mlc.num_pairs); } return 0; @@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->ver_id = nvme_nvm_id->ver_id; nvm_id->vmnt = nvme_nvm_id->vmnt; - nvm_id->cgrps = nvme_nvm_id->cgrps; nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, @@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, } /* Transform physical address to target address space */ - nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); + nvm_part_to_tgt(nvmdev, entries, cmd_nlb); if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { ret = -EINTR; @@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) struct nvm_rq *rqd = rq->end_io_data; rqd->ppa_status = nvme_req(rq)->result.u64; - nvm_end_io(rqd, error); + rqd->error = error; + nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); blk_mq_free_request(rq); @@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; +static void nvme_nvm_end_user_vio(struct request *rq, int error) +{ + struct completion *waiting = rq->end_io_data; + + complete(waiting); +} + +static int nvme_nvm_submit_user_cmd(struct request_queue *q, + struct nvme_ns *ns, + struct nvme_nvm_command *vcmd, + void __user *ubuf, unsigned int bufflen, + void __user *meta_buf, unsigned int meta_len, + void __user *ppa_buf, unsigned int ppa_len, + u32 *result, u64 *status, unsigned int timeout) +{ + bool write = nvme_is_write((struct nvme_command *)vcmd); + struct nvm_dev *dev = ns->ndev; + struct gendisk *disk = ns->disk; + struct request *rq; + struct bio *bio = NULL; + __le64 *ppa_list = NULL; + dma_addr_t ppa_dma; + __le64 *metadata = NULL; + dma_addr_t metadata_dma; + DECLARE_COMPLETION_ONSTACK(wait); + int ret; + + rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, + NVME_QID_ANY); + if (IS_ERR(rq)) { + ret = -ENOMEM; + goto err_cmd; + } + + rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; + + rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; + rq->end_io_data = &wait; + + if (ppa_buf && ppa_len) { + ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); + if (!ppa_list) { + ret = -ENOMEM; + goto err_rq; + } + if (copy_from_user(ppa_list, (void __user *)ppa_buf, + sizeof(u64) * (ppa_len + 1))) { + ret = -EFAULT; + goto err_ppa; + } + vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); + } else { + vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); + } + + if (ubuf && bufflen) { + ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); + if (ret) + goto err_ppa; + bio = rq->bio; + + if (meta_buf && meta_len) { + metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, + &metadata_dma); + if (!metadata) { + ret = -ENOMEM; + goto err_map; + } + + if (write) { + if (copy_from_user(metadata, + (void __user *)meta_buf, + meta_len)) { + ret = -EFAULT; + goto err_meta; + } + } + vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); + } + + if (!disk) + goto submit; + + bio->bi_bdev = bdget_disk(disk, 0); + if (!bio->bi_bdev) { + ret = -ENODEV; + goto err_meta; + } + } + +submit: + blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio); + + wait_for_completion_io(&wait); + + ret = nvme_error_status(rq->errors); + if (result) + *result = rq->errors & 0x7ff; + if (status) + *status = le64_to_cpu(nvme_req(rq)->result.u64); + + if (metadata && !ret && !write) { + if (copy_to_user(meta_buf, (void *)metadata, meta_len)) + ret = -EFAULT; + } +err_meta: + if (meta_buf && meta_len) + dma_pool_free(dev->dma_pool, metadata, metadata_dma); +err_map: + if (bio) { + if (disk && bio->bi_bdev) + bdput(bio->bi_bdev); + blk_rq_unmap_user(bio); + } +err_ppa: + if (ppa_buf && ppa_len) + dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); +err_rq: + blk_mq_free_request(rq); +err_cmd: + return ret; +} + +static int nvme_nvm_submit_vio(struct nvme_ns *ns, + struct nvm_user_vio __user *uvio) +{ + struct nvm_user_vio vio; + struct nvme_nvm_command c; + unsigned int length; + int ret; + + if (copy_from_user(&vio, uvio, sizeof(vio))) + return -EFAULT; + if (vio.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.ph_rw.opcode = vio.opcode; + c.ph_rw.nsid = cpu_to_le32(ns->ns_id); + c.ph_rw.control = cpu_to_le16(vio.control); + c.ph_rw.length = cpu_to_le16(vio.nppas); + + length = (vio.nppas + 1) << ns->lba_shift; + + ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, + (void __user *)(uintptr_t)vio.addr, length, + (void __user *)(uintptr_t)vio.metadata, + vio.metadata_len, + (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, + &vio.result, &vio.status, 0); + + if (ret && copy_to_user(uvio, &vio, sizeof(vio))) + return -EFAULT; + + return ret; +} + +static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, + struct nvm_passthru_vio __user *uvcmd) +{ + struct nvm_passthru_vio vcmd; + struct nvme_nvm_command c; + struct request_queue *q; + unsigned int timeout = 0; + int ret; + + if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) + return -EFAULT; + if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) + return -EACCES; + if (vcmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = vcmd.opcode; + c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); + /* cdw11-12 */ + c.ph_rw.length = cpu_to_le16(vcmd.nppas); + c.ph_rw.control = cpu_to_le32(vcmd.control); + c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); + c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); + c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); + + if (vcmd.timeout_ms) + timeout = msecs_to_jiffies(vcmd.timeout_ms); + + q = admin ? ns->ctrl->admin_q : ns->queue; + + ret = nvme_nvm_submit_user_cmd(q, ns, + (struct nvme_nvm_command *)&c, + (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, + (void __user *)(uintptr_t)vcmd.metadata, + vcmd.metadata_len, + (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, + &vcmd.result, &vcmd.status, timeout); + + if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) + return -EFAULT; + + return ret; +} + +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case NVME_NVM_IOCTL_ADMIN_VIO: + return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg); + case NVME_NVM_IOCTL_IO_VIO: + return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg); + case NVME_NVM_IOCTL_SUBMIT_VIO: + return nvme_nvm_submit_vio(ns, (void __user *)arg); + default: + return -ENOTTY; + } +} + int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; @@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev, return 0; id = &ndev->identity; - grp = &id->groups[0]; + grp = &id->grp; attr = &dattr->attr; if (strcmp(attr->name, "version") == 0) { @@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev, return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); } else if (strcmp(attr->name, "device_mode") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + /* kept for compatibility */ } else if (strcmp(attr->name, "media_manager") == 0) { - if (!ndev->mt) - return scnprintf(page, PAGE_SIZE, "%s\n", "none"); - return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name); + return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); } else if (strcmp(attr->name, "ppa_format") == 0) { return scnprintf(page, PAGE_SIZE, "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bd5321441d12..2aa20e3e5675 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -19,6 +19,7 @@ #include <linux/kref.h> #include <linux/blk-mq.h> #include <linux/lightnvm.h> +#include <linux/sed-opal.h> enum { /* @@ -77,6 +78,11 @@ enum nvme_quirks { * readiness, which is done by reading the NVME_CSTS_RDY bit. */ NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3), + + /* + * APST should not be used. + */ + NVME_QUIRK_NO_APST = (1 << 4), }; /* @@ -111,6 +117,7 @@ enum nvme_ctrl_state { struct nvme_ctrl { enum nvme_ctrl_state state; + bool identified; spinlock_t lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; @@ -125,6 +132,8 @@ struct nvme_ctrl { struct list_head node; struct ida ns_ida; + struct opal_dev *opal_dev; + char name[12]; char serial[20]; char model[40]; @@ -135,22 +144,28 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; - u32 stripe_size; u16 oncs; u16 vid; + u16 oacs; atomic_t abort_limit; u8 event_limit; u8 vwc; u32 vs; u32 sgls; u16 kas; + u8 npss; + u8 apsta; unsigned int kato; bool subsystem; unsigned long quirks; + struct nvme_id_power_state psd[32]; struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + /* Power saving configuration */ + u64 ps_max_latency_us; + /* Fabrics only */ u16 sqsize; u32 ioccsz; @@ -226,14 +241,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -static inline unsigned nvme_map_len(struct request *rq) -{ - if (req_op(rq) == REQ_OP_DISCARD) - return sizeof(struct nvme_dsm_range); - else - return blk_rq_bytes(rq); -} - static inline void nvme_cleanup_cmd(struct request *req) { if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { @@ -276,6 +283,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); +int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, + bool send); + #define NVME_NR_AERS 1 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res); @@ -284,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); +void nvme_unfreeze(struct nvme_ctrl *ctrl); +void nvme_wait_freeze(struct nvme_ctrl *ctrl); +void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); +void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, @@ -327,6 +341,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) @@ -344,6 +359,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i { return 0; } +static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} #endif /* CONFIG_NVM */ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3d21a154dce7..26a5fd05fe88 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -43,6 +43,7 @@ #include <linux/types.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <asm/unaligned.h> +#include <linux/sed-opal.h> #include "nvme.h" @@ -306,11 +307,11 @@ static __le64 **iod_list(struct request *req) return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); } -static int nvme_init_iod(struct request *rq, unsigned size, - struct nvme_dev *dev) +static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); int nseg = blk_rq_nr_phys_segments(rq); + unsigned int size = blk_rq_payload_bytes(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -420,12 +421,11 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) } #endif -static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, - int total_len) +static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; - int length = total_len; + int length = blk_rq_payload_bytes(req); struct scatterlist *sg = iod->sg; int dma_len = sg_dma_len(sg); u64 dma_addr = sg_dma_address(sg); @@ -501,7 +501,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, } static int nvme_map_data(struct nvme_dev *dev, struct request *req, - unsigned size, struct nvme_command *cmnd) + struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct request_queue *q = req->q; @@ -519,7 +519,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_ATTR_NO_WARN)) goto out; - if (!nvme_setup_prps(dev, req, size)) + if (!nvme_setup_prps(dev, req)) goto out_unmap; ret = BLK_MQ_RQ_QUEUE_ERROR; @@ -580,7 +580,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_command cmnd; - unsigned map_len; int ret = BLK_MQ_RQ_QUEUE_OK; /* @@ -590,7 +589,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, */ if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8) && - req->cmd_type != REQ_TYPE_DRV_PRIV) { + !blk_rq_is_passthrough(req)) { blk_mq_end_request(req, -EFAULT); return BLK_MQ_RQ_QUEUE_OK; } @@ -600,13 +599,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - map_len = nvme_map_len(req); - ret = nvme_init_iod(req, map_len, dev); + ret = nvme_init_iod(req, dev); if (ret != BLK_MQ_RQ_QUEUE_OK) goto out_free_cmd; if (blk_rq_nr_phys_segments(req)) - ret = nvme_map_data(dev, req, map_len, &cmnd); + ret = nvme_map_data(dev, req, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) goto out_cleanup_iod; @@ -615,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&nvmeq->q_lock); if (unlikely(nvmeq->cq_vector < 0)) { - if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) - ret = BLK_MQ_RQ_QUEUE_BUSY; - else - ret = BLK_MQ_RQ_QUEUE_ERROR; + ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); goto out_cleanup_iod; } @@ -648,7 +643,7 @@ static void nvme_complete_rq(struct request *req) return; } - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(req)) error = req->errors; else error = nvme_error_status(req->errors); @@ -712,15 +707,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); nvme_req(req)->result = cqe.result; blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); - } - /* If the controller ignores the cq head doorbell and continuously - * writes to the queue, it is theoretically possible to wrap around - * the queue twice and mistakenly return IRQ_NONE. Linux only - * requires that 0.1% of your interrupts are handled, so this isn't - * a big problem. - */ if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; @@ -905,12 +893,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) return BLK_EH_HANDLED; } - iod->aborted = 1; - if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } + iod->aborted = 1; memset(&cmd, 0, sizeof(cmd)); cmd.abort.opcode = nvme_admin_abort_cmd; @@ -1051,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth) + int depth, int node) { - struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); + struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL, + node); if (!nvmeq) return NULL; @@ -1188,6 +1176,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); + dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; if (blk_mq_alloc_tag_set(&dev->admin_tagset)) @@ -1229,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq = dev->queues[0]; if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); + nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, + dev_to_node(dev->dev)); if (!nvmeq) return -ENOMEM; } @@ -1321,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev) int ret = 0; for (i = dev->queue_count; i <= dev->max_qid; i++) { - if (!nvme_alloc_queue(dev, i, dev->q_depth)) { + /* vector == qid - 1, match nvme_create_queue */ + if (!nvme_alloc_queue(dev, i, dev->q_depth, + pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { ret = -ENOMEM; break; } @@ -1683,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { int i, queues; - u32 csts = -1; + bool dead = true; + struct pci_dev *pdev = to_pci_dev(dev->dev); del_timer_sync(&dev->watchdog_timer); mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(to_pci_dev(dev->dev))) { - nvme_stop_queues(&dev->ctrl); - csts = readl(dev->bar + NVME_REG_CSTS); + if (pci_is_enabled(pdev)) { + u32 csts = readl(dev->bar + NVME_REG_CSTS); + + if (dev->ctrl.state == NVME_CTRL_LIVE) + nvme_start_freeze(&dev->ctrl); + dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || + pdev->error_state != pci_channel_io_normal); } + /* + * Give the controller a chance to complete all entered requests if + * doing a safe shutdown. + */ + if (!dead && shutdown) + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + nvme_stop_queues(&dev->ctrl); + queues = dev->online_queues - 1; for (i = dev->queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); - if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { + if (dead) { /* A device might become IO incapable very soon during * probe, before the admin queue is configured. Thus, * queue_count can be 0 here. @@ -1712,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); + + /* + * The driver will not be starting up queues again if shutting down so + * must flush all entered requests to their failed completion to avoid + * deadlocking blk-mq hot-cpu notifier. + */ + if (shutdown) + nvme_start_queues(&dev->ctrl); mutex_unlock(&dev->shutdown_lock); } @@ -1748,6 +1761,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); + free_opal_dev(dev->ctrl.opal_dev); kfree(dev); } @@ -1764,6 +1778,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); + bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) @@ -1796,6 +1811,17 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { + if (!dev->ctrl.opal_dev) + dev->ctrl.opal_dev = + init_opal_dev(&dev->ctrl, &nvme_sec_submit); + else if (was_suspend) + opal_unlock_from_suspend(dev->ctrl.opal_dev); + } else { + free_opal_dev(dev->ctrl.opal_dev); + dev->ctrl.opal_dev = NULL; + } + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -1821,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work) nvme_remove_namespaces(&dev->ctrl); } else { nvme_start_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); nvme_dev_add(dev); + nvme_unfreeze(&dev->ctrl); } if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { @@ -1909,10 +1937,10 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!dev->bar) goto release; - return 0; + return 0; release: - pci_release_mem_regions(pdev); - return -ENODEV; + pci_release_mem_regions(pdev); + return -ENODEV; } static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -2000,8 +2028,10 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); - if (!pci_device_is_present(pdev)) + if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); + nvme_dev_disable(dev, false); + } flush_work(&dev->reset_work); nvme_uninit_ctrl(&dev->ctrl); @@ -2120,6 +2150,7 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { 0, } }; MODULE_DEVICE_TABLE(pci, nvme_id_table); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f587af345889..779f516e7a4e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -42,28 +42,6 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -static const char *const nvme_rdma_cm_status_strs[] = { - [NVME_RDMA_CM_INVALID_LEN] = "invalid length", - [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format", - [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID", - [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size", - [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size", - [NVME_RDMA_CM_NO_RSC] = "resource not found", - [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD", - [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD", -}; - -static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) -{ - size_t index = status; - - if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) && - nvme_rdma_cm_status_strs[index]) - return nvme_rdma_cm_status_strs[index]; - else - return "unrecognized reason"; -}; - /* * We handle AEN commands ourselves and don't even let the * block layer know about them. @@ -155,6 +133,10 @@ struct nvme_rdma_ctrl { struct sockaddr addr; struct sockaddr_in addr_in; }; + union { + struct sockaddr src_addr; + struct sockaddr_in src_addr_in; + }; struct nvme_ctrl ctrl; }; @@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, int idx, size_t queue_size) { struct nvme_rdma_queue *queue; + struct sockaddr *src_addr = NULL; int ret; queue = &ctrl->queues[idx]; @@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, } queue->cm_error = -ETIMEDOUT; - ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr, + if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) + src_addr = &ctrl->src_addr; + + ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -981,8 +967,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, } static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, - struct request *rq, unsigned int map_len, - struct nvme_command *c) + struct request *rq, struct nvme_command *c) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; @@ -1014,9 +999,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, } if (count == 1) { - if (rq_data_dir(rq) == WRITE && - map_len <= nvme_rdma_inline_data_size(queue) && - nvme_rdma_queue_idx(queue)) + if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && + blk_rq_payload_bytes(rq) <= + nvme_rdma_inline_data_size(queue)) return nvme_rdma_map_sg_inline(queue, req, c); if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -1066,7 +1051,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * sequencer is not allocated in our driver's tagset and it's * triggered to be freed by blk_cleanup_queue(). So we need to * always mark it as signaled to ensure that the "wr_cqe", which is - * embeded in request's payload, is not freed when __ib_process_cq() + * embedded in request's payload, is not freed when __ib_process_cq() * calls wr_cqe->done(). */ if ((++queue->sig_count % 32) == 0 || flush) @@ -1266,7 +1251,7 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue) dev = nvme_rdma_find_get_device(queue->cm_id); if (!dev) { - dev_err(queue->cm_id->device->dma_device, + dev_err(queue->cm_id->device->dev.parent, "no client data found!\n"); return -ECONNREFUSED; } @@ -1422,9 +1407,9 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { - struct nvme_command *cmd = (struct nvme_command *)rq->cmd; + struct nvme_command *cmd = nvme_req(rq)->cmd; - if (rq->cmd_type != REQ_TYPE_DRV_PRIV || + if (!blk_rq_is_passthrough(rq) || cmd->common.opcode != nvme_fabrics_command || cmd->fabrics.fctype != nvme_fabrics_type_connect) return false; @@ -1444,7 +1429,6 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_command *c = sqe->data; bool flush = false; struct ib_device *dev; - unsigned int map_len; int ret; WARN_ON_ONCE(rq->tag < 0); @@ -1462,8 +1446,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - map_len = nvme_map_len(rq); - ret = nvme_rdma_map_data(queue, rq, map_len, c); + ret = nvme_rdma_map_data(queue, rq, c); if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", ret); @@ -1474,7 +1457,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) + if (req_op(rq) == REQ_OP_FLUSH) flush = true; ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); @@ -1525,7 +1508,7 @@ static void nvme_rdma_complete_rq(struct request *rq) return; } - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(rq)) error = rq->errors; else error = nvme_error_status(rq->errors); @@ -1908,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_free_ctrl; } + if (opts->mask & NVMF_OPT_HOST_TRADDR) { + ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, + opts->host_traddr); + if (ret) { + pr_err("malformed src IP address passed: %s\n", + opts->host_traddr); + goto out_free_ctrl; + } + } + if (opts->mask & NVMF_OPT_TRSVCID) { u16 port; @@ -2019,7 +2012,8 @@ out_free_ctrl: static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, - .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY, + .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | + NVMF_OPT_HOST_TRADDR, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2066,8 +2060,7 @@ static int __init nvme_rdma_init_module(void) return ret; } - nvmf_register_transport(&nvme_rdma_transport); - return 0; + return nvmf_register_transport(&nvme_rdma_transport); } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index b71e95044b43..f49ae2758bb7 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -43,6 +43,7 @@ #include <asm/unaligned.h> #include <scsi/sg.h> #include <scsi/scsi.h> +#include <scsi/scsi_request.h> #include "nvme.h" @@ -2160,30 +2161,6 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, return nvme_trans_status_code(hdr, nvme_sc); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) -{ - u8 immed, no_flush; - - immed = cmd[1] & 0x01; - no_flush = cmd[4] & 0x04; - - if (immed != 0) { - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - } else { - if (no_flush == 0) { - /* Issue NVME FLUSH command prior to START STOP UNIT */ - int res = nvme_trans_synchronize_cache(ns, hdr); - if (res) - return res; - } - - return 0; - } -} - static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { @@ -2371,12 +2348,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) { - u8 cmd[BLK_MAX_CDB]; + u8 cmd[16]; int retcode; unsigned int opcode; if (hdr->cmdp == NULL) return -EMSGSIZE; + if (hdr->cmd_len > sizeof(cmd)) + return -EINVAL; if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; @@ -2439,9 +2418,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case SECURITY_PROTOCOL_OUT: retcode = nvme_trans_security_protocol(ns, hdr, cmd); break; - case START_STOP: - retcode = nvme_trans_start_stop(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; @@ -2478,8 +2454,6 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) return -EFAULT; if (hdr.interface_id != 'S') return -EINVAL; - if (hdr.cmd_len > BLK_MAX_CDB) - return -EINVAL; /* * A positive return code means a NVMe status, which has been diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index ec1ad2aa0a4c..a7bcff45f437 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -13,6 +13,8 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> +#include <linux/rculist.h> + #include <generated/utsrelease.h> #include <asm/unaligned.h> #include "nvmet.h" @@ -41,7 +43,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { status = NVME_SC_INVALID_NS; - pr_err("nvmet : Counld not find namespace id : %d\n", + pr_err("nvmet : Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); goto out; } @@ -382,7 +384,6 @@ static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); - u64 val; u32 val32; u16 status = 0; @@ -392,8 +393,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; case NVME_FEAT_KATO: - val = le64_to_cpu(req->cmd->prop_set.value); - val32 = val & 0xffff; + val32 = le32_to_cpu(req->cmd->common.cdw10[1]); req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; @@ -511,7 +511,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) break; case nvme_admin_identify: req->data_len = 4096; - switch (le32_to_cpu(cmd->identify.cns)) { + switch (cmd->identify.cns) { case NVME_ID_CNS_NS: req->execute = nvmet_execute_identify_ns; return 0; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 6f5074153dcd..be8c800078e2 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -631,6 +631,7 @@ static void nvmet_subsys_release(struct config_item *item) { struct nvmet_subsys *subsys = to_subsys(item); + nvmet_subsys_del_ctrls(subsys); nvmet_subsys_put(subsys); } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b1d66ed655c9..11b0a0a5f661 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -14,9 +14,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/random.h> +#include <linux/rculist.h> + #include "nvmet.h" static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; +static DEFINE_IDA(cntlid_ida); /* * This read/write semaphore is used to synchronize access to configuration @@ -200,7 +203,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n", ctrl->cntlid, ctrl->kato); - ctrl->ops->delete_ctrl(ctrl); + nvmet_ctrl_fatal_error(ctrl); } static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) @@ -749,7 +752,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_cqs; - ret = ida_simple_get(&subsys->cntlid_ida, + ret = ida_simple_get(&cntlid_ida, NVME_CNTLID_MIN, NVME_CNTLID_MAX, GFP_KERNEL); if (ret < 0) { @@ -816,7 +819,10 @@ static void nvmet_ctrl_free(struct kref *ref) list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); - ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid); + flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->fatal_err_work); + + ida_simple_remove(&cntlid_ida, ctrl->cntlid); nvmet_subsys_put(subsys); kfree(ctrl->sqs); @@ -915,9 +921,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, mutex_init(&subsys->lock); INIT_LIST_HEAD(&subsys->namespaces); INIT_LIST_HEAD(&subsys->ctrls); - - ida_init(&subsys->cntlid_ida); - INIT_LIST_HEAD(&subsys->hosts); return subsys; @@ -930,11 +933,20 @@ static void nvmet_subsys_free(struct kref *ref) WARN_ON_ONCE(!list_empty(&subsys->namespaces)); - ida_destroy(&subsys->cntlid_ida); kfree(subsys->subsysnqn); kfree(subsys); } +void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys) +{ + struct nvmet_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) + ctrl->ops->delete_ctrl(ctrl); + mutex_unlock(&subsys->lock); +} + void nvmet_subsys_put(struct nvmet_subsys *subsys) { kref_put(&subsys->ref, nvmet_subsys_free); @@ -963,6 +975,7 @@ static void __exit nvmet_exit(void) { nvmet_exit_configfs(); nvmet_exit_discovery(); + ida_destroy(&cntlid_ida); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 12f39eea569f..af8aabf05335 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) } case nvme_admin_identify: req->data_len = 4096; - switch (le32_to_cpu(cmd->identify.cns)) { + switch (cmd->identify.cns) { case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_disc_ctrl; return 0; default: pr_err("nvmet: unsupported identify cns %d\n", - le32_to_cpu(cmd->identify.cns)); + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f4088198cd0d..8bd022af3df6 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for NQN %s.\n", - ctrl->cntlid, ctrl->hostnqn); + pr_info("creating controller %d for subsystem %s for NQN %s.\n", + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: @@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req) req->ns = NULL; - if (req->cmd->common.opcode != nvme_fabrics_command) { + if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 173e842f19c9..8f483ee7868c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1314,7 +1314,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; struct fcnvme_ls_disconnect_acc *acc = (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; - struct nvmet_fc_tgt_queue *queue; + struct nvmet_fc_tgt_queue *queue = NULL; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; bool del_assoc = false; @@ -1348,7 +1348,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; - if (!assoc) + if (assoc) { + if (rqst->discon_cmd.scope == + FCNVME_DISCONN_CONNECTION) { + queue = nvmet_fc_find_target_queue(tgtport, + be64_to_cpu( + rqst->discon_cmd.id)); + if (!queue) { + nvmet_fc_tgt_a_put(assoc); + ret = VERR_NO_CONN; + } + } + } else ret = VERR_NO_ASSOC; } @@ -1373,21 +1384,18 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_LS_DISCONNECT); - if (rqst->discon_cmd.scope == FCNVME_DISCONN_CONNECTION) { - queue = nvmet_fc_find_target_queue(tgtport, - be64_to_cpu(rqst->discon_cmd.id)); - if (queue) { - int qid = queue->qid; + /* are we to delete a Connection ID (queue) */ + if (queue) { + int qid = queue->qid; - nvmet_fc_delete_target_queue(queue); + nvmet_fc_delete_target_queue(queue); - /* release the get taken by find_target_queue */ - nvmet_fc_tgt_q_put(queue); + /* release the get taken by find_target_queue */ + nvmet_fc_tgt_q_put(queue); - /* tear association down if io queue terminated */ - if (!qid) - del_assoc = true; - } + /* tear association down if io queue terminated */ + if (!qid) + del_assoc = true; } /* release get taken in nvmet_fc_find_target_assoc */ @@ -1809,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - if (fcpreq->fcp_error || abort) - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - + nvmet_req_complete(&fod->req, fcpreq->fcp_error); return; } switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: - if (abort || fcpreq->fcp_error || + if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); @@ -1841,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: - if (abort || fcpreq->fcp_error || + if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index bcb8ebeb01c5..4e8e6a22bce1 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -845,7 +845,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, rport->lport = nport->lport; nport->rport = rport; - return ret ? ret : count; + return count; } @@ -952,7 +952,7 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->lport = nport->lport; nport->tport = tport; - return ret ? ret : count; + return count; } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9aaa70071ae5..d1f06e7768ff 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -104,7 +104,7 @@ static void nvme_loop_complete_rq(struct request *req) return; } - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(req)) error = req->errors; else error = nvme_error_status(req->errors); @@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - nvmf_register_transport(&nvme_loop_transport); - return 0; + return nvmf_register_transport(&nvme_loop_transport); } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 23d5eb1c944f..1370eee0a3c0 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -142,7 +142,6 @@ struct nvmet_subsys { unsigned int max_nsid; struct list_head ctrls; - struct ida cntlid_ida; struct list_head hosts; bool allow_any_host; @@ -282,6 +281,7 @@ void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); void nvmet_subsys_put(struct nvmet_subsys *subsys); +void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys); struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid); void nvmet_put_namespace(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 8c3760a78ac0..9aa1da3778b3 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -438,6 +438,10 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, { struct ib_recv_wr *bad_wr; + ib_dma_sync_single_for_device(ndev->device, + cmd->sge[0].addr, cmd->sge[0].length, + DMA_FROM_DEVICE); + if (ndev->srq) return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); @@ -538,6 +542,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) first_wr = &rsp->send_wr; nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); + + ib_dma_sync_single_for_device(rsp->queue->dev->device, + rsp->send_sge.addr, rsp->send_sge.length, + DMA_TO_DEVICE); + if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); @@ -698,6 +707,14 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, cmd->n_rdma = 0; cmd->req.port = queue->port; + + ib_dma_sync_single_for_cpu(queue->dev->device, + cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, + DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(queue->dev->device, + cmd->send_sge.addr, cmd->send_sge.length, + DMA_TO_DEVICE); + if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_rdma_ops)) return; @@ -1024,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, { struct nvme_rdma_cm_rej rej; + pr_debug("rejecting connect request: status %d (%s)\n", + status, nvme_rdma_cm_msg(status)); + rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); rej.sts = cpu_to_le16(status); @@ -1074,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); if (queue->idx < 0) { ret = NVME_RDMA_CM_NO_RSC; - goto out_free_queue; + goto out_destroy_sq; } ret = nvmet_rdma_alloc_rsps(queue); @@ -1118,7 +1138,6 @@ out_destroy_sq: out_free_queue: kfree(queue); out_reject: - pr_debug("rejecting connect request with status code %d\n", ret); nvmet_rdma_cm_reject(cm_id, ret); return NULL; } @@ -1171,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ndev = nvmet_rdma_find_get_device(cm_id); if (!ndev) { - pr_err("no client data!\n"); nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC); return -ECONNREFUSED; } |