diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /drivers/nvme/target/io-cmd-file.c | |
parent | Merge tag 'y2038-for-4.21' of ssh://gitolite.kernel.org:/pub/scm/linux/kernel/git/arnd/playground (diff) | |
parent | kyber: use sbitmap add_wait_queue/list_del wait helpers (diff) | |
download | linux-dev-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.tar.xz linux-dev-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.zip |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'drivers/nvme/target/io-cmd-file.c')
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 165 |
1 files changed, 104 insertions, 61 deletions
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 01feebec29ea..517522305e5c 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -83,17 +83,16 @@ static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) } static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, - unsigned long nr_segs, size_t count) + unsigned long nr_segs, size_t count, int ki_flags) { struct kiocb *iocb = &req->f.iocb; ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); struct iov_iter iter; - int ki_flags = 0, rw; - ssize_t ret; + int rw; if (req->cmd->rw.opcode == nvme_cmd_write) { if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - ki_flags = IOCB_DSYNC; + ki_flags |= IOCB_DSYNC; call_iter = req->ns->file->f_op->write_iter; rw = WRITE; } else { @@ -107,17 +106,13 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, iocb->ki_filp = req->ns->file; iocb->ki_flags = ki_flags | iocb_flags(req->ns->file); - ret = call_iter(iocb, &iter); - - if (ret != -EIOCBQUEUED && iocb->ki_complete) - iocb->ki_complete(iocb, ret, 0); - - return ret; + return call_iter(iocb, &iter); } static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) { struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); + u16 status = NVME_SC_SUCCESS; if (req->f.bvec != req->inline_bvec) { if (likely(req->f.mpool_alloc == false)) @@ -126,11 +121,12 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) mempool_free(req->f.bvec, req->ns->bvec_pool); } - nvmet_req_complete(req, ret != req->data_len ? - NVME_SC_INTERNAL | NVME_SC_DNR : 0); + if (unlikely(ret != req->data_len)) + status = errno_to_nvme_status(req, ret); + nvmet_req_complete(req, status); } -static void nvmet_file_execute_rw(struct nvmet_req *req) +static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) { ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); struct sg_page_iter sg_pg_iter; @@ -140,30 +136,14 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) ssize_t ret = 0; loff_t pos; - if (!req->sg_cnt || !nr_bvec) { - nvmet_req_complete(req, 0); - return; - } + + if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; if (unlikely(pos + req->data_len > req->ns->size)) { - nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); - return; - } - - if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) - req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), - GFP_KERNEL); - else - req->f.bvec = req->inline_bvec; - - req->f.mpool_alloc = false; - if (unlikely(!req->f.bvec)) { - /* fallback under memory pressure */ - req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); - req->f.mpool_alloc = true; - if (nr_bvec > NVMET_MAX_MPOOL_BVEC) - is_sync = true; + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); + return true; } memset(&req->f.iocb, 0, sizeof(struct kiocb)); @@ -177,9 +157,10 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (unlikely(is_sync) && (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { - ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0); if (ret < 0) - goto out; + goto complete; + pos += len; bv_cnt = 0; len = 0; @@ -187,35 +168,95 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) nr_bvec--; } - if (WARN_ON_ONCE(total_len != req->data_len)) + if (WARN_ON_ONCE(total_len != req->data_len)) { ret = -EIO; -out: - if (unlikely(is_sync || ret)) { - nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); - return; + goto complete; + } + + if (unlikely(is_sync)) { + ret = total_len; + goto complete; } - req->f.iocb.ki_complete = nvmet_file_io_done; - nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); + + /* + * A NULL ki_complete ask for synchronous execution, which we want + * for the IOCB_NOWAIT case. + */ + if (!(ki_flags & IOCB_NOWAIT)) + req->f.iocb.ki_complete = nvmet_file_io_done; + + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags); + + switch (ret) { + case -EIOCBQUEUED: + return true; + case -EAGAIN: + if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT))) + goto complete; + return false; + case -EOPNOTSUPP: + /* + * For file systems returning error -EOPNOTSUPP, handle + * IOCB_NOWAIT error case separately and retry without + * IOCB_NOWAIT. + */ + if ((ki_flags & IOCB_NOWAIT)) + return false; + break; + } + +complete: + nvmet_file_io_done(&req->f.iocb, ret, 0); + return true; } static void nvmet_file_buffered_io_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); - nvmet_file_execute_rw(req); + nvmet_file_execute_io(req, 0); } -static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req) +static void nvmet_file_submit_buffered_io(struct nvmet_req *req) { INIT_WORK(&req->f.work, nvmet_file_buffered_io_work); queue_work(buffered_io_wq, &req->f.work); } +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + } else + req->f.mpool_alloc = false; + + if (req->ns->buffered_io) { + if (likely(!req->f.mpool_alloc) && + nvmet_file_execute_io(req, IOCB_NOWAIT)) + return; + nvmet_file_submit_buffered_io(req); + } else + nvmet_file_execute_io(req, 0); +} + u16 nvmet_file_flush(struct nvmet_req *req) { - if (vfs_fsync(req->ns->file, 1) < 0) - return NVME_SC_INTERNAL | NVME_SC_DNR; - return 0; + return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1)); } static void nvmet_file_flush_work(struct work_struct *w) @@ -236,30 +277,34 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; struct nvme_dsm_range range; loff_t offset, len; - u16 ret; + u16 status = 0; + int ret; int i; for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { - ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range, + status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, sizeof(range)); - if (ret) + if (status) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; len = le32_to_cpu(range.nlb); len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { - ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; + req->error_slba = le64_to_cpu(range.slba); + status = errno_to_nvme_status(req, -ENOSPC); break; } - if (vfs_fallocate(req->ns->file, mode, offset, len)) { - ret = NVME_SC_INTERNAL | NVME_SC_DNR; + ret = vfs_fallocate(req->ns->file, mode, offset, len); + if (ret) { + req->error_slba = le64_to_cpu(range.slba); + status = errno_to_nvme_status(req, ret); break; } } - nvmet_req_complete(req, ret); + nvmet_req_complete(req, status); } static void nvmet_file_dsm_work(struct work_struct *w) @@ -299,12 +344,12 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) req->ns->blksize_shift); if (unlikely(offset + len > req->ns->size)) { - nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); return; } ret = vfs_fallocate(req->ns->file, mode, offset, len); - nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0); } static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) @@ -320,10 +365,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) switch (cmd->common.opcode) { case nvme_cmd_read: case nvme_cmd_write: - if (req->ns->buffered_io) - req->execute = nvmet_file_execute_rw_buffered_io; - else - req->execute = nvmet_file_execute_rw; + req->execute = nvmet_file_execute_rw; req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: @@ -342,6 +384,7 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) default: pr_err("unhandled cmd for file ns %d on qid %d\n", cmd->common.opcode, req->sq->qid); + req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } |