From 0512a75b98f847c2e9a4b664013424e603e202f7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 12 May 2020 17:55:47 +0900 Subject: block: Introduce REQ_OP_ZONE_APPEND Define REQ_OP_ZONE_APPEND to append-write sectors to a zone of a zoned block device. This is a no-merge write operation. A zone append write BIO must: * Target a zoned block device * Have a sector position indicating the start sector of the target zone * The target zone must be a sequential write zone * The BIO must not cross a zone boundary * The BIO size must not be split to ensure that a single range of LBAs is written with a single command. Implement these checks in generic_make_request_checks() using the helper function blk_check_zone_append(). To avoid write append BIO splitting, introduce the new max_zone_append_sectors queue limit attribute and ensure that a BIO size is always lower than this limit. Export this new limit through sysfs and check these limits in bio_full(). Also when a LLDD can't dispatch a request to a specific zone, it will return BLK_STS_ZONE_RESOURCE indicating this request needs to be delayed, e.g. because the zone it will be dispatched to is still write-locked. If this happens set the request aside in a local list to continue trying dispatching requests such as READ requests or a WRITE/ZONE_APPEND requests targetting other zones. This way we can still keep a high queue depth without starving other requests even if one request can't be served due to zone write-locking. Finally, make sure that the bio sector position indicates the actual write position as indicated by the device on completion. Signed-off-by: Keith Busch [ jth: added zone-append specific add_page and merge_page helpers ] Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/bio.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'block/bio.c') diff --git a/block/bio.c b/block/bio.c index aad0a6dad4f9..3aa3c4ce2e5e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1025,6 +1025,50 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return 0; } +static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) +{ + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; + struct request_queue *q = bio->bi_disk->queue; + unsigned int max_append_sectors = queue_max_zone_append_sectors(q); + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; + struct page **pages = (struct page **)bv; + ssize_t size, left; + unsigned len, i; + size_t offset; + + if (WARN_ON_ONCE(!max_append_sectors)) + return 0; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); + + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (unlikely(size <= 0)) + return size ? size : -EFAULT; + + for (left = size, i = 0; left > 0; left -= len, i++) { + struct page *page = pages[i]; + bool same_page = false; + + len = min_t(size_t, PAGE_SIZE - offset, left); + if (bio_add_hw_page(q, bio, page, len, offset, + max_append_sectors, &same_page) != len) + return -EINVAL; + if (same_page) + put_page(page); + offset = 0; + } + + iov_iter_advance(iter, size); + return 0; +} + /** * bio_iov_iter_get_pages - add user or kernel pages to a bio * @bio: bio to add pages to @@ -1054,10 +1098,16 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return -EINVAL; do { - if (is_bvec) - ret = __bio_iov_bvec_add_pages(bio, iter); - else - ret = __bio_iov_iter_get_pages(bio, iter); + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + if (WARN_ON_ONCE(is_bvec)) + return -EINVAL; + ret = __bio_iov_append_get_pages(bio, iter); + } else { + if (is_bvec) + ret = __bio_iov_bvec_add_pages(bio, iter); + else + ret = __bio_iov_iter_get_pages(bio, iter); + } } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); if (is_bvec) @@ -1460,6 +1510,10 @@ struct bio *bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); BUG_ON(sectors >= bio_sectors(bio)); + /* Zone append commands cannot be split */ + if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND)) + return NULL; + split = bio_clone_fast(bio, gfp, bs); if (!split) return NULL; -- cgit v1.2.3-59-g8ed1b