diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 558 |
1 files changed, 229 insertions, 329 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 5454db2fa263..ac1de7d73a45 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -18,6 +18,7 @@ #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/blk-pm.h> +#include <linux/blk-integrity.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/pagemap.h> @@ -49,7 +50,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" #include "blk-pm.h" -#include "blk-rq-qos.h" +#include "blk-throttle.h" struct dentry *blk_debugfs_root; @@ -215,8 +216,7 @@ int blk_status_to_errno(blk_status_t status) } EXPORT_SYMBOL_GPL(blk_status_to_errno); -static void print_req_error(struct request *req, blk_status_t status, - const char *caller) +void blk_print_req_error(struct request *req, blk_status_t status) { int idx = (__force int)status; @@ -224,9 +224,9 @@ static void print_req_error(struct request *req, blk_status_t status, return; printk_ratelimited(KERN_ERR - "%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " + "%s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " "phys_seg %u prio class %u\n", - caller, blk_errors[idx].name, + blk_errors[idx].name, req->rq_disk ? req->rq_disk->disk_name : "?", blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)), req->cmd_flags & ~REQ_OP_MASK, @@ -234,33 +234,6 @@ static void print_req_error(struct request *req, blk_status_t status, IOPRIO_PRIO_CLASS(req->ioprio)); } -static void req_bio_endio(struct request *rq, struct bio *bio, - unsigned int nbytes, blk_status_t error) -{ - if (error) - bio->bi_status = error; - - if (unlikely(rq->rq_flags & RQF_QUIET)) - bio_set_flag(bio, BIO_QUIET); - - bio_advance(bio, nbytes); - - if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { - /* - * Partial zone append completions cannot be supported as the - * BIO fragments may end up not being written sequentially. - */ - if (bio->bi_iter.bi_size) - bio->bi_status = BLK_STS_IOERR; - else - bio->bi_iter.bi_sector = rq->__sector; - } - - /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) - bio_endio(bio); -} - void blk_dump_rq_flags(struct request *rq, char *msg) { printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, @@ -337,23 +310,25 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -void blk_set_queue_dying(struct request_queue *q) +void blk_queue_start_drain(struct request_queue *q) { - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - /* * When queue DYING flag is set, we need to block new req * entering queue, so we call blk_freeze_queue_start() to * prevent I/O from crossing blk_queue_enter(). */ blk_freeze_queue_start(q); - if (queue_is_mq(q)) blk_mq_wake_waiters(q); - /* Make blk_queue_enter() reexamine the DYING flag. */ wake_up_all(&q->mq_freeze_wq); } + +void blk_set_queue_dying(struct request_queue *q) +{ + blk_queue_flag_set(QUEUE_FLAG_DYING, q); + blk_queue_start_drain(q); +} EXPORT_SYMBOL_GPL(blk_set_queue_dying); /** @@ -385,13 +360,8 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); - /* for synchronous bio-based driver finish in-flight integrity i/o */ - blk_flush_integrity(); - blk_sync_queue(q); if (queue_is_mq(q)) blk_mq_exit_queue(q); @@ -406,7 +376,7 @@ void blk_cleanup_queue(struct request_queue *q) */ mutex_lock(&q->sysfs_lock); if (q->elevator) - blk_mq_sched_free_requests(q); + blk_mq_sched_free_rqs(q); mutex_unlock(&q->sysfs_lock); percpu_ref_exit(&q->q_usage_counter); @@ -416,6 +386,30 @@ void blk_cleanup_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_cleanup_queue); +static bool blk_try_enter_queue(struct request_queue *q, bool pm) +{ + rcu_read_lock(); + if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) + goto fail; + + /* + * The code that increments the pm_only counter must ensure that the + * counter is globally visible before the queue is unfrozen. + */ + if (blk_queue_pm_only(q) && + (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) + goto fail_put; + + rcu_read_unlock(); + return true; + +fail_put: + blk_queue_exit(q); +fail: + rcu_read_unlock(); + return false; +} + /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer @@ -425,40 +419,18 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { const bool pm = flags & BLK_MQ_REQ_PM; - while (true) { - bool success = false; - - rcu_read_lock(); - if (percpu_ref_tryget_live(&q->q_usage_counter)) { - /* - * The code that increments the pm_only counter is - * responsible for ensuring that that counter is - * globally visible before the queue is unfrozen. - */ - if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) || - !blk_queue_pm_only(q)) { - success = true; - } else { - percpu_ref_put(&q->q_usage_counter); - } - } - rcu_read_unlock(); - - if (success) - return 0; - + while (!blk_try_enter_queue(q, pm)) { if (flags & BLK_MQ_REQ_NOWAIT) return -EBUSY; /* - * read pair of barrier in blk_freeze_queue_start(), - * we need to order reading __PERCPU_REF_DEAD flag of - * .q_usage_counter and reading .mq_freeze_depth or - * queue dying flag, otherwise the following wait may - * never return if the two reads are reordered. + * read pair of barrier in blk_freeze_queue_start(), we need to + * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and + * reading .mq_freeze_depth or queue dying flag, otherwise the + * following wait may never return if the two reads are + * reordered. */ smp_rmb(); - wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && blk_pm_resume_queue(pm, q)) || @@ -466,23 +438,44 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) if (blk_queue_dying(q)) return -ENODEV; } + + return 0; } static inline int bio_queue_enter(struct bio *bio) { - struct request_queue *q = bio->bi_bdev->bd_disk->queue; - bool nowait = bio->bi_opf & REQ_NOWAIT; - int ret; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + while (!blk_try_enter_queue(q, false)) { + struct gendisk *disk = bio->bi_bdev->bd_disk; - ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0); - if (unlikely(ret)) { - if (nowait && !blk_queue_dying(q)) + if (bio->bi_opf & REQ_NOWAIT) { + if (test_bit(GD_DEAD, &disk->state)) + goto dead; bio_wouldblock_error(bio); - else - bio_io_error(bio); + return -EBUSY; + } + + /* + * read pair of barrier in blk_freeze_queue_start(), we need to + * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and + * reading .mq_freeze_depth or queue dying flag, otherwise the + * following wait may never return if the two reads are + * reordered. + */ + smp_rmb(); + wait_event(q->mq_freeze_wq, + (!q->mq_freeze_depth && + blk_pm_resume_queue(false, q)) || + test_bit(GD_DEAD, &disk->state)); + if (test_bit(GD_DEAD, &disk->state)) + goto dead; } - return ret; + return 0; +dead: + bio_io_error(bio); + return -ENODEV; } void blk_queue_exit(struct request_queue *q) @@ -535,7 +528,7 @@ struct request_queue *blk_alloc_queue(int node_id) q->node = node_id; - atomic_set(&q->nr_active_requests_shared_sbitmap, 0); + atomic_set(&q->nr_active_requests_shared_tags, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, blk_timeout_work); @@ -568,7 +561,7 @@ struct request_queue *blk_alloc_queue(int node_id) blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); - q->nr_requests = BLKDEV_MAX_RQ; + q->nr_requests = BLKDEV_DEFAULT_RQ; return q; @@ -604,40 +597,13 @@ bool blk_get_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_get_queue); -/** - * blk_get_request - allocate a request - * @q: request queue to allocate a request for - * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC. - * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. - */ -struct request *blk_get_request(struct request_queue *q, unsigned int op, - blk_mq_req_flags_t flags) -{ - struct request *req; - - WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); - - req = blk_mq_alloc_request(q, op, flags); - if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) - q->mq_ops->initialize_rq_fn(req); - - return req; -} -EXPORT_SYMBOL(blk_get_request); - -void blk_put_request(struct request *req) -{ - blk_mq_free_request(req); -} -EXPORT_SYMBOL(blk_put_request); - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; - pr_info_ratelimited("attempt to access beyond end of device\n" + pr_info_ratelimited("%s: attempt to access beyond end of device\n" "%s: rw=%d, want=%llu, limit=%llu\n", + current->comm, bio_devname(bio, b), bio->bi_opf, bio_end_sector(bio), maxsector); } @@ -779,7 +745,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, static noinline_for_stack bool submit_bio_checks(struct bio *bio) { struct block_device *bdev = bio->bi_bdev; - struct request_queue *q = bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(bdev); blk_status_t status = BLK_STS_IOERR; struct blk_plug *plug; @@ -821,7 +787,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) } if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - bio_clear_hipri(bio); + bio_clear_polled(bio); switch (bio_op(bio)) { case REQ_OP_DISCARD: @@ -894,18 +860,22 @@ end_io: return false; } -static blk_qc_t __submit_bio(struct bio *bio) +static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - blk_qc_t ret = BLK_QC_T_NONE; - if (blk_crypto_bio_prep(&bio)) { - if (!disk->fops->submit_bio) - return blk_mq_submit_bio(bio); - ret = disk->fops->submit_bio(bio); + if (unlikely(bio_queue_enter(bio) != 0)) + return; + + if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio)) + goto queue_exit; + if (!disk->fops->submit_bio) { + blk_mq_submit_bio(bio); + return; } + disk->fops->submit_bio(bio); +queue_exit: blk_queue_exit(disk->queue); - return ret; } /* @@ -927,10 +897,9 @@ static blk_qc_t __submit_bio(struct bio *bio) * bio_list_on_stack[1] contains bios that were submitted before the current * ->submit_bio_bio, but that haven't been processed yet. */ -static blk_qc_t __submit_bio_noacct(struct bio *bio) +static void __submit_bio_noacct(struct bio *bio) { struct bio_list bio_list_on_stack[2]; - blk_qc_t ret = BLK_QC_T_NONE; BUG_ON(bio->bi_next); @@ -938,19 +907,16 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) current->bio_list = bio_list_on_stack; do { - struct request_queue *q = bio->bi_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct bio_list lower, same; - if (unlikely(bio_queue_enter(bio) != 0)) - continue; - /* * Create a fresh bio_list for all subordinate requests. */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = __submit_bio(bio); + __submit_bio(bio); /* * Sort new bios into those for a lower level and those for the @@ -959,7 +925,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) bio_list_init(&lower); bio_list_init(&same); while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) - if (q == bio->bi_bdev->bd_disk->queue) + if (q == bdev_get_queue(bio->bi_bdev)) bio_list_add(&same, bio); else bio_list_add(&lower, bio); @@ -973,33 +939,19 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); current->bio_list = NULL; - return ret; } -static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) +static void __submit_bio_noacct_mq(struct bio *bio) { struct bio_list bio_list[2] = { }; - blk_qc_t ret = BLK_QC_T_NONE; current->bio_list = bio_list; do { - struct gendisk *disk = bio->bi_bdev->bd_disk; - - if (unlikely(bio_queue_enter(bio) != 0)) - continue; - - if (!blk_crypto_bio_prep(&bio)) { - blk_queue_exit(disk->queue); - ret = BLK_QC_T_NONE; - continue; - } - - ret = blk_mq_submit_bio(bio); + __submit_bio(bio); } while ((bio = bio_list_pop(&bio_list[0]))); current->bio_list = NULL; - return ret; } /** @@ -1011,25 +963,20 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) * systems and other upper level users of the block layer should use * submit_bio() instead. */ -blk_qc_t submit_bio_noacct(struct bio *bio) +void submit_bio_noacct(struct bio *bio) { - if (!submit_bio_checks(bio)) - return BLK_QC_T_NONE; - /* * We only want one ->submit_bio to be active at a time, else stack * usage with stacked devices could be a problem. Use current->bio_list * to collect a list of requests submited by a ->submit_bio method while * it is active, and then process them after it returned. */ - if (current->bio_list) { + if (current->bio_list) bio_list_add(¤t->bio_list[0], bio); - return BLK_QC_T_NONE; - } - - if (!bio->bi_bdev->bd_disk->fops->submit_bio) - return __submit_bio_noacct_mq(bio); - return __submit_bio_noacct(bio); + else if (!bio->bi_bdev->bd_disk->fops->submit_bio) + __submit_bio_noacct_mq(bio); + else + __submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio_noacct); @@ -1046,10 +993,10 @@ EXPORT_SYMBOL(submit_bio_noacct); * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has * been called. */ -blk_qc_t submit_bio(struct bio *bio) +void submit_bio(struct bio *bio) { if (blkcg_punt_bio_submit(bio)) - return BLK_QC_T_NONE; + return; /* * If it's a regular read/write or a barrier with data attached, @@ -1060,7 +1007,7 @@ blk_qc_t submit_bio(struct bio *bio) if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) count = queue_logical_block_size( - bio->bi_bdev->bd_disk->queue) >> 9; + bdev_get_queue(bio->bi_bdev)) >> 9; else count = bio_sectors(bio); @@ -1081,20 +1028,93 @@ blk_qc_t submit_bio(struct bio *bio) if (unlikely(bio_op(bio) == REQ_OP_READ && bio_flagged(bio, BIO_WORKINGSET))) { unsigned long pflags; - blk_qc_t ret; psi_memstall_enter(&pflags); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); psi_memstall_leave(&pflags); - - return ret; + return; } - return submit_bio_noacct(bio); + submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); /** + * bio_poll - poll for BIO completions + * @bio: bio to poll for + * @flags: BLK_POLL_* flags that control the behavior + * + * Poll for completions on queue associated with the bio. Returns number of + * completed entries found. + * + * Note: the caller must either be the context that submitted @bio, or + * be in a RCU critical section to prevent freeing of @bio. + */ +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + blk_qc_t cookie = READ_ONCE(bio->bi_cookie); + int ret; + + if (cookie == BLK_QC_T_NONE || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; + + if (current->plug) + blk_flush_plug(current->plug, false); + + if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) + return 0; + if (WARN_ON_ONCE(!queue_is_mq(q))) + ret = 0; /* not yet implemented, should not happen */ + else + ret = blk_mq_poll(q, cookie, iob, flags); + blk_queue_exit(q); + return ret; +} +EXPORT_SYMBOL_GPL(bio_poll); + +/* + * Helper to implement file_operations.iopoll. Requires the bio to be stored + * in iocb->private, and cleared before freeing the bio. + */ +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags) +{ + struct bio *bio; + int ret = 0; + + /* + * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can + * point to a freshly allocated bio at this point. If that happens + * we have a few cases to consider: + * + * 1) the bio is beeing initialized and bi_bdev is NULL. We can just + * simply nothing in this case + * 2) the bio points to a not poll enabled device. bio_poll will catch + * this and return 0 + * 3) the bio points to a poll capable device, including but not + * limited to the one that the original bio pointed to. In this + * case we will call into the actual poll method and poll for I/O, + * even if we don't need to, but it won't cause harm either. + * + * For cases 2) and 3) above the RCU grace period ensures that bi_bdev + * is still allocated. Because partitions hold a reference to the whole + * device bdev and thus disk, the disk is also still valid. Grabbing + * a reference to the queue in bio_poll() ensures the hctxs and requests + * are still valid as well. + */ + rcu_read_lock(); + bio = READ_ONCE(kiocb->private); + if (bio && bio->bi_bdev) + ret = bio_poll(bio, iob, flags); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(iocb_bio_iopoll); + +/** * blk_cloned_rq_check_limits - Helper function to check a cloned request * for the new queue limits * @q: the queue @@ -1169,8 +1189,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * if (blk_crypto_insert_cloned_request(rq)) return BLK_STS_IOERR; - if (blk_queue_io_stat(q)) - blk_account_io_start(rq); + blk_account_io_start(rq); /* * Since we have a scheduler attached on the top device, @@ -1238,41 +1257,19 @@ again: } } -static void blk_account_io_completion(struct request *req, unsigned int bytes) +void __blk_account_io_done(struct request *req, u64 now) { - if (req->part && blk_do_io_stat(req)) { - const int sgrp = op_stat_group(req_op(req)); + const int sgrp = op_stat_group(req_op(req)); - part_stat_lock(); - part_stat_add(req->part, sectors[sgrp], bytes >> 9); - part_stat_unlock(); - } -} - -void blk_account_io_done(struct request *req, u64 now) -{ - /* - * Account IO completion. flush_rq isn't accounted as a - * normal IO on queueing nor completion. Accounting the - * containing request is enough. - */ - if (req->part && blk_do_io_stat(req) && - !(req->rq_flags & RQF_FLUSH_SEQ)) { - const int sgrp = op_stat_group(req_op(req)); - - part_stat_lock(); - update_io_ticks(req->part, jiffies, true); - part_stat_inc(req->part, ios[sgrp]); - part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); - part_stat_unlock(); - } + part_stat_lock(); + update_io_ticks(req->part, jiffies, true); + part_stat_inc(req->part, ios[sgrp]); + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); + part_stat_unlock(); } -void blk_account_io_start(struct request *rq) +void __blk_account_io_start(struct request *rq) { - if (!blk_do_io_stat(rq)) - return; - /* passthrough requests can hold bios that do not have ->bi_bdev set */ if (rq->bio && rq->bio->bi_bdev) rq->part = rq->bio->bi_bdev; @@ -1368,112 +1365,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq) } EXPORT_SYMBOL_GPL(blk_steal_bios); -/** - * blk_update_request - Complete multiple bytes without completing the request - * @req: the request being processed - * @error: block status code - * @nr_bytes: number of bytes to complete for @req - * - * Description: - * Ends I/O on a number of bytes attached to @req, but doesn't complete - * the request structure even if @req doesn't have leftover. - * If @req has leftover, sets it up for the next range of segments. - * - * Passing the result of blk_rq_bytes() as @nr_bytes guarantees - * %false return from this function. - * - * Note: - * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function - * except in the consistency check at the end of this function. - * - * Return: - * %false - this request doesn't have any more data - * %true - this request has more data - **/ -bool blk_update_request(struct request *req, blk_status_t error, - unsigned int nr_bytes) -{ - int total_bytes; - - trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes); - - if (!req->bio) - return false; - -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && - error == BLK_STS_OK) - req->q->integrity.profile->complete_fn(req, nr_bytes); -#endif - - if (unlikely(error && !blk_rq_is_passthrough(req) && - !(req->rq_flags & RQF_QUIET))) - print_req_error(req, error, __func__); - - blk_account_io_completion(req, nr_bytes); - - total_bytes = 0; - while (req->bio) { - struct bio *bio = req->bio; - unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - - if (bio_bytes == bio->bi_iter.bi_size) - req->bio = bio->bi_next; - - /* Completion has already been traced */ - bio_clear_flag(bio, BIO_TRACE_COMPLETION); - req_bio_endio(req, bio, bio_bytes, error); - - total_bytes += bio_bytes; - nr_bytes -= bio_bytes; - - if (!nr_bytes) - break; - } - - /* - * completely done - */ - if (!req->bio) { - /* - * Reset counters so that the request stacking driver - * can find how many bytes remain in the request - * later. - */ - req->__data_len = 0; - return false; - } - - req->__data_len -= total_bytes; - - /* update sector only for requests with clear definition of sector */ - if (!blk_rq_is_passthrough(req)) - req->__sector += total_bytes >> 9; - - /* mixed attributes always follow the first bio */ - if (req->rq_flags & RQF_MIXED_MERGE) { - req->cmd_flags &= ~REQ_FAILFAST_MASK; - req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; - } - - if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { - /* - * If total number of sectors is less than the first segment - * size, something has gone terribly wrong. - */ - if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { - blk_dump_rq_flags(req, "request botched"); - req->__data_len = blk_rq_cur_bytes(req); - } - - /* recalculate the number of segments */ - req->nr_phys_segments = blk_recalc_rq_segments(req); - } - - return true; -} -EXPORT_SYMBOL_GPL(blk_update_request); - #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE /** * rq_flush_dcache_pages - Helper function to flush all pages in a request @@ -1621,6 +1512,32 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); +void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) +{ + struct task_struct *tsk = current; + + /* + * If this is a nested plug, don't actually assign it. + */ + if (tsk->plug) + return; + + plug->mq_list = NULL; + plug->cached_rq = NULL; + plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); + plug->rq_count = 0; + plug->multiple_queues = false; + plug->has_elevator = false; + plug->nowait = false; + INIT_LIST_HEAD(&plug->cb_list); + + /* + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier + */ + tsk->plug = plug; +} + /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized @@ -1646,25 +1563,7 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on); */ void blk_start_plug(struct blk_plug *plug) { - struct task_struct *tsk = current; - - /* - * If this is a nested plug, don't actually assign it. - */ - if (tsk->plug) - return; - - INIT_LIST_HEAD(&plug->mq_list); - INIT_LIST_HEAD(&plug->cb_list); - plug->rq_count = 0; - plug->multiple_queues = false; - plug->nowait = false; - - /* - * Store ordering should not be needed here, since a potential - * preempt will imply a full memory barrier - */ - tsk->plug = plug; + blk_start_plug_nr_ios(plug, 1); } EXPORT_SYMBOL(blk_start_plug); @@ -1710,12 +1609,14 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, } EXPORT_SYMBOL(blk_check_plugged); -void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) +void blk_flush_plug(struct blk_plug *plug, bool from_schedule) { - flush_plug_callbacks(plug, from_schedule); - - if (!list_empty(&plug->mq_list)) + if (!list_empty(&plug->cb_list)) + flush_plug_callbacks(plug, from_schedule); + if (!rq_list_empty(plug->mq_list)) blk_mq_flush_plug_list(plug, from_schedule); + if (unlikely(!from_schedule && plug->cached_rq)) + blk_mq_free_plug_rqs(plug); } /** @@ -1730,11 +1631,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) */ void blk_finish_plug(struct blk_plug *plug) { - if (plug != current->plug) - return; - blk_flush_plug_list(plug, false); - - current->plug = NULL; + if (plug == current->plug) { + blk_flush_plug(plug, false); + current->plug = NULL; + } } EXPORT_SYMBOL(blk_finish_plug); |