From 1e2a410ff71504a64d1af2e354287ac51aeac1b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:34:56 -0700 Subject: block: Ues bi_pool for bio_integrity_alloc() Now that bios keep track of where they were allocated from, bio_integrity_alloc_bioset() becomes redundant. Remove bio_integrity_alloc_bioset() and drop bio_set argument from the related functions and make them use bio->bi_pool. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 4b4dbdfbca89..95c493511be7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2788,7 +2788,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, __bio_clone(bio, bio_src); if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask, bs)) + bio_integrity_clone(bio, bio_src, gfp_mask)) goto free_and_out; if (bio_ctr && bio_ctr(bio, bio_src, data)) -- cgit v1.2.3-59-g8ed1b From 4254bba17d92d53822a56ebc2a0c1eb7e2a71155 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:35:00 -0700 Subject: block: Kill bi_destructor Now that we've got generic code for freeing bios allocated from bio pools, this isn't needed anymore. This patch also makes bio_free() static, since without bi_destructor there should be no need for it to be called anywhere else. bio_free() is now only called from bio_put, so we can refactor those a bit - move some code from bio_put() to bio_free() and kill the redundant bio->bi_next = NULL. v5: Switch to BIO_KMALLOC_POOL ((void *)~0), per Boaz v6: BIO_KMALLOC_POOL now NULL, drop bio_free's EXPORT_SYMBOL v7: No #define BIO_KMALLOC_POOL anymore Signed-off-by: Kent Overstreet CC: Jens Axboe Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 5 ---- block/blk-core.c | 2 +- fs/bio.c | 64 +++++++++++++++++------------------------- include/linux/bio.h | 1 - include/linux/blk_types.h | 3 -- 5 files changed, 27 insertions(+), 48 deletions(-) (limited to 'block/blk-core.c') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index e418dc0a7086..8df5e8e6dceb 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -465,7 +465,6 @@ struct bio { bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ atomic_t bi_cnt; /* pin count: free when it hits zero */ void *bi_private; - bio_destructor_t *bi_destructor; /* bi_destructor (bio) */ }; With this multipage bio design: @@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs, so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the given size from these slabs. -The bi_destructor() routine takes into account the possibility of the bio -having originated from a different source (see later discussions on -n/w to block transfers and kvec_cb) - The bio_get() routine may be used to hold an extra reference on a bio prior to i/o submission, if the bio fields are likely to be accessed after the i/o is issued (since the bio may otherwise get freed in case i/o completion diff --git a/block/blk-core.c b/block/blk-core.c index 95c493511be7..b776cc90a4e7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2807,7 +2807,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, free_and_out: if (bio) - bio_free(bio, bs); + bio_put(bio); blk_rq_unprep_clone(rq); return -ENOMEM; diff --git a/fs/bio.c b/fs/bio.c index 919ee9aa5c57..736ef12f5191 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -233,26 +233,37 @@ fallback: return bvl; } -void bio_free(struct bio *bio, struct bio_set *bs) +static void __bio_free(struct bio *bio) { - void *p; - - if (bio_has_allocated_vec(bio)) - bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bio_disassociate_task(bio); if (bio_integrity(bio)) bio_integrity_free(bio); +} - /* - * If we have front padding, adjust the bio pointer before freeing - */ - p = bio; - if (bs->front_pad) +static void bio_free(struct bio *bio) +{ + struct bio_set *bs = bio->bi_pool; + void *p; + + __bio_free(bio); + + if (bs) { + if (bio_has_allocated_vec(bio)) + bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + + /* + * If we have front padding, adjust the bio pointer before freeing + */ + p = bio; p -= bs->front_pad; - mempool_free(p, bs->bio_pool); + mempool_free(p, bs->bio_pool); + } else { + /* Bio was allocated by bio_kmalloc() */ + kfree(bio); + } } -EXPORT_SYMBOL(bio_free); void bio_init(struct bio *bio) { @@ -276,10 +287,7 @@ void bio_reset(struct bio *bio) { unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); - if (bio_integrity(bio)) - bio_integrity_free(bio); - - bio_disassociate_task(bio); + __bio_free(bio); memset(bio, 0, BIO_RESET_BYTES); bio->bi_flags = flags|(1 << BIO_UPTODATE); @@ -362,13 +370,6 @@ struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) } EXPORT_SYMBOL(bio_alloc); -static void bio_kmalloc_destructor(struct bio *bio) -{ - if (bio_integrity(bio)) - bio_integrity_free(bio); - kfree(bio); -} - /** * bio_kmalloc - allocate a bio for I/O using kmalloc() * @gfp_mask: the GFP_ mask given to the slab allocator @@ -395,7 +396,6 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; bio->bi_io_vec = bio->bi_inline_vecs; - bio->bi_destructor = bio_kmalloc_destructor; return bio; } @@ -431,20 +431,8 @@ void bio_put(struct bio *bio) /* * last put frees it */ - if (atomic_dec_and_test(&bio->bi_cnt)) { - bio_disassociate_task(bio); - bio->bi_next = NULL; - - /* - * This if statement is temporary - bi_pool is replacing - * bi_destructor, but bi_destructor will be taken out in another - * patch. - */ - if (bio->bi_pool) - bio_free(bio, bio->bi_pool); - else - bio->bi_destructor(bio); - } + if (atomic_dec_and_test(&bio->bi_cnt)) + bio_free(bio); } EXPORT_SYMBOL(bio_put); diff --git a/include/linux/bio.h b/include/linux/bio.h index 76f6c252baff..04944c91fae7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -216,7 +216,6 @@ extern struct bio *bio_alloc(gfp_t, unsigned int); extern struct bio *bio_kmalloc(gfp_t, unsigned int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); -extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1b607c247d72..3eefbb291192 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -84,11 +84,8 @@ struct bio { struct bio_vec *bi_io_vec; /* the actual vec list */ - /* If bi_pool is non NULL, bi_destructor is not called */ struct bio_set *bi_pool; - bio_destructor_t *bi_destructor; /* destructor */ - /* * We can inline a number of vecs at the end of the bio, to avoid * double allocations for a small number of bio_vecs. This member -- cgit v1.2.3-59-g8ed1b From bf800ef1816b4283a885e55ad38068aec9711e4d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:35:02 -0700 Subject: block: Add bio_clone_bioset(), bio_clone_kmalloc() Previously, there was bio_clone() but it only allocated from the fs bio set; as a result various users were open coding it and using __bio_clone(). This changes bio_clone() to become bio_clone_bioset(), and then we add bio_clone() and bio_clone_kmalloc() as wrappers around it, making use of the functionality the last patch adedd. This will also help in a later patch changing how bio cloning works. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown CC: Alasdair Kergon CC: Boaz Harrosh CC: Jeff Garzik Acked-by: Jeff Garzik Signed-off-by: Jens Axboe --- block/blk-core.c | 8 +------- drivers/block/osdblk.c | 3 +-- drivers/md/dm-crypt.c | 7 +------ drivers/md/dm.c | 4 ++-- drivers/md/md.c | 20 +------------------- fs/bio.c | 11 +++++++---- fs/exofs/ore.c | 5 ++--- include/linux/bio.h | 17 ++++++++++++++--- 8 files changed, 29 insertions(+), 46 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index b776cc90a4e7..82aab2815858 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2781,16 +2781,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); + bio = bio_clone_bioset(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; - __bio_clone(bio, bio_src); - - if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask)) - goto free_and_out; - if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 87311ebac0db..1bbc681688e4 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) struct bio *tmp, *new_chain = NULL, *tail = NULL; while (old_chain) { - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); + tmp = bio_clone_kmalloc(old_chain, gfpmask); if (!tmp) goto err_out; - __bio_clone(tmp, old_chain); tmp->bi_bdev = NULL; gfpmask &= ~__GFP_WAIT; tmp->bi_next = NULL; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3c0acba042b6..bbf459bca61d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -979,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) * copy the required bvecs because we need the original * one in order to decrypt the whole bio data *afterwards*. */ - clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); + clone = bio_clone_bioset(base_bio, gfp, cc->bs); if (!clone) return 1; crypt_inc_pending(io); clone_init(io, clone); - clone->bi_idx = 0; - clone->bi_vcnt = bio_segments(base_bio); - clone->bi_size = base_bio->bi_size; clone->bi_sector = cc->start + io->sector; - memcpy(clone->bi_io_vec, bio_iovec(base_bio), - sizeof(struct bio_vec) * clone->bi_vcnt); generic_make_request(clone); return 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 33470f01ea5e..837879716889 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1129,8 +1129,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); - __bio_clone(clone, ci->bio); + clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs); + if (len) { clone->bi_sector = ci->sector; clone->bi_size = to_bytes(len); diff --git a/drivers/md/md.c b/drivers/md/md.c index 457ca8451ddb..7a2b0793f66e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -173,28 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev); struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, struct mddev *mddev) { - struct bio *b; - if (!mddev || !mddev->bio_set) return bio_clone(bio, gfp_mask); - b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, mddev->bio_set); - if (!b) - return NULL; - - __bio_clone(b, bio); - if (bio_integrity(bio)) { - int ret; - - ret = bio_integrity_clone(b, bio, gfp_mask); - - if (ret < 0) { - bio_put(b); - return NULL; - } - } - - return b; + return bio_clone_bioset(bio, gfp_mask, mddev->bio_set); } EXPORT_SYMBOL_GPL(bio_clone_mddev); diff --git a/fs/bio.c b/fs/bio.c index 191b9b86c272..13e956779e10 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -438,16 +438,19 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) EXPORT_SYMBOL(__bio_clone); /** - * bio_clone - clone a bio + * bio_clone_bioset - clone a bio * @bio: bio to clone * @gfp_mask: allocation priority + * @bs: bio_set to allocate from * * Like __bio_clone, only also allocates the returned bio */ -struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, + struct bio_set *bs) { - struct bio *b = bio_alloc(gfp_mask, bio->bi_max_vecs); + struct bio *b; + b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); if (!b) return NULL; @@ -466,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) return b; } -EXPORT_SYMBOL(bio_clone); +EXPORT_SYMBOL(bio_clone_bioset); /** * bio_get_nr_vecs - return approx number of vecs diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 1585db1aa365..f936cb50dc0d 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) struct bio *bio; if (per_dev != master_dev) { - bio = bio_kmalloc(GFP_KERNEL, - master_dev->bio->bi_max_vecs); + bio = bio_clone_kmalloc(master_dev->bio, + GFP_KERNEL); if (unlikely(!bio)) { ORE_DBGMSG( "Failed to allocate BIO size=%u\n", @@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) goto out; } - __bio_clone(bio, master_dev->bio); bio->bi_bdev = NULL; bio->bi_next = NULL; per_dev->offset = master_dev->offset; diff --git a/include/linux/bio.h b/include/linux/bio.h index fbe35b175555..52b9cbc3e4da 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -215,6 +215,9 @@ extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); +extern void __bio_clone(struct bio *, struct bio *); +extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); + extern struct bio_set *fs_bio_set; static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) @@ -222,18 +225,26 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); } +static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, fs_bio_set); +} + static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); } +static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, NULL); + +} + extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone(struct bio *, gfp_t); - extern void bio_init(struct bio *); extern void bio_reset(struct bio *); -- cgit v1.2.3-59-g8ed1b From e32463b2f7801d6561887c01db37b34958504635 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 31 Aug 2012 17:24:13 +0900 Subject: block: remove the duplicated setting for congestion_threshold Before call the blk_queue_congestion_threshold(), the blk_queue_congestion_threshold() is already called at blk_queue_make_rquest(). Because this code is the duplicated, it has removed. Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 82aab2815858..2d739ca10923 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -713,8 +713,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (elevator_init(q, NULL)) return NULL; - blk_queue_congestion_threshold(q); - /* all done, end the initial bypass */ blk_queue_bypass_end(q); return q; -- cgit v1.2.3-59-g8ed1b From e2a60da74fc8215c68509a89e9a69c66363153db Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:25 -0400 Subject: block: Clean up special command handling logic Remove special-casing of non-rw fs style requests (discard). The nomerge flags are consolidated in blk_types.h, and rq_mergeable() and bio_mergeable() have been modified to use them. bio_is_rw() is used in place of bio_has_data() a few places. This is done to to distinguish true reads and writes from other fs type requests that carry a payload (e.g. write same). Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 13 ++++++------- block/blk-merge.c | 22 +--------------------- block/blk.h | 5 ++--- block/elevator.c | 6 ++---- include/linux/bio.h | 23 +++++++++++++++++++++-- include/linux/blk_types.h | 4 ++++ include/linux/blkdev.h | 22 ++++++++++------------ 7 files changed, 46 insertions(+), 49 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 2d739ca10923..5cc29299f6ac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1657,8 +1657,8 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { + if (likely(bio_is_rw(bio) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1699,8 +1699,7 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { + ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { err = -EOPNOTSUPP; goto end_io; } @@ -1818,7 +1817,7 @@ void submit_bio(int rw, struct bio *bio) * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { + if (bio_has_data(bio)) { if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1864,7 +1863,7 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->cmd_flags & REQ_DISCARD) + if (!rq_mergeable(rq)) return 0; if (blk_rq_sectors(rq) > queue_max_sectors(q) || @@ -2338,7 +2337,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) + if (req->cmd_type == REQ_TYPE_FS) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ diff --git a/block/blk-merge.c b/block/blk-merge.c index e76279e41162..86710ca408b8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -417,18 +417,6 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; - /* - * Don't merge file system requests and discard requests - */ - if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) - return 0; - - /* - * Don't merge discard requests and secure discard requests - */ - if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) - return 0; - /* * not contiguous */ @@ -521,15 +509,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, bool blk_rq_merge_ok(struct request *rq, struct bio *bio) { - if (!rq_mergeable(rq)) - return false; - - /* don't merge file system requests and discard requests */ - if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) - return false; - - /* don't merge discard requests and secure discard requests */ - if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) + if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; /* different data direction or already started, don't merge */ diff --git a/block/blk.h b/block/blk.h index 2a0ea32d249f..ca51543b248c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) * * a) it's attached to a gendisk, and * b) the queue had IO stats enabled when this request was started, and - * c) it's a file system request or a discard request + * c) it's a file system request */ static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->cmd_flags & REQ_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)); + (rq->cmd_type == REQ_TYPE_FS); } /* diff --git a/block/elevator.c b/block/elevator.c index 6a55d418896f..9b1d42b62f20 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)) { + if (rq->cmd_type == REQ_TYPE_FS) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (elv_attempt_insert_merge(q, rq)) break; case ELEVATOR_INSERT_SORT: - BUG_ON(rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_DISCARD)); + BUG_ON(rq->cmd_type != REQ_TYPE_FS); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 52b9cbc3e4da..e54305cacc98 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -386,9 +386,28 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ -static inline int bio_has_data(struct bio *bio) +static inline bool bio_has_data(struct bio *bio) { - return bio && bio->bi_io_vec != NULL; + if (bio && bio->bi_vcnt) + return true; + + return false; +} + +static inline bool bio_is_rw(struct bio *bio) +{ + if (!bio_has_data(bio)) + return false; + + return true; +} + +static inline bool bio_mergeable(struct bio *bio) +{ + if (bio->bi_rw & REQ_NOMERGE_FLAGS) + return false; + + return true; } /* diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3eefbb291192..1b229664f573 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -194,6 +194,10 @@ enum rq_flag_bits { REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK +/* This mask is used for both bio and request merge checking */ +#define REQ_NOMERGE_FLAGS \ + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + #define REQ_RAHEAD (1 << __REQ_RAHEAD) #define REQ_THROTTLED (1 << __REQ_THROTTLED) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a2ab7c85393..3a6fea7460f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,8 +540,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_account_rq(rq) \ (((rq)->cmd_flags & REQ_STARTED) && \ - ((rq)->cmd_type == REQ_TYPE_FS || \ - ((rq)->cmd_flags & REQ_DISCARD))) + ((rq)->cmd_type == REQ_TYPE_FS)) #define blk_pm_request(rq) \ ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ @@ -595,17 +594,16 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) rl->flags &= ~flag; } +static inline bool rq_mergeable(struct request *rq) +{ + if (rq->cmd_type != REQ_TYPE_FS) + return false; -/* - * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may - * it already be started by driver. - */ -#define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) -#define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ - (((rq)->cmd_flags & REQ_DISCARD) || \ - (rq)->cmd_type == REQ_TYPE_FS)) + if (rq->cmd_flags & REQ_NOMERGE_FLAGS) + return false; + + return true; +} /* * q->prep_rq_fn return values -- cgit v1.2.3-59-g8ed1b From f31dc1cd490539e2b62a126bc4dc2495b165d772 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:26 -0400 Subject: block: Consolidate command flag and queue limit checks for merges - blk_check_merge_flags() verifies that cmd_flags / bi_rw are compatible. This function is called for both req-req and req-bio merging. - blk_rq_get_max_sectors() and blk_queue_get_max_sectors() can be used to query the maximum sector count for a given request or queue. The calls will return the right value from the queue limits given the type of command (RW, discard, write same, etc.) Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- block/blk-merge.c | 30 ++++++++++++------------------ include/linux/blkdev.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 20 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 5cc29299f6ac..33eded00c5b1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1866,8 +1866,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) if (!rq_mergeable(rq)) return 0; - if (blk_rq_sectors(rq) > queue_max_sectors(q) || - blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 86710ca408b8..642b862608a1 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -275,14 +275,8 @@ no_merge: int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, /* * Will it become too large? */ - if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) + if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > + blk_rq_get_max_sectors(req)) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -417,6 +405,9 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; + if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) + return 0; + /* * not contiguous */ @@ -512,6 +503,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; + if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) + return false; + /* different data direction or already started, don't merge */ if (bio_data_dir(bio) != rq_data_dir(rq)) return false; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a6fea7460f1..90f7abe8f183 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -605,6 +605,18 @@ static inline bool rq_mergeable(struct request *rq) return true; } +static inline bool blk_check_merge_flags(unsigned int flags1, + unsigned int flags2) +{ + if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD)) + return false; + + if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) + return false; + + return true; +} + /* * q->prep_rq_fn return values */ @@ -800,6 +812,25 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, + unsigned int cmd_flags) +{ + if (unlikely(cmd_flags & REQ_DISCARD)) + return q->limits.max_discard_sectors; + + return q->limits.max_sectors; +} + +static inline unsigned int blk_rq_get_max_sectors(struct request *rq) +{ + struct request_queue *q = rq->q; + + if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) + return q->limits.max_hw_sectors; + + return blk_queue_get_max_sectors(q, rq->cmd_flags); +} + /* * Request issue related functions. */ -- cgit v1.2.3-59-g8ed1b From 4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:27 -0400 Subject: block: Implement support for WRITE SAME The WRITE SAME command supported on some SCSI devices allows the same block to be efficiently replicated throughout a block range. Only a single logical block is transferred from the host and the storage device writes the same data to all blocks described by the I/O. This patch implements support for WRITE SAME in the block layer. The blkdev_issue_write_same() function can be used by filesystems and block drivers to replicate a buffer across a block range. This can be used to efficiently initialize software RAID devices, etc. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 14 +++++++ block/blk-core.c | 14 ++++++- block/blk-lib.c | 74 +++++++++++++++++++++++++++++++++++ block/blk-merge.c | 9 +++++ block/blk-settings.c | 16 ++++++++ block/blk-sysfs.c | 13 ++++++ drivers/md/raid0.c | 1 + fs/bio.c | 9 +++-- include/linux/bio.h | 3 ++ include/linux/blk_types.h | 5 ++- include/linux/blkdev.h | 29 ++++++++++++++ 11 files changed, 181 insertions(+), 6 deletions(-) (limited to 'block/blk-core.c') diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index c1eb41cb9876..279da08f7541 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -206,3 +206,17 @@ Description: when a discarded area is read the discard_zeroes_data parameter will be set to one. Otherwise it will be 0 and the result of reading a discarded area is undefined. + +What: /sys/block//queue/write_same_max_bytes +Date: January 2012 +Contact: Martin K. Petersen +Description: + Some devices support a write same operation in which a + single data block can be written to a range of several + contiguous blocks on storage. This can be used to wipe + areas on disk or to initialize drives in a RAID + configuration. write_same_max_bytes indicates how many + bytes can be written in a single write same command. If + write_same_max_bytes is 0, write same is not supported + by the device. + diff --git a/block/blk-core.c b/block/blk-core.c index 33eded00c5b1..3b080541098e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio) goto end_io; } + if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { + err = -EOPNOTSUPP; + goto end_io; + } + /* * Various block parts want %current->io_context and lazy ioc * allocation ends up trading a lot of pain for a small amount of @@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request); */ void submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio); - bio->bi_rw |= rw; /* @@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { + unsigned int count; + + if (unlikely(rw & REQ_WRITE_SAME)) + count = bdev_logical_block_size(bio->bi_bdev) >> 9; + else + count = bio_sectors(bio); + if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..a062543c58ac 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL(blkdev_issue_discard); +/** + * blkdev_issue_write_same - queue a write same operation + * @bdev: target blockdev + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * @page: page containing data to write + * + * Description: + * Issue a write same request for the sectors in question. + */ +int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, + struct page *page) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_write_same_sectors; + struct bio_batch bb; + struct bio *bio; + int ret = 0; + + if (!q) + return -ENXIO; + + max_write_same_sectors = q->limits.max_write_same_sectors; + + if (max_write_same_sectors == 0) + return -EOPNOTSUPP; + + atomic_set(&bb.done, 1); + bb.flags = 1 << BIO_UPTODATE; + bb.wait = &wait; + + while (nr_sects) { + bio = bio_alloc(gfp_mask, 1); + if (!bio) { + ret = -ENOMEM; + break; + } + + bio->bi_sector = sector; + bio->bi_end_io = bio_batch_end_io; + bio->bi_bdev = bdev; + bio->bi_private = &bb; + bio->bi_vcnt = 1; + bio->bi_io_vec->bv_page = page; + bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); + + if (nr_sects > max_write_same_sectors) { + bio->bi_size = max_write_same_sectors << 9; + nr_sects -= max_write_same_sectors; + sector += max_write_same_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + + atomic_inc(&bb.done); + submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); + } + + /* Wait for bios in-flight */ + if (!atomic_dec_and_test(&bb.done)) + wait_for_completion(&wait); + + if (!test_bit(BIO_UPTODATE, &bb.flags)) + ret = -ENOTSUPP; + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_write_same); + /** * blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue diff --git a/block/blk-merge.c b/block/blk-merge.c index 642b862608a1..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; + if (req->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(req->bio, next->bio)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return false; + /* must be using the same buffer */ + if (rq->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(rq->bio, bio)) + return false; + return true; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; + lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_segments = USHRT_MAX; lim->max_hw_sectors = UINT_MAX; lim->max_sectors = UINT_MAX; + lim->max_write_same_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_max_write_same_sectors - set max sectors for a single write same + * @q: the request queue for the device + * @max_write_same_sectors: maximum number of sectors to write per command + **/ +void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors) +{ + q->limits.max_write_same_sectors = max_write_same_sectors; +} +EXPORT_SYMBOL(blk_queue_max_write_same_sectors); + /** * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + t->max_write_same_sectors = min(t->max_write_same_sectors, + b->max_write_same_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ea51d827a0bb..247dbfd42621 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(queue_discard_zeroes_data(q), page); } +static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_write_same_sectors << 9); +} + + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { .show = queue_discard_zeroes_data_show, }; +static struct queue_sysfs_entry queue_write_same_max_entry = { + .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, + .show = queue_write_same_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_show_nonrot, @@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = { &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index de63a1fc3737..a9e4fa95dfaa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); + blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { diff --git a/fs/bio.c b/fs/bio.c index 13e956779e10..f855e0e1869c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1487,9 +1487,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) bp->bv1 = bi->bi_io_vec[0]; bp->bv2 = bi->bi_io_vec[0]; - bp->bv2.bv_offset += first_sectors << 9; - bp->bv2.bv_len -= first_sectors << 9; - bp->bv1.bv_len = first_sectors << 9; + + if (bio_is_rw(bi)) { + bp->bv2.bv_offset += first_sectors << 9; + bp->bv2.bv_len -= first_sectors << 9; + bp->bv1.bv_len = first_sectors << 9; + } bp->bio1.bi_io_vec = &bp->bv1; bp->bio2.bi_io_vec = &bp->bv2; diff --git a/include/linux/bio.h b/include/linux/bio.h index e54305cacc98..820e7aaad4fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -399,6 +399,9 @@ static inline bool bio_is_rw(struct bio *bio) if (!bio_has_data(bio)) return false; + if (bio->bi_rw & REQ_WRITE_SAME) + return false; + return true; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1b229664f573..cdf11191e645 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -147,6 +147,7 @@ enum rq_flag_bits { __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_WRITE_SAME, /* write same block many times */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_FUA, /* forced unit access */ @@ -185,13 +186,15 @@ enum rq_flag_bits { #define REQ_META (1 << __REQ_META) #define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ - REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ + REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK /* This mask is used for both bio and request merge checking */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90f7abe8f183..1756001210d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -270,6 +270,7 @@ struct queue_limits { unsigned int io_min; unsigned int io_opt; unsigned int max_discard_sectors; + unsigned int max_write_same_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -614,9 +615,20 @@ static inline bool blk_check_merge_flags(unsigned int flags1, if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) return false; + if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME)) + return false; + return true; } +static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) +{ + if (bio_data(a) == bio_data(b)) + return true; + + return false; +} + /* * q->prep_rq_fn return values */ @@ -818,6 +830,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, if (unlikely(cmd_flags & REQ_DISCARD)) return q->limits.max_discard_sectors; + if (unlikely(cmd_flags & REQ_WRITE_SAME)) + return q->limits.max_write_same_sectors; + return q->limits.max_sectors; } @@ -886,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); +extern void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, @@ -1016,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); +extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, @@ -1193,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) return queue_discard_zeroes_data(bdev_get_queue(bdev)); } +static inline unsigned int bdev_write_same(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return q->limits.max_write_same_sectors; + + return 0; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3-59-g8ed1b From 749fefe6778e98dfefe3b8bb72a93875196ec554 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Sep 2012 14:08:52 -0700 Subject: block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue() b82d4b197c ("blkcg: make request_queue bypassing on allocation") made request_queues bypassed on allocation to avoid switching on and off bypass mode on a queue being initialized. Some drivers allocate and then destroy a lot of queues without fully initializing them and incurring bypass latency overhead on each of them could add upto significant overhead. Unfortunately, blk_init_allocated_queue() is never used by queues of bio-based drivers, which means that all bio-based driver queues are in bypass mode even after initialization and registration complete successfully. Due to the limited way request_queues are used by bio drivers, this problem is hidden pretty well but it shows up when blk-throttle is used in combination with a bio-based driver. Trying to configure (echoing to cgroupfs file) blk-throttle for a bio-based driver hangs indefinitely in blkg_conf_prep() waiting for bypass mode to end. This patch moves the initial blk_queue_bypass_end() call from blk_init_allocated_queue() to blk_register_queue() which is called for any userland-visible queues regardless of its type. I believe this is correct because I don't think there is any block driver which needs or wants working elevator and blk-cgroup on a queue which isn't visible to userland. If there are such users, we need a different solution. Signed-off-by: Tejun Heo Reported-by: Joseph Glanville Cc: stable@vger.kernel.org Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 7 ++----- block/blk-sysfs.c | 6 ++++++ 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 3b080541098e..80e29c90723c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -608,8 +608,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) /* * A queue starts its life with bypass turned on to avoid * unnecessary bypass on/off overhead and nasty surprises during - * init. The initial bypass will be finished at the end of - * blk_init_allocated_queue(). + * init. The initial bypass will be finished when the queue is + * registered by blk_register_queue(). */ q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); @@ -712,9 +712,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) return NULL; - - /* all done, end the initial bypass */ - blk_queue_bypass_end(q); return q; } EXPORT_SYMBOL(blk_init_allocated_queue); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 247dbfd42621..ce6204608822 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -561,6 +561,12 @@ int blk_register_queue(struct gendisk *disk) if (WARN_ON(!q)) return -ENXIO; + /* + * Initialization must be complete by now. Finish the initial + * bypass from queue allocation. + */ + blk_queue_bypass_end(q); + ret = blk_trace_init_sysfs(dev); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 60ea8226cbd5c8301f9a39edc574ddabcb8150e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Sep 2012 14:09:30 -0700 Subject: block: fix request_queue->flags initialization A queue newly allocated with blk_alloc_queue_node() has only QUEUE_FLAG_BYPASS set. For request-based drivers, blk_init_allocated_queue() is called and q->queue_flags is overwritten with QUEUE_FLAG_DEFAULT which doesn't include BYPASS even though the initial bypass is still in effect. In blk_init_allocated_queue(), or QUEUE_FLAG_DEFAULT to q->queue_flags instead of overwriting. Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 80e29c90723c..a17869f337f7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -696,7 +696,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->queue_flags = QUEUE_FLAG_DEFAULT; + q->queue_flags |= QUEUE_FLAG_DEFAULT; /* Override internal queue lock with supplied lock pointer */ if (lock) -- cgit v1.2.3-59-g8ed1b