diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 10 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/blk-core.c | 54 | ||||
-rw-r--r-- | block/blk-exec.c | 7 | ||||
-rw-r--r-- | block/blk-flush.c | 25 | ||||
-rw-r--r-- | block/blk-ioc.c | 40 | ||||
-rw-r--r-- | block/blk-lib.c | 5 | ||||
-rw-r--r-- | block/blk-softirq.c | 19 | ||||
-rw-r--r-- | block/blk-sysfs.c | 13 | ||||
-rw-r--r-- | block/blk-throttle.c | 14 | ||||
-rw-r--r-- | block/blk-timeout.c | 5 | ||||
-rw-r--r-- | block/blk.h | 2 | ||||
-rw-r--r-- | block/bsg-lib.c | 298 | ||||
-rw-r--r-- | block/bsg.c | 18 | ||||
-rw-r--r-- | block/cfq-iosched.c | 157 | ||||
-rw-r--r-- | block/compat_ioctl.c | 14 | ||||
-rw-r--r-- | block/deadline-iosched.c | 4 | ||||
-rw-r--r-- | block/elevator.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 119 |
19 files changed, 595 insertions, 217 deletions
diff --git a/block/Kconfig b/block/Kconfig index 60be1e0455da..e97934eececa 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -65,6 +65,16 @@ config BLK_DEV_BSG If unsure, say Y. +config BLK_DEV_BSGLIB + bool "Block layer SG support v4 helper lib" + default n + select BLK_DEV_BSG + help + Subsystems will normally enable this if needed. Users will not + normally need to manually enable this. + + If unsure, say N. + config BLK_DEV_INTEGRITY bool "Block layer data integrity support" ---help--- diff --git a/block/Makefile b/block/Makefile index 0fec4b3fab51..514c6e4f427a 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index d2f8f4049abd..90e1ffdeb415 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -839,6 +839,9 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { struct request *rq; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + BUG_ON(rw != READ && rw != WRITE); spin_lock_irq(q->queue_lock); @@ -1279,10 +1282,8 @@ get_rq: init_request_from_bio(req, bio); if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || - bio_flagged(bio, BIO_CPU_AFFINE)) { - req->cpu = blk_cpu_to_group(get_cpu()); - put_cpu(); - } + bio_flagged(bio, BIO_CPU_AFFINE)) + req->cpu = raw_smp_processor_id(); plug = current->plug; if (plug) { @@ -1302,7 +1303,10 @@ get_rq: plug->should_sort = 1; } list_add_tail(&req->queuelist, &plug->list); + plug->count++; drive_stat_acct(req, 1); + if (plug->count >= BLK_MAX_REQUEST_COUNT) + blk_flush_plug_list(plug, false); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); @@ -1357,29 +1361,27 @@ static int __init setup_fail_make_request(char *str) } __setup("fail_make_request=", setup_fail_make_request); -static int should_fail_request(struct bio *bio) +static bool should_fail_request(struct hd_struct *part, unsigned int bytes) { - struct hd_struct *part = bio->bi_bdev->bd_part; - - if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) - return should_fail(&fail_make_request, bio->bi_size); - - return 0; + return part->make_it_fail && should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) { - return init_fault_attr_dentries(&fail_make_request, - "fail_make_request"); + struct dentry *dir = fault_create_debugfs_attr("fail_make_request", + NULL, &fail_make_request); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_make_request_debugfs); #else /* CONFIG_FAIL_MAKE_REQUEST */ -static inline int should_fail_request(struct bio *bio) +static inline bool should_fail_request(struct hd_struct *part, + unsigned int bytes) { - return 0; + return false; } #endif /* CONFIG_FAIL_MAKE_REQUEST */ @@ -1462,6 +1464,7 @@ static inline void __generic_make_request(struct bio *bio) old_dev = 0; do { char b[BDEVNAME_SIZE]; + struct hd_struct *part; q = bdev_get_queue(bio->bi_bdev); if (unlikely(!q)) { @@ -1485,7 +1488,10 @@ static inline void __generic_make_request(struct bio *bio) if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) goto end_io; - if (should_fail_request(bio)) + part = bio->bi_bdev->bd_part; + if (should_fail_request(part, bio->bi_size) || + should_fail_request(&part_to_disk(part)->part0, + bio->bi_size)) goto end_io; /* @@ -1696,15 +1702,14 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; -#ifdef CONFIG_FAIL_MAKE_REQUEST - if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && - should_fail(&fail_make_request, blk_rq_bytes(rq))) + if (rq->rq_disk && + should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) return -EIO; -#endif spin_lock_irqsave(q->queue_lock, flags); @@ -1714,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2271,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) @@ -2626,6 +2634,7 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; + plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2709,6 +2718,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; list_splice_init(&plug->list, &list); + plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --git a/block/blk-exec.c b/block/blk-exec.c index 8a0e7ec056e7..a1ebceb332f9 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -50,6 +50,13 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { + rq->errors = -ENXIO; + if (rq->end_io) + rq->end_io(rq, rq->errors); + return; + } + rq->rq_disk = bd_disk; rq->end_io = done; WARN_ON(irqs_disabled()); diff --git a/block/blk-flush.c b/block/blk-flush.c index bb21e4c36f70..491eb30a242d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; + if (blk_rq_sectors(rq)) + policy |= REQ_FSEQ_DATA; + if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (blk_rq_sectors(rq)) - policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } @@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; - rq->end_io = NULL; + rq->end_io = rq->flush.saved_end_io; } /** @@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq) unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); - BUG_ON(rq->end_io); - BUG_ON(!rq->bio || rq->bio != rq->biotail); - /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq) rq->cmd_flags &= ~REQ_FUA; /* + * An empty flush handed down from a stacking driver may + * translate into nothing if the underlying device does not + * advertise a write-back cache. In this case, simply + * complete the request. + */ + if (!policy) { + __blk_end_bidi_request(rq, 0, 0, 0); + return; + } + + BUG_ON(!rq->bio || rq->bio != rq->biotail); + + /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue * for normal execution. @@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); + blk_run_queue_async(q); return; } @@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq) memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = flush_data_end_io; blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 342eae9b0d3c..6f9bbd978653 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task) struct io_context *alloc_io_context(gfp_t gfp_flags, int node) { - struct io_context *ret; + struct io_context *ioc; - ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); - if (ret) { - atomic_long_set(&ret->refcount, 1); - atomic_set(&ret->nr_tasks, 1); - spin_lock_init(&ret->lock); - ret->ioprio_changed = 0; - ret->ioprio = 0; - ret->last_waited = 0; /* doesn't matter... */ - ret->nr_batch_requests = 0; /* because this is 0 */ - INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); - INIT_HLIST_HEAD(&ret->cic_list); - ret->ioc_data = NULL; + ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); + if (ioc) { + atomic_long_set(&ioc->refcount, 1); + atomic_set(&ioc->nr_tasks, 1); + spin_lock_init(&ioc->lock); + ioc->ioprio_changed = 0; + ioc->ioprio = 0; + ioc->last_waited = 0; /* doesn't matter... */ + ioc->nr_batch_requests = 0; /* because this is 0 */ + INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ioc->cic_list); + ioc->ioc_data = NULL; #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) - ret->cgroup_changed = 0; + ioc->cgroup_changed = 0; #endif } - return ret; + return ioc; } /* @@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node) */ struct io_context *get_io_context(gfp_t gfp_flags, int node) { - struct io_context *ret = NULL; + struct io_context *ioc = NULL; /* * Check for unlikely race with exiting task. ioc ref count is * zero when ioc is being detached. */ do { - ret = current_io_context(gfp_flags, node); - if (unlikely(!ret)) + ioc = current_io_context(gfp_flags, node); + if (unlikely(!ioc)) break; - } while (!atomic_long_inc_not_zero(&ret->refcount)); + } while (!atomic_long_inc_not_zero(&ioc->refcount)); - return ret; + return ioc; } EXPORT_SYMBOL(get_io_context); diff --git a/block/blk-lib.c b/block/blk-lib.c index 78e627e2581d..2b461b496a78 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * granularity */ max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - if (q->limits.discard_granularity) { + if (unlikely(!max_discard_sectors)) { + /* Avoid infinite loop below. Being cautious never hurts. */ + return -EOPNOTSUPP; + } else if (q->limits.discard_granularity) { unsigned int disc_sects = q->limits.discard_granularity >> 9; max_discard_sectors &= ~(disc_sects - 1); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ee9c21602228..58340d0cb23a 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -103,24 +103,35 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { + int ccpu, cpu, group_cpu = NR_CPUS; struct request_queue *q = req->q; unsigned long flags; - int ccpu, cpu, group_cpu; BUG_ON(!q->softirq_done_fn); local_irq_save(flags); cpu = smp_processor_id(); - group_cpu = blk_cpu_to_group(cpu); /* * Select completion CPU */ - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; - else + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { + ccpu = blk_cpu_to_group(ccpu); + group_cpu = blk_cpu_to_group(cpu); + } + } else ccpu = cpu; + /* + * If current CPU and requested CPU are in the same group, running + * softirq in current CPU. One might concern this is just like + * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is + * running in interrupt handler, and currently I/O controller doesn't + * support multiple interrupts, so current CPU is unique actually. This + * avoids IPI sending from current CPU to the first CPU of a group. + */ if (ccpu == cpu || ccpu == group_cpu) { struct list_head *list; do_local: diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d935bd859c87..0ee17b5e7fb6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) { bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); + bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags); - return queue_var_show(set, page); + return queue_var_show(set << force, page); } static ssize_t @@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) + if (val) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - else - queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + if (val == 2) + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else { + queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } spin_unlock_irq(q->queue_lock); #endif return ret; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a62be8d0dc1b..a19f58c6fc3a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg) return NULL; } -static inline int total_nr_queued(struct throtl_data *td) +static inline unsigned int total_nr_queued(struct throtl_data *td) { - return (td->nr_queued[0] + td->nr_queued[1]); + return td->nr_queued[0] + td->nr_queued[1]; } static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) @@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - bool sync = bio->bi_rw & REQ_SYNC; + bool sync = rw_is_sync(bio->bi_rw); /* Charge the bio to the group */ tg->bytes_disp[rw] += bio->bi_size; @@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q) bio_list_init(&bio_list_on_stack); - throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u", + throtl_log(td, "dispatch nr_queued=%u read=%u write=%u", total_nr_queued(td), td->nr_queued[READ], td->nr_queued[WRITE]); @@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) struct delayed_work *dwork = &td->throtl_work; /* schedule work if limits changed even if no bio is queued */ - if (total_nr_queued(td) > 0 || td->limits_changed) { + if (total_nr_queued(td) || td->limits_changed) { /* * We might have a work scheduled to be executed in future. * Cancel that and schedule a new one. @@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, - rw, bio->bi_rw & REQ_SYNC); + rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); return 0; } @@ -1204,7 +1204,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) } queue_bio: - throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu" + throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu" " iodisp=%u iops=%u queued=%d/%d", rw == READ ? 'R' : 'W', tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4f0c06c7a338..780354888958 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q) static int __init fail_io_timeout_debugfs(void) { - return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); + struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", + NULL, &fail_io_timeout); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_io_timeout_debugfs); diff --git a/block/blk.h b/block/blk.h index d6586287adc9..20b900a377c9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 000000000000..6690e6e41037 --- /dev/null +++ b/block/bsg-lib.c @@ -0,0 +1,298 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/bsg-lib.h> +#include <linux/module.h> +#include <scsi/scsi_cmnd.h> + +/** + * bsg_destroy_job - routine to teardown/delete a bsg job + * @job: bsg_job that is to be torn down + */ +static void bsg_destroy_job(struct bsg_job *job) +{ + put_device(job->dev); /* release reference for the request */ + + kfree(job->request_payload.sg_list); + kfree(job->reply_payload.sg_list); + kfree(job); +} + +/** + * bsg_job_done - completion routine for bsg requests + * @job: bsg_job that is complete + * @result: job reply result + * @reply_payload_rcv_len: length of payload recvd + * + * The LLD should call this when the bsg job has completed. + */ +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len) +{ + struct request *req = job->req; + struct request *rsp = req->next_rq; + int err; + + err = job->req->errors = result; + if (err < 0) + /* we're only returning the result field in the reply */ + job->req->sense_len = sizeof(u32); + else + job->req->sense_len = job->reply_len; + /* we assume all request payload was transferred, residual == 0 */ + req->resid_len = 0; + + if (rsp) { + WARN_ON(reply_payload_rcv_len > rsp->resid_len); + + /* set reply (bidi) residual */ + rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + } + blk_complete_request(req); +} +EXPORT_SYMBOL_GPL(bsg_job_done); + +/** + * bsg_softirq_done - softirq done routine for destroying the bsg requests + * @rq: BSG request that holds the job to be destroyed + */ +static void bsg_softirq_done(struct request *rq) +{ + struct bsg_job *job = rq->special; + + blk_end_request_all(rq, rq->errors); + bsg_destroy_job(job); +} + +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) +{ + size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); + + BUG_ON(!req->nr_phys_segments); + + buf->sg_list = kzalloc(sz, GFP_KERNEL); + if (!buf->sg_list) + return -ENOMEM; + sg_init_table(buf->sg_list, req->nr_phys_segments); + buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); + buf->payload_len = blk_rq_bytes(req); + return 0; +} + +/** + * bsg_create_job - create the bsg_job structure for the bsg request + * @dev: device that is being sent the bsg request + * @req: BSG request that needs a job structure + */ +static int bsg_create_job(struct device *dev, struct request *req) +{ + struct request *rsp = req->next_rq; + struct request_queue *q = req->q; + struct bsg_job *job; + int ret; + + BUG_ON(req->special); + + job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); + if (!job) + return -ENOMEM; + + req->special = job; + job->req = req; + if (q->bsg_job_size) + job->dd_data = (void *)&job[1]; + job->request = req->cmd; + job->request_len = req->cmd_len; + job->reply = req->sense; + job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer + * allocated */ + if (req->bio) { + ret = bsg_map_buffer(&job->request_payload, req); + if (ret) + goto failjob_rls_job; + } + if (rsp && rsp->bio) { + ret = bsg_map_buffer(&job->reply_payload, rsp); + if (ret) + goto failjob_rls_rqst_payload; + } + job->dev = dev; + /* take a reference for the request */ + get_device(job->dev); + return 0; + +failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); +failjob_rls_job: + kfree(job); + return -ENOMEM; +} + +/* + * bsg_goose_queue - restart queue in case it was stopped + * @q: request q to be restarted + */ +void bsg_goose_queue(struct request_queue *q) +{ + if (!q) + return; + + blk_run_queue_async(q); +} +EXPORT_SYMBOL_GPL(bsg_goose_queue); + +/** + * bsg_request_fn - generic handler for bsg requests + * @q: request queue to manage + * + * On error the create_bsg_job function should return a -Exyz error value + * that will be set to the req->errors. + * + * Drivers/subsys should pass this to the queue init function. + */ +void bsg_request_fn(struct request_queue *q) +{ + struct device *dev = q->queuedata; + struct request *req; + struct bsg_job *job; + int ret; + + if (!get_device(dev)) + return; + + while (1) { + req = blk_fetch_request(q); + if (!req) + break; + spin_unlock_irq(q->queue_lock); + + ret = bsg_create_job(dev, req); + if (ret) { + req->errors = ret; + blk_end_request_all(req, ret); + spin_lock_irq(q->queue_lock); + continue; + } + + job = req->special; + ret = q->bsg_job_fn(job); + spin_lock_irq(q->queue_lock); + if (ret) + break; + } + + spin_unlock_irq(q->queue_lock); + put_device(dev); + spin_lock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(bsg_request_fn); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @q: request queue setup by caller + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * + * The caller should have setup the reuqest queue with bsg_request_fn + * as the request_fn. + */ +int bsg_setup_queue(struct device *dev, struct request_queue *q, + char *name, bsg_job_fn *job_fn, int dd_job_size) +{ + int ret; + + q->queuedata = dev; + q->bsg_job_size = dd_job_size; + q->bsg_job_fn = job_fn; + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + blk_queue_softirq_done(q, bsg_softirq_done); + blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + + ret = bsg_register_queue(q, dev, name, NULL); + if (ret) { + printk(KERN_ERR "%s: bsg interface failed to " + "initialize - register queue\n", dev->kobj.name); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_remove_queue - Deletes the bsg dev from the q + * @q: the request_queue that is to be torn down. + * + * Notes: + * Before unregistering the queue empty any requests that are blocked + */ +void bsg_remove_queue(struct request_queue *q) +{ + struct request *req; /* block request */ + int counts; /* totals for request_list count and starved */ + + if (!q) + return; + + /* Stop taking in new requests */ + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + + /* drain all requests in the queue */ + while (1) { + /* need the lock to fetch a request + * this may fetch the same reqeust as the previous pass + */ + req = blk_fetch_request(q); + /* save requests in use and starved */ + counts = q->rq.count[0] + q->rq.count[1] + + q->rq.starved[0] + q->rq.starved[1]; + spin_unlock_irq(q->queue_lock); + /* any requests still outstanding? */ + if (counts == 0) + break; + + /* This may be the same req as the previous iteration, + * always send the blk_end_request_all after a prefetch. + * It is not okay to not end the request because the + * prefetch started the request. + */ + if (req) { + /* return -ENXIO to indicate that this queue is + * going away + */ + req->errors = -ENXIO; + blk_end_request_all(req, -ENXIO); + } + + msleep(200); /* allow bsg to possibly finish */ + spin_lock_irq(q->queue_lock); + } + bsg_unregister_queue(q); +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); diff --git a/block/bsg.c b/block/bsg.c index 0c8b64a16484..702f1316bb8f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -182,7 +182,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, return -ENOMEM; } - if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request, + if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; @@ -249,7 +249,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, struct request *rq, *next_rq = NULL; int ret, rw; unsigned int dxfer_len; - void *dxferp = NULL; + void __user *dxferp = NULL; struct bsg_class_device *bcd = &q->bsg_dev; /* if the LLD has been removed then the bsg_unregister_queue will @@ -291,7 +291,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, rq->next_rq = next_rq; next_rq->cmd_type = rq->cmd_type; - dxferp = (void*)(unsigned long)hdr->din_xferp; + dxferp = (void __user *)(unsigned long)hdr->din_xferp; ret = blk_rq_map_user(q, next_rq, NULL, dxferp, hdr->din_xfer_len, GFP_KERNEL); if (ret) @@ -300,10 +300,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, if (hdr->dout_xfer_len) { dxfer_len = hdr->dout_xfer_len; - dxferp = (void*)(unsigned long)hdr->dout_xferp; + dxferp = (void __user *)(unsigned long)hdr->dout_xferp; } else if (hdr->din_xfer_len) { dxfer_len = hdr->din_xfer_len; - dxferp = (void*)(unsigned long)hdr->din_xferp; + dxferp = (void __user *)(unsigned long)hdr->din_xferp; } else dxfer_len = 0; @@ -445,7 +445,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, int len = min_t(unsigned int, hdr->max_response_len, rq->sense_len); - ret = copy_to_user((void*)(unsigned long)hdr->response, + ret = copy_to_user((void __user *)(unsigned long)hdr->response, rq->sense, len); if (!ret) hdr->response_len = len; @@ -606,7 +606,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) ret = __bsg_read(buf, count, bd, NULL, &bytes_read); *ppos = bytes_read; - if (!bytes_read || (bytes_read && err_block_err(ret))) + if (!bytes_read || err_block_err(ret)) bytes_read = ret; return bytes_read; @@ -686,7 +686,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* * return bytes written on non-fatal errors */ - if (!bytes_written || (bytes_written && err_block_err(ret))) + if (!bytes_written || err_block_err(ret)) bytes_written = ret; dprintk("%s: returning %Zd\n", bd->name, bytes_written); @@ -878,7 +878,7 @@ static unsigned int bsg_poll(struct file *file, poll_table *wait) spin_lock_irq(&bd->lock); if (!list_empty(&bd->done_list)) mask |= POLLIN | POLLRDNORM; - if (bd->queued_cmds >= bd->max_queue) + if (bd->queued_cmds < bd->max_queue) mask |= POLLOUT; spin_unlock_irq(&bd->lock); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3c7b537bf908..a33bd4377c61 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -87,9 +87,10 @@ struct cfq_rb_root { unsigned count; unsigned total_weight; u64 min_vdisktime; + struct cfq_ttime ttime; }; -#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ - .count = 0, .min_vdisktime = 0, } +#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ + .ttime = {.last_end_request = jiffies,},} /* * Per process-grouping structure @@ -136,7 +137,7 @@ struct cfq_queue { /* io prio of this group */ unsigned short ioprio, org_ioprio; - unsigned short ioprio_class, org_ioprio_class; + unsigned short ioprio_class; pid_t pid; @@ -212,6 +213,7 @@ struct cfq_group { #endif /* number of requests that are on the dispatch list or inside driver */ int dispatched; + struct cfq_ttime ttime; }; /* @@ -393,6 +395,18 @@ CFQ_CFQQ_FNS(wait_busy); j++, st = i < IDLE_WORKLOAD ? \ &cfqg->service_trees[i][j]: NULL) \ +static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, + struct cfq_ttime *ttime, bool group_idle) +{ + unsigned long slice; + if (!sample_valid(ttime->ttime_samples)) + return false; + if (group_idle) + slice = cfqd->cfq_group_idle; + else + slice = cfqd->cfq_slice_idle; + return ttime->ttime_mean > slice; +} static inline bool iops_mode(struct cfq_data *cfqd) { @@ -988,9 +1002,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" - " sect=%u", used_sl, cfqq->slice_dispatch, charge, - iops_mode(cfqd), cfqq->nr_sectors); + cfq_log_cfqq(cfqq->cfqd, cfqq, + "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", + used_sl, cfqq->slice_dispatch, charge, + iops_mode(cfqd), cfqq->nr_sectors); cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl, unaccounted_sl); cfq_blkiocg_set_start_empty_time(&cfqg->blkg); @@ -1004,8 +1019,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) return NULL; } -void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, - unsigned int weight) +static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, + unsigned int weight) { struct cfq_group *cfqg = cfqg_of_blkg(blkg); cfqg->new_weight = weight; @@ -1058,6 +1073,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); + cfqg->ttime.last_end_request = jiffies; + /* * Take the initial reference that will be released on destroy * This can be thought of a joint reference by cgroup and @@ -1197,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) hlist_del_init(&cfqg->cfqd_node); + BUG_ON(cfqd->nr_blkcg_linked_grps <= 0); + cfqd->nr_blkcg_linked_grps--; + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. @@ -1234,7 +1254,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd) * it should not be NULL as even if elevator was exiting, cgroup deltion * path got to it first. */ -void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) +static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) { unsigned long flags; struct cfq_data *cfqd = key; @@ -1501,16 +1521,11 @@ static void cfq_add_rq_rb(struct request *rq) { struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - struct request *__alias, *prev; + struct request *prev; cfqq->queued[rq_is_sync(rq)]++; - /* - * looks a little odd, but the first insert might return an alias. - * if that happens, put the alias on the dispatch list - */ - while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) - cfq_dispatch_insert(cfqd->queue, __alias); + elv_rb_add(&cfqq->sort_list, rq); if (!cfq_cfqq_on_rr(cfqq)) cfq_add_cfqq_rr(cfqd, cfqq); @@ -1968,7 +1983,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) * Otherwise, we do only if they are the last ones * in their service tree. */ - if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) + if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && + !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) return true; cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", service_tree->count); @@ -2021,10 +2037,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * slice, then don't idle. This avoids overrunning the allotted * time slice. */ - if (sample_valid(cic->ttime_samples) && - (cfqq->slice_end - jiffies < cic->ttime_mean)) { - cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", - cic->ttime_mean); + if (sample_valid(cic->ttime.ttime_samples) && + (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { + cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", + cic->ttime.ttime_mean); return; } @@ -2380,8 +2396,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) * this group, wait for requests to complete. */ check_group_idle: - if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 - && cfqq->cfqg->dispatched) { + if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 && + cfqq->cfqg->dispatched && + !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) { cfqq = NULL; goto keep_queue; } @@ -2772,8 +2789,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, smp_wmb(); cic->key = cfqd_dead_key(cfqd); - if (ioc->ioc_data == cic) + rcu_read_lock(); + if (rcu_dereference(ioc->ioc_data) == cic) { + rcu_read_unlock(); + spin_lock(&ioc->lock); rcu_assign_pointer(ioc->ioc_data, NULL); + spin_unlock(&ioc->lock); + } else + rcu_read_unlock(); if (cic->cfqq[BLK_RW_ASYNC]) { cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]); @@ -2826,7 +2849,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, cfqd->queue->node); if (cic) { - cic->last_end_request = jiffies; + cic->ttime.last_end_request = jiffies; INIT_LIST_HEAD(&cic->queue_list); INIT_HLIST_NODE(&cic->cic_list); cic->dtor = cfq_free_io_context; @@ -2876,7 +2899,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) * elevate the priority of this queue */ cfqq->org_ioprio = cfqq->ioprio; - cfqq->org_ioprio_class = cfqq->ioprio_class; cfq_clear_cfqq_prio_changed(cfqq); } @@ -3080,7 +3102,8 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, spin_lock_irqsave(&ioc->lock, flags); - BUG_ON(ioc->ioc_data == cic); + BUG_ON(rcu_dereference_check(ioc->ioc_data, + lockdep_is_held(&ioc->lock)) == cic); radix_tree_delete(&ioc->radix_root, cfqd->cic_index); hlist_del_rcu(&cic->cic_list); @@ -3213,14 +3236,28 @@ err: } static void -cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) +__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) { - unsigned long elapsed = jiffies - cic->last_end_request; - unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); + unsigned long elapsed = jiffies - ttime->last_end_request; + elapsed = min(elapsed, 2UL * slice_idle); + + ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; + ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; + ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; +} - cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; - cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; - cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; +static void +cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct cfq_io_context *cic) +{ + if (cfq_cfqq_sync(cfqq)) { + __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle); + __cfq_update_io_thinktime(&cfqq->service_tree->ttime, + cfqd->cfq_slice_idle); + } +#ifdef CONFIG_CFQ_GROUP_IOSCHED + __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle); +#endif } static void @@ -3269,8 +3306,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; - else if (sample_valid(cic->ttime_samples)) { - if (cic->ttime_mean > cfqd->cfq_slice_idle) + else if (sample_valid(cic->ttime.ttime_samples)) { + if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle) enable_idle = 0; else enable_idle = 1; @@ -3405,7 +3442,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (rq->cmd_flags & REQ_META) cfqq->meta_pending++; - cfq_update_io_thinktime(cfqd, cic); + cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); cfq_update_idle_window(cfqd, cfqq, cic); @@ -3512,12 +3549,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (cfqq->cfqg->nr_cfqq > 1) return false; + /* the only queue in the group, but think time is big */ + if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) + return false; + if (cfq_slice_used(cfqq)) return true; /* if slice left is less than think time, wait busy */ - if (cic && sample_valid(cic->ttime_samples) - && (cfqq->slice_end - jiffies < cic->ttime_mean)) + if (cic && sample_valid(cic->ttime.ttime_samples) + && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) return true; /* @@ -3558,11 +3599,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; if (sync) { - RQ_CIC(rq)->last_end_request = now; + struct cfq_rb_root *service_tree; + + RQ_CIC(rq)->ttime.last_end_request = now; + + if (cfq_cfqq_on_rr(cfqq)) + service_tree = cfqq->service_tree; + else + service_tree = service_tree_for(cfqq->cfqg, + cfqq_prio(cfqq), cfqq_type(cfqq)); + service_tree->ttime.last_end_request = now; if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) cfqd->last_delayed_sync = now; } +#ifdef CONFIG_CFQ_GROUP_IOSCHED + cfqq->cfqg->ttime.last_end_request = now; +#endif + /* * If this is the active queue, check if it needs to be expired, * or if we want to idle in case it has no pending requests. @@ -3608,30 +3662,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_schedule_dispatch(cfqd); } -/* - * we temporarily boost lower priority queues if they are holding fs exclusive - * resources. they are boosted to normal prio (CLASS_BE/4) - */ -static void cfq_prio_boost(struct cfq_queue *cfqq) -{ - if (has_fs_excl()) { - /* - * boost idle prio on transactions that would lock out other - * users of the filesystem - */ - if (cfq_class_idle(cfqq)) - cfqq->ioprio_class = IOPRIO_CLASS_BE; - if (cfqq->ioprio > IOPRIO_NORM) - cfqq->ioprio = IOPRIO_NORM; - } else { - /* - * unboost the queue (if needed) - */ - cfqq->ioprio_class = cfqq->org_ioprio_class; - cfqq->ioprio = cfqq->org_ioprio; - } -} - static inline int __cfq_may_queue(struct cfq_queue *cfqq) { if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { @@ -3662,7 +3692,6 @@ static int cfq_may_queue(struct request_queue *q, int rw) cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); if (cfqq) { cfq_init_prio_data(cfqq, cic->ioc); - cfq_prio_boost(cfqq); return __cfq_may_queue(cfqq); } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index cc3eb78e333a..7b725020823c 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode, #define BLKBSZSET_32 _IOW(0x12, 113, int) #define BLKGETSIZE64_32 _IOR(0x12, 114, int) -struct compat_floppy_struct { - compat_uint_t size; - compat_uint_t sect; - compat_uint_t head; - compat_uint_t track; - compat_uint_t stretch; - unsigned char gap; - unsigned char rate; - unsigned char spec1; - unsigned char fmt_gap; - const compat_caddr_t name; -}; - struct compat_floppy_drive_params { char cmos; compat_ulong_t max_dtr; @@ -288,7 +275,6 @@ struct compat_floppy_write_errors { #define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct) #define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct) -#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct) #define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params) #define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params) #define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 5139c0ea1864..c644137d9cd6 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -77,10 +77,8 @@ static void deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) { struct rb_root *root = deadline_rb_root(dd, rq); - struct request *__alias; - while (unlikely(__alias = elv_rb_add(root, rq))) - deadline_move_request(dd, __alias); + elv_rb_add(root, rq); } static inline void diff --git a/block/elevator.c b/block/elevator.c index b0b38ce0dcb6..a3b64bc71d88 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) * RB-tree support functions for inserting/lookup/removal of requests * in a sorted RB tree. */ -struct request *elv_rb_add(struct rb_root *root, struct request *rq) +void elv_rb_add(struct rb_root *root, struct request *rq) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq) if (blk_rq_pos(rq) < blk_rq_pos(__rq)) p = &(*p)->rb_left; - else if (blk_rq_pos(rq) > blk_rq_pos(__rq)) + else if (blk_rq_pos(rq) >= blk_rq_pos(__rq)) p = &(*p)->rb_right; - else - return __rq; } rb_link_node(&rq->rb_node, parent, p); rb_insert_color(&rq->rb_node, root); - return NULL; } EXPORT_SYMBOL(elv_rb_add); diff --git a/block/genhd.c b/block/genhd.c index 95822ae25cfe..e2f67902dd02 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk) disk->major = MAJOR(devt); disk->first_minor = MINOR(devt); - /* Register BDI before referencing it from bdev */ + /* Register BDI before referencing it from bdev */ bdi = &disk->queue->backing_dev_info; bdi_register_dev(bdi, disk_devt(disk)); @@ -1018,14 +1018,6 @@ static const struct attribute_group *disk_attr_groups[] = { NULL }; -static void disk_free_ptbl_rcu_cb(struct rcu_head *head) -{ - struct disk_part_tbl *ptbl = - container_of(head, struct disk_part_tbl, rcu_head); - - kfree(ptbl); -} - /** * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way * @disk: disk to replace part_tbl for @@ -1046,7 +1038,7 @@ static void disk_replace_part_tbl(struct gendisk *disk, if (old_ptbl) { rcu_assign_pointer(old_ptbl->last_lookup, NULL); - call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); + kfree_rcu(old_ptbl, rcu_head); } } @@ -1148,23 +1140,23 @@ static int diskstats_show(struct seq_file *seqf, void *v) "wsect wuse running use aveq" "\n\n"); */ - + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); part_round_stats(cpu, hd); part_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu " - "%u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %lu " + "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[READ]), part_stat_read(hd, merges[READ]), - (unsigned long long)part_stat_read(hd, sectors[READ]), + part_stat_read(hd, sectors[READ]), jiffies_to_msecs(part_stat_read(hd, ticks[READ])), part_stat_read(hd, ios[WRITE]), part_stat_read(hd, merges[WRITE]), - (unsigned long long)part_stat_read(hd, sectors[WRITE]), + part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), @@ -1172,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) ); } disk_part_iter_exit(&piter); - + return 0; } @@ -1371,6 +1363,7 @@ struct disk_events { struct gendisk *disk; /* the associated disk */ spinlock_t lock; + struct mutex block_mutex; /* protects blocking */ int block; /* event blocking depth */ unsigned int pending; /* events already sent out */ unsigned int clearing; /* events being cleared */ @@ -1414,22 +1407,44 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk) return msecs_to_jiffies(intv_msecs); } -static void __disk_block_events(struct gendisk *disk, bool sync) +/** + * disk_block_events - block and flush disk event checking + * @disk: disk to block events for + * + * On return from this function, it is guaranteed that event checking + * isn't in progress and won't happen until unblocked by + * disk_unblock_events(). Events blocking is counted and the actual + * unblocking happens after the matching number of unblocks are done. + * + * Note that this intentionally does not block event checking from + * disk_clear_events(). + * + * CONTEXT: + * Might sleep. + */ +void disk_block_events(struct gendisk *disk) { struct disk_events *ev = disk->ev; unsigned long flags; bool cancel; + if (!ev) + return; + + /* + * Outer mutex ensures that the first blocker completes canceling + * the event work before further blockers are allowed to finish. + */ + mutex_lock(&ev->block_mutex); + spin_lock_irqsave(&ev->lock, flags); cancel = !ev->block++; spin_unlock_irqrestore(&ev->lock, flags); - if (cancel) { - if (sync) - cancel_delayed_work_sync(&disk->ev->dwork); - else - cancel_delayed_work(&disk->ev->dwork); - } + if (cancel) + cancel_delayed_work_sync(&disk->ev->dwork); + + mutex_unlock(&ev->block_mutex); } static void __disk_unblock_events(struct gendisk *disk, bool check_now) @@ -1461,27 +1476,6 @@ out_unlock: } /** - * disk_block_events - block and flush disk event checking - * @disk: disk to block events for - * - * On return from this function, it is guaranteed that event checking - * isn't in progress and won't happen until unblocked by - * disk_unblock_events(). Events blocking is counted and the actual - * unblocking happens after the matching number of unblocks are done. - * - * Note that this intentionally does not block event checking from - * disk_clear_events(). - * - * CONTEXT: - * Might sleep. - */ -void disk_block_events(struct gendisk *disk) -{ - if (disk->ev) - __disk_block_events(disk, true); -} - -/** * disk_unblock_events - unblock disk event checking * @disk: disk to unblock events for * @@ -1498,22 +1492,32 @@ void disk_unblock_events(struct gendisk *disk) } /** - * disk_check_events - schedule immediate event checking - * @disk: disk to check events for + * disk_flush_events - schedule immediate event checking and flushing + * @disk: disk to check and flush events for + * @mask: events to flush * - * Schedule immediate event checking on @disk if not blocked. + * Schedule immediate event checking on @disk if not blocked. Events in + * @mask are scheduled to be cleared from the driver. Note that this + * doesn't clear the events from @disk->ev. * * CONTEXT: - * Don't care. Safe to call from irq context. + * If @mask is non-zero must be called with bdev->bd_mutex held. */ -void disk_check_events(struct gendisk *disk) +void disk_flush_events(struct gendisk *disk, unsigned int mask) { - if (disk->ev) { - __disk_block_events(disk, false); - __disk_unblock_events(disk, true); + struct disk_events *ev = disk->ev; + + if (!ev) + return; + + spin_lock_irq(&ev->lock); + ev->clearing |= mask; + if (!ev->block) { + cancel_delayed_work(&ev->dwork); + queue_delayed_work(system_nrt_wq, &ev->dwork, 0); } + spin_unlock_irq(&ev->lock); } -EXPORT_SYMBOL_GPL(disk_check_events); /** * disk_clear_events - synchronously check, clear and return pending events @@ -1546,7 +1550,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) spin_unlock_irq(&ev->lock); /* uncondtionally schedule event check and wait for it to finish */ - __disk_block_events(disk, true); + disk_block_events(disk); queue_delayed_work(system_nrt_wq, &ev->dwork, 0); flush_delayed_work(&ev->dwork); __disk_unblock_events(disk, false); @@ -1664,7 +1668,7 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev, if (intv < 0 && intv != -1) return -EINVAL; - __disk_block_events(disk, true); + disk_block_events(disk); disk->ev->poll_msecs = intv; __disk_unblock_events(disk, true); @@ -1703,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val, mutex_lock(&disk_events_mutex); list_for_each_entry(ev, &disk_events, node) - disk_check_events(ev->disk); + disk_flush_events(ev->disk, 0); mutex_unlock(&disk_events_mutex); @@ -1750,6 +1754,7 @@ static void disk_add_events(struct gendisk *disk) INIT_LIST_HEAD(&ev->node); ev->disk = disk; spin_lock_init(&ev->lock); + mutex_init(&ev->block_mutex); ev->block = 1; ev->poll_msecs = -1; INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); @@ -1770,7 +1775,7 @@ static void disk_del_events(struct gendisk *disk) if (!disk->ev) return; - __disk_block_events(disk, true); + disk_block_events(disk); mutex_lock(&disk_events_mutex); list_del_init(&disk->ev->node); |