aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c54
-rw-r--r--block/blk-exec.c7
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-ioc.c40
-rw-r--r--block/blk-lib.c5
-rw-r--r--block/blk-softirq.c19
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/blk-throttle.c14
-rw-r--r--block/blk-timeout.c5
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/bsg.c18
-rw-r--r--block/cfq-iosched.c157
-rw-r--r--block/compat_ioctl.c14
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c119
19 files changed, 595 insertions, 217 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455da..e97934eececa 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
If unsure, say Y.
+config BLK_DEV_BSGLIB
+ bool "Block layer SG support v4 helper lib"
+ default n
+ select BLK_DEV_BSG
+ help
+ Subsystems will normally enable this if needed. Users will not
+ normally need to manually enable this.
+
+ If unsure, say N.
+
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab51..514c6e4f427a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
+obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index d2f8f4049abd..90e1ffdeb415 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -839,6 +839,9 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
struct request *rq;
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ return NULL;
+
BUG_ON(rw != READ && rw != WRITE);
spin_lock_irq(q->queue_lock);
@@ -1279,10 +1282,8 @@ get_rq:
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
- bio_flagged(bio, BIO_CPU_AFFINE)) {
- req->cpu = blk_cpu_to_group(get_cpu());
- put_cpu();
- }
+ bio_flagged(bio, BIO_CPU_AFFINE))
+ req->cpu = raw_smp_processor_id();
plug = current->plug;
if (plug) {
@@ -1302,7 +1303,10 @@ get_rq:
plug->should_sort = 1;
}
list_add_tail(&req->queuelist, &plug->list);
+ plug->count++;
drive_stat_acct(req, 1);
+ if (plug->count >= BLK_MAX_REQUEST_COUNT)
+ blk_flush_plug_list(plug, false);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
@@ -1357,29 +1361,27 @@ static int __init setup_fail_make_request(char *str)
}
__setup("fail_make_request=", setup_fail_make_request);
-static int should_fail_request(struct bio *bio)
+static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
{
- struct hd_struct *part = bio->bi_bdev->bd_part;
-
- if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
- return should_fail(&fail_make_request, bio->bi_size);
-
- return 0;
+ return part->make_it_fail && should_fail(&fail_make_request, bytes);
}
static int __init fail_make_request_debugfs(void)
{
- return init_fault_attr_dentries(&fail_make_request,
- "fail_make_request");
+ struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
+ NULL, &fail_make_request);
+
+ return IS_ERR(dir) ? PTR_ERR(dir) : 0;
}
late_initcall(fail_make_request_debugfs);
#else /* CONFIG_FAIL_MAKE_REQUEST */
-static inline int should_fail_request(struct bio *bio)
+static inline bool should_fail_request(struct hd_struct *part,
+ unsigned int bytes)
{
- return 0;
+ return false;
}
#endif /* CONFIG_FAIL_MAKE_REQUEST */
@@ -1462,6 +1464,7 @@ static inline void __generic_make_request(struct bio *bio)
old_dev = 0;
do {
char b[BDEVNAME_SIZE];
+ struct hd_struct *part;
q = bdev_get_queue(bio->bi_bdev);
if (unlikely(!q)) {
@@ -1485,7 +1488,10 @@ static inline void __generic_make_request(struct bio *bio)
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
goto end_io;
- if (should_fail_request(bio))
+ part = bio->bi_bdev->bd_part;
+ if (should_fail_request(part, bio->bi_size) ||
+ should_fail_request(&part_to_disk(part)->part0,
+ bio->bi_size))
goto end_io;
/*
@@ -1696,15 +1702,14 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
+ int where = ELEVATOR_INSERT_BACK;
if (blk_rq_check_limits(q, rq))
return -EIO;
-#ifdef CONFIG_FAIL_MAKE_REQUEST
- if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
- should_fail(&fail_make_request, blk_rq_bytes(rq)))
+ if (rq->rq_disk &&
+ should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
return -EIO;
-#endif
spin_lock_irqsave(q->queue_lock, flags);
@@ -1714,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
- add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
+ if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+ where = ELEVATOR_INSERT_FLUSH;
+
+ add_acct_request(q, rq, where);
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
@@ -2271,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2626,6 +2634,7 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->cb_list);
plug->should_sort = 0;
+ plug->count = 0;
/*
* If this is a nested plug, don't actually assign it. It will be
@@ -2709,6 +2718,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
return;
list_splice_init(&plug->list, &list);
+ plug->count = 0;
if (plug->should_sort) {
list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 8a0e7ec056e7..a1ebceb332f9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -50,6 +50,13 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
{
int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+ rq->errors = -ENXIO;
+ if (rq->end_io)
+ rq->end_io(rq, rq->errors);
+ return;
+ }
+
rq->rq_disk = bd_disk;
rq->end_io = done;
WARN_ON(irqs_disabled());
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f70..491eb30a242d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
{
unsigned int policy = 0;
+ if (blk_rq_sectors(rq))
+ policy |= REQ_FSEQ_DATA;
+
if (fflags & REQ_FLUSH) {
if (rq->cmd_flags & REQ_FLUSH)
policy |= REQ_FSEQ_PREFLUSH;
- if (blk_rq_sectors(rq))
- policy |= REQ_FSEQ_DATA;
if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
policy |= REQ_FSEQ_POSTFLUSH;
}
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
/* make @rq a normal request */
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
- rq->end_io = NULL;
+ rq->end_io = rq->flush.saved_end_io;
}
/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
unsigned int fflags = q->flush_flags; /* may change, cache */
unsigned int policy = blk_flush_policy(fflags, rq);
- BUG_ON(rq->end_io);
- BUG_ON(!rq->bio || rq->bio != rq->biotail);
-
/*
* @policy now records what operations need to be done. Adjust
* REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
rq->cmd_flags &= ~REQ_FUA;
/*
+ * An empty flush handed down from a stacking driver may
+ * translate into nothing if the underlying device does not
+ * advertise a write-back cache. In this case, simply
+ * complete the request.
+ */
+ if (!policy) {
+ __blk_end_bidi_request(rq, 0, 0, 0);
+ return;
+ }
+
+ BUG_ON(!rq->bio || rq->bio != rq->biotail);
+
+ /*
* If there's data but flush is not necessary, the request can be
* processed directly without going through flush machinery. Queue
* for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
+ blk_run_queue_async(q);
return;
}
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
memset(&rq->flush, 0, sizeof(rq->flush));
INIT_LIST_HEAD(&rq->flush.list);
rq->cmd_flags |= REQ_FLUSH_SEQ;
+ rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
rq->end_io = flush_data_end_io;
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3c..6f9bbd978653 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
- struct io_context *ret;
+ struct io_context *ioc;
- ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_long_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = 0; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
+ ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
+ if (ioc) {
+ atomic_long_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = 0; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
- ret->cgroup_changed = 0;
+ ioc->cgroup_changed = 0;
#endif
}
- return ret;
+ return ioc;
}
/*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
*/
struct io_context *get_io_context(gfp_t gfp_flags, int node)
{
- struct io_context *ret = NULL;
+ struct io_context *ioc = NULL;
/*
* Check for unlikely race with exiting task. ioc ref count is
* zero when ioc is being detached.
*/
do {
- ret = current_io_context(gfp_flags, node);
- if (unlikely(!ret))
+ ioc = current_io_context(gfp_flags, node);
+ if (unlikely(!ioc))
break;
- } while (!atomic_long_inc_not_zero(&ret->refcount));
+ } while (!atomic_long_inc_not_zero(&ioc->refcount));
- return ret;
+ return ioc;
}
EXPORT_SYMBOL(get_io_context);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581d..2b461b496a78 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* granularity
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- if (q->limits.discard_granularity) {
+ if (unlikely(!max_discard_sectors)) {
+ /* Avoid infinite loop below. Being cautious never hurts. */
+ return -EOPNOTSUPP;
+ } else if (q->limits.discard_granularity) {
unsigned int disc_sects = q->limits.discard_granularity >> 9;
max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c21602228..58340d0cb23a 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,24 +103,35 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
void __blk_complete_request(struct request *req)
{
+ int ccpu, cpu, group_cpu = NR_CPUS;
struct request_queue *q = req->q;
unsigned long flags;
- int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
- group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
ccpu = req->cpu;
- else
+ if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
+ ccpu = blk_cpu_to_group(ccpu);
+ group_cpu = blk_cpu_to_group(cpu);
+ }
+ } else
ccpu = cpu;
+ /*
+ * If current CPU and requested CPU are in the same group, running
+ * softirq in current CPU. One might concern this is just like
+ * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
+ * running in interrupt handler, and currently I/O controller doesn't
+ * support multiple interrupts, so current CPU is unique actually. This
+ * avoids IPI sending from current CPU to the first CPU of a group.
+ */
if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c87..0ee17b5e7fb6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+ bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
- return queue_var_show(set, page);
+ return queue_var_show(set << force, page);
}
static ssize_t
@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
- if (val)
+ if (val) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- else
- queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ if (val == 2)
+ queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ } else {
+ queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ }
spin_unlock_irq(q->queue_lock);
#endif
return ret;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a62be8d0dc1b..a19f58c6fc3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
return NULL;
}
-static inline int total_nr_queued(struct throtl_data *td)
+static inline unsigned int total_nr_queued(struct throtl_data *td)
{
- return (td->nr_queued[0] + td->nr_queued[1]);
+ return td->nr_queued[0] + td->nr_queued[1];
}
static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
- bool sync = bio->bi_rw & REQ_SYNC;
+ bool sync = rw_is_sync(bio->bi_rw);
/* Charge the bio to the group */
tg->bytes_disp[rw] += bio->bi_size;
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
bio_list_init(&bio_list_on_stack);
- throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u",
+ throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
total_nr_queued(td), td->nr_queued[READ],
td->nr_queued[WRITE]);
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
struct delayed_work *dwork = &td->throtl_work;
/* schedule work if limits changed even if no bio is queued */
- if (total_nr_queued(td) > 0 || td->limits_changed) {
+ if (total_nr_queued(td) || td->limits_changed) {
/*
* We might have a work scheduled to be executed in future.
* Cancel that and schedule a new one.
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
if (tg_no_rule_group(tg, rw)) {
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
- rw, bio->bi_rw & REQ_SYNC);
+ rw, rw_is_sync(bio->bi_rw));
rcu_read_unlock();
return 0;
}
@@ -1204,7 +1204,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
}
queue_bio:
- throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu"
+ throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u queued=%d/%d",
rw == READ ? 'R' : 'W',
tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a338..780354888958 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
static int __init fail_io_timeout_debugfs(void)
{
- return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+ struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
+ NULL, &fail_io_timeout);
+
+ return IS_ERR(dir) ? PTR_ERR(dir) : 0;
}
late_initcall(fail_io_timeout_debugfs);
diff --git a/block/blk.h b/block/blk.h
index d6586287adc9..20b900a377c9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
+bool __blk_end_bidi_request(struct request *rq, int error,
+ unsigned int nr_bytes, unsigned int bidi_bytes);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 000000000000..6690e6e41037
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
+/*
+ * BSG helper library
+ *
+ * Copyright (C) 2008 James Smart, Emulex Corporation
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2011 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/bsg-lib.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+
+/**
+ * bsg_destroy_job - routine to teardown/delete a bsg job
+ * @job: bsg_job that is to be torn down
+ */
+static void bsg_destroy_job(struct bsg_job *job)
+{
+ put_device(job->dev); /* release reference for the request */
+
+ kfree(job->request_payload.sg_list);
+ kfree(job->reply_payload.sg_list);
+ kfree(job);
+}
+
+/**
+ * bsg_job_done - completion routine for bsg requests
+ * @job: bsg_job that is complete
+ * @result: job reply result
+ * @reply_payload_rcv_len: length of payload recvd
+ *
+ * The LLD should call this when the bsg job has completed.
+ */
+void bsg_job_done(struct bsg_job *job, int result,
+ unsigned int reply_payload_rcv_len)
+{
+ struct request *req = job->req;
+ struct request *rsp = req->next_rq;
+ int err;
+
+ err = job->req->errors = result;
+ if (err < 0)
+ /* we're only returning the result field in the reply */
+ job->req->sense_len = sizeof(u32);
+ else
+ job->req->sense_len = job->reply_len;
+ /* we assume all request payload was transferred, residual == 0 */
+ req->resid_len = 0;
+
+ if (rsp) {
+ WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+
+ /* set reply (bidi) residual */
+ rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+ }
+ blk_complete_request(req);
+}
+EXPORT_SYMBOL_GPL(bsg_job_done);
+
+/**
+ * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * @rq: BSG request that holds the job to be destroyed
+ */
+static void bsg_softirq_done(struct request *rq)
+{
+ struct bsg_job *job = rq->special;
+
+ blk_end_request_all(rq, rq->errors);
+ bsg_destroy_job(job);
+}
+
+static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+{
+ size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+
+ BUG_ON(!req->nr_phys_segments);
+
+ buf->sg_list = kzalloc(sz, GFP_KERNEL);
+ if (!buf->sg_list)
+ return -ENOMEM;
+ sg_init_table(buf->sg_list, req->nr_phys_segments);
+ buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+ buf->payload_len = blk_rq_bytes(req);
+ return 0;
+}
+
+/**
+ * bsg_create_job - create the bsg_job structure for the bsg request
+ * @dev: device that is being sent the bsg request
+ * @req: BSG request that needs a job structure
+ */
+static int bsg_create_job(struct device *dev, struct request *req)
+{
+ struct request *rsp = req->next_rq;
+ struct request_queue *q = req->q;
+ struct bsg_job *job;
+ int ret;
+
+ BUG_ON(req->special);
+
+ job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ req->special = job;
+ job->req = req;
+ if (q->bsg_job_size)
+ job->dd_data = (void *)&job[1];
+ job->request = req->cmd;
+ job->request_len = req->cmd_len;
+ job->reply = req->sense;
+ job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+ * allocated */
+ if (req->bio) {
+ ret = bsg_map_buffer(&job->request_payload, req);
+ if (ret)
+ goto failjob_rls_job;
+ }
+ if (rsp && rsp->bio) {
+ ret = bsg_map_buffer(&job->reply_payload, rsp);
+ if (ret)
+ goto failjob_rls_rqst_payload;
+ }
+ job->dev = dev;
+ /* take a reference for the request */
+ get_device(job->dev);
+ return 0;
+
+failjob_rls_rqst_payload:
+ kfree(job->request_payload.sg_list);
+failjob_rls_job:
+ kfree(job);
+ return -ENOMEM;
+}
+
+/*
+ * bsg_goose_queue - restart queue in case it was stopped
+ * @q: request q to be restarted
+ */
+void bsg_goose_queue(struct request_queue *q)
+{
+ if (!q)
+ return;
+
+ blk_run_queue_async(q);
+}
+EXPORT_SYMBOL_GPL(bsg_goose_queue);
+
+/**
+ * bsg_request_fn - generic handler for bsg requests
+ * @q: request queue to manage
+ *
+ * On error the create_bsg_job function should return a -Exyz error value
+ * that will be set to the req->errors.
+ *
+ * Drivers/subsys should pass this to the queue init function.
+ */
+void bsg_request_fn(struct request_queue *q)
+{
+ struct device *dev = q->queuedata;
+ struct request *req;
+ struct bsg_job *job;
+ int ret;
+
+ if (!get_device(dev))
+ return;
+
+ while (1) {
+ req = blk_fetch_request(q);
+ if (!req)
+ break;
+ spin_unlock_irq(q->queue_lock);
+
+ ret = bsg_create_job(dev, req);
+ if (ret) {
+ req->errors = ret;
+ blk_end_request_all(req, ret);
+ spin_lock_irq(q->queue_lock);
+ continue;
+ }
+
+ job = req->special;
+ ret = q->bsg_job_fn(job);
+ spin_lock_irq(q->queue_lock);
+ if (ret)
+ break;
+ }
+
+ spin_unlock_irq(q->queue_lock);
+ put_device(dev);
+ spin_lock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(bsg_request_fn);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @q: request queue setup by caller
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ *
+ * The caller should have setup the reuqest queue with bsg_request_fn
+ * as the request_fn.
+ */
+int bsg_setup_queue(struct device *dev, struct request_queue *q,
+ char *name, bsg_job_fn *job_fn, int dd_job_size)
+{
+ int ret;
+
+ q->queuedata = dev;
+ q->bsg_job_size = dd_job_size;
+ q->bsg_job_fn = job_fn;
+ queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ blk_queue_softirq_done(q, bsg_softirq_done);
+ blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+
+ ret = bsg_register_queue(q, dev, name, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: bsg interface failed to "
+ "initialize - register queue\n", dev->kobj.name);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_remove_queue - Deletes the bsg dev from the q
+ * @q: the request_queue that is to be torn down.
+ *
+ * Notes:
+ * Before unregistering the queue empty any requests that are blocked
+ */
+void bsg_remove_queue(struct request_queue *q)
+{
+ struct request *req; /* block request */
+ int counts; /* totals for request_list count and starved */
+
+ if (!q)
+ return;
+
+ /* Stop taking in new requests */
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+
+ /* drain all requests in the queue */
+ while (1) {
+ /* need the lock to fetch a request
+ * this may fetch the same reqeust as the previous pass
+ */
+ req = blk_fetch_request(q);
+ /* save requests in use and starved */
+ counts = q->rq.count[0] + q->rq.count[1] +
+ q->rq.starved[0] + q->rq.starved[1];
+ spin_unlock_irq(q->queue_lock);
+ /* any requests still outstanding? */
+ if (counts == 0)
+ break;
+
+ /* This may be the same req as the previous iteration,
+ * always send the blk_end_request_all after a prefetch.
+ * It is not okay to not end the request because the
+ * prefetch started the request.
+ */
+ if (req) {
+ /* return -ENXIO to indicate that this queue is
+ * going away
+ */
+ req->errors = -ENXIO;
+ blk_end_request_all(req, -ENXIO);
+ }
+
+ msleep(200); /* allow bsg to possibly finish */
+ spin_lock_irq(q->queue_lock);
+ }
+ bsg_unregister_queue(q);
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/bsg.c b/block/bsg.c
index 0c8b64a16484..702f1316bb8f 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -182,7 +182,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -ENOMEM;
}
- if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request,
+ if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request,
hdr->request_len))
return -EFAULT;
@@ -249,7 +249,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
struct request *rq, *next_rq = NULL;
int ret, rw;
unsigned int dxfer_len;
- void *dxferp = NULL;
+ void __user *dxferp = NULL;
struct bsg_class_device *bcd = &q->bsg_dev;
/* if the LLD has been removed then the bsg_unregister_queue will
@@ -291,7 +291,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
rq->next_rq = next_rq;
next_rq->cmd_type = rq->cmd_type;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
+ dxferp = (void __user *)(unsigned long)hdr->din_xferp;
ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
hdr->din_xfer_len, GFP_KERNEL);
if (ret)
@@ -300,10 +300,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
if (hdr->dout_xfer_len) {
dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
} else if (hdr->din_xfer_len) {
dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
+ dxferp = (void __user *)(unsigned long)hdr->din_xferp;
} else
dxfer_len = 0;
@@ -445,7 +445,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
int len = min_t(unsigned int, hdr->max_response_len,
rq->sense_len);
- ret = copy_to_user((void*)(unsigned long)hdr->response,
+ ret = copy_to_user((void __user *)(unsigned long)hdr->response,
rq->sense, len);
if (!ret)
hdr->response_len = len;
@@ -606,7 +606,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
- if (!bytes_read || (bytes_read && err_block_err(ret)))
+ if (!bytes_read || err_block_err(ret))
bytes_read = ret;
return bytes_read;
@@ -686,7 +686,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
/*
* return bytes written on non-fatal errors
*/
- if (!bytes_written || (bytes_written && err_block_err(ret)))
+ if (!bytes_written || err_block_err(ret))
bytes_written = ret;
dprintk("%s: returning %Zd\n", bd->name, bytes_written);
@@ -878,7 +878,7 @@ static unsigned int bsg_poll(struct file *file, poll_table *wait)
spin_lock_irq(&bd->lock);
if (!list_empty(&bd->done_list))
mask |= POLLIN | POLLRDNORM;
- if (bd->queued_cmds >= bd->max_queue)
+ if (bd->queued_cmds < bd->max_queue)
mask |= POLLOUT;
spin_unlock_irq(&bd->lock);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3c7b537bf908..a33bd4377c61 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
+ struct cfq_ttime ttime;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
- .count = 0, .min_vdisktime = 0, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
+ .ttime = {.last_end_request = jiffies,},}
/*
* Per process-grouping structure
@@ -136,7 +137,7 @@ struct cfq_queue {
/* io prio of this group */
unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
+ unsigned short ioprio_class;
pid_t pid;
@@ -212,6 +213,7 @@ struct cfq_group {
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
+ struct cfq_ttime ttime;
};
/*
@@ -393,6 +395,18 @@ CFQ_CFQQ_FNS(wait_busy);
j++, st = i < IDLE_WORKLOAD ? \
&cfqg->service_trees[i][j]: NULL) \
+static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
+ struct cfq_ttime *ttime, bool group_idle)
+{
+ unsigned long slice;
+ if (!sample_valid(ttime->ttime_samples))
+ return false;
+ if (group_idle)
+ slice = cfqd->cfq_group_idle;
+ else
+ slice = cfqd->cfq_slice_idle;
+ return ttime->ttime_mean > slice;
+}
static inline bool iops_mode(struct cfq_data *cfqd)
{
@@ -988,9 +1002,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
st->min_vdisktime);
- cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
- " sect=%u", used_sl, cfqq->slice_dispatch, charge,
- iops_mode(cfqd), cfqq->nr_sectors);
+ cfq_log_cfqq(cfqq->cfqd, cfqq,
+ "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+ used_sl, cfqq->slice_dispatch, charge,
+ iops_mode(cfqd), cfqq->nr_sectors);
cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
unaccounted_sl);
cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
@@ -1004,8 +1019,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
return NULL;
}
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
- unsigned int weight)
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+ unsigned int weight)
{
struct cfq_group *cfqg = cfqg_of_blkg(blkg);
cfqg->new_weight = weight;
@@ -1058,6 +1073,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
+ cfqg->ttime.last_end_request = jiffies;
+
/*
* Take the initial reference that will be released on destroy
* This can be thought of a joint reference by cgroup and
@@ -1197,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
hlist_del_init(&cfqg->cfqd_node);
+ BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
+ cfqd->nr_blkcg_linked_grps--;
+
/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
@@ -1234,7 +1254,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
* it should not be NULL as even if elevator was exiting, cgroup deltion
* path got to it first.
*/
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
{
unsigned long flags;
struct cfq_data *cfqd = key;
@@ -1501,16 +1521,11 @@ static void cfq_add_rq_rb(struct request *rq)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
- struct request *__alias, *prev;
+ struct request *prev;
cfqq->queued[rq_is_sync(rq)]++;
- /*
- * looks a little odd, but the first insert might return an alias.
- * if that happens, put the alias on the dispatch list
- */
- while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
- cfq_dispatch_insert(cfqd->queue, __alias);
+ elv_rb_add(&cfqq->sort_list, rq);
if (!cfq_cfqq_on_rr(cfqq))
cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1968,7 +1983,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones
* in their service tree.
*/
- if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+ if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
+ !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
@@ -2021,10 +2037,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* slice, then don't idle. This avoids overrunning the allotted
* time slice.
*/
- if (sample_valid(cic->ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime_mean)) {
- cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
- cic->ttime_mean);
+ if (sample_valid(cic->ttime.ttime_samples) &&
+ (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+ cic->ttime.ttime_mean);
return;
}
@@ -2380,8 +2396,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
* this group, wait for requests to complete.
*/
check_group_idle:
- if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
- && cfqq->cfqg->dispatched) {
+ if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
+ cfqq->cfqg->dispatched &&
+ !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
cfqq = NULL;
goto keep_queue;
}
@@ -2772,8 +2789,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
smp_wmb();
cic->key = cfqd_dead_key(cfqd);
- if (ioc->ioc_data == cic)
+ rcu_read_lock();
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+ rcu_read_unlock();
+ spin_lock(&ioc->lock);
rcu_assign_pointer(ioc->ioc_data, NULL);
+ spin_unlock(&ioc->lock);
+ } else
+ rcu_read_unlock();
if (cic->cfqq[BLK_RW_ASYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2826,7 +2849,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
cfqd->queue->node);
if (cic) {
- cic->last_end_request = jiffies;
+ cic->ttime.last_end_request = jiffies;
INIT_LIST_HEAD(&cic->queue_list);
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
@@ -2876,7 +2899,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
* elevate the priority of this queue
*/
cfqq->org_ioprio = cfqq->ioprio;
- cfqq->org_ioprio_class = cfqq->ioprio_class;
cfq_clear_cfqq_prio_changed(cfqq);
}
@@ -3080,7 +3102,8 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
spin_lock_irqsave(&ioc->lock, flags);
- BUG_ON(ioc->ioc_data == cic);
+ BUG_ON(rcu_dereference_check(ioc->ioc_data,
+ lockdep_is_held(&ioc->lock)) == cic);
radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
hlist_del_rcu(&cic->cic_list);
@@ -3213,14 +3236,28 @@ err:
}
static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
{
- unsigned long elapsed = jiffies - cic->last_end_request;
- unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+ unsigned long elapsed = jiffies - ttime->last_end_request;
+ elapsed = min(elapsed, 2UL * slice_idle);
+
+ ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
+ ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
+ ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+}
- cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
- cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
- cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ struct cfq_io_context *cic)
+{
+ if (cfq_cfqq_sync(cfqq)) {
+ __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
+ __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
+ cfqd->cfq_slice_idle);
+ }
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+#endif
}
static void
@@ -3269,8 +3306,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
- else if (sample_valid(cic->ttime_samples)) {
- if (cic->ttime_mean > cfqd->cfq_slice_idle)
+ else if (sample_valid(cic->ttime.ttime_samples)) {
+ if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
enable_idle = 0;
else
enable_idle = 1;
@@ -3405,7 +3442,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (rq->cmd_flags & REQ_META)
cfqq->meta_pending++;
- cfq_update_io_thinktime(cfqd, cic);
+ cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);
cfq_update_idle_window(cfqd, cfqq, cic);
@@ -3512,12 +3549,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
if (cfqq->cfqg->nr_cfqq > 1)
return false;
+ /* the only queue in the group, but think time is big */
+ if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
+ return false;
+
if (cfq_slice_used(cfqq))
return true;
/* if slice left is less than think time, wait busy */
- if (cic && sample_valid(cic->ttime_samples)
- && (cfqq->slice_end - jiffies < cic->ttime_mean))
+ if (cic && sample_valid(cic->ttime.ttime_samples)
+ && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
return true;
/*
@@ -3558,11 +3599,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
if (sync) {
- RQ_CIC(rq)->last_end_request = now;
+ struct cfq_rb_root *service_tree;
+
+ RQ_CIC(rq)->ttime.last_end_request = now;
+
+ if (cfq_cfqq_on_rr(cfqq))
+ service_tree = cfqq->service_tree;
+ else
+ service_tree = service_tree_for(cfqq->cfqg,
+ cfqq_prio(cfqq), cfqq_type(cfqq));
+ service_tree->ttime.last_end_request = now;
if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
cfqd->last_delayed_sync = now;
}
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ cfqq->cfqg->ttime.last_end_request = now;
+#endif
+
/*
* If this is the active queue, check if it needs to be expired,
* or if we want to idle in case it has no pending requests.
@@ -3608,30 +3662,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_schedule_dispatch(cfqd);
}
-/*
- * we temporarily boost lower priority queues if they are holding fs exclusive
- * resources. they are boosted to normal prio (CLASS_BE/4)
- */
-static void cfq_prio_boost(struct cfq_queue *cfqq)
-{
- if (has_fs_excl()) {
- /*
- * boost idle prio on transactions that would lock out other
- * users of the filesystem
- */
- if (cfq_class_idle(cfqq))
- cfqq->ioprio_class = IOPRIO_CLASS_BE;
- if (cfqq->ioprio > IOPRIO_NORM)
- cfqq->ioprio = IOPRIO_NORM;
- } else {
- /*
- * unboost the queue (if needed)
- */
- cfqq->ioprio_class = cfqq->org_ioprio_class;
- cfqq->ioprio = cfqq->org_ioprio;
- }
-}
-
static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3662,7 +3692,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
cfq_init_prio_data(cfqq, cic->ioc);
- cfq_prio_boost(cfqq);
return __cfq_may_queue(cfqq);
}
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333a..7b725020823c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
#define BLKBSZSET_32 _IOW(0x12, 113, int)
#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
-struct compat_floppy_struct {
- compat_uint_t size;
- compat_uint_t sect;
- compat_uint_t head;
- compat_uint_t track;
- compat_uint_t stretch;
- unsigned char gap;
- unsigned char rate;
- unsigned char spec1;
- unsigned char fmt_gap;
- const compat_caddr_t name;
-};
-
struct compat_floppy_drive_params {
char cmos;
compat_ulong_t max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
-#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea1864..c644137d9cd6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = deadline_rb_root(dd, rq);
- struct request *__alias;
- while (unlikely(__alias = elv_rb_add(root, rq)))
- deadline_move_request(dd, __alias);
+ elv_rb_add(root, rq);
}
static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb6..a3b64bc71d88 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
* RB-tree support functions for inserting/lookup/removal of requests
* in a sorted RB tree.
*/
-struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+void elv_rb_add(struct rb_root *root, struct request *rq)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
if (blk_rq_pos(rq) < blk_rq_pos(__rq))
p = &(*p)->rb_left;
- else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
+ else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
p = &(*p)->rb_right;
- else
- return __rq;
}
rb_link_node(&rq->rb_node, parent, p);
rb_insert_color(&rq->rb_node, root);
- return NULL;
}
EXPORT_SYMBOL(elv_rb_add);
diff --git a/block/genhd.c b/block/genhd.c
index 95822ae25cfe..e2f67902dd02 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
- /* Register BDI before referencing it from bdev */
+ /* Register BDI before referencing it from bdev */
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
@@ -1018,14 +1018,6 @@ static const struct attribute_group *disk_attr_groups[] = {
NULL
};
-static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
-{
- struct disk_part_tbl *ptbl =
- container_of(head, struct disk_part_tbl, rcu_head);
-
- kfree(ptbl);
-}
-
/**
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
* @disk: disk to replace part_tbl for
@@ -1046,7 +1038,7 @@ static void disk_replace_part_tbl(struct gendisk *disk,
if (old_ptbl) {
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
- call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+ kfree_rcu(old_ptbl, rcu_head);
}
}
@@ -1148,23 +1140,23 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"wsect wuse running use aveq"
"\n\n");
*/
-
+
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
part_round_stats(cpu, hd);
part_stat_unlock();
- seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
- "%u %lu %lu %llu %u %u %u %u\n",
+ seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+ "%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[READ]),
part_stat_read(hd, merges[READ]),
- (unsigned long long)part_stat_read(hd, sectors[READ]),
+ part_stat_read(hd, sectors[READ]),
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
part_stat_read(hd, ios[WRITE]),
part_stat_read(hd, merges[WRITE]),
- (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+ part_stat_read(hd, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
part_in_flight(hd),
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
@@ -1172,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
);
}
disk_part_iter_exit(&piter);
-
+
return 0;
}
@@ -1371,6 +1363,7 @@ struct disk_events {
struct gendisk *disk; /* the associated disk */
spinlock_t lock;
+ struct mutex block_mutex; /* protects blocking */
int block; /* event blocking depth */
unsigned int pending; /* events already sent out */
unsigned int clearing; /* events being cleared */
@@ -1414,22 +1407,44 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
return msecs_to_jiffies(intv_msecs);
}
-static void __disk_block_events(struct gendisk *disk, bool sync)
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events(). Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
{
struct disk_events *ev = disk->ev;
unsigned long flags;
bool cancel;
+ if (!ev)
+ return;
+
+ /*
+ * Outer mutex ensures that the first blocker completes canceling
+ * the event work before further blockers are allowed to finish.
+ */
+ mutex_lock(&ev->block_mutex);
+
spin_lock_irqsave(&ev->lock, flags);
cancel = !ev->block++;
spin_unlock_irqrestore(&ev->lock, flags);
- if (cancel) {
- if (sync)
- cancel_delayed_work_sync(&disk->ev->dwork);
- else
- cancel_delayed_work(&disk->ev->dwork);
- }
+ if (cancel)
+ cancel_delayed_work_sync(&disk->ev->dwork);
+
+ mutex_unlock(&ev->block_mutex);
}
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
@@ -1461,27 +1476,6 @@ out_unlock:
}
/**
- * disk_block_events - block and flush disk event checking
- * @disk: disk to block events for
- *
- * On return from this function, it is guaranteed that event checking
- * isn't in progress and won't happen until unblocked by
- * disk_unblock_events(). Events blocking is counted and the actual
- * unblocking happens after the matching number of unblocks are done.
- *
- * Note that this intentionally does not block event checking from
- * disk_clear_events().
- *
- * CONTEXT:
- * Might sleep.
- */
-void disk_block_events(struct gendisk *disk)
-{
- if (disk->ev)
- __disk_block_events(disk, true);
-}
-
-/**
* disk_unblock_events - unblock disk event checking
* @disk: disk to unblock events for
*
@@ -1498,22 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
}
/**
- * disk_check_events - schedule immediate event checking
- * @disk: disk to check events for
+ * disk_flush_events - schedule immediate event checking and flushing
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
*
- * Schedule immediate event checking on @disk if not blocked.
+ * Schedule immediate event checking on @disk if not blocked. Events in
+ * @mask are scheduled to be cleared from the driver. Note that this
+ * doesn't clear the events from @disk->ev.
*
* CONTEXT:
- * Don't care. Safe to call from irq context.
+ * If @mask is non-zero must be called with bdev->bd_mutex held.
*/
-void disk_check_events(struct gendisk *disk)
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
- if (disk->ev) {
- __disk_block_events(disk, false);
- __disk_unblock_events(disk, true);
+ struct disk_events *ev = disk->ev;
+
+ if (!ev)
+ return;
+
+ spin_lock_irq(&ev->lock);
+ ev->clearing |= mask;
+ if (!ev->block) {
+ cancel_delayed_work(&ev->dwork);
+ queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
}
+ spin_unlock_irq(&ev->lock);
}
-EXPORT_SYMBOL_GPL(disk_check_events);
/**
* disk_clear_events - synchronously check, clear and return pending events
@@ -1546,7 +1550,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
spin_unlock_irq(&ev->lock);
/* uncondtionally schedule event check and wait for it to finish */
- __disk_block_events(disk, true);
+ disk_block_events(disk);
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
flush_delayed_work(&ev->dwork);
__disk_unblock_events(disk, false);
@@ -1664,7 +1668,7 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
if (intv < 0 && intv != -1)
return -EINVAL;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
disk->ev->poll_msecs = intv;
__disk_unblock_events(disk, true);
@@ -1703,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
mutex_lock(&disk_events_mutex);
list_for_each_entry(ev, &disk_events, node)
- disk_check_events(ev->disk);
+ disk_flush_events(ev->disk, 0);
mutex_unlock(&disk_events_mutex);
@@ -1750,6 +1754,7 @@ static void disk_add_events(struct gendisk *disk)
INIT_LIST_HEAD(&ev->node);
ev->disk = disk;
spin_lock_init(&ev->lock);
+ mutex_init(&ev->block_mutex);
ev->block = 1;
ev->poll_msecs = -1;
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
@@ -1770,7 +1775,7 @@ static void disk_del_events(struct gendisk *disk)
if (!disk->ev)
return;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
mutex_lock(&disk_events_mutex);
list_del_init(&disk->ev->node);