aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig16
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c98
-rw-r--r--block/blk-merge.c95
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c40
-rw-r--r--block/blk-timeout.c9
-rw-r--r--block/blk.h2
-rw-r--r--block/blktrace.c860
-rw-r--r--block/bsg.c27
-rw-r--r--block/cfq-iosched.c4
-rw-r--r--block/cmd-filter.c1
-rw-r--r--block/elevator.c2
-rw-r--r--block/genhd.c24
-rw-r--r--block/scsi_ioctl.c21
15 files changed, 184 insertions, 1018 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 0cbb3b88b59a..e7d12782bcfb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -44,22 +44,6 @@ config LBD
If unsure, say N.
-config BLK_DEV_IO_TRACE
- bool "Support for tracing block io actions"
- depends on SYSFS
- select RELAY
- select DEBUG_FS
- select TRACEPOINTS
- help
- Say Y here if you want to be able to trace the block layer actions
- on a given queue. Tracing allows you to see any traffic happening
- on a block device queue. For more information (and the userspace
- support tools needed), fetch the blktrace tools from:
-
- git://git.kernel.dk/blktrace.git
-
- If unsure, say N.
-
config BLK_DEV_BSG
bool "Block layer SG support v4 (EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/block/Makefile b/block/Makefile
index bfe73049f939..e9fa4dd690f2 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -13,6 +13,5 @@ obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 29bcfac6c688..25572802dac2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -484,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q)
{
struct request_list *rl = &q->rq;
- rl->count[READ] = rl->count[WRITE] = 0;
- rl->starved[READ] = rl->starved[WRITE] = 0;
+ rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
+ rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
rl->elvpriv = 0;
- init_waitqueue_head(&rl->wait[READ]);
- init_waitqueue_head(&rl->wait[WRITE]);
+ init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
+ init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
mempool_free_slab, request_cachep, q->node);
@@ -603,13 +603,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
q->queue_flags = QUEUE_FLAG_DEFAULT;
q->queue_lock = lock;
- blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
-
+ /*
+ * This also sets hw/phys segments, boundary and size
+ */
blk_queue_make_request(q, __make_request);
- blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
-
- blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
- blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
q->sg_reserved_size = INT_MAX;
@@ -702,18 +699,18 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
ioc->last_waited = jiffies;
}
-static void __freed_request(struct request_queue *q, int rw)
+static void __freed_request(struct request_queue *q, int sync)
{
struct request_list *rl = &q->rq;
- if (rl->count[rw] < queue_congestion_off_threshold(q))
- blk_clear_queue_congested(q, rw);
+ if (rl->count[sync] < queue_congestion_off_threshold(q))
+ blk_clear_queue_congested(q, sync);
- if (rl->count[rw] + 1 <= q->nr_requests) {
- if (waitqueue_active(&rl->wait[rw]))
- wake_up(&rl->wait[rw]);
+ if (rl->count[sync] + 1 <= q->nr_requests) {
+ if (waitqueue_active(&rl->wait[sync]))
+ wake_up(&rl->wait[sync]);
- blk_clear_queue_full(q, rw);
+ blk_clear_queue_full(q, sync);
}
}
@@ -721,21 +718,20 @@ static void __freed_request(struct request_queue *q, int rw)
* A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock.
*/
-static void freed_request(struct request_queue *q, int rw, int priv)
+static void freed_request(struct request_queue *q, int sync, int priv)
{
struct request_list *rl = &q->rq;
- rl->count[rw]--;
+ rl->count[sync]--;
if (priv)
rl->elvpriv--;
- __freed_request(q, rw);
+ __freed_request(q, sync);
- if (unlikely(rl->starved[rw ^ 1]))
- __freed_request(q, rw ^ 1);
+ if (unlikely(rl->starved[sync ^ 1]))
+ __freed_request(q, sync ^ 1);
}
-#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
/*
* Get a free request, queue_lock must be held.
* Returns NULL on failure, with queue_lock held.
@@ -747,15 +743,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
struct request *rq = NULL;
struct request_list *rl = &q->rq;
struct io_context *ioc = NULL;
- const int rw = rw_flags & 0x01;
+ const bool is_sync = rw_is_sync(rw_flags) != 0;
int may_queue, priv;
may_queue = elv_may_queue(q, rw_flags);
if (may_queue == ELV_MQUEUE_NO)
goto rq_starved;
- if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
- if (rl->count[rw]+1 >= q->nr_requests) {
+ if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
+ if (rl->count[is_sync]+1 >= q->nr_requests) {
ioc = current_io_context(GFP_ATOMIC, q->node);
/*
* The queue will fill after this allocation, so set
@@ -763,9 +759,9 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* This process will be allowed to complete a batch of
* requests, others will be blocked.
*/
- if (!blk_queue_full(q, rw)) {
+ if (!blk_queue_full(q, is_sync)) {
ioc_set_batching(q, ioc);
- blk_set_queue_full(q, rw);
+ blk_set_queue_full(q, is_sync);
} else {
if (may_queue != ELV_MQUEUE_MUST
&& !ioc_batching(q, ioc)) {
@@ -778,7 +774,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
}
}
}
- blk_set_queue_congested(q, rw);
+ blk_set_queue_congested(q, is_sync);
}
/*
@@ -786,11 +782,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* limit of requests, otherwise we could have thousands of requests
* allocated with any setting of ->nr_requests
*/
- if (rl->count[rw] >= (3 * q->nr_requests / 2))
+ if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
goto out;
- rl->count[rw]++;
- rl->starved[rw] = 0;
+ rl->count[is_sync]++;
+ rl->starved[is_sync] = 0;
priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
if (priv)
@@ -808,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* wait queue, but this is pretty rare.
*/
spin_lock_irq(q->queue_lock);
- freed_request(q, rw, priv);
+ freed_request(q, is_sync, priv);
/*
* in the very unlikely event that allocation failed and no
@@ -818,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
* rq mempool into READ and WRITE
*/
rq_starved:
- if (unlikely(rl->count[rw] == 0))
- rl->starved[rw] = 1;
+ if (unlikely(rl->count[is_sync] == 0))
+ rl->starved[is_sync] = 1;
goto out;
}
@@ -833,7 +829,7 @@ rq_starved:
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
- trace_block_getrq(q, bio, rw);
+ trace_block_getrq(q, bio, rw_flags & 1);
out:
return rq;
}
@@ -847,7 +843,7 @@ out:
static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct bio *bio)
{
- const int rw = rw_flags & 0x01;
+ const bool is_sync = rw_is_sync(rw_flags) != 0;
struct request *rq;
rq = get_request(q, rw_flags, bio, GFP_NOIO);
@@ -856,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
struct io_context *ioc;
struct request_list *rl = &q->rq;
- prepare_to_wait_exclusive(&rl->wait[rw], &wait,
+ prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
TASK_UNINTERRUPTIBLE);
- trace_block_sleeprq(q, bio, rw);
+ trace_block_sleeprq(q, bio, rw_flags & 1);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
@@ -875,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
ioc_set_batching(q, ioc);
spin_lock_irq(q->queue_lock);
- finish_wait(&rl->wait[rw], &wait);
+ finish_wait(&rl->wait[is_sync], &wait);
rq = get_request(q, rw_flags, bio, GFP_NOIO);
};
@@ -1066,19 +1062,22 @@ void __blk_put_request(struct request_queue *q, struct request *req)
elv_completed_request(q, req);
+ /* this is a bio leak */
+ WARN_ON(req->bio != NULL);
+
/*
* Request may not have originated from ll_rw_blk. if not,
* it didn't come out of our reserved rq pools
*/
if (req->cmd_flags & REQ_ALLOCED) {
- int rw = rq_data_dir(req);
+ int is_sync = rq_is_sync(req) != 0;
int priv = req->cmd_flags & REQ_ELVPRIV;
BUG_ON(!list_empty(&req->queuelist));
BUG_ON(!hlist_unhashed(&req->hash));
blk_free_request(q, req);
- freed_request(q, rw, priv);
+ freed_request(q, is_sync, priv);
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1129,6 +1128,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
req->cmd_flags |= REQ_UNPLUG;
if (bio_rw_meta(bio))
req->cmd_flags |= REQ_RW_META;
+ if (bio_noidle(bio))
+ req->cmd_flags |= REQ_NOIDLE;
req->errors = 0;
req->hard_sector = req->sector = bio->bi_sector;
@@ -1137,6 +1138,15 @@ void init_request_from_bio(struct request *req, struct bio *bio)
blk_rq_bio_prep(req->q, req, bio);
}
+/*
+ * Only disabling plugging for non-rotational devices if it does tagging
+ * as well, otherwise we do need the proper merging
+ */
+static inline bool queue_should_plug(struct request_queue *q)
+{
+ return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
+}
+
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
@@ -1243,11 +1253,11 @@ get_rq:
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = blk_cpu_to_group(smp_processor_id());
- if (!blk_queue_nonrot(q) && elv_queue_empty(q))
+ if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
- if (unplug || blk_queue_nonrot(q))
+ if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
return 0;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b92f5b0866b0..e39cb24b7679 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -38,72 +38,77 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
}
}
-void blk_recalc_rq_segments(struct request *rq)
+static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
+ struct bio *bio)
{
- int nr_phys_segs;
unsigned int phys_size;
struct bio_vec *bv, *bvprv = NULL;
- int seg_size;
- int cluster;
- struct req_iterator iter;
- int high, highprv = 1;
- struct request_queue *q = rq->q;
+ int cluster, i, high, highprv = 1;
+ unsigned int seg_size, nr_phys_segs;
+ struct bio *fbio, *bbio;
- if (!rq->bio)
- return;
+ if (!bio)
+ return 0;
+ fbio = bio;
cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
seg_size = 0;
phys_size = nr_phys_segs = 0;
- rq_for_each_segment(bv, rq, iter) {
- /*
- * the trick here is making sure that a high page is never
- * considered part of another segment, since that might
- * change with the bounce page.
- */
- high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
- if (high || highprv)
- goto new_segment;
- if (cluster) {
- if (seg_size + bv->bv_len > q->max_segment_size)
- goto new_segment;
- if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+ for_each_bio(bio) {
+ bio_for_each_segment(bv, bio, i) {
+ /*
+ * the trick here is making sure that a high page is
+ * never considered part of another segment, since that
+ * might change with the bounce page.
+ */
+ high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+ if (high || highprv)
goto new_segment;
+ if (cluster) {
+ if (seg_size + bv->bv_len > q->max_segment_size)
+ goto new_segment;
+ if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+ goto new_segment;
+ if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+ goto new_segment;
+
+ seg_size += bv->bv_len;
+ bvprv = bv;
+ continue;
+ }
+new_segment:
+ if (nr_phys_segs == 1 && seg_size >
+ fbio->bi_seg_front_size)
+ fbio->bi_seg_front_size = seg_size;
- seg_size += bv->bv_len;
+ nr_phys_segs++;
bvprv = bv;
- continue;
+ seg_size = bv->bv_len;
+ highprv = high;
}
-new_segment:
- if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
- rq->bio->bi_seg_front_size = seg_size;
-
- nr_phys_segs++;
- bvprv = bv;
- seg_size = bv->bv_len;
- highprv = high;
+ bbio = bio;
}
- if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
- rq->bio->bi_seg_front_size = seg_size;
- if (seg_size > rq->biotail->bi_seg_back_size)
- rq->biotail->bi_seg_back_size = seg_size;
+ if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size)
+ fbio->bi_seg_front_size = seg_size;
+ if (seg_size > bbio->bi_seg_back_size)
+ bbio->bi_seg_back_size = seg_size;
+
+ return nr_phys_segs;
+}
- rq->nr_phys_segments = nr_phys_segs;
+void blk_recalc_rq_segments(struct request *rq)
+{
+ rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- struct request rq;
struct bio *nxt = bio->bi_next;
- rq.q = q;
- rq.bio = rq.biotail = bio;
+
bio->bi_next = NULL;
- blk_recalc_rq_segments(&rq);
+ bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
bio->bi_next = nxt;
- bio->bi_phys_segments = rq.nr_phys_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
}
EXPORT_SYMBOL(blk_recount_segments);
@@ -398,6 +403,8 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (blk_rq_cpu_valid(next))
req->cpu = next->cpu;
+ /* owner-ship of bio passed from next to req */
+ next->bio = NULL;
__blk_put_request(q, next);
return 1;
}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ce0efc6b26dc..ee9c21602228 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
data->info = rq;
data->flags = 0;
- __smp_call_function_single(cpu, data);
+ __smp_call_function_single(cpu, data, 0);
return 0;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e29ddfc73cf4..3ff9bba3379a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
q->nr_requests = nr;
blk_queue_congestion_threshold(q);
- if (rl->count[READ] >= queue_congestion_on_threshold(q))
- blk_set_queue_congested(q, READ);
- else if (rl->count[READ] < queue_congestion_off_threshold(q))
- blk_clear_queue_congested(q, READ);
-
- if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
- blk_set_queue_congested(q, WRITE);
- else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
- blk_clear_queue_congested(q, WRITE);
-
- if (rl->count[READ] >= q->nr_requests) {
- blk_set_queue_full(q, READ);
- } else if (rl->count[READ]+1 <= q->nr_requests) {
- blk_clear_queue_full(q, READ);
- wake_up(&rl->wait[READ]);
+ if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+ blk_set_queue_congested(q, BLK_RW_SYNC);
+ else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
+ blk_clear_queue_congested(q, BLK_RW_SYNC);
+
+ if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+ blk_set_queue_congested(q, BLK_RW_ASYNC);
+ else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+ blk_clear_queue_congested(q, BLK_RW_ASYNC);
+
+ if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+ blk_set_queue_full(q, BLK_RW_SYNC);
+ } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
+ blk_clear_queue_full(q, BLK_RW_SYNC);
+ wake_up(&rl->wait[BLK_RW_SYNC]);
}
- if (rl->count[WRITE] >= q->nr_requests) {
- blk_set_queue_full(q, WRITE);
- } else if (rl->count[WRITE]+1 <= q->nr_requests) {
- blk_clear_queue_full(q, WRITE);
- wake_up(&rl->wait[WRITE]);
+ if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+ blk_set_queue_full(q, BLK_RW_ASYNC);
+ } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
+ blk_clear_queue_full(q, BLK_RW_ASYNC);
+ wake_up(&rl->wait[BLK_RW_ASYNC]);
}
spin_unlock_irq(q->queue_lock);
return ret;
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a09535377a94..bbbdc4b8ccf2 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -209,12 +209,19 @@ void blk_abort_queue(struct request_queue *q)
{
unsigned long flags;
struct request *rq, *tmp;
+ LIST_HEAD(list);
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
- list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+ /*
+ * Splice entries to local list, to avoid deadlocking if entries
+ * get readded to the timeout list by error handling
+ */
+ list_splice_init(&q->timeout_list, &list);
+
+ list_for_each_entry_safe(rq, tmp, &list, timeout_list)
blk_abort_request(rq);
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/blk.h b/block/blk.h
index 0dce92c37496..3ee94358b43d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -102,7 +102,7 @@ static inline int blk_cpu_to_group(int cpu)
const struct cpumask *mask = cpu_coregroup_mask(cpu);
return cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
- return first_cpu(per_cpu(cpu_sibling_map, cpu));
+ return cpumask_first(topology_thread_cpumask(cpu));
#else
return cpu;
#endif
diff --git a/block/blktrace.c b/block/blktrace.c
deleted file mode 100644
index 39cc3bfe56e4..000000000000
--- a/block/blktrace.c
+++ /dev/null
@@ -1,860 +0,0 @@
-/*
- * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- */
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/blktrace_api.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/debugfs.h>
-#include <linux/time.h>
-#include <trace/block.h>
-#include <asm/uaccess.h>
-
-static unsigned int blktrace_seq __read_mostly = 1;
-
-/* Global reference count of probes */
-static DEFINE_MUTEX(blk_probe_mutex);
-static atomic_t blk_probes_ref = ATOMIC_INIT(0);
-
-static int blk_register_tracepoints(void);
-static void blk_unregister_tracepoints(void);
-
-/*
- * Send out a notify message.
- */
-static void trace_note(struct blk_trace *bt, pid_t pid, int action,
- const void *data, size_t len)
-{
- struct blk_io_trace *t;
-
- t = relay_reserve(bt->rchan, sizeof(*t) + len);
- if (t) {
- const int cpu = smp_processor_id();
-
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->time = ktime_to_ns(ktime_get());
- t->device = bt->dev;
- t->action = action;
- t->pid = pid;
- t->cpu = cpu;
- t->pdu_len = len;
- memcpy((void *) t + sizeof(*t), data, len);
- }
-}
-
-/*
- * Send out a notify for this process, if we haven't done so since a trace
- * started
- */
-static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
-{
- tsk->btrace_seq = blktrace_seq;
- trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
-}
-
-static void trace_note_time(struct blk_trace *bt)
-{
- struct timespec now;
- unsigned long flags;
- u32 words[2];
-
- getnstimeofday(&now);
- words[0] = now.tv_sec;
- words[1] = now.tv_nsec;
-
- local_irq_save(flags);
- trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
- local_irq_restore(flags);
-}
-
-void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
-{
- int n;
- va_list args;
- unsigned long flags;
- char *buf;
-
- local_irq_save(flags);
- buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
- va_start(args, fmt);
- n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
- va_end(args);
-
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(__trace_note_message);
-
-static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
- pid_t pid)
-{
- if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
- return 1;
- if (sector < bt->start_lba || sector > bt->end_lba)
- return 1;
- if (bt->pid && pid != bt->pid)
- return 1;
-
- return 0;
-}
-
-/*
- * Data direction bit lookup
- */
-static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
-
-/* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
- (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
-
-/*
- * The worker for the various blk_add_trace*() types. Fills out a
- * blk_io_trace structure and places it in a per-cpu subbuffer.
- */
-static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
- int rw, u32 what, int error, int pdu_len, void *pdu_data)
-{
- struct task_struct *tsk = current;
- struct blk_io_trace *t;
- unsigned long flags;
- unsigned long *sequence;
- pid_t pid;
- int cpu;
-
- if (unlikely(bt->trace_state != Blktrace_running))
- return;
-
- what |= ddir_act[rw & WRITE];
- what |= MASK_TC_BIT(rw, BARRIER);
- what |= MASK_TC_BIT(rw, SYNC);
- what |= MASK_TC_BIT(rw, AHEAD);
- what |= MASK_TC_BIT(rw, META);
- what |= MASK_TC_BIT(rw, DISCARD);
-
- pid = tsk->pid;
- if (unlikely(act_log_check(bt, what, sector, pid)))
- return;
-
- /*
- * A word about the locking here - we disable interrupts to reserve
- * some space in the relay per-cpu buffer, to prevent an irq
- * from coming in and stepping on our toes.
- */
- local_irq_save(flags);
-
- if (unlikely(tsk->btrace_seq != blktrace_seq))
- trace_note_tsk(bt, tsk);
-
- t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
- if (t) {
- cpu = smp_processor_id();
- sequence = per_cpu_ptr(bt->sequence, cpu);
-
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->sequence = ++(*sequence);
- t->time = ktime_to_ns(ktime_get());
- t->sector = sector;
- t->bytes = bytes;
- t->action = what;
- t->pid = pid;
- t->device = bt->dev;
- t->cpu = cpu;
- t->error = error;
- t->pdu_len = pdu_len;
-
- if (pdu_len)
- memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
- }
-
- local_irq_restore(flags);
-}
-
-static struct dentry *blk_tree_root;
-static DEFINE_MUTEX(blk_tree_mutex);
-
-static void blk_trace_cleanup(struct blk_trace *bt)
-{
- debugfs_remove(bt->msg_file);
- debugfs_remove(bt->dropped_file);
- relay_close(bt->rchan);
- free_percpu(bt->sequence);
- free_percpu(bt->msg_data);
- kfree(bt);
- mutex_lock(&blk_probe_mutex);
- if (atomic_dec_and_test(&blk_probes_ref))
- blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
-}
-
-int blk_trace_remove(struct request_queue *q)
-{
- struct blk_trace *bt;
-
- bt = xchg(&q->blk_trace, NULL);
- if (!bt)
- return -EINVAL;
-
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped)
- blk_trace_cleanup(bt);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(blk_trace_remove);
-
-static int blk_dropped_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
-
- return 0;
-}
-
-static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct blk_trace *bt = filp->private_data;
- char buf[16];
-
- snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
-
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static const struct file_operations blk_dropped_fops = {
- .owner = THIS_MODULE,
- .open = blk_dropped_open,
- .read = blk_dropped_read,
-};
-
-static int blk_msg_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
-
- return 0;
-}
-
-static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- char *msg;
- struct blk_trace *bt;
-
- if (count > BLK_TN_MAX_MSG)
- return -EINVAL;
-
- msg = kmalloc(count, GFP_KERNEL);
- if (msg == NULL)
- return -ENOMEM;
-
- if (copy_from_user(msg, buffer, count)) {
- kfree(msg);
- return -EFAULT;
- }
-
- bt = filp->private_data;
- __trace_note_message(bt, "%s", msg);
- kfree(msg);
-
- return count;
-}
-
-static const struct file_operations blk_msg_fops = {
- .owner = THIS_MODULE,
- .open = blk_msg_open,
- .write = blk_msg_write,
-};
-
-/*
- * Keep track of how many times we encountered a full subbuffer, to aid
- * the user space app in telling how many lost events there were.
- */
-static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
- void *prev_subbuf, size_t prev_padding)
-{
- struct blk_trace *bt;
-
- if (!relay_buf_full(buf))
- return 1;
-
- bt = buf->chan->private_data;
- atomic_inc(&bt->dropped);
- return 0;
-}
-
-static int blk_remove_buf_file_callback(struct dentry *dentry)
-{
- struct dentry *parent = dentry->d_parent;
- debugfs_remove(dentry);
-
- /*
- * this will fail for all but the last file, but that is ok. what we
- * care about is the top level buts->name directory going away, when
- * the last trace file is gone. Then we don't have to rmdir() that
- * manually on trace stop, so it nicely solves the issue with
- * force killing of running traces.
- */
-
- debugfs_remove(parent);
- return 0;
-}
-
-static struct dentry *blk_create_buf_file_callback(const char *filename,
- struct dentry *parent,
- int mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
-}
-
-static struct rchan_callbacks blk_relay_callbacks = {
- .subbuf_start = blk_subbuf_start_callback,
- .create_buf_file = blk_create_buf_file_callback,
- .remove_buf_file = blk_remove_buf_file_callback,
-};
-
-/*
- * Setup everything required to start tracing
- */
-int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct blk_user_trace_setup *buts)
-{
- struct blk_trace *old_bt, *bt = NULL;
- struct dentry *dir = NULL;
- int ret, i;
-
- if (!buts->buf_size || !buts->buf_nr)
- return -EINVAL;
-
- strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
- buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
-
- /*
- * some device names have larger paths - convert the slashes
- * to underscores for this to work as expected
- */
- for (i = 0; i < strlen(buts->name); i++)
- if (buts->name[i] == '/')
- buts->name[i] = '_';
-
- ret = -ENOMEM;
- bt = kzalloc(sizeof(*bt), GFP_KERNEL);
- if (!bt)
- goto err;
-
- bt->sequence = alloc_percpu(unsigned long);
- if (!bt->sequence)
- goto err;
-
- bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
- if (!bt->msg_data)
- goto err;
-
- ret = -ENOENT;
-
- if (!blk_tree_root) {
- blk_tree_root = debugfs_create_dir("block", NULL);
- if (!blk_tree_root)
- return -ENOMEM;
- }
-
- dir = debugfs_create_dir(buts->name, blk_tree_root);
-
- if (!dir)
- goto err;
-
- bt->dir = dir;
- bt->dev = dev;
- atomic_set(&bt->dropped, 0);
-
- ret = -EIO;
- bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
- if (!bt->dropped_file)
- goto err;
-
- bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- if (!bt->msg_file)
- goto err;
-
- bt->rchan = relay_open("trace", dir, buts->buf_size,
- buts->buf_nr, &blk_relay_callbacks, bt);
- if (!bt->rchan)
- goto err;
-
- bt->act_mask = buts->act_mask;
- if (!bt->act_mask)
- bt->act_mask = (u16) -1;
-
- bt->start_lba = buts->start_lba;
- bt->end_lba = buts->end_lba;
- if (!bt->end_lba)
- bt->end_lba = -1ULL;
-
- bt->pid = buts->pid;
- bt->trace_state = Blktrace_setup;
-
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1) {
- ret = blk_register_tracepoints();
- if (ret)
- goto probe_err;
- }
- mutex_unlock(&blk_probe_mutex);
-
- ret = -EBUSY;
- old_bt = xchg(&q->blk_trace, bt);
- if (old_bt) {
- (void) xchg(&q->blk_trace, old_bt);
- goto err;
- }
-
- return 0;
-probe_err:
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
-err:
- if (bt) {
- if (bt->msg_file)
- debugfs_remove(bt->msg_file);
- if (bt->dropped_file)
- debugfs_remove(bt->dropped_file);
- free_percpu(bt->sequence);
- free_percpu(bt->msg_data);
- if (bt->rchan)
- relay_close(bt->rchan);
- kfree(bt);
- }
- return ret;
-}
-
-int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- char __user *arg)
-{
- struct blk_user_trace_setup buts;
- int ret;
-
- ret = copy_from_user(&buts, arg, sizeof(buts));
- if (ret)
- return -EFAULT;
-
- ret = do_blk_trace_setup(q, name, dev, &buts);
- if (ret)
- return ret;
-
- if (copy_to_user(arg, &buts, sizeof(buts)))
- return -EFAULT;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(blk_trace_setup);
-
-int blk_trace_startstop(struct request_queue *q, int start)
-{
- struct blk_trace *bt;
- int ret;
-
- if ((bt = q->blk_trace) == NULL)
- return -EINVAL;
-
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace. For stopping a trace, the state must be running
- */
- ret = -EINVAL;
- if (start) {
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped) {
- blktrace_seq++;
- smp_mb();
- bt->trace_state = Blktrace_running;
-
- trace_note_time(bt);
- ret = 0;
- }
- } else {
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- relay_flush(bt->rchan);
- ret = 0;
- }
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(blk_trace_startstop);
-
-/**
- * blk_trace_ioctl: - handle the ioctls associated with tracing
- * @bdev: the block device
- * @cmd: the ioctl cmd
- * @arg: the argument data, if any
- *
- **/
-int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
-{
- struct request_queue *q;
- int ret, start = 0;
- char b[BDEVNAME_SIZE];
-
- q = bdev_get_queue(bdev);
- if (!q)
- return -ENXIO;
-
- mutex_lock(&bdev->bd_mutex);
-
- switch (cmd) {
- case BLKTRACESETUP:
- bdevname(bdev, b);
- ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
- break;
- case BLKTRACESTART:
- start = 1;
- case BLKTRACESTOP:
- ret = blk_trace_startstop(q, start);
- break;
- case BLKTRACETEARDOWN:
- ret = blk_trace_remove(q);
- break;
- default:
- ret = -ENOTTY;
- break;
- }
-
- mutex_unlock(&bdev->bd_mutex);
- return ret;
-}
-
-/**
- * blk_trace_shutdown: - stop and cleanup trace structures
- * @q: the request queue associated with the device
- *
- **/
-void blk_trace_shutdown(struct request_queue *q)
-{
- if (q->blk_trace) {
- blk_trace_startstop(q, 0);
- blk_trace_remove(q);
- }
-}
-
-/*
- * blktrace probes
- */
-
-/**
- * blk_add_trace_rq - Add a trace for a request oriented action
- * @q: queue the io is for
- * @rq: the source request
- * @what: the action
- *
- * Description:
- * Records an action against a request. Will log the bio offset + size.
- *
- **/
-static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
- u32 what)
-{
- struct blk_trace *bt = q->blk_trace;
- int rw = rq->cmd_flags & 0x03;
-
- if (likely(!bt))
- return;
-
- if (blk_discard_rq(rq))
- rw |= (1 << BIO_RW_DISCARD);
-
- if (blk_pc_request(rq)) {
- what |= BLK_TC_ACT(BLK_TC_PC);
- __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
- sizeof(rq->cmd), rq->cmd);
- } else {
- what |= BLK_TC_ACT(BLK_TC_FS);
- __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
- rw, what, rq->errors, 0, NULL);
- }
-}
-
-static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, BLK_TA_ABORT);
-}
-
-static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, BLK_TA_INSERT);
-}
-
-static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
-}
-
-static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
-}
-
-static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
-}
-
-/**
- * blk_add_trace_bio - Add a trace for a bio oriented action
- * @q: queue the io is for
- * @bio: the source bio
- * @what: the action
- *
- * Description:
- * Records an action against a bio. Will log the bio offset + size.
- *
- **/
-static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (likely(!bt))
- return;
-
- __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
- !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
-}
-
-static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
-{
- blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
-}
-
-static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
-{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
-}
-
-static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
-{
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
-}
-
-static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
-{
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
-}
-
-static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
-{
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
-}
-
-static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
-{
- if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
- else {
- struct blk_trace *bt = q->blk_trace;
-
- if (bt)
- __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
- }
-}
-
-
-static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
-{
- if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
- else {
- struct blk_trace *bt = q->blk_trace;
-
- if (bt)
- __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
- }
-}
-
-static void blk_add_trace_plug(struct request_queue *q)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (bt)
- __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
-}
-
-static void blk_add_trace_unplug_io(struct request_queue *q)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (bt) {
- unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
- __be64 rpdu = cpu_to_be64(pdu);
-
- __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
- sizeof(rpdu), &rpdu);
- }
-}
-
-static void blk_add_trace_unplug_timer(struct request_queue *q)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (bt) {
- unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
- __be64 rpdu = cpu_to_be64(pdu);
-
- __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
- sizeof(rpdu), &rpdu);
- }
-}
-
-static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
- unsigned int pdu)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (bt) {
- __be64 rpdu = cpu_to_be64(pdu);
-
- __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
- BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
- sizeof(rpdu), &rpdu);
- }
-}
-
-/**
- * blk_add_trace_remap - Add a trace for a remap operation
- * @q: queue the io is for
- * @bio: the source bio
- * @dev: target device
- * @from: source sector
- * @to: target sector
- *
- * Description:
- * Device mapper or raid target sometimes need to split a bio because
- * it spans a stripe (or similar). Add a trace for that action.
- *
- **/
-static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
- dev_t dev, sector_t from, sector_t to)
-{
- struct blk_trace *bt = q->blk_trace;
- struct blk_io_trace_remap r;
-
- if (likely(!bt))
- return;
-
- r.device = cpu_to_be32(dev);
- r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
- r.sector = cpu_to_be64(to);
-
- __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
- !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
-}
-
-/**
- * blk_add_driver_data - Add binary message with driver-specific data
- * @q: queue the io is for
- * @rq: io request
- * @data: driver-specific data
- * @len: length of driver-specific data
- *
- * Description:
- * Some drivers might want to write driver-specific data per request.
- *
- **/
-void blk_add_driver_data(struct request_queue *q,
- struct request *rq,
- void *data, size_t len)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (likely(!bt))
- return;
-
- if (blk_pc_request(rq))
- __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
- rq->errors, len, data);
- else
- __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
- 0, BLK_TA_DRV_DATA, rq->errors, len, data);
-}
-EXPORT_SYMBOL_GPL(blk_add_driver_data);
-
-static int blk_register_tracepoints(void)
-{
- int ret;
-
- ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
- WARN_ON(ret);
- ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
- WARN_ON(ret);
- ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
- WARN_ON(ret);
- ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
- WARN_ON(ret);
- ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
- WARN_ON(ret);
- ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
- WARN_ON(ret);
- ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
- WARN_ON(ret);
- ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
- WARN_ON(ret);
- ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
- WARN_ON(ret);
- ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
- WARN_ON(ret);
- ret = register_trace_block_getrq(blk_add_trace_getrq);
- WARN_ON(ret);
- ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
- WARN_ON(ret);
- ret = register_trace_block_plug(blk_add_trace_plug);
- WARN_ON(ret);
- ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
- WARN_ON(ret);
- ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
- WARN_ON(ret);
- ret = register_trace_block_split(blk_add_trace_split);
- WARN_ON(ret);
- ret = register_trace_block_remap(blk_add_trace_remap);
- WARN_ON(ret);
- return 0;
-}
-
-static void blk_unregister_tracepoints(void)
-{
- unregister_trace_block_remap(blk_add_trace_remap);
- unregister_trace_block_split(blk_add_trace_split);
- unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
- unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
- unregister_trace_block_plug(blk_add_trace_plug);
- unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
- unregister_trace_block_getrq(blk_add_trace_getrq);
- unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
- unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
- unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
- unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
- unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
- unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
- unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
- unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
- unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
- unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
-
- tracepoint_synchronize_unregister();
-}
diff --git a/block/bsg.c b/block/bsg.c
index d414bb5607e8..206060e795da 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -218,9 +218,6 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
if (hdr->guard != 'Q')
return -EINVAL;
- if (hdr->dout_xfer_len > (q->max_sectors << 9) ||
- hdr->din_xfer_len > (q->max_sectors << 9))
- return -EIO;
switch (hdr->protocol) {
case BSG_PROTOCOL_SCSI:
@@ -244,7 +241,8 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
* map sg_io_v4 to a request.
*/
static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
+bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
+ u8 *sense)
{
struct request_queue *q = bd->queue;
struct request *rq, *next_rq = NULL;
@@ -306,6 +304,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
if (ret)
goto out;
}
+
+ rq->sense = sense;
+ rq->sense_len = 0;
+
return rq;
out:
if (rq->cmd != rq->__cmd)
@@ -348,8 +350,7 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
struct bsg_command *bc, struct request *rq)
{
- rq->sense = bc->sense;
- rq->sense_len = 0;
+ int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
/*
* add bc command to busy queue and submit rq for io
@@ -366,7 +367,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
rq->end_io_data = bc;
- blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io);
+ blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
}
static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
@@ -419,7 +420,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
{
int ret = 0;
- dprintk("rq %p bio %p %u\n", rq, bio, rq->errors);
+ dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
/*
* fill in all the output members
*/
@@ -635,7 +636,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
/*
* get a request, fill in the blanks, and add to request queue
*/
- rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
+ rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
rq = NULL;
@@ -922,18 +923,22 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct request *rq;
struct bio *bio, *bidi_bio = NULL;
struct sg_io_v4 hdr;
+ int at_head;
+ u8 sense[SCSI_SENSE_BUFFERSIZE];
if (copy_from_user(&hdr, uarg, sizeof(hdr)))
return -EFAULT;
- rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
+ rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense);
if (IS_ERR(rq))
return PTR_ERR(rq);
bio = rq->bio;
if (rq->next_rq)
bidi_bio = rq->next_rq->bio;
- blk_execute_rq(bd->queue, NULL, rq, 0);
+
+ at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
+ blk_execute_rq(bd->queue, NULL, rq, at_head);
ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
if (copy_to_user(uarg, &hdr, sizeof(hdr)))
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 664ebfd092ec..9e809345f71a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1992,8 +1992,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
}
if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
cfq_slice_expired(cfqd, 1);
- else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list))
+ else if (sync && !rq_noidle(rq) &&
+ RB_EMPTY_ROOT(&cfqq->sort_list)) {
cfq_arm_slice_timer(cfqd);
+ }
}
if (!cfqd->rq_in_driver)
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 504b275e1b90..572bbc2f900d 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/capability.h>
#include <linux/bitops.h>
+#include <linux/blkdev.h>
#include <scsi/scsi.h>
#include <linux/cdrom.h>
diff --git a/block/elevator.c b/block/elevator.c
index 98259eda0ef6..ca6788a0195a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -677,7 +677,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
}
if (unplug_it && blk_queue_plugged(q)) {
- int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+ int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
- q->in_flight;
if (nrq >= q->unplug_thresh)
diff --git a/block/genhd.c b/block/genhd.c
index 397960cf26af..a9ec910974c1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -256,6 +256,22 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
}
#endif /* CONFIG_PROC_FS */
+/**
+ * register_blkdev - register a new block device
+ *
+ * @major: the requested major device number [1..255]. If @major=0, try to
+ * allocate any unused major number.
+ * @name: the name of the new block device as a zero terminated string
+ *
+ * The @name must be unique within the system.
+ *
+ * The return value depends on the @major input parameter.
+ * - if a major device number was requested in range [1..255] then the
+ * function returns zero on success, or a negative error code
+ * - if any unused major number was requested with @major=0 parameter
+ * then the return value is the allocated major number in range
+ * [1..255] or a negative error code otherwise
+ */
int register_blkdev(unsigned int major, const char *name)
{
struct blk_major_name **n, *p;
@@ -1087,6 +1103,14 @@ dev_t blk_lookup_devt(const char *name, int partno)
if (strcmp(dev_name(dev), name))
continue;
+ if (partno < disk->minors) {
+ /* We need to return the right devno, even
+ * if the partition doesn't exist yet.
+ */
+ devt = MKDEV(MAJOR(dev->devt),
+ MINOR(dev->devt) + partno);
+ break;
+ }
part = disk_get_part(disk, partno);
if (part) {
devt = part_devt(part);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ee9c67d7e1be..626ee274c5c4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
return 0;
}
-/*
- * unmap a request that was previously mapped to this sg_io_hdr. handles
- * both sg and non-sg sg_io_hdr.
- */
-static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr)
-{
- blk_rq_unmap_user(rq->bio);
- blk_put_request(rq);
- return 0;
-}
-
static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
struct bio *bio)
{
- int r, ret = 0;
+ int ret = 0;
/*
* fill in all the output members
@@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
ret = -EFAULT;
}
- rq->bio = bio;
- r = blk_unmap_sghdr_rq(rq, hdr);
- if (ret)
- r = ret;
+ blk_rq_unmap_user(bio);
+ blk_put_request(rq);
- return r;
+ return ret;
}
static int sg_io(struct request_queue *q, struct gendisk *bd_disk,