aboutsummaryrefslogtreecommitdiffstats
path: root/block/ll_rw_blk.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/ll_rw_blk.c')
-rw-r--r--block/ll_rw_blk.c182
1 files changed, 133 insertions, 49 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 03d9c82b0fe7..062067fa7ead 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -28,6 +28,7 @@
#include <linux/writeback.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
+#include <linux/blktrace_api.h>
/*
* for max sense size
@@ -625,26 +626,31 @@ static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
* Different hardware can have different requirements as to what pages
* it can do I/O directly to. A low level driver can call
* blk_queue_bounce_limit to have lower memory pages allocated as bounce
- * buffers for doing I/O to pages residing above @page. By default
- * the block layer sets this to the highest numbered "low" memory page.
+ * buffers for doing I/O to pages residing above @page.
**/
void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
{
unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
-
- /*
- * set appropriate bounce gfp mask -- unfortunately we don't have a
- * full 4GB zone, so we have to resort to low memory for any bounces.
- * ISA has its own < 16MB zone.
- */
- if (bounce_pfn < blk_max_low_pfn) {
- BUG_ON(dma_addr < BLK_BOUNCE_ISA);
+ int dma = 0;
+
+ q->bounce_gfp = GFP_NOIO;
+#if BITS_PER_LONG == 64
+ /* Assume anything <= 4GB can be handled by IOMMU.
+ Actually some IOMMUs can handle everything, but I don't
+ know of a way to test this here. */
+ if (bounce_pfn < (0xffffffff>>PAGE_SHIFT))
+ dma = 1;
+ q->bounce_pfn = max_low_pfn;
+#else
+ if (bounce_pfn < blk_max_low_pfn)
+ dma = 1;
+ q->bounce_pfn = bounce_pfn;
+#endif
+ if (dma) {
init_emergency_isa_pool();
q->bounce_gfp = GFP_NOIO | GFP_DMA;
- } else
- q->bounce_gfp = GFP_NOIO;
-
- q->bounce_pfn = bounce_pfn;
+ q->bounce_pfn = bounce_pfn;
+ }
}
EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -1551,8 +1557,10 @@ void blk_plug_device(request_queue_t *q)
if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
return;
- if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+ if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
+ blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+ }
}
EXPORT_SYMBOL(blk_plug_device);
@@ -1616,14 +1624,21 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
/*
* devices don't necessarily have an ->unplug_fn defined
*/
- if (q->unplug_fn)
+ if (q->unplug_fn) {
+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+ q->rq.count[READ] + q->rq.count[WRITE]);
+
q->unplug_fn(q);
+ }
}
static void blk_unplug_work(void *data)
{
request_queue_t *q = data;
+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
+ q->rq.count[READ] + q->rq.count[WRITE]);
+
q->unplug_fn(q);
}
@@ -1631,6 +1646,9 @@ static void blk_unplug_timeout(unsigned long data)
{
request_queue_t *q = (request_queue_t *)data;
+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
+ q->rq.count[READ] + q->rq.count[WRITE]);
+
kblockd_schedule_work(&q->unplug_work);
}
@@ -1735,16 +1753,11 @@ EXPORT_SYMBOL(blk_run_queue);
* Hopefully the low level driver will have finished any
* outstanding requests first...
**/
-void blk_cleanup_queue(request_queue_t * q)
+static void blk_release_queue(struct kobject *kobj)
{
+ request_queue_t *q = container_of(kobj, struct request_queue, kobj);
struct request_list *rl = &q->rq;
- if (!atomic_dec_and_test(&q->refcnt))
- return;
-
- if (q->elevator)
- elevator_exit(q->elevator);
-
blk_sync_queue(q);
if (rl->rq_pool)
@@ -1753,9 +1766,30 @@ void blk_cleanup_queue(request_queue_t * q)
if (q->queue_tags)
__blk_queue_free_tags(q);
+ if (q->blk_trace)
+ blk_trace_shutdown(q);
+
kmem_cache_free(requestq_cachep, q);
}
+void blk_put_queue(request_queue_t *q)
+{
+ kobject_put(&q->kobj);
+}
+EXPORT_SYMBOL(blk_put_queue);
+
+void blk_cleanup_queue(request_queue_t * q)
+{
+ mutex_lock(&q->sysfs_lock);
+ set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+ mutex_unlock(&q->sysfs_lock);
+
+ if (q->elevator)
+ elevator_exit(q->elevator);
+
+ blk_put_queue(q);
+}
+
EXPORT_SYMBOL(blk_cleanup_queue);
static int blk_init_free_list(request_queue_t *q)
@@ -1783,6 +1817,8 @@ request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
+static struct kobj_type queue_ktype;
+
request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
request_queue_t *q;
@@ -1793,11 +1829,16 @@ request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
memset(q, 0, sizeof(*q));
init_timer(&q->unplug_timer);
- atomic_set(&q->refcnt, 1);
+
+ snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
+ q->kobj.ktype = &queue_ktype;
+ kobject_init(&q->kobj);
q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
q->backing_dev_info.unplug_io_data = q;
+ mutex_init(&q->sysfs_lock);
+
return q;
}
EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -1849,8 +1890,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
return NULL;
q->node = node_id;
- if (blk_init_free_list(q))
- goto out_init;
+ if (blk_init_free_list(q)) {
+ kmem_cache_free(requestq_cachep, q);
+ return NULL;
+ }
/*
* if caller didn't supply a lock, they get per-queue locking with
@@ -1886,9 +1929,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
return q;
}
- blk_cleanup_queue(q);
-out_init:
- kmem_cache_free(requestq_cachep, q);
+ blk_put_queue(q);
return NULL;
}
EXPORT_SYMBOL(blk_init_queue_node);
@@ -1896,7 +1937,7 @@ EXPORT_SYMBOL(blk_init_queue_node);
int blk_get_queue(request_queue_t *q)
{
if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
- atomic_inc(&q->refcnt);
+ kobject_get(&q->kobj);
return 0;
}
@@ -2104,6 +2145,8 @@ rq_starved:
rq_init(q, rq);
rq->rl = rl;
+
+ blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
out:
return rq;
}
@@ -2132,6 +2175,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
if (!rq) {
struct io_context *ioc;
+ blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
io_schedule();
@@ -2185,6 +2230,8 @@ EXPORT_SYMBOL(blk_get_request);
*/
void blk_requeue_request(request_queue_t *q, struct request *rq)
{
+ blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
@@ -2819,6 +2866,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
if (!q->back_merge_fn(q, req, bio))
break;
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+
req->biotail->bi_next = bio;
req->biotail = bio;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
@@ -2834,6 +2883,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
if (!q->front_merge_fn(q, req, bio))
break;
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+
bio->bi_next = req->bio;
req->bio = bio;
@@ -2951,6 +3002,7 @@ void generic_make_request(struct bio *bio)
request_queue_t *q;
sector_t maxsector;
int ret, nr_sectors = bio_sectors(bio);
+ dev_t old_dev;
might_sleep();
/* Test device or partition size, when known. */
@@ -2977,6 +3029,8 @@ void generic_make_request(struct bio *bio)
* NOTE: we don't repeat the blk_size check for each new device.
* Stacking drivers are expected to know what they are doing.
*/
+ maxsector = -1;
+ old_dev = 0;
do {
char b[BDEVNAME_SIZE];
@@ -3009,6 +3063,15 @@ end_io:
*/
blk_partition_remap(bio);
+ if (maxsector != -1)
+ blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
+ maxsector);
+
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+
+ maxsector = bio->bi_sector;
+ old_dev = bio->bi_bdev->bd_dev;
+
ret = q->make_request_fn(q, bio);
} while (ret);
}
@@ -3128,6 +3191,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
int total_bytes, bio_nbytes, error, next_idx = 0;
struct bio *bio;
+ blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+
/*
* extend uptodate bool to allow < 0 value to be direct io error
*/
@@ -3472,10 +3537,12 @@ void put_io_context(struct io_context *ioc)
BUG_ON(atomic_read(&ioc->refcount) == 0);
if (atomic_dec_and_test(&ioc->refcount)) {
+ rcu_read_lock();
if (ioc->aic && ioc->aic->dtor)
ioc->aic->dtor(ioc->aic);
if (ioc->cic && ioc->cic->dtor)
ioc->cic->dtor(ioc->cic);
+ rcu_read_unlock();
kmem_cache_free(iocontext_cachep, ioc);
}
@@ -3609,10 +3676,13 @@ static ssize_t
queue_requests_store(struct request_queue *q, const char *page, size_t count)
{
struct request_list *rl = &q->rq;
+ unsigned long nr;
+ int ret = queue_var_store(&nr, page, count);
+ if (nr < BLKDEV_MIN_RQ)
+ nr = BLKDEV_MIN_RQ;
- int ret = queue_var_store(&q->nr_requests, page, count);
- if (q->nr_requests < BLKDEV_MIN_RQ)
- q->nr_requests = BLKDEV_MIN_RQ;
+ spin_lock_irq(q->queue_lock);
+ q->nr_requests = nr;
blk_queue_congestion_threshold(q);
if (rl->count[READ] >= queue_congestion_on_threshold(q))
@@ -3638,6 +3708,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
blk_clear_queue_full(q, WRITE);
wake_up(&rl->wait[WRITE]);
}
+ spin_unlock_irq(q->queue_lock);
return ret;
}
@@ -3753,13 +3824,19 @@ static ssize_t
queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct queue_sysfs_entry *entry = to_queue(attr);
- struct request_queue *q;
+ request_queue_t *q = container_of(kobj, struct request_queue, kobj);
+ ssize_t res;
- q = container_of(kobj, struct request_queue, kobj);
if (!entry->show)
return -EIO;
-
- return entry->show(q, page);
+ mutex_lock(&q->sysfs_lock);
+ if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+ mutex_unlock(&q->sysfs_lock);
+ return -ENOENT;
+ }
+ res = entry->show(q, page);
+ mutex_unlock(&q->sysfs_lock);
+ return res;
}
static ssize_t
@@ -3767,13 +3844,20 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
struct queue_sysfs_entry *entry = to_queue(attr);
- struct request_queue *q;
+ request_queue_t *q = container_of(kobj, struct request_queue, kobj);
+
+ ssize_t res;
- q = container_of(kobj, struct request_queue, kobj);
if (!entry->store)
return -EIO;
-
- return entry->store(q, page, length);
+ mutex_lock(&q->sysfs_lock);
+ if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+ mutex_unlock(&q->sysfs_lock);
+ return -ENOENT;
+ }
+ res = entry->store(q, page, length);
+ mutex_unlock(&q->sysfs_lock);
+ return res;
}
static struct sysfs_ops queue_sysfs_ops = {
@@ -3784,6 +3868,7 @@ static struct sysfs_ops queue_sysfs_ops = {
static struct kobj_type queue_ktype = {
.sysfs_ops = &queue_sysfs_ops,
.default_attrs = default_attrs,
+ .release = blk_release_queue,
};
int blk_register_queue(struct gendisk *disk)
@@ -3796,19 +3881,17 @@ int blk_register_queue(struct gendisk *disk)
return -ENXIO;
q->kobj.parent = kobject_get(&disk->kobj);
- if (!q->kobj.parent)
- return -EBUSY;
- snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
- q->kobj.ktype = &queue_ktype;
-
- ret = kobject_register(&q->kobj);
+ ret = kobject_add(&q->kobj);
if (ret < 0)
return ret;
+ kobject_uevent(&q->kobj, KOBJ_ADD);
+
ret = elv_register_queue(q);
if (ret) {
- kobject_unregister(&q->kobj);
+ kobject_uevent(&q->kobj, KOBJ_REMOVE);
+ kobject_del(&q->kobj);
return ret;
}
@@ -3822,7 +3905,8 @@ void blk_unregister_queue(struct gendisk *disk)
if (q && q->request_fn) {
elv_unregister_queue(q);
- kobject_unregister(&q->kobj);
+ kobject_uevent(&q->kobj, KOBJ_REMOVE);
+ kobject_del(&q->kobj);
kobject_put(&disk->kobj);
}
}