aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig3
-rw-r--r--block/Kconfig.iosched2
-rw-r--r--block/blk-barrier.c3
-rw-r--r--block/blk-cgroup.c68
-rw-r--r--block/blk-cgroup.h13
-rw-r--r--block/blk-core.c45
-rw-r--r--block/blk-integrity.c3
-rw-r--r--block/blk-ioc.c8
-rw-r--r--block/blk-merge.c8
-rw-r--r--block/blk-settings.c265
-rw-r--r--block/blk-sysfs.c39
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-timeout.c12
-rw-r--r--block/bsg.c3
-rw-r--r--block/cfq-iosched.c281
-rw-r--r--block/compat_ioctl.c1
-rw-r--r--block/elevator.c15
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c1
-rw-r--r--block/noop-iosched.c1
20 files changed, 401 insertions, 373 deletions
diff --git a/block/Kconfig b/block/Kconfig
index e20fbde0875c..f9e89f4d94bb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY
Protection. If in doubt, say N.
config BLK_CGROUP
- bool
+ tristate "Block cgroup support"
depends on CGROUPS
+ depends on CFQ_GROUP_IOSCHED
default n
---help---
Generic block IO controller cgroup interface. This is the common
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b71abfb0d726..fc71cf071fb2 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
config IOSCHED_CFQ
tristate "CFQ I/O scheduler"
+ select BLK_CGROUP if CFQ_GROUP_IOSCHED
default y
---help---
The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -35,7 +36,6 @@ config IOSCHED_CFQ
config CFQ_GROUP_IOSCHED
bool "CFQ Group Scheduling support"
depends on IOSCHED_CFQ && CGROUPS
- select BLK_CGROUP
default n
---help---
Enable group IO scheduling in CFQ.
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8873b9b439ff..6d88544b677f 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/gfp.h>
#include "blk.h"
@@ -402,7 +403,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* our current implementations need. If we'll ever need
* more the interface will need revisiting.
*/
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ page = alloc_page(gfp_mask | __GFP_ZERO);
if (!page)
goto out_free_bio;
if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1fa2654db0a6..5fe03def34b2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -15,6 +15,7 @@
#include <linux/kdev_t.h>
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/slab.h>
#include "blk-cgroup.h"
static DEFINE_SPINLOCK(blkio_list_lock);
@@ -23,19 +24,30 @@ static LIST_HEAD(blkio_list);
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
-bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
-{
- if (!css_tryget(&blkcg->css))
- return false;
- return true;
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_tryget);
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
+ struct cgroup *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+ struct task_struct *, bool);
+static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+ struct cgroup *, struct task_struct *, bool);
+static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
-void blkiocg_css_put(struct blkio_cgroup *blkcg)
-{
- css_put(&blkcg->css);
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_put);
+struct cgroup_subsys blkio_subsys = {
+ .name = "blkio",
+ .create = blkiocg_create,
+ .can_attach = blkiocg_can_attach,
+ .attach = blkiocg_attach,
+ .destroy = blkiocg_destroy,
+ .populate = blkiocg_populate,
+#ifdef CONFIG_BLK_CGROUP
+ /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
+ .subsys_id = blkio_subsys_id,
+#endif
+ .use_id = 1,
+ .module = THIS_MODULE,
+};
+EXPORT_SYMBOL_GPL(blkio_subsys);
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
{
@@ -147,16 +159,16 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
return -EINVAL;
blkcg = cgroup_to_blkio_cgroup(cgroup);
+ spin_lock(&blkio_list_lock);
spin_lock_irq(&blkcg->lock);
blkcg->weight = (unsigned int)val;
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
- spin_lock(&blkio_list_lock);
list_for_each_entry(blkiop, &blkio_list, list)
blkiop->ops.blkio_update_group_weight_fn(blkg,
blkcg->weight);
- spin_unlock(&blkio_list_lock);
}
spin_unlock_irq(&blkcg->lock);
+ spin_unlock(&blkio_list_lock);
return 0;
}
@@ -267,7 +279,8 @@ remove_entry:
done:
free_css_id(&blkio_subsys, &blkcg->css);
rcu_read_unlock();
- kfree(blkcg);
+ if (blkcg != &blkio_root_cgroup)
+ kfree(blkcg);
}
static struct cgroup_subsys_state *
@@ -333,17 +346,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
task_unlock(tsk);
}
-struct cgroup_subsys blkio_subsys = {
- .name = "blkio",
- .create = blkiocg_create,
- .can_attach = blkiocg_can_attach,
- .attach = blkiocg_attach,
- .destroy = blkiocg_destroy,
- .populate = blkiocg_populate,
- .subsys_id = blkio_subsys_id,
- .use_id = 1,
-};
-
void blkio_policy_register(struct blkio_policy_type *blkiop)
{
spin_lock(&blkio_list_lock);
@@ -359,3 +361,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
spin_unlock(&blkio_list_lock);
}
EXPORT_SYMBOL_GPL(blkio_policy_unregister);
+
+static int __init init_cgroup_blkio(void)
+{
+ return cgroup_load_subsys(&blkio_subsys);
+}
+
+static void __exit exit_cgroup_blkio(void)
+{
+ cgroup_unload_subsys(&blkio_subsys);
+}
+
+module_init(init_cgroup_blkio);
+module_exit(exit_cgroup_blkio);
+MODULE_LICENSE("GPL");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 4d316df863b4..8ccc20464dae 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,7 +15,13 @@
#include <linux/cgroup.h>
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+
+#ifndef CONFIG_BLK_CGROUP
+/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */
+extern struct cgroup_subsys blkio_subsys;
+#define blkio_subsys_id blkio_subsys.subsys_id
+#endif
struct blkio_cgroup {
struct cgroup_subsys_state css;
@@ -43,9 +49,6 @@ struct blkio_group {
unsigned long sectors;
};
-extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
-extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
-
typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
unsigned int weight);
@@ -94,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats(
struct blkio_group *blkg, unsigned long dequeue) {}
#endif
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..9fe174dc74d1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
*/
static inline bool queue_should_plug(struct request_queue *q)
{
- return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
+ return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
}
static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1490,9 +1490,9 @@ end_io:
/*
* We only want one ->make_request_fn to be active at a time,
* else stack usage with stacked devices could be a problem.
- * So use current->bio_{list,tail} to keep a list of requests
+ * So use current->bio_list to keep a list of requests
* submited by a make_request_fn function.
- * current->bio_tail is also used as a flag to say if
+ * current->bio_list is also used as a flag to say if
* generic_make_request is currently active in this task or not.
* If it is NULL, then no make_request is active. If it is non-NULL,
* then a make_request is active, and new requests should be added
@@ -1500,11 +1500,11 @@ end_io:
*/
void generic_make_request(struct bio *bio)
{
- if (current->bio_tail) {
+ struct bio_list bio_list_on_stack;
+
+ if (current->bio_list) {
/* make_request is active */
- *(current->bio_tail) = bio;
- bio->bi_next = NULL;
- current->bio_tail = &bio->bi_next;
+ bio_list_add(current->bio_list, bio);
return;
}
/* following loop may be a bit non-obvious, and so deserves some
@@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio)
* Before entering the loop, bio->bi_next is NULL (as all callers
* ensure that) so we have a list with a single bio.
* We pretend that we have just taken it off a longer list, so
- * we assign bio_list to the next (which is NULL) and bio_tail
- * to &bio_list, thus initialising the bio_list of new bios to be
+ * we assign bio_list to a pointer to the bio_list_on_stack,
+ * thus initialising the bio_list of new bios to be
* added. __generic_make_request may indeed add some more bios
* through a recursive call to generic_make_request. If it
* did, we find a non-NULL value in bio_list and re-enter the loop
* from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so fixup bio_list and
- * bio_tail or bi_next, and call into __generic_make_request again.
+ * of the top of the list (no pretending) and so remove it from
+ * bio_list, and call into __generic_make_request again.
*
* The loop was structured like this to make only one call to
* __generic_make_request (which is important as it is large and
* inlined) and to keep the structure simple.
*/
BUG_ON(bio->bi_next);
+ bio_list_init(&bio_list_on_stack);
+ current->bio_list = &bio_list_on_stack;
do {
- current->bio_list = bio->bi_next;
- if (bio->bi_next == NULL)
- current->bio_tail = &current->bio_list;
- else
- bio->bi_next = NULL;
__generic_make_request(bio);
- bio = current->bio_list;
+ bio = bio_list_pop(current->bio_list);
} while (bio);
- current->bio_tail = NULL; /* deactivate */
+ current->bio_list = NULL; /* deactivate */
}
EXPORT_SYMBOL(generic_make_request);
@@ -1617,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
* limitation.
*/
blk_recalc_rq_segments(rq);
- if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
- rq->nr_phys_segments > queue_max_hw_segments(q)) {
+ if (rq->nr_phys_segments > queue_max_segments(q)) {
printk(KERN_ERR "%s: over max segments limit.\n", __func__);
return -EIO;
}
@@ -1859,15 +1855,8 @@ void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
- if (blk_account_rq(rq)) {
+ if (blk_account_rq(rq))
q->in_flight[rq_is_sync(rq)]++;
- /*
- * Mark this device as supporting hardware queuing, if
- * we have more IOs in flight than 4.
- */
- if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
- set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
- }
}
/**
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 15c630813b1c..edce1ef7933d 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -24,6 +24,7 @@
#include <linux/mempool.h>
#include <linux/bio.h>
#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include "blk.h"
@@ -278,7 +279,7 @@ static struct attribute *integrity_attrs[] = {
NULL,
};
-static struct sysfs_ops integrity_ops = {
+static const struct sysfs_ops integrity_ops = {
.show = &integrity_attr_show,
.store = &integrity_attr_store,
};
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..d22c4c55c406 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -7,6 +7,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
+#include <linux/slab.h>
#include "blk.h"
@@ -39,8 +40,6 @@ int put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) {
rcu_read_lock();
- if (ioc->aic && ioc->aic->dtor)
- ioc->aic->dtor(ioc->aic);
cfq_dtor(ioc);
rcu_read_unlock();
@@ -76,8 +75,6 @@ void exit_io_context(struct task_struct *task)
task_unlock(task);
if (atomic_dec_and_test(&ioc->nr_tasks)) {
- if (ioc->aic && ioc->aic->exit)
- ioc->aic->exit(ioc->aic);
cfq_exit(ioc);
}
@@ -95,9 +92,8 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
spin_lock_init(&ret->lock);
ret->ioprio_changed = 0;
ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
+ ret->last_waited = 0; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->cic_list);
ret->ioc_data = NULL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f447..5e7dc9973458 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
{
int nr_phys_segs = bio_phys_segments(q, bio);
- if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
- req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
+ if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
total_phys_segments--;
}
- if (total_phys_segments > queue_max_phys_segments(q))
- return 0;
-
- if (total_phys_segments > queue_max_hw_segments(q))
+ if (total_phys_segments > queue_max_segments(q))
return 0;
/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6ae118d6e193..f5ed5a1187ba 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -8,7 +8,9 @@
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/gcd.h>
+#include <linux/lcm.h>
#include <linux/jiffies.h>
+#include <linux/gfp.h>
#include "blk.h"
@@ -91,10 +93,9 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
*/
void blk_set_default_limits(struct queue_limits *lim)
{
- lim->max_phys_segments = MAX_PHYS_SEGMENTS;
- lim->max_hw_segments = MAX_HW_SEGMENTS;
+ lim->max_segments = BLK_MAX_SEGMENTS;
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->max_segment_size = MAX_SEGMENT_SIZE;
+ lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = BLK_DEF_MAX_SECTORS;
lim->max_hw_sectors = INT_MAX;
lim->max_discard_sectors = 0;
@@ -154,7 +155,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->unplug_timer.data = (unsigned long)q;
blk_set_default_limits(&q->limits);
- blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+ blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
/*
* If the caller didn't supply a lock, fall back to our embedded
@@ -210,37 +211,32 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
EXPORT_SYMBOL(blk_queue_bounce_limit);
/**
- * blk_queue_max_sectors - set max sectors for a request for this queue
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
* @q: the request queue for the device
- * @max_sectors: max sectors in the usual 512b unit
+ * @max_hw_sectors: max hardware sectors in the usual 512b unit
*
* Description:
- * Enables a low level driver to set an upper limit on the size of
- * received requests.
+ * Enables a low level driver to set a hard upper limit,
+ * max_hw_sectors, on the size of requests. max_hw_sectors is set by
+ * the device driver based upon the combined capabilities of I/O
+ * controller and storage device.
+ *
+ * max_sectors is a soft limit imposed by the block layer for
+ * filesystem type requests. This value can be overridden on a
+ * per-device basis in /sys/block/<device>/queue/max_sectors_kb.
+ * The soft limit can not exceed max_hw_sectors.
**/
-void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
{
- if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
- max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+ if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
+ max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_sectors);
- }
-
- if (BLK_DEF_MAX_SECTORS > max_sectors)
- q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
- else {
- q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
- q->limits.max_hw_sectors = max_sectors;
+ __func__, max_hw_sectors);
}
-}
-EXPORT_SYMBOL(blk_queue_max_sectors);
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
-{
- if (BLK_DEF_MAX_SECTORS > max_sectors)
- q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
- else
- q->limits.max_hw_sectors = max_sectors;
+ q->limits.max_hw_sectors = max_hw_sectors;
+ q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
+ BLK_DEF_MAX_SECTORS);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
@@ -257,41 +253,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
- * blk_queue_max_phys_segments - set max phys segments for a request for this queue
- * @q: the request queue for the device
- * @max_segments: max number of segments
- *
- * Description:
- * Enables a low level driver to set an upper limit on the number of
- * physical data segments in a request. This would be the largest sized
- * scatter list the driver could handle.
- **/
-void blk_queue_max_phys_segments(struct request_queue *q,
- unsigned short max_segments)
-{
- if (!max_segments) {
- max_segments = 1;
- printk(KERN_INFO "%s: set to minimum %d\n",
- __func__, max_segments);
- }
-
- q->limits.max_phys_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_phys_segments);
-
-/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
+ * blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
*
* Description:
* Enables a low level driver to set an upper limit on the number of
- * hw data segments in a request. This would be the largest number of
- * address/length pairs the host adapter can actually give at once
- * to the device.
+ * hw data segments in a request.
**/
-void blk_queue_max_hw_segments(struct request_queue *q,
- unsigned short max_segments)
+void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
{
if (!max_segments) {
max_segments = 1;
@@ -299,9 +269,9 @@ void blk_queue_max_hw_segments(struct request_queue *q,
__func__, max_segments);
}
- q->limits.max_hw_segments = max_segments;
+ q->limits.max_segments = max_segments;
}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
+EXPORT_SYMBOL(blk_queue_max_segments);
/**
* blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
@@ -493,33 +463,31 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
}
EXPORT_SYMBOL(blk_queue_stack_limits);
-static unsigned int lcm(unsigned int a, unsigned int b)
-{
- if (a && b)
- return (a * b) / gcd(a, b);
- else if (b)
- return b;
-
- return a;
-}
-
/**
* blk_stack_limits - adjust queue_limits for stacked devices
- * @t: the stacking driver limits (top)
- * @b: the underlying queue limits (bottom)
- * @offset: offset to beginning of data within component device
+ * @t: the stacking driver limits (top device)
+ * @b: the underlying queue limits (bottom, component device)
+ * @start: first data sector within component device
*
* Description:
- * Merges two queue_limit structs. Returns 0 if alignment didn't
- * change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * This function is used by stacking drivers like MD and DM to ensure
+ * that all component devices have compatible block sizes and
+ * alignments. The stacking driver must provide a queue_limits
+ * struct (top) and then iteratively call the stacking function for
+ * all component (bottom) devices. The stacking function will
+ * attempt to combine the values and ensure proper alignment.
+ *
+ * Returns 0 if the top and bottom queue_limits are compatible. The
+ * top device's block sizes and alignment offsets may be adjusted to
+ * ensure alignment with the bottom device. If no compatible sizes
+ * and alignments exist, -1 is returned and the resulting top
+ * queue_limits will have the misaligned flag set to indicate that
+ * the alignment_offset is undefined.
*/
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
- sector_t offset)
+ sector_t start)
{
- int ret;
-
- ret = 0;
+ unsigned int top, bottom, alignment, ret = 0;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -528,15 +496,31 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
b->seg_boundary_mask);
- t->max_phys_segments = min_not_zero(t->max_phys_segments,
- b->max_phys_segments);
-
- t->max_hw_segments = min_not_zero(t->max_hw_segments,
- b->max_hw_segments);
+ t->max_segments = min_not_zero(t->max_segments, b->max_segments);
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
+ t->misaligned |= b->misaligned;
+
+ alignment = queue_limit_alignment_offset(b, start);
+
+ /* Bottom device has different alignment. Check that it is
+ * compatible with the current top alignment.
+ */
+ if (t->alignment_offset != alignment) {
+
+ top = max(t->physical_block_size, t->io_min)
+ + t->alignment_offset;
+ bottom = max(b->physical_block_size, b->io_min) + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1)) {
+ t->misaligned = 1;
+ ret = -1;
+ }
+ }
+
t->logical_block_size = max(t->logical_block_size,
b->logical_block_size);
@@ -544,67 +528,99 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->physical_block_size);
t->io_min = max(t->io_min, b->io_min);
+ t->io_opt = lcm(t->io_opt, b->io_opt);
+
t->no_cluster |= b->no_cluster;
t->discard_zeroes_data &= b->discard_zeroes_data;
- /* Bottom device offset aligned? */
- if (offset &&
- (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+ /* Physical block size a multiple of the logical block size? */
+ if (t->physical_block_size & (t->logical_block_size - 1)) {
+ t->physical_block_size = t->logical_block_size;
t->misaligned = 1;
ret = -1;
}
- /*
- * Temporarily disable discard granularity. It's currently buggy
- * since we default to 0 for discard_granularity, hence this
- * "failure" will always trigger for non-zero offsets.
- */
-#if 0
- if (offset &&
- (offset & (b->discard_granularity - 1)) != b->discard_alignment) {
- t->discard_misaligned = 1;
+ /* Minimum I/O a multiple of the physical block size? */
+ if (t->io_min & (t->physical_block_size - 1)) {
+ t->io_min = t->physical_block_size;
+ t->misaligned = 1;
ret = -1;
}
-#endif
- /* If top has no alignment offset, inherit from bottom */
- if (!t->alignment_offset)
- t->alignment_offset =
- b->alignment_offset & (b->physical_block_size - 1);
+ /* Optimal I/O a multiple of the physical block size? */
+ if (t->io_opt & (t->physical_block_size - 1)) {
+ t->io_opt = 0;
+ t->misaligned = 1;
+ ret = -1;
+ }
- if (!t->discard_alignment)
- t->discard_alignment =
- b->discard_alignment & (b->discard_granularity - 1);
+ /* Find lowest common alignment_offset */
+ t->alignment_offset = lcm(t->alignment_offset, alignment)
+ & (max(t->physical_block_size, t->io_min) - 1);
- /* Top device aligned on logical block boundary? */
+ /* Verify that new alignment_offset is on a logical block boundary */
if (t->alignment_offset & (t->logical_block_size - 1)) {
t->misaligned = 1;
ret = -1;
}
- /* Find lcm() of optimal I/O size and granularity */
- t->io_opt = lcm(t->io_opt, b->io_opt);
- t->discard_granularity = lcm(t->discard_granularity,
- b->discard_granularity);
-
- /* Verify that optimal I/O size is a multiple of io_min */
- if (t->io_min && t->io_opt % t->io_min)
- ret = -1;
+ /* Discard alignment and granularity */
+ if (b->discard_granularity) {
+ alignment = queue_limit_discard_alignment(b, start);
+
+ if (t->discard_granularity != 0 &&
+ t->discard_alignment != alignment) {
+ top = t->discard_granularity + t->discard_alignment;
+ bottom = b->discard_granularity + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1))
+ t->discard_misaligned = 1;
+ }
+
+ t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+ b->max_discard_sectors);
+ t->discard_granularity = max(t->discard_granularity,
+ b->discard_granularity);
+ t->discard_alignment = lcm(t->discard_alignment, alignment) &
+ (t->discard_granularity - 1);
+ }
return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t: the stacking driver limits (top device)
+ * @bdev: the component block_device (bottom)
+ * @start: first data sector within component device
+ *
+ * Description:
+ * Merges queue limits for a top device and a block_device. Returns
+ * 0 if alignment didn't change. Returns -1 if adding the bottom
+ * device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+ sector_t start)
+{
+ struct request_queue *bq = bdev_get_queue(bdev);
+
+ start += get_start_sect(bdev);
+
+ return blk_stack_limits(t, &bq->limits, start);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
+/**
* disk_stack_limits - adjust queue limits for stacked drivers
* @disk: MD/DM gendisk (top)
* @bdev: the underlying block device (bottom)
* @offset: offset to beginning of data within component device
*
* Description:
- * Merges the limits for two queues. Returns 0 if alignment
- * didn't change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * Merges the limits for a top level gendisk and a bottom level
+ * block_device.
*/
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
sector_t offset)
@@ -612,9 +628,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
struct request_queue *t = disk->queue;
struct request_queue *b = bdev_get_queue(bdev);
- offset += get_start_sect(bdev) << 9;
-
- if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+ if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
disk_name(disk, 0, top);
@@ -686,22 +700,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad);
* does is adjust the queue so that the buf is always appended
* silently to the scatterlist.
*
- * Note: This routine adjusts max_hw_segments to make room for
- * appending the drain buffer. If you call
- * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
- * calling this routine, you must set the limit to one fewer than your
- * device can support otherwise there won't be room for the drain
- * buffer.
+ * Note: This routine adjusts max_hw_segments to make room for appending
+ * the drain buffer. If you call blk_queue_max_segments() after calling
+ * this routine, you must set the limit to one fewer than your device
+ * can support otherwise there won't be room for the drain buffer.
*/
int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size)
{
- if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
+ if (queue_max_segments(q) < 2)
return -EINVAL;
/* make room for appending the drain */
- blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
- blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
+ blk_queue_max_segments(q, queue_max_segments(q) - 1);
q->dma_drain_needed = dma_drain_needed;
q->dma_drain_buffer = buf;
q->dma_drain_size = size;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8606c9543fdd..306759bbdf1b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -2,6 +2,7 @@
* Functions related to sysfs handling
*/
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -106,6 +107,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
return queue_var_show(max_sectors_kb, (page));
}
+static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(queue_max_segments(q), (page));
+}
+
+static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
+{
+ if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+ return queue_var_show(queue_max_segment_size(q), (page));
+
+ return queue_var_show(PAGE_CACHE_SIZE, (page));
+}
+
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_logical_block_size(q), page);
@@ -189,7 +203,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
{
- return queue_var_show(blk_queue_nomerges(q), page);
+ return queue_var_show((blk_queue_nomerges(q) << 1) |
+ blk_queue_noxmerges(q), page);
}
static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -199,10 +214,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&nm, page, count);
spin_lock_irq(q->queue_lock);
- if (nm)
+ queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ if (nm == 2)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
- else
- queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ else if (nm)
+ queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
spin_unlock_irq(q->queue_lock);
return ret;
@@ -277,6 +294,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.show = queue_max_hw_sectors_show,
};
+static struct queue_sysfs_entry queue_max_segments_entry = {
+ .attr = {.name = "max_segments", .mode = S_IRUGO },
+ .show = queue_max_segments_show,
+};
+
+static struct queue_sysfs_entry queue_max_segment_size_entry = {
+ .attr = {.name = "max_segment_size", .mode = S_IRUGO },
+ .show = queue_max_segment_size_show,
+};
+
static struct queue_sysfs_entry queue_iosched_entry = {
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
.show = elv_iosched_show,
@@ -352,6 +379,8 @@ static struct attribute *default_attrs[] = {
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr,
+ &queue_max_segments_entry.attr,
+ &queue_max_segment_size_entry.attr,
&queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
@@ -447,7 +476,7 @@ static void blk_release_queue(struct kobject *kobj)
kmem_cache_free(blk_requestq_cachep, q);
}
-static struct sysfs_ops queue_sysfs_ops = {
+static const struct sysfs_ops queue_sysfs_ops = {
.show = queue_attr_show,
.store = queue_attr_store,
};
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 6b0f52c20964..ece65fc4c79b 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/slab.h>
#include "blk.h"
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 1ba7e0aca878..4f0c06c7a338 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -109,6 +109,7 @@ void blk_rq_timed_out_timer(unsigned long data)
struct request_queue *q = (struct request_queue *) data;
unsigned long flags, next = 0;
struct request *rq, *tmp;
+ int next_set = 0;
spin_lock_irqsave(q->queue_lock, flags);
@@ -122,16 +123,13 @@ void blk_rq_timed_out_timer(unsigned long data)
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
- } else if (!next || time_after(next, rq->deadline))
+ } else if (!next_set || time_after(next, rq->deadline)) {
next = rq->deadline;
+ next_set = 1;
+ }
}
- /*
- * next can never be 0 here with the list non-empty, since we always
- * bump ->deadline to 1 so we can detect if the timer was ever added
- * or not. See comment in blk_add_timer()
- */
- if (next)
+ if (next_set)
mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/bsg.c b/block/bsg.c
index a9fd2d84b53a..82d58829ba59 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -21,6 +21,7 @@
#include <linux/idr.h>
#include <linux/bsg.h>
#include <linux/smp_lock.h>
+#include <linux/slab.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
@@ -260,7 +261,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
return ERR_PTR(ret);
/*
- * map scatter-gather elements seperately and string them to request
+ * map scatter-gather elements separately and string them to request
*/
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e2f80463ed0d..838834be115b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -7,6 +7,7 @@
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
*/
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/jiffies.h>
@@ -19,7 +20,7 @@
* tunables
*/
/* max queue in one round of service */
-static const int cfq_quantum = 4;
+static const int cfq_quantum = 8;
static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
/* maximum backwards seek, in KiB */
static const int cfq_back_max = 16 * 1024;
@@ -42,16 +43,15 @@ static const int cfq_hist_divisor = 4;
*/
#define CFQ_MIN_TT (2)
-/*
- * Allow merged cfqqs to perform this amount of seeky I/O before
- * deciding to break the queues up again.
- */
-#define CFQQ_COOP_TOUT (HZ)
-
#define CFQ_SLICE_SCALE (5)
#define CFQ_HW_QUEUE_MIN (5)
#define CFQ_SERVICE_SHIFT 12
+#define CFQQ_SEEK_THR (sector_t)(8 * 100)
+#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
+#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
+#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
+
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
@@ -80,11 +80,12 @@ struct cfq_rb_root {
struct rb_root rb;
struct rb_node *left;
unsigned count;
+ unsigned total_weight;
u64 min_vdisktime;
struct rb_node *active;
- unsigned total_weight;
};
-#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
+#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
+ .count = 0, .min_vdisktime = 0, }
/*
* Per process-grouping structure
@@ -118,11 +119,11 @@ struct cfq_queue {
/* time when queue got scheduled in to dispatch first request. */
unsigned long dispatch_start;
unsigned int allocated_slice;
+ unsigned int slice_dispatch;
/* time when first request from queue completed and slice started. */
unsigned long slice_start;
unsigned long slice_end;
long slice_resid;
- unsigned int slice_dispatch;
/* pending metadata requests */
int meta_pending;
@@ -133,14 +134,11 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
- unsigned int seek_samples;
- u64 seek_total;
- sector_t seek_mean;
- sector_t last_request_pos;
- unsigned long seeky_start;
-
pid_t pid;
+ u32 seek_history;
+ sector_t last_request_pos;
+
struct cfq_rb_root *service_tree;
struct cfq_queue *new_cfqq;
struct cfq_group *cfqg;
@@ -208,8 +206,6 @@ struct cfq_data {
/* Root service tree for cfq_groups */
struct cfq_rb_root grp_service_tree;
struct cfq_group root_group;
- /* Number of active cfq groups on group service tree */
- int nr_groups;
/*
* The priority currently being served
@@ -229,8 +225,8 @@ struct cfq_data {
unsigned int busy_queues;
- int rq_in_driver[2];
- int sync_flight;
+ int rq_in_driver;
+ int rq_in_flight[2];
/*
* queue-depth detection
@@ -294,8 +290,7 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
enum wl_prio_t prio,
- enum wl_type_t type,
- struct cfq_data *cfqd)
+ enum wl_type_t type)
{
if (!cfqg)
return NULL;
@@ -317,6 +312,7 @@ enum cfqq_state_flags {
CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
CFQ_CFQQ_FLAG_sync, /* synchronous queue */
CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
+ CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
};
@@ -345,6 +341,7 @@ CFQ_CFQQ_FNS(prio_changed);
CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
CFQ_CFQQ_FNS(coop);
+CFQ_CFQQ_FNS(split_coop);
CFQ_CFQQ_FNS(deep);
CFQ_CFQQ_FNS(wait_busy);
#undef CFQ_CFQQ_FNS
@@ -422,11 +419,6 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
struct io_context *);
-static inline int rq_in_driver(struct cfq_data *cfqd)
-{
- return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
-}
-
static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
bool is_sync)
{
@@ -842,7 +834,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
__cfq_group_service_tree_add(st, cfqg);
cfqg->on_st = true;
- cfqd->nr_groups++;
st->total_weight += cfqg->weight;
}
@@ -863,7 +854,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
cfqg->on_st = false;
- cfqd->nr_groups--;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
@@ -958,11 +948,12 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
unsigned int major, minor;
- /* Do we need to take this reference */
- if (!blkiocg_css_tryget(blkcg))
- return NULL;;
-
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+ if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+ sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+ cfqg->blkg.dev = MKDEV(major, minor);
+ goto done;
+ }
if (cfqg || !create)
goto done;
@@ -992,7 +983,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
done:
- blkiocg_css_put(blkcg);
return cfqg;
}
@@ -1150,7 +1140,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
#endif
service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
- cfqq_type(cfqq), cfqd);
+ cfqq_type(cfqq));
if (cfq_class_idle(cfqq)) {
rb_key = CFQ_IDLE_DELAY;
parent = rb_last(&service_tree->rb);
@@ -1427,9 +1417,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- cfqd->rq_in_driver[rq_is_sync(rq)]++;
+ cfqd->rq_in_driver++;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
- rq_in_driver(cfqd));
+ cfqd->rq_in_driver);
cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
}
@@ -1437,12 +1427,11 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- const int sync = rq_is_sync(rq);
- WARN_ON(!cfqd->rq_in_driver[sync]);
- cfqd->rq_in_driver[sync]--;
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
- rq_in_driver(cfqd));
+ cfqd->rq_in_driver);
}
static void cfq_remove_request(struct request *rq)
@@ -1513,9 +1502,6 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
struct cfq_io_context *cic;
struct cfq_queue *cfqq;
- /* Deny merge if bio and rq don't belong to same cfq group */
- if ((RQ_CFQQ(rq))->cfqg != cfq_get_cfqg(cfqd, 0))
- return false;
/*
* Disallow merge of a sync bio into an async request.
*/
@@ -1538,7 +1524,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
if (cfqq) {
- cfq_log_cfqq(cfqd, cfqq, "set_active");
+ cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
+ cfqd->serving_prio, cfqd->serving_type);
cfqq->slice_start = 0;
cfqq->dispatch_start = jiffies;
cfqq->allocated_slice = 0;
@@ -1574,6 +1561,15 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfq_clear_cfqq_wait_busy(cfqq);
/*
+ * If this cfqq is shared between multiple processes, check to
+ * make sure that those processes are still issuing I/Os within
+ * the mean seek distance. If not, it may be time to break the
+ * queues apart again.
+ */
+ if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
+ cfq_mark_cfqq_split_coop(cfqq);
+
+ /*
* store what was left of this slice, if the queue idled/timed out
*/
if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
@@ -1616,7 +1612,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
{
struct cfq_rb_root *service_tree =
service_tree_for(cfqd->serving_group, cfqd->serving_prio,
- cfqd->serving_type, cfqd);
+ cfqd->serving_type);
if (!cfqd->rq_queued)
return NULL;
@@ -1671,18 +1667,10 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
return cfqd->last_position - blk_rq_pos(rq);
}
-#define CFQQ_SEEK_THR 8 * 1024
-#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR)
-
static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq)
{
- sector_t sdist = cfqq->seek_mean;
-
- if (!sample_valid(cfqq->seek_samples))
- sdist = CFQQ_SEEK_THR;
-
- return cfq_dist_from_last(cfqd, rq) <= sdist;
+ return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
}
static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
@@ -1741,6 +1729,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
{
struct cfq_queue *cfqq;
+ if (cfq_class_idle(cur_cfqq))
+ return NULL;
if (!cfq_cfqq_sync(cur_cfqq))
return NULL;
if (CFQQ_SEEKY(cur_cfqq))
@@ -1807,7 +1797,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones
* in their service tree.
*/
- return service_tree->count == 1;
+ if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+ return 1;
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
+ service_tree->count);
+ return 0;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -1852,8 +1846,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* time slice.
*/
if (sample_valid(cic->ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime_mean))
+ (cfqq->slice_end - jiffies < cic->ttime_mean)) {
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
+ cic->ttime_mean);
return;
+ }
cfq_mark_cfqq_wait_request(cfqq);
@@ -1878,8 +1875,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
cfqq->dispatched++;
elv_dispatch_sort(q, rq);
- if (cfq_cfqq_sync(cfqq))
- cfqd->sync_flight++;
+ cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
cfqq->nr_sectors += blk_rq_sectors(rq);
}
@@ -1963,8 +1959,7 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
}
static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
- struct cfq_group *cfqg, enum wl_prio_t prio,
- bool prio_changed)
+ struct cfq_group *cfqg, enum wl_prio_t prio)
{
struct cfq_queue *queue;
int i;
@@ -1972,24 +1967,9 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
unsigned long lowest_key = 0;
enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
- if (prio_changed) {
- /*
- * When priorities switched, we prefer starting
- * from SYNC_NOIDLE (first choice), or just SYNC
- * over ASYNC
- */
- if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
- return cur_best;
- cur_best = SYNC_WORKLOAD;
- if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
- return cur_best;
-
- return ASYNC_WORKLOAD;
- }
-
- for (i = 0; i < 3; ++i) {
- /* otherwise, select the one with lowest rb_key */
- queue = cfq_rb_first(service_tree_for(cfqg, prio, i, cfqd));
+ for (i = 0; i <= SYNC_WORKLOAD; ++i) {
+ /* select the one with lowest rb_key */
+ queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
if (queue &&
(!key_valid || time_before(queue->rb_key, lowest_key))) {
lowest_key = queue->rb_key;
@@ -2003,8 +1983,6 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
- enum wl_prio_t previous_prio = cfqd->serving_prio;
- bool prio_changed;
unsigned slice;
unsigned count;
struct cfq_rb_root *st;
@@ -2032,24 +2010,19 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
* expiration time
*/
- prio_changed = (cfqd->serving_prio != previous_prio);
- st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type,
- cfqd);
+ st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
count = st->count;
/*
- * If priority didn't change, check workload expiration,
- * and that we still have other queues ready
+ * check workload expiration, and that we still have other queues ready
*/
- if (!prio_changed && count &&
- !time_after(jiffies, cfqd->workload_expires))
+ if (count && !time_after(jiffies, cfqd->workload_expires))
return;
/* otherwise select new workload type */
cfqd->serving_type =
- cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio, prio_changed);
- st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type,
- cfqd);
+ cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
+ st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
count = st->count;
/*
@@ -2085,6 +2058,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
slice = max(slice, 2 * cfqd->cfq_slice_idle);
slice = max_t(unsigned, slice, CFQ_MIN_TT);
+ cfq_log(cfqd, "workload slice:%d", slice);
cfqd->workload_expires = jiffies + slice;
cfqd->noidle_tree_requires_idle = false;
}
@@ -2232,16 +2206,32 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
struct cfq_queue *cfqq;
int dispatched = 0;
- while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL)
+ /* Expire the timeslice of the current active queue first */
+ cfq_slice_expired(cfqd, 0);
+ while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
+ __cfq_set_active_queue(cfqd, cfqq);
dispatched += __cfq_forced_dispatch_cfqq(cfqq);
+ }
- cfq_slice_expired(cfqd, 0);
BUG_ON(cfqd->busy_queues);
cfq_log(cfqd, "forced_dispatch=%d", dispatched);
return dispatched;
}
+static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
+{
+ /* the queue hasn't finished any request, can't estimate */
+ if (cfq_cfqq_slice_new(cfqq))
+ return 1;
+ if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
+ cfqq->slice_end))
+ return 1;
+
+ return 0;
+}
+
static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
unsigned int max_dispatch;
@@ -2249,16 +2239,16 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/*
* Drain async requests before we start sync IO
*/
- if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
return false;
/*
* If this is an async queue and we have sync IO in flight, let it wait
*/
- if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+ if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
return false;
- max_dispatch = cfqd->cfq_quantum;
+ max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
if (cfq_class_idle(cfqq))
max_dispatch = 1;
@@ -2275,13 +2265,22 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/*
* We have other queues, don't allow more IO from this one
*/
- if (cfqd->busy_queues > 1)
+ if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
return false;
/*
* Sole queue user, no limit
*/
- max_dispatch = -1;
+ if (cfqd->busy_queues == 1)
+ max_dispatch = -1;
+ else
+ /*
+ * Normally we start throttling cfqq when cfq_quantum/2
+ * requests have been dispatched. But we can drive
+ * deeper queue depths at the beginning of slice
+ * subjected to upper limit of cfq_quantum.
+ * */
+ max_dispatch = cfqd->cfq_quantum;
}
/*
@@ -3003,43 +3002,20 @@ static void
cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq)
{
- sector_t sdist;
- u64 total;
-
- if (!cfqq->last_request_pos)
- sdist = 0;
- else if (cfqq->last_request_pos < blk_rq_pos(rq))
- sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
- else
- sdist = cfqq->last_request_pos - blk_rq_pos(rq);
+ sector_t sdist = 0;
+ sector_t n_sec = blk_rq_sectors(rq);
+ if (cfqq->last_request_pos) {
+ if (cfqq->last_request_pos < blk_rq_pos(rq))
+ sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
+ else
+ sdist = cfqq->last_request_pos - blk_rq_pos(rq);
+ }
- /*
- * Don't allow the seek distance to get too large from the
- * odd fragment, pagein, etc
- */
- if (cfqq->seek_samples <= 60) /* second&third seek */
- sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024);
+ cfqq->seek_history <<= 1;
+ if (blk_queue_nonrot(cfqd->queue))
+ cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
else
- sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64);
-
- cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8;
- cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8;
- total = cfqq->seek_total + (cfqq->seek_samples/2);
- do_div(total, cfqq->seek_samples);
- cfqq->seek_mean = (sector_t)total;
-
- /*
- * If this cfqq is shared between multiple processes, check to
- * make sure that those processes are still issuing I/Os within
- * the mean seek distance. If not, it may be time to break the
- * queues apart again.
- */
- if (cfq_cfqq_coop(cfqq)) {
- if (CFQQ_SEEKY(cfqq) && !cfqq->seeky_start)
- cfqq->seeky_start = jiffies;
- else if (!CFQQ_SEEKY(cfqq))
- cfqq->seeky_start = 0;
- }
+ cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
}
/*
@@ -3064,8 +3040,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfq_mark_cfqq_deep(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
- (!cfq_cfqq_deep(cfqq) && sample_valid(cfqq->seek_samples)
- && CFQQ_SEEKY(cfqq)))
+ (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -3104,6 +3079,12 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
return true;
/*
+ * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
+ */
+ if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
+ return false;
+
+ /*
* if the new request is sync, but the currently running queue is
* not, let the sync request have priority.
*/
@@ -3245,14 +3226,14 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
- if (rq_in_driver(cfqd) > cfqd->hw_tag_est_depth)
- cfqd->hw_tag_est_depth = rq_in_driver(cfqd);
+ if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
+ cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
if (cfqd->hw_tag == 1)
return;
if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
- rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
+ cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
return;
/*
@@ -3262,7 +3243,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
*/
if (cfqq && cfq_cfqq_idle_window(cfqq) &&
cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
- CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN)
+ CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
return;
if (cfqd->hw_tag_samples++ < 50)
@@ -3315,13 +3296,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_update_hw_tag(cfqd);
- WARN_ON(!cfqd->rq_in_driver[sync]);
+ WARN_ON(!cfqd->rq_in_driver);
WARN_ON(!cfqq->dispatched);
- cfqd->rq_in_driver[sync]--;
+ cfqd->rq_in_driver--;
cfqq->dispatched--;
- if (cfq_cfqq_sync(cfqq))
- cfqd->sync_flight--;
+ cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
if (sync) {
RQ_CIC(rq)->last_end_request = now;
@@ -3348,6 +3328,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
if (cfq_should_wait_busy(cfqd, cfqq)) {
cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
cfq_mark_cfqq_wait_busy(cfqq);
+ cfq_log_cfqq(cfqd, cfqq, "will busy wait");
}
/*
@@ -3375,7 +3356,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
}
}
- if (!rq_in_driver(cfqd))
+ if (!cfqd->rq_in_driver)
cfq_schedule_dispatch(cfqd);
}
@@ -3474,14 +3455,6 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
return cic_to_cfqq(cic, 1);
}
-static int should_split_cfqq(struct cfq_queue *cfqq)
-{
- if (cfqq->seeky_start &&
- time_after(jiffies, cfqq->seeky_start + CFQQ_COOP_TOUT))
- return 1;
- return 0;
-}
-
/*
* Returns NULL if a new cfqq should be allocated, or the old cfqq if this
* was the last process referring to said cfqq.
@@ -3490,9 +3463,9 @@ static struct cfq_queue *
split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
{
if (cfqq_process_refs(cfqq) == 1) {
- cfqq->seeky_start = 0;
cfqq->pid = current->pid;
cfq_clear_cfqq_coop(cfqq);
+ cfq_clear_cfqq_split_coop(cfqq);
return cfqq;
}
@@ -3531,7 +3504,7 @@ new_queue:
/*
* If the queue was seeky for too long, break it apart.
*/
- if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) {
+ if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
cfqq = split_cfqq(cic, cfqq);
if (!cfqq)
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 4eb8e9ea4af5..f26051f44681 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -6,6 +6,7 @@
#include <linux/elevator.h>
#include <linux/fd.h>
#include <linux/hdreg.h>
+#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/smp_lock.h>
#include <linux/types.h>
diff --git a/block/elevator.c b/block/elevator.c
index 9ad5ccc4c5ee..76e3702d5381 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name)
spin_unlock(&elv_list_lock);
- sprintf(elv, "%s-iosched", name);
+ snprintf(elv, sizeof(elv), "%s-iosched", name);
request_module("%s", elv);
spin_lock(&elv_list_lock);
@@ -474,6 +474,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
int ret;
/*
+ * Levels of merges:
+ * nomerges: No merges at all attempted
+ * noxmerges: Only simple one-hit cache try
+ * merges: All merge tries attempted
+ */
+ if (blk_queue_nomerges(q))
+ return ELEVATOR_NO_MERGE;
+
+ /*
* First try one-hit cache.
*/
if (q->last_merge) {
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
}
}
- if (blk_queue_nomerges(q))
+ if (blk_queue_noxmerges(q))
return ELEVATOR_NO_MERGE;
/*
@@ -883,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
return error;
}
-static struct sysfs_ops elv_sysfs_ops = {
+static const struct sysfs_ops elv_sysfs_ops = {
.show = elv_attr_show,
.store = elv_attr_store,
};
diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+ return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/block/ioctl.c b/block/ioctl.c
index be48ea51faee..8905d2a2a717 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,5 +1,6 @@
#include <linux/capability.h>
#include <linux/blkdev.h>
+#include <linux/gfp.h>
#include <linux/blkpg.h>
#include <linux/hdreg.h>
#include <linux/backing-dev.h>
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 3a0d369d08c7..232c4b38cd37 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -5,6 +5,7 @@
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/init.h>
struct noop_data {