aboutsummaryrefslogtreecommitdiffstats
path: root/block/cfq-iosched.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r--block/cfq-iosched.c608
1 files changed, 226 insertions, 382 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 457295253566..39c43307dc6c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,6 +17,8 @@
#include "blk.h"
#include "cfq.h"
+static struct blkio_policy_type blkio_policy_cfq;
+
/*
* tunables
*/
@@ -206,11 +208,7 @@ struct cfq_group {
unsigned long saved_workload_slice;
enum wl_type_t saved_workload;
enum wl_prio_t saved_serving_prio;
- struct blkio_group blkg;
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- struct hlist_node cfqd_node;
- int ref;
-#endif
+
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
struct cfq_ttime ttime;
@@ -220,6 +218,10 @@ struct cfq_io_cq {
struct io_cq icq; /* must be the first member */
struct cfq_queue *cfqq[2];
struct cfq_ttime ttime;
+ int ioprio; /* the current ioprio */
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ uint64_t blkcg_id; /* the current blkcg ID */
+#endif
};
/*
@@ -229,7 +231,7 @@ struct cfq_data {
struct request_queue *queue;
/* Root service tree for cfq_groups */
struct cfq_rb_root grp_service_tree;
- struct cfq_group root_group;
+ struct cfq_group *root_group;
/*
* The priority currently being served
@@ -302,12 +304,6 @@ struct cfq_data {
struct cfq_queue oom_cfqq;
unsigned long last_delayed_sync;
-
- /* List of cfq groups being managed on this device*/
- struct hlist_head cfqg_list;
-
- /* Number of groups which are on blkcg->blkg_list */
- unsigned int nr_blkcg_linked_grps;
};
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -371,20 +367,48 @@ CFQ_CFQQ_FNS(wait_busy);
#undef CFQ_CFQQ_FNS
#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
+{
+ return blkg_to_pdata(blkg, &blkio_policy_cfq);
+}
+
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+ return pdata_to_blkg(cfqg, &blkio_policy_cfq);
+}
+
+static inline void cfqg_get(struct cfq_group *cfqg)
+{
+ return blkg_get(cfqg_to_blkg(cfqg));
+}
+
+static inline void cfqg_put(struct cfq_group *cfqg)
+{
+ return blkg_put(cfqg_to_blkg(cfqg));
+}
+
#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
- blkg_path(&(cfqq)->cfqg->blkg), ##args)
+ blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args)
#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "%s " fmt, \
- blkg_path(&(cfqg)->blkg), ##args) \
+ blkg_path(cfqg_to_blkg((cfqg))), ##args) \
+
+#else /* CONFIG_CFQ_GROUP_IOSCHED */
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; }
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; }
+static inline void cfqg_get(struct cfq_group *cfqg) { }
+static inline void cfqg_put(struct cfq_group *cfqg) { }
-#else
#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
-#endif
+
+#endif /* CONFIG_CFQ_GROUP_IOSCHED */
+
#define cfq_log(cfqd, fmt, args...) \
blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
@@ -465,8 +489,9 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
}
static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
- struct io_context *, gfp_t);
+static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
+ struct cfq_io_cq *cic, struct bio *bio,
+ gfp_t gfp_mask);
static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
{
@@ -935,7 +960,8 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
cfq_group_service_tree_del(st, cfqg);
cfqg->saved_workload_slice = 0;
- cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
+ cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg),
+ &blkio_policy_cfq, 1);
}
static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1007,178 +1033,70 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
"sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
used_sl, cfqq->slice_dispatch, charge,
iops_mode(cfqd), cfqq->nr_sectors);
- cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
- unaccounted_sl);
- cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
-}
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
-{
- if (blkg)
- return container_of(blkg, struct cfq_group, blkg);
- return NULL;
-}
-
-static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
- unsigned int weight)
-{
- struct cfq_group *cfqg = cfqg_of_blkg(blkg);
- cfqg->new_weight = weight;
- cfqg->needs_update = true;
+ cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), &blkio_policy_cfq,
+ used_sl, unaccounted_sl);
+ cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg), &blkio_policy_cfq);
}
-static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
- struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
-{
- struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
- unsigned int major, minor;
-
- /*
- * Add group onto cgroup list. It might happen that bdi->dev is
- * not initialized yet. Initialize this new group without major
- * and minor info and this info will be filled in once a new thread
- * comes for IO.
- */
- if (bdi->dev) {
- sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
- cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
- (void *)cfqd, MKDEV(major, minor));
- } else
- cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
- (void *)cfqd, 0);
-
- cfqd->nr_blkcg_linked_grps++;
- cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
-
- /* Add group on cfqd list */
- hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-}
-
-/*
- * Should be called from sleepable context. No request queue lock as per
- * cpu stats are allocated dynamically and alloc_percpu needs to be called
- * from sleepable context.
+/**
+ * cfq_init_cfqg_base - initialize base part of a cfq_group
+ * @cfqg: cfq_group to initialize
+ *
+ * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
+ * is enabled or not.
*/
-static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+static void cfq_init_cfqg_base(struct cfq_group *cfqg)
{
- struct cfq_group *cfqg = NULL;
- int i, j, ret;
struct cfq_rb_root *st;
-
- cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
- if (!cfqg)
- return NULL;
+ int i, j;
for_each_cfqg_st(cfqg, i, j, st)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
cfqg->ttime.last_end_request = jiffies;
-
- /*
- * Take the initial reference that will be released on destroy
- * This can be thought of a joint reference by cgroup and
- * elevator which will be dropped by either elevator exit
- * or cgroup deletion path depending on who is exiting first.
- */
- cfqg->ref = 1;
-
- ret = blkio_alloc_blkg_stats(&cfqg->blkg);
- if (ret) {
- kfree(cfqg);
- return NULL;
- }
-
- return cfqg;
}
-static struct cfq_group *
-cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static void cfq_update_blkio_group_weight(struct request_queue *q,
+ struct blkio_group *blkg,
+ unsigned int weight)
{
- struct cfq_group *cfqg = NULL;
- void *key = cfqd;
- struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
- unsigned int major, minor;
+ struct cfq_group *cfqg = blkg_to_cfqg(blkg);
- /*
- * This is the common case when there are no blkio cgroups.
- * Avoid lookup in this case
- */
- if (blkcg == &blkio_root_cgroup)
- cfqg = &cfqd->root_group;
- else
- cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+ cfqg->new_weight = weight;
+ cfqg->needs_update = true;
+}
- if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
- sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
- cfqg->blkg.dev = MKDEV(major, minor);
- }
+static void cfq_init_blkio_group(struct blkio_group *blkg)
+{
+ struct cfq_group *cfqg = blkg_to_cfqg(blkg);
- return cfqg;
+ cfq_init_cfqg_base(cfqg);
+ cfqg->weight = blkg->blkcg->weight;
}
/*
* Search for the cfq group current task belongs to. request_queue lock must
* be held.
*/
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+ struct blkio_cgroup *blkcg)
{
- struct blkio_cgroup *blkcg;
- struct cfq_group *cfqg = NULL, *__cfqg = NULL;
struct request_queue *q = cfqd->queue;
+ struct cfq_group *cfqg = NULL;
- rcu_read_lock();
- blkcg = task_blkio_cgroup(current);
- cfqg = cfq_find_cfqg(cfqd, blkcg);
- if (cfqg) {
- rcu_read_unlock();
- return cfqg;
- }
-
- /*
- * Need to allocate a group. Allocation of group also needs allocation
- * of per cpu stats which in-turn takes a mutex() and can block. Hence
- * we need to drop rcu lock and queue_lock before we call alloc.
- *
- * Not taking any queue reference here and assuming that queue is
- * around by the time we return. CFQ queue allocation code does
- * the same. It might be racy though.
- */
-
- rcu_read_unlock();
- spin_unlock_irq(q->queue_lock);
-
- cfqg = cfq_alloc_cfqg(cfqd);
-
- spin_lock_irq(q->queue_lock);
-
- rcu_read_lock();
- blkcg = task_blkio_cgroup(current);
-
- /*
- * If some other thread already allocated the group while we were
- * not holding queue lock, free up the group
- */
- __cfqg = cfq_find_cfqg(cfqd, blkcg);
+ /* avoid lookup for the common case where there's no blkio cgroup */
+ if (blkcg == &blkio_root_cgroup) {
+ cfqg = cfqd->root_group;
+ } else {
+ struct blkio_group *blkg;
- if (__cfqg) {
- kfree(cfqg);
- rcu_read_unlock();
- return __cfqg;
+ blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
+ if (!IS_ERR(blkg))
+ cfqg = blkg_to_cfqg(blkg);
}
- if (!cfqg)
- cfqg = &cfqd->root_group;
-
- cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
- rcu_read_unlock();
- return cfqg;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
- cfqg->ref++;
return cfqg;
}
@@ -1186,94 +1104,18 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
{
/* Currently, all async queues are mapped to root group */
if (!cfq_cfqq_sync(cfqq))
- cfqg = &cfqq->cfqd->root_group;
+ cfqg = cfqq->cfqd->root_group;
cfqq->cfqg = cfqg;
/* cfqq reference on cfqg */
- cfqq->cfqg->ref++;
-}
-
-static void cfq_put_cfqg(struct cfq_group *cfqg)
-{
- struct cfq_rb_root *st;
- int i, j;
-
- BUG_ON(cfqg->ref <= 0);
- cfqg->ref--;
- if (cfqg->ref)
- return;
- for_each_cfqg_st(cfqg, i, j, st)
- BUG_ON(!RB_EMPTY_ROOT(&st->rb));
- free_percpu(cfqg->blkg.stats_cpu);
- kfree(cfqg);
-}
-
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
- /* Something wrong if we are trying to remove same group twice */
- BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
-
- hlist_del_init(&cfqg->cfqd_node);
-
- BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
- cfqd->nr_blkcg_linked_grps--;
-
- /*
- * Put the reference taken at the time of creation so that when all
- * queues are gone, group can be destroyed.
- */
- cfq_put_cfqg(cfqg);
-}
-
-static void cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
- struct hlist_node *pos, *n;
- struct cfq_group *cfqg;
-
- hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
- /*
- * If cgroup removal path got to blk_group first and removed
- * it from cgroup list, then it will take care of destroying
- * cfqg also.
- */
- if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
- cfq_destroy_cfqg(cfqd, cfqg);
- }
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
- * read lock.
- *
- * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
-{
- unsigned long flags;
- struct cfq_data *cfqd = key;
-
- spin_lock_irqsave(cfqd->queue->queue_lock, flags);
- cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
- spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+ cfqg_get(cfqg);
}
#else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+ struct blkio_cgroup *blkcg)
{
- return &cfqd->root_group;
-}
-
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
- return cfqg;
+ return cfqd->root_group;
}
static inline void
@@ -1281,9 +1123,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
cfqq->cfqg = cfqg;
}
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
-
#endif /* GROUP_IOSCHED */
/*
@@ -1550,12 +1389,14 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
{
elv_rb_del(&cfqq->sort_list, rq);
cfqq->queued[rq_is_sync(rq)]--;
- cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
- rq_data_dir(rq), rq_is_sync(rq));
+ cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+ &blkio_policy_cfq, rq_data_dir(rq),
+ rq_is_sync(rq));
cfq_add_rq_rb(rq);
- cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
- &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
- rq_is_sync(rq));
+ cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+ &blkio_policy_cfq,
+ cfqg_to_blkg(cfqq->cfqd->serving_group),
+ rq_data_dir(rq), rq_is_sync(rq));
}
static struct request *
@@ -1611,8 +1452,9 @@ static void cfq_remove_request(struct request *rq)
cfq_del_rq_rb(rq);
cfqq->cfqd->rq_queued--;
- cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
- rq_data_dir(rq), rq_is_sync(rq));
+ cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+ &blkio_policy_cfq, rq_data_dir(rq),
+ rq_is_sync(rq));
if (rq->cmd_flags & REQ_PRIO) {
WARN_ON(!cfqq->prio_pending);
cfqq->prio_pending--;
@@ -1647,8 +1489,9 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
static void cfq_bio_merged(struct request_queue *q, struct request *req,
struct bio *bio)
{
- cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
- bio_data_dir(bio), cfq_bio_sync(bio));
+ cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
+ &blkio_policy_cfq, bio_data_dir(bio),
+ cfq_bio_sync(bio));
}
static void
@@ -1670,8 +1513,9 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
if (cfqq->next_rq == next)
cfqq->next_rq = rq;
cfq_remove_request(next);
- cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
- rq_data_dir(next), rq_is_sync(next));
+ cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+ &blkio_policy_cfq, rq_data_dir(next),
+ rq_is_sync(next));
cfqq = RQ_CFQQ(next);
/*
@@ -1712,7 +1556,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
del_timer(&cfqd->idle_slice_timer);
- cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+ cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq);
}
static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1721,7 +1566,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
if (cfqq) {
cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
cfqd->serving_prio, cfqd->serving_type);
- cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
+ cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq);
cfqq->slice_start = 0;
cfqq->dispatch_start = jiffies;
cfqq->allocated_slice = 0;
@@ -2042,7 +1888,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* task has exited, don't wait
*/
cic = cfqd->active_cic;
- if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
+ if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
return;
/*
@@ -2069,7 +1915,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
sl = cfqd->cfq_slice_idle;
mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
- cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
+ cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq);
cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
group_idle ? 1 : 0);
}
@@ -2092,8 +1939,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
cfqq->nr_sectors += blk_rq_sectors(rq);
- cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
- rq_data_dir(rq), rq_is_sync(rq));
+ cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq, blk_rq_bytes(rq),
+ rq_data_dir(rq), rq_is_sync(rq));
}
/*
@@ -2675,7 +2523,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
BUG_ON(cfq_cfqq_on_rr(cfqq));
kmem_cache_free(cfq_pool, cfqq);
- cfq_put_cfqg(cfqg);
+ cfqg_put(cfqg);
}
static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2734,7 +2582,7 @@ static void cfq_exit_icq(struct io_cq *icq)
}
}
-static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
+static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
{
struct task_struct *tsk = current;
int ioprio_class;
@@ -2742,7 +2590,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
if (!cfq_cfqq_prio_changed(cfqq))
return;
- ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
+ ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
switch (ioprio_class) {
default:
printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
@@ -2754,11 +2602,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
cfqq->ioprio_class = task_nice_ioclass(tsk);
break;
case IOPRIO_CLASS_RT:
- cfqq->ioprio = task_ioprio(ioc);
+ cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
cfqq->ioprio_class = IOPRIO_CLASS_RT;
break;
case IOPRIO_CLASS_BE:
- cfqq->ioprio = task_ioprio(ioc);
+ cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
cfqq->ioprio_class = IOPRIO_CLASS_BE;
break;
case IOPRIO_CLASS_IDLE:
@@ -2776,19 +2624,24 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
cfq_clear_cfqq_prio_changed(cfqq);
}
-static void changed_ioprio(struct cfq_io_cq *cic)
+static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio)
{
+ int ioprio = cic->icq.ioc->ioprio;
struct cfq_data *cfqd = cic_to_cfqd(cic);
struct cfq_queue *cfqq;
- if (unlikely(!cfqd))
+ /*
+ * Check whether ioprio has changed. The condition may trigger
+ * spuriously on a newly created cic but there's no harm.
+ */
+ if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
return;
cfqq = cic->cfqq[BLK_RW_ASYNC];
if (cfqq) {
struct cfq_queue *new_cfqq;
- new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
- GFP_ATOMIC);
+ new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio,
+ GFP_ATOMIC);
if (new_cfqq) {
cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
cfq_put_queue(cfqq);
@@ -2798,6 +2651,8 @@ static void changed_ioprio(struct cfq_io_cq *cic)
cfqq = cic->cfqq[BLK_RW_SYNC];
if (cfqq)
cfq_mark_cfqq_prio_changed(cfqq);
+
+ cic->ioprio = ioprio;
}
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
@@ -2821,17 +2676,24 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct cfq_io_cq *cic)
+static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
{
- struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
struct cfq_data *cfqd = cic_to_cfqd(cic);
- struct request_queue *q;
+ struct cfq_queue *sync_cfqq;
+ uint64_t id;
- if (unlikely(!cfqd))
- return;
+ rcu_read_lock();
+ id = bio_blkio_cgroup(bio)->id;
+ rcu_read_unlock();
- q = cfqd->queue;
+ /*
+ * Check whether blkcg has changed. The condition may trigger
+ * spuriously on a newly created cic but there's no harm.
+ */
+ if (unlikely(!cfqd) || likely(cic->blkcg_id == id))
+ return;
+ sync_cfqq = cic_to_cfqq(cic, 1);
if (sync_cfqq) {
/*
* Drop reference to sync queue. A new sync queue will be
@@ -2841,21 +2703,26 @@ static void changed_cgroup(struct cfq_io_cq *cic)
cic_set_cfqq(cic, NULL, 1);
cfq_put_queue(sync_cfqq);
}
+
+ cic->blkcg_id = id;
}
+#else
+static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
- struct io_context *ioc, gfp_t gfp_mask)
+cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
+ struct bio *bio, gfp_t gfp_mask)
{
+ struct blkio_cgroup *blkcg;
struct cfq_queue *cfqq, *new_cfqq = NULL;
- struct cfq_io_cq *cic;
struct cfq_group *cfqg;
retry:
- cfqg = cfq_get_cfqg(cfqd);
- cic = cfq_cic_lookup(cfqd, ioc);
- /* cic always exists here */
+ rcu_read_lock();
+
+ blkcg = bio_blkio_cgroup(bio);
+ cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
cfqq = cic_to_cfqq(cic, is_sync);
/*
@@ -2868,6 +2735,7 @@ retry:
cfqq = new_cfqq;
new_cfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
+ rcu_read_unlock();
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
@@ -2883,7 +2751,7 @@ retry:
if (cfqq) {
cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
- cfq_init_prio_data(cfqq, ioc);
+ cfq_init_prio_data(cfqq, cic);
cfq_link_cfqq_cfqg(cfqq, cfqg);
cfq_log_cfqq(cfqd, cfqq, "alloced");
} else
@@ -2893,6 +2761,7 @@ retry:
if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);
+ rcu_read_unlock();
return cfqq;
}
@@ -2902,6 +2771,9 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
switch (ioprio_class) {
case IOPRIO_CLASS_RT:
return &cfqd->async_cfqq[0][ioprio];
+ case IOPRIO_CLASS_NONE:
+ ioprio = IOPRIO_NORM;
+ /* fall through */
case IOPRIO_CLASS_BE:
return &cfqd->async_cfqq[1][ioprio];
case IOPRIO_CLASS_IDLE:
@@ -2912,11 +2784,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
}
static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
- gfp_t gfp_mask)
+cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
+ struct bio *bio, gfp_t gfp_mask)
{
- const int ioprio = task_ioprio(ioc);
- const int ioprio_class = task_ioprio_class(ioc);
+ const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
+ const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
struct cfq_queue **async_cfqq = NULL;
struct cfq_queue *cfqq = NULL;
@@ -2926,7 +2798,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
}
if (!cfqq)
- cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
+ cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio, gfp_mask);
/*
* pin the queue now that it's allocated, scheduler exit will prune it
@@ -3008,7 +2880,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
enable_idle = 0;
- else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+ else if (!atomic_read(&cic->icq.ioc->active_ref) ||
!cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
@@ -3173,7 +3045,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
__blk_run_queue(cfqd->queue);
} else {
cfq_blkiocg_update_idle_time_stats(
- &cfqq->cfqg->blkg);
+ cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq);
cfq_mark_cfqq_must_dispatch(cfqq);
}
}
@@ -3195,14 +3068,15 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
struct cfq_queue *cfqq = RQ_CFQQ(rq);
cfq_log_cfqq(cfqd, cfqq, "insert_request");
- cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc);
+ cfq_init_prio_data(cfqq, RQ_CIC(rq));
rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
list_add_tail(&rq->queuelist, &cfqq->fifo);
cfq_add_rq_rb(rq);
- cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
- &cfqd->serving_group->blkg, rq_data_dir(rq),
- rq_is_sync(rq));
+ cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+ &blkio_policy_cfq,
+ cfqg_to_blkg(cfqd->serving_group),
+ rq_data_dir(rq), rq_is_sync(rq));
cfq_rq_enqueued(cfqd, cfqq, rq);
}
@@ -3298,9 +3172,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_driver--;
cfqq->dispatched--;
(RQ_CFQG(rq))->dispatched--;
- cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
- rq_start_time_ns(rq), rq_io_start_time_ns(rq),
- rq_data_dir(rq), rq_is_sync(rq));
+ cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
+ &blkio_policy_cfq, rq_start_time_ns(rq),
+ rq_io_start_time_ns(rq), rq_data_dir(rq),
+ rq_is_sync(rq));
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
@@ -3397,7 +3272,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
- cfq_init_prio_data(cfqq, cic->icq.ioc);
+ cfq_init_prio_data(cfqq, cic);
return __cfq_may_queue(cfqq);
}
@@ -3419,7 +3294,7 @@ static void cfq_put_request(struct request *rq)
cfqq->allocated[rw]--;
/* Put down rq reference on cfqg */
- cfq_put_cfqg(RQ_CFQG(rq));
+ cfqg_put(RQ_CFQG(rq));
rq->elv.priv[0] = NULL;
rq->elv.priv[1] = NULL;
@@ -3463,32 +3338,25 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
* Allocate cfq data structures associated with this request.
*/
static int
-cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
+cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
+ gfp_t gfp_mask)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
const int rw = rq_data_dir(rq);
const bool is_sync = rq_is_sync(rq);
struct cfq_queue *cfqq;
- unsigned int changed;
might_sleep_if(gfp_mask & __GFP_WAIT);
spin_lock_irq(q->queue_lock);
- /* handle changed notifications */
- changed = icq_get_changed(&cic->icq);
- if (unlikely(changed & ICQ_IOPRIO_CHANGED))
- changed_ioprio(cic);
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- if (unlikely(changed & ICQ_CGROUP_CHANGED))
- changed_cgroup(cic);
-#endif
-
+ check_ioprio_changed(cic, bio);
+ check_blkcg_changed(cic, bio);
new_queue:
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
- cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
+ cfqq = cfq_get_queue(cfqd, is_sync, cic, bio, gfp_mask);
cic_set_cfqq(cic, cfqq, is_sync);
} else {
/*
@@ -3514,8 +3382,9 @@ new_queue:
cfqq->allocated[rw]++;
cfqq->ref++;
+ cfqg_get(cfqq->cfqg);
rq->elv.priv[0] = cfqq;
- rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+ rq->elv.priv[1] = cfqq->cfqg;
spin_unlock_irq(q->queue_lock);
return 0;
}
@@ -3612,7 +3481,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
{
struct cfq_data *cfqd = e->elevator_data;
struct request_queue *q = cfqd->queue;
- bool wait = false;
cfq_shutdown_timer_wq(cfqd);
@@ -3622,89 +3490,59 @@ static void cfq_exit_queue(struct elevator_queue *e)
__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
cfq_put_async_queues(cfqd);
- cfq_release_cfq_groups(cfqd);
-
- /*
- * If there are groups which we could not unlink from blkcg list,
- * wait for a rcu period for them to be freed.
- */
- if (cfqd->nr_blkcg_linked_grps)
- wait = true;
spin_unlock_irq(q->queue_lock);
cfq_shutdown_timer_wq(cfqd);
- /*
- * Wait for cfqg->blkg->key accessors to exit their grace periods.
- * Do this wait only if there are other unlinked groups out
- * there. This can happen if cgroup deletion path claimed the
- * responsibility of cleaning up a group before queue cleanup code
- * get to the group.
- *
- * Do not call synchronize_rcu() unconditionally as there are drivers
- * which create/delete request queue hundreds of times during scan/boot
- * and synchronize_rcu() can take significant time and slow down boot.
- */
- if (wait)
- synchronize_rcu();
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- /* Free up per cpu stats for root group */
- free_percpu(cfqd->root_group.blkg.stats_cpu);
+#ifndef CONFIG_CFQ_GROUP_IOSCHED
+ kfree(cfqd->root_group);
#endif
+ update_root_blkg_pd(q, BLKIO_POLICY_PROP);
kfree(cfqd);
}
-static void *cfq_init_queue(struct request_queue *q)
+static int cfq_init_queue(struct request_queue *q)
{
struct cfq_data *cfqd;
- int i, j;
- struct cfq_group *cfqg;
- struct cfq_rb_root *st;
+ struct blkio_group *blkg __maybe_unused;
+ int i;
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!cfqd)
- return NULL;
+ return -ENOMEM;
+
+ cfqd->queue = q;
+ q->elevator->elevator_data = cfqd;
/* Init root service tree */
cfqd->grp_service_tree = CFQ_RB_ROOT;
- /* Init root group */
- cfqg = &cfqd->root_group;
- for_each_cfqg_st(cfqg, i, j, st)
- *st = CFQ_RB_ROOT;
- RB_CLEAR_NODE(&cfqg->rb_node);
-
- /* Give preference to root group over other groups */
- cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
-
+ /* Init root group and prefer root group over other groups by default */
#ifdef CONFIG_CFQ_GROUP_IOSCHED
- /*
- * Set root group reference to 2. One reference will be dropped when
- * all groups on cfqd->cfqg_list are being deleted during queue exit.
- * Other reference will remain there as we don't want to delete this
- * group as it is statically allocated and gets destroyed when
- * throtl_data goes away.
- */
- cfqg->ref = 2;
+ rcu_read_lock();
+ spin_lock_irq(q->queue_lock);
- if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
- kfree(cfqg);
+ blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
+ true);
+ if (!IS_ERR(blkg))
+ cfqd->root_group = blkg_to_cfqg(blkg);
+
+ spin_unlock_irq(q->queue_lock);
+ rcu_read_unlock();
+#else
+ cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
+ GFP_KERNEL, cfqd->queue->node);
+ if (cfqd->root_group)
+ cfq_init_cfqg_base(cfqd->root_group);
+#endif
+ if (!cfqd->root_group) {
kfree(cfqd);
- return NULL;
+ return -ENOMEM;
}
- rcu_read_lock();
-
- cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
- (void *)cfqd, 0);
- rcu_read_unlock();
- cfqd->nr_blkcg_linked_grps++;
+ cfqd->root_group->weight = 2*BLKIO_WEIGHT_DEFAULT;
- /* Add group on cfqd->cfqg_list */
- hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
-#endif
/*
* Not strictly needed (since RB_ROOT just clears the node and we
* zeroed cfqd on alloc), but better be safe in case someone decides
@@ -3716,13 +3554,17 @@ static void *cfq_init_queue(struct request_queue *q)
/*
* Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
- * will not attempt to free it.
+ * will not attempt to free it. oom_cfqq is linked to root_group
+ * but shouldn't hold a reference as it'll never be unlinked. Lose
+ * the reference from linking right away.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
cfqd->oom_cfqq.ref++;
- cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
- cfqd->queue = q;
+ spin_lock_irq(q->queue_lock);
+ cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
+ cfqg_put(cfqd->root_group);
+ spin_unlock_irq(q->queue_lock);
init_timer(&cfqd->idle_slice_timer);
cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
@@ -3747,7 +3589,7 @@ static void *cfq_init_queue(struct request_queue *q)
* second, in order to have larger depth for async operations.
*/
cfqd->last_delayed_sync = jiffies - HZ;
- return cfqd;
+ return 0;
}
/*
@@ -3873,13 +3715,12 @@ static struct elevator_type iosched_cfq = {
#ifdef CONFIG_CFQ_GROUP_IOSCHED
static struct blkio_policy_type blkio_policy_cfq = {
.ops = {
- .blkio_unlink_group_fn = cfq_unlink_blkio_group,
+ .blkio_init_group_fn = cfq_init_blkio_group,
.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
},
.plid = BLKIO_POLICY_PROP,
+ .pdata_size = sizeof(struct cfq_group),
};
-#else
-static struct blkio_policy_type blkio_policy_cfq;
#endif
static int __init cfq_init(void)
@@ -3910,14 +3751,17 @@ static int __init cfq_init(void)
return ret;
}
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkio_policy_register(&blkio_policy_cfq);
-
+#endif
return 0;
}
static void __exit cfq_exit(void)
{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkio_policy_unregister(&blkio_policy_cfq);
+#endif
elv_unregister(&iosched_cfq);
kmem_cache_destroy(cfq_pool);
}