From 32e380aedc3de454c06ce1c254fe3bea35a676e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:14:54 -0800
Subject: blkcg: make CONFIG_BLK_CGROUP bool

Block cgroup core can be built as module; however, it isn't too useful
as blk-throttle can only be built-in and cfq-iosched is usually the
default built-in scheduler.  Scheduled blkcg cleanup requires calling
into blkcg from block core.  To simplify that, disallow building blkcg
as module by making CONFIG_BLK_CGROUP bool.

If building blkcg core as module really matters, which I doubt, we can
revisit it after blkcg API cleanup.

-v2: Vivek pointed out that IOSCHED_CFQ was incorrectly updated to
     depend on BLK_CGROUP.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 6f3ace7e792f..355168772f51 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -24,13 +24,7 @@ enum blkio_policy_id {
 /* Max limits for throttle policy */
 #define THROTL_IOPS_MAX		UINT_MAX
 
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-
-#ifndef CONFIG_BLK_CGROUP
-/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */
-extern struct cgroup_subsys blkio_subsys;
-#define blkio_subsys_id blkio_subsys.subsys_id
-#endif
+#ifdef CONFIG_BLK_CGROUP
 
 enum stat_type {
 	/* Total time spent (in ns) between request dispatch to the driver and
@@ -303,7 +297,7 @@ static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {}
 static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
 #endif
 
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+#ifdef CONFIG_BLK_CGROUP
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
-- 
cgit v1.2.3-59-g8ed1b


From 72e06c255181537d0b3e1f657a9ed81655d745b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:00 -0800
Subject: blkcg: shoot down blkio_groups on elevator switch

Elevator switch may involve changes to blkcg policies.  Implement
shoot down of blkio_groups.

Combined with the previous bypass updates, the end goal is updating
blkcg core such that it can ensure that blkcg's being affected become
quiescent and don't have any per-blkg data hanging around before
commencing any policy updates.  Until queues are made aware of the
policies that applies to them, as an interim step, all per-policy blkg
data will be shot down.

* blk-throtl doesn't need this change as it can't be disabled for a
  live queue; however, update it anyway as the scheduled blkg
  unification requires this behavior change.  This means that
  blk-throtl configuration will be unnecessarily lost over elevator
  switch.  This oddity will be removed after blkcg learns to associate
  individual policies with request_queues.

* blk-throtl dosen't shoot down root_tg.  This is to ease transition.
  Unified blkg will always have persistent root group and not shooting
  down root_tg for now eases transition to that point by avoiding
  having to update td->root_tg and is safe as blk-throtl can never be
  disabled

-v2: Vivek pointed out that group list is not guaranteed to be empty
     on return from clear function if it raced cgroup removal and
     lost.  Fix it by waiting a bit and retrying.  This kludge will
     soon be removed once locking is updated such that blkg is never
     in limbo state between blkcg and request_queue locks.

     blk-throtl no longer shoots down root_tg to avoid breaking
     td->root_tg.

     Also, Nest queue_lock inside blkio_list_lock not the other way
     around to avoid introduce possible deadlock via blkcg lock.

-v3: blkcg_clear_queue() repositioned and renamed to
     blkg_destroy_all() to increase consistency with later changes.
     cfq_clear_queue() updated to check q->elevator before
     dereferencing it to avoid NULL dereference on not fully
     initialized queues (used by later change).

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 34 +++++++++++++++++++++++++++++++++-
 block/blk-cgroup.h   |  5 ++++-
 block/blk-throttle.c | 27 +++++++++++++++++++++++++--
 block/cfq-iosched.c  | 20 +++++++++++++++++++-
 block/elevator.c     |  3 +++
 5 files changed, 84 insertions(+), 5 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 970a717a056f..159aef59589f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,8 +17,9 @@
 #include <linux/err.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
-#include "blk-cgroup.h"
 #include <linux/genhd.h>
+#include <linux/delay.h>
+#include "blk-cgroup.h"
 
 #define MAX_KEY_LEN 100
 
@@ -546,6 +547,37 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
 }
 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
 
+void blkg_destroy_all(struct request_queue *q)
+{
+	struct blkio_policy_type *pol;
+
+	while (true) {
+		bool done = true;
+
+		spin_lock(&blkio_list_lock);
+		spin_lock_irq(q->queue_lock);
+
+		/*
+		 * clear_queue_fn() might return with non-empty group list
+		 * if it raced cgroup removal and lost.  cgroup removal is
+		 * guaranteed to make forward progress and retrying after a
+		 * while is enough.  This ugliness is scheduled to be
+		 * removed after locking update.
+		 */
+		list_for_each_entry(pol, &blkio_list, list)
+			if (!pol->ops.blkio_clear_queue_fn(q))
+				done = false;
+
+		spin_unlock_irq(q->queue_lock);
+		spin_unlock(&blkio_list_lock);
+
+		if (done)
+			break;
+
+		msleep(10);	/* just some random duration I like */
+	}
+}
+
 static void blkio_reset_stats_cpu(struct blkio_group *blkg)
 {
 	struct blkio_group_stats_cpu *stats_cpu;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 355168772f51..e5cfcbd4d2f4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -203,7 +203,7 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
 				     dev_t dev);
 
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
-
+typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
 typedef void (blkio_update_group_weight_fn) (void *key,
 			struct blkio_group *blkg, unsigned int weight);
 typedef void (blkio_update_group_read_bps_fn) (void * key,
@@ -217,6 +217,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key,
 
 struct blkio_policy_ops {
 	blkio_unlink_group_fn *blkio_unlink_group_fn;
+	blkio_clear_queue_fn *blkio_clear_queue_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
 	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
 	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -233,6 +234,7 @@ struct blkio_policy_type {
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
+extern void blkg_destroy_all(struct request_queue *q);
 
 static inline char *blkg_path(struct blkio_group *blkg)
 {
@@ -249,6 +251,7 @@ struct blkio_policy_type {
 
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
+static inline void blkg_destroy_all(struct request_queue *q) { }
 
 static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 702c0e64e09f..3699ab40d494 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -989,12 +989,17 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
 	td->nr_undestroyed_grps--;
 }
 
-static void throtl_release_tgs(struct throtl_data *td)
+static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
 {
 	struct hlist_node *pos, *n;
 	struct throtl_grp *tg;
+	bool empty = true;
 
 	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
+		/* skip root? */
+		if (!release_root && tg == td->root_tg)
+			continue;
+
 		/*
 		 * If cgroup removal path got to blk_group first and removed
 		 * it from cgroup list, then it will take care of destroying
@@ -1002,7 +1007,10 @@ static void throtl_release_tgs(struct throtl_data *td)
 		 */
 		if (!blkiocg_del_blkio_group(&tg->blkg))
 			throtl_destroy_tg(td, tg);
+		else
+			empty = false;
 	}
+	return empty;
 }
 
 /*
@@ -1029,6 +1037,20 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
 	spin_unlock_irqrestore(td->queue->queue_lock, flags);
 }
 
+static bool throtl_clear_queue(struct request_queue *q)
+{
+	lockdep_assert_held(q->queue_lock);
+
+	/*
+	 * Clear tgs but leave the root one alone.  This is necessary
+	 * because root_tg is expected to be persistent and safe because
+	 * blk-throtl can never be disabled while @q is alive.  This is a
+	 * kludge to prepare for unified blkg.  This whole function will be
+	 * removed soon.
+	 */
+	return throtl_release_tgs(q->td, false);
+}
+
 static void throtl_update_blkio_group_common(struct throtl_data *td,
 				struct throtl_grp *tg)
 {
@@ -1097,6 +1119,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
+		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
 					throtl_update_blkio_group_read_bps,
 		.blkio_update_group_write_bps_fn =
@@ -1282,7 +1305,7 @@ void blk_throtl_exit(struct request_queue *q)
 	throtl_shutdown_wq(q);
 
 	spin_lock_irq(q->queue_lock);
-	throtl_release_tgs(td);
+	throtl_release_tgs(td, true);
 
 	/* If there are other groups */
 	if (td->nr_undestroyed_grps > 0)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 72680a6715fc..61693d3404d0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1225,10 +1225,11 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	cfq_put_cfqg(cfqg);
 }
 
-static void cfq_release_cfq_groups(struct cfq_data *cfqd)
+static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
 {
 	struct hlist_node *pos, *n;
 	struct cfq_group *cfqg;
+	bool empty = true;
 
 	hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
 		/*
@@ -1238,7 +1239,10 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
 		 */
 		if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
 			cfq_destroy_cfqg(cfqd, cfqg);
+		else
+			empty = false;
 	}
+	return empty;
 }
 
 /*
@@ -1265,6 +1269,19 @@ static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
 
+static struct elevator_type iosched_cfq;
+
+static bool cfq_clear_queue(struct request_queue *q)
+{
+	lockdep_assert_held(q->queue_lock);
+
+	/* shoot down blkgs iff the current elevator is cfq */
+	if (!q->elevator || q->elevator->type != &iosched_cfq)
+		return true;
+
+	return cfq_release_cfq_groups(q->elevator->elevator_data);
+}
+
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
 {
@@ -3875,6 +3892,7 @@ static struct elevator_type iosched_cfq = {
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
 		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
+		.blkio_clear_queue_fn = cfq_clear_queue,
 		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
 	},
 	.plid = BLKIO_POLICY_PROP,
diff --git a/block/elevator.c b/block/elevator.c
index 0bdea0ed03a3..8c7561fd2c79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -38,6 +38,7 @@
 #include <trace/events/block.h>
 
 #include "blk.h"
+#include "blk-cgroup.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
@@ -894,6 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	ioc_clear_queue(q);
 	spin_unlock_irq(q->queue_lock);
 
+	blkg_destroy_all(q);
+
 	/* allocate, init and register new elevator */
 	err = -ENOMEM;
 	q->elevator = elevator_alloc(q, new_e);
-- 
cgit v1.2.3-59-g8ed1b


From ca32aefc7f2539ed88d42763330d54ee3e61769a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:03 -0800
Subject: blkcg: use q and plid instead of opaque void * for blkio_group
 association

blkgio_group is association between a block cgroup and a queue for a
given policy.  Using opaque void * for association makes things
confusing and hinders factoring of common code.  Use request_queue *
and, if necessary, policy id instead.

This will help block cgroup API cleanup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 32 +++++++++++++++-----------------
 block/blk-cgroup.h   | 22 ++++++++++++----------
 block/blk-throttle.c | 50 +++++++++++++++++++++++---------------------------
 block/cfq-iosched.c  | 30 ++++++++++++++++--------------
 block/cfq.h          |  7 ++++---
 5 files changed, 70 insertions(+), 71 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 159aef59589f..5e50ca1f5b47 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -129,7 +129,7 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
 		if (blkiop->plid != blkg->plid)
 			continue;
 		if (blkiop->ops.blkio_update_group_weight_fn)
-			blkiop->ops.blkio_update_group_weight_fn(blkg->key,
+			blkiop->ops.blkio_update_group_weight_fn(blkg->q,
 							blkg, weight);
 	}
 }
@@ -147,12 +147,12 @@ static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
 
 		if (fileid == BLKIO_THROTL_read_bps_device
 		    && blkiop->ops.blkio_update_group_read_bps_fn)
-			blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
+			blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
 								blkg, bps);
 
 		if (fileid == BLKIO_THROTL_write_bps_device
 		    && blkiop->ops.blkio_update_group_write_bps_fn)
-			blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
+			blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
 								blkg, bps);
 	}
 }
@@ -170,12 +170,12 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
 
 		if (fileid == BLKIO_THROTL_read_iops_device
 		    && blkiop->ops.blkio_update_group_read_iops_fn)
-			blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
+			blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
 								blkg, iops);
 
 		if (fileid == BLKIO_THROTL_write_iops_device
 		    && blkiop->ops.blkio_update_group_write_iops_fn)
-			blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
+			blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
 								blkg,iops);
 	}
 }
@@ -478,14 +478,14 @@ int blkio_alloc_blkg_stats(struct blkio_group *blkg)
 EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
 
 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, void *key, dev_t dev,
+		struct blkio_group *blkg, struct request_queue *q, dev_t dev,
 		enum blkio_policy_id plid)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkcg->lock, flags);
 	spin_lock_init(&blkg->stats_lock);
-	rcu_assign_pointer(blkg->key, key);
+	rcu_assign_pointer(blkg->q, q);
 	blkg->blkcg_id = css_id(&blkcg->css);
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 	blkg->plid = plid;
@@ -531,18 +531,16 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg)
 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
 
 /* called under rcu_read_lock(). */
-struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
+struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
+					 struct request_queue *q,
+					 enum blkio_policy_id plid)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
-	void *__key;
 
-	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		__key = blkg->key;
-		if (__key == key)
+	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
+		if (blkg->q == q && blkg->plid == plid)
 			return blkg;
-	}
-
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
@@ -1582,7 +1580,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	unsigned long flags;
 	struct blkio_group *blkg;
-	void *key;
+	struct request_queue *q;
 	struct blkio_policy_type *blkiop;
 	struct blkio_policy_node *pn, *pntmp;
 
@@ -1597,7 +1595,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 
 		blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
 					blkcg_node);
-		key = rcu_dereference(blkg->key);
+		q = rcu_dereference(blkg->q);
 		__blkiocg_del_blkio_group(blkg);
 
 		spin_unlock_irqrestore(&blkcg->lock, flags);
@@ -1611,7 +1609,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 		list_for_each_entry(blkiop, &blkio_list, list) {
 			if (blkiop->plid != blkg->plid)
 				continue;
-			blkiop->ops.blkio_unlink_group_fn(key, blkg);
+			blkiop->ops.blkio_unlink_group_fn(q, blkg);
 		}
 		spin_unlock(&blkio_list_lock);
 	} while (1);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e5cfcbd4d2f4..41c960b99c57 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -153,8 +153,8 @@ struct blkio_group_stats_cpu {
 };
 
 struct blkio_group {
-	/* An rcu protected unique identifier for the group */
-	void *key;
+	/* Pointer to the associated request_queue, RCU protected */
+	struct request_queue __rcu *q;
 	struct hlist_node blkcg_node;
 	unsigned short blkcg_id;
 	/* Store cgroup path */
@@ -202,17 +202,18 @@ extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
 extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
 				     dev_t dev);
 
-typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
+typedef void (blkio_unlink_group_fn)(struct request_queue *q,
+			struct blkio_group *blkg);
 typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
-typedef void (blkio_update_group_weight_fn) (void *key,
+typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int weight);
-typedef void (blkio_update_group_read_bps_fn) (void * key,
+typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
 			struct blkio_group *blkg, u64 read_bps);
-typedef void (blkio_update_group_write_bps_fn) (void *key,
+typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
 			struct blkio_group *blkg, u64 write_bps);
-typedef void (blkio_update_group_read_iops_fn) (void *key,
+typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int read_iops);
-typedef void (blkio_update_group_write_iops_fn) (void *key,
+typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int write_iops);
 
 struct blkio_policy_ops {
@@ -305,12 +306,13 @@ extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-	struct blkio_group *blkg, void *key, dev_t dev,
+	struct blkio_group *blkg, struct request_queue *q, dev_t dev,
 	enum blkio_policy_id plid);
 extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
-						void *key);
+						struct request_queue *q,
+						enum blkio_policy_id plid);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 					unsigned long time,
 					unsigned long unaccounted_time);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c252df9169db..6613de78e364 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -252,7 +252,7 @@ static void throtl_init_add_tg_lists(struct throtl_data *td,
 	__throtl_tg_fill_dev_details(td, tg);
 
 	/* Add group onto cgroup list */
-	blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
+	blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
 				tg->blkg.dev, BLKIO_POLICY_THROTL);
 
 	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
@@ -288,7 +288,6 @@ static struct
 throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
 	struct throtl_grp *tg = NULL;
-	void *key = td;
 
 	/*
 	 * This is the common case when there are no blkio cgroups.
@@ -297,7 +296,8 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 	if (blkcg == &blkio_root_cgroup)
 		tg = td->root_tg;
 	else
-		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
+		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue,
+						     BLKIO_POLICY_THROTL));
 
 	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
@@ -1012,22 +1012,22 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
  * no new IO will come in this group. So get rid of this group as soon as
  * any pending IO in the group is finished.
  *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means "key" is a valid throtl_data pointer as long as we are
- * rcu read lock.
+ * This function is called under rcu_read_lock(). @q is the rcu protected
+ * pointer. That means @q is a valid request_queue pointer as long as we
+ * are rcu read lock.
  *
- * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
+ * @q was fetched from blkio_group under blkio_cgroup->lock. That means
  * it should not be NULL as even if queue was going away, cgroup deltion
  * path got to it first.
  */
-void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
+void throtl_unlink_blkio_group(struct request_queue *q,
+			       struct blkio_group *blkg)
 {
 	unsigned long flags;
-	struct throtl_data *td = key;
 
-	spin_lock_irqsave(td->queue->queue_lock, flags);
-	throtl_destroy_tg(td, tg_of_blkg(blkg));
-	spin_unlock_irqrestore(td->queue->queue_lock, flags);
+	spin_lock_irqsave(q->queue_lock, flags);
+	throtl_destroy_tg(q->td, tg_of_blkg(blkg));
+	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 static bool throtl_clear_queue(struct request_queue *q)
@@ -1054,52 +1054,48 @@ static void throtl_update_blkio_group_common(struct throtl_data *td,
 }
 
 /*
- * For all update functions, key should be a valid pointer because these
+ * For all update functions, @q should be a valid pointer because these
  * update functions are called under blkcg_lock, that means, blkg is
- * valid and in turn key is valid. queue exit path can not race because
+ * valid and in turn @q is valid. queue exit path can not race because
  * of blkcg_lock
  *
  * Can not take queue lock in update functions as queue lock under blkcg_lock
  * is not allowed. Under other paths we take blkcg_lock under queue_lock.
  */
-static void throtl_update_blkio_group_read_bps(void *key,
+static void throtl_update_blkio_group_read_bps(struct request_queue *q,
 				struct blkio_group *blkg, u64 read_bps)
 {
-	struct throtl_data *td = key;
 	struct throtl_grp *tg = tg_of_blkg(blkg);
 
 	tg->bps[READ] = read_bps;
-	throtl_update_blkio_group_common(td, tg);
+	throtl_update_blkio_group_common(q->td, tg);
 }
 
-static void throtl_update_blkio_group_write_bps(void *key,
+static void throtl_update_blkio_group_write_bps(struct request_queue *q,
 				struct blkio_group *blkg, u64 write_bps)
 {
-	struct throtl_data *td = key;
 	struct throtl_grp *tg = tg_of_blkg(blkg);
 
 	tg->bps[WRITE] = write_bps;
-	throtl_update_blkio_group_common(td, tg);
+	throtl_update_blkio_group_common(q->td, tg);
 }
 
-static void throtl_update_blkio_group_read_iops(void *key,
+static void throtl_update_blkio_group_read_iops(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int read_iops)
 {
-	struct throtl_data *td = key;
 	struct throtl_grp *tg = tg_of_blkg(blkg);
 
 	tg->iops[READ] = read_iops;
-	throtl_update_blkio_group_common(td, tg);
+	throtl_update_blkio_group_common(q->td, tg);
 }
 
-static void throtl_update_blkio_group_write_iops(void *key,
+static void throtl_update_blkio_group_write_iops(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int write_iops)
 {
-	struct throtl_data *td = key;
 	struct throtl_grp *tg = tg_of_blkg(blkg);
 
 	tg->iops[WRITE] = write_iops;
-	throtl_update_blkio_group_common(td, tg);
+	throtl_update_blkio_group_common(q->td, tg);
 }
 
 static void throtl_shutdown_wq(struct request_queue *q)
@@ -1306,7 +1302,7 @@ void blk_throtl_exit(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	/*
-	 * Wait for tg->blkg->key accessors to exit their grace periods.
+	 * Wait for tg->blkg->q accessors to exit their grace periods.
 	 * Do this wait only if there are other undestroyed groups out
 	 * there (other than root group). This can happen if cgroup deletion
 	 * path claimed the responsibility of cleaning up a group before
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0f7a81fc7c73..37e2da9cbb09 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1020,7 +1020,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
 	return NULL;
 }
 
-static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+static void cfq_update_blkio_group_weight(struct request_queue *q,
+					  struct blkio_group *blkg,
 					  unsigned int weight)
 {
 	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
@@ -1043,10 +1044,10 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
 	if (bdi->dev) {
 		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
 		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-					(void *)cfqd, MKDEV(major, minor));
+					cfqd->queue, MKDEV(major, minor));
 	} else
 		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-					(void *)cfqd, 0);
+					cfqd->queue, 0);
 
 	cfqd->nr_blkcg_linked_grps++;
 	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
@@ -1097,7 +1098,6 @@ static struct cfq_group *
 cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
 {
 	struct cfq_group *cfqg = NULL;
-	void *key = cfqd;
 	struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
 	unsigned int major, minor;
 
@@ -1108,7 +1108,8 @@ cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
 	if (blkcg == &blkio_root_cgroup)
 		cfqg = &cfqd->root_group;
 	else
-		cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+		cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue,
+							 BLKIO_POLICY_PROP));
 
 	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
 		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
@@ -1247,21 +1248,22 @@ static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
  * any pending IO in the group is finished.
  *
  * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
- * read lock.
+ * pointer. That means @q is a valid request_queue pointer as long as we
+ * are rcu read lock.
  *
- * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
+ * @q was fetched from blkio_group under blkio_cgroup->lock. That means
  * it should not be NULL as even if elevator was exiting, cgroup deltion
  * path got to it first.
  */
-static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(struct request_queue *q,
+				   struct blkio_group *blkg)
 {
-	unsigned long  flags;
-	struct cfq_data *cfqd = key;
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	unsigned long flags;
 
-	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+	spin_lock_irqsave(q->queue_lock, flags);
 	cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
-	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 static struct elevator_type iosched_cfq;
@@ -3718,7 +3720,7 @@ static int cfq_init_queue(struct request_queue *q)
 	rcu_read_lock();
 
 	cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
-					(void *)cfqd, 0);
+				    cfqd->queue, 0);
 	rcu_read_unlock();
 	cfqd->nr_blkcg_linked_grps++;
 
diff --git a/block/cfq.h b/block/cfq.h
index 2a155927e37c..343b78a61df3 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -68,8 +68,9 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 }
 
 static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-			struct blkio_group *blkg, void *key, dev_t dev) {
-	blkiocg_add_blkio_group(blkcg, blkg, key, dev, BLKIO_POLICY_PROP);
+		struct blkio_group *blkg, struct request_queue *q, dev_t dev)
+{
+	blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP);
 }
 
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
@@ -105,7 +106,7 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
 
 static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-			struct blkio_group *blkg, void *key, dev_t dev) {}
+		struct blkio_group *blkg, struct request_queue *q, dev_t dev) {}
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 035d10b2fa7e5f7e9bf9465dbc39c35affd5ac32 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:04 -0800
Subject: blkcg: add blkio_policy[] array and allow one policy per policy ID

Block cgroup policies are maintained in a linked list and,
theoretically, multiple policies sharing the same policy ID are
allowed.

This patch temporarily restricts one policy per plid and adds
blkio_policy[] array which indexes registered policy types by plid.
Both the restriction and blkio_policy[] array are transitional and
will be removed once API cleanup is complete.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 10 ++++++++++
 block/blk-cgroup.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5e50ca1f5b47..f1b08d3cba55 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -29,6 +29,8 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
+
 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
 						  struct cgroup *);
 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
@@ -1694,7 +1696,11 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 void blkio_policy_register(struct blkio_policy_type *blkiop)
 {
 	spin_lock(&blkio_list_lock);
+
+	BUG_ON(blkio_policy[blkiop->plid]);
+	blkio_policy[blkiop->plid] = blkiop;
 	list_add_tail(&blkiop->list, &blkio_list);
+
 	spin_unlock(&blkio_list_lock);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_register);
@@ -1702,7 +1708,11 @@ EXPORT_SYMBOL_GPL(blkio_policy_register);
 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
 	spin_lock(&blkio_list_lock);
+
+	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
+	blkio_policy[blkiop->plid] = NULL;
 	list_del_init(&blkiop->list);
+
 	spin_unlock(&blkio_list_lock);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 41c960b99c57..562fa55d97b1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -19,6 +19,8 @@
 enum blkio_policy_id {
 	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
 	BLKIO_POLICY_THROTL,		/* Throttling */
+
+	BLKIO_NR_POLICIES,
 };
 
 /* Max limits for throttle policy */
-- 
cgit v1.2.3-59-g8ed1b


From cd1604fab4f95f7cfc227d3955fd7ae14da61f38 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:06 -0800
Subject: blkcg: factor out blkio_group creation

Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg().  This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.

* New plkio_policy_ops methods blkio_alloc_group_fn() and
  blkio_link_group_fn added.  Both are transitional and will be
  removed once the blkg management code is fully moved into
  blk-cgroup.c.

* blkio_alloc_group_fn() allocates policy-specific blkg which is
  usually a larger data structure with blkg as the first entry and
  intiailizes it.  Note that initialization of blkg proper, including
  percpu stats, is responsibility of blk-cgroup proper.

  Note that default config (weight, bps...) initialization is done
  from this method; otherwise, we end up violating locking order
  between blkcg and q locks via blkcg_get_CONF() functions.

* blkio_link_group_fn() is called under queue_lock and responsible for
  linking the blkg to the queue.  blkcg side is handled by blk-cgroup
  proper.

* The common blkg creation function is named blkg_lookup_create() and
  blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
  Also, throtl / cfq related functions are similarly [re]named for
  consistency.

This simplifies blkcg policy implementations and enables further
cleanup.

-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
     blk_queue_dead() instead of blk_queue_bypass() leading a user of
     the function ending up creating a new blkg on bypassing queue.
     This is a bug introduced while relocating bypass patches before
     this one.  Fixed.

-v3: ERR_PTR patch folded into this one.  @for_root added to
     blkg_lookup_create() to allow creating root group on a bypassed
     queue during elevator switch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 117 +++++++++++++++++++++++++++-----------
 block/blk-cgroup.h   |  30 +++++-----
 block/blk-throttle.c | 155 +++++++++++++++++----------------------------------
 block/cfq-iosched.c  | 131 +++++++++++++------------------------------
 block/cfq.h          |   8 ---
 5 files changed, 193 insertions(+), 248 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f1b08d3cba55..bc9891496318 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -465,38 +465,93 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
-/*
- * This function allocates the per cpu stats for blkio_group. Should be called
- * from sleepable context as alloc_per_cpu() requires that.
- */
-int blkio_alloc_blkg_stats(struct blkio_group *blkg)
+struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
+				       struct request_queue *q,
+				       enum blkio_policy_id plid,
+				       bool for_root)
+	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
-	/* Allocate memory for per cpu stats */
-	blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-	if (!blkg->stats_cpu)
-		return -ENOMEM;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
+	struct blkio_policy_type *pol = blkio_policy[plid];
+	struct blkio_group *blkg, *new_blkg;
 
-void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, struct request_queue *q, dev_t dev,
-		enum blkio_policy_id plid)
-{
-	unsigned long flags;
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	lockdep_assert_held(q->queue_lock);
 
-	spin_lock_irqsave(&blkcg->lock, flags);
-	spin_lock_init(&blkg->stats_lock);
-	rcu_assign_pointer(blkg->q, q);
-	blkg->blkcg_id = css_id(&blkcg->css);
+	/*
+	 * This could be the first entry point of blkcg implementation and
+	 * we shouldn't allow anything to go through for a bypassing queue.
+	 * The following can be removed if blkg lookup is guaranteed to
+	 * fail on a bypassing queue.
+	 */
+	if (unlikely(blk_queue_bypass(q)) && !for_root)
+		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+
+	blkg = blkg_lookup(blkcg, q, plid);
+	if (blkg)
+		return blkg;
+
+	if (!css_tryget(&blkcg->css))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * Allocate and initialize.
+	 *
+	 * FIXME: The following is broken.  Percpu memory allocation
+	 * requires %GFP_KERNEL context and can't be performed from IO
+	 * path.  Allocation here should inherently be atomic and the
+	 * following lock dancing can be removed once the broken percpu
+	 * allocation is fixed.
+	 */
+	spin_unlock_irq(q->queue_lock);
+	rcu_read_unlock();
+
+	new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg);
+	if (new_blkg) {
+		new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+
+		spin_lock_init(&new_blkg->stats_lock);
+		rcu_assign_pointer(new_blkg->q, q);
+		new_blkg->blkcg_id = css_id(&blkcg->css);
+		new_blkg->plid = plid;
+		cgroup_path(blkcg->css.cgroup, new_blkg->path,
+			    sizeof(new_blkg->path));
+	}
+
+	rcu_read_lock();
+	spin_lock_irq(q->queue_lock);
+	css_put(&blkcg->css);
+
+	/* did bypass get turned on inbetween? */
+	if (unlikely(blk_queue_bypass(q)) && !for_root) {
+		blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+		goto out;
+	}
+
+	/* did someone beat us to it? */
+	blkg = blkg_lookup(blkcg, q, plid);
+	if (unlikely(blkg))
+		goto out;
+
+	/* did alloc fail? */
+	if (unlikely(!new_blkg || !new_blkg->stats_cpu)) {
+		blkg = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	/* insert */
+	spin_lock(&blkcg->lock);
+	swap(blkg, new_blkg);
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	blkg->plid = plid;
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-	/* Need to take css reference ? */
-	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
-	blkg->dev = dev;
+	pol->ops.blkio_link_group_fn(q, blkg);
+	spin_unlock(&blkcg->lock);
+out:
+	if (new_blkg) {
+		free_percpu(new_blkg->stats_cpu);
+		kfree(new_blkg);
+	}
+	return blkg;
 }
-EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
+EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
@@ -533,9 +588,9 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg)
 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
 
 /* called under rcu_read_lock(). */
-struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
-					 struct request_queue *q,
-					 enum blkio_policy_id plid)
+struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
+				struct request_queue *q,
+				enum blkio_policy_id plid)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
@@ -545,7 +600,7 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
 			return blkg;
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
+EXPORT_SYMBOL_GPL(blkg_lookup);
 
 void blkg_destroy_all(struct request_queue *q)
 {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 562fa55d97b1..2600ae7e6f60 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -204,6 +204,10 @@ extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
 extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
 				     dev_t dev);
 
+typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
+						   struct blkio_cgroup *blkcg);
+typedef void (blkio_link_group_fn)(struct request_queue *q,
+			struct blkio_group *blkg);
 typedef void (blkio_unlink_group_fn)(struct request_queue *q,
 			struct blkio_group *blkg);
 typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
@@ -219,6 +223,8 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int write_iops);
 
 struct blkio_policy_ops {
+	blkio_alloc_group_fn *blkio_alloc_group_fn;
+	blkio_link_group_fn *blkio_link_group_fn;
 	blkio_unlink_group_fn *blkio_unlink_group_fn;
 	blkio_clear_queue_fn *blkio_clear_queue_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
@@ -307,14 +313,14 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
-extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-	struct blkio_group *blkg, struct request_queue *q, dev_t dev,
-	enum blkio_policy_id plid);
-extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
-extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
-						struct request_queue *q,
-						enum blkio_policy_id plid);
+extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
+				       struct request_queue *q,
+				       enum blkio_policy_id plid);
+struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
+				       struct request_queue *q,
+				       enum blkio_policy_id plid,
+				       bool for_root);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 					unsigned long time,
 					unsigned long unaccounted_time);
@@ -335,17 +341,11 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
 static inline struct blkio_cgroup *
 task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
 
-static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, void *key, dev_t dev,
-		enum blkio_policy_id plid) {}
-
-static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
-
 static inline int
 blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 
-static inline struct blkio_group *
-blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
+static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
+					      void *key) { return NULL; }
 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 						unsigned long time,
 						unsigned long unaccounted_time)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index aeeb798d1cda..2ae637b9e80c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg)
 	call_rcu(&tg->rcu_head, throtl_free_tg);
 }
 
-static void throtl_init_group(struct throtl_grp *tg)
+static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
+						    struct blkio_cgroup *blkcg)
 {
+	struct throtl_grp *tg;
+
+	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
+	if (!tg)
+		return NULL;
+
 	INIT_HLIST_NODE(&tg->tg_node);
 	RB_CLEAR_NODE(&tg->rb_node);
 	bio_list_init(&tg->bio_lists[0]);
 	bio_list_init(&tg->bio_lists[1]);
 	tg->limits_changed = false;
 
-	/* Practically unlimited BW */
-	tg->bps[0] = tg->bps[1] = -1;
-	tg->iops[0] = tg->iops[1] = -1;
+	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
+	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
+	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
+	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg)
 	 * exit or cgroup deletion path depending on who is exiting first.
 	 */
 	atomic_set(&tg->ref, 1);
-}
 
-/* Should be called with rcu read lock held (needed for blkcg) */
-static void
-throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
-{
-	hlist_add_head(&tg->tg_node, &td->tg_list);
-	td->nr_undestroyed_grps++;
+	return &tg->blkg;
 }
 
 static void
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
 	spin_unlock_irq(td->queue->queue_lock);
 }
 
-static void throtl_init_add_tg_lists(struct throtl_data *td,
-			struct throtl_grp *tg, struct blkio_cgroup *blkcg)
+static void throtl_link_blkio_group(struct request_queue *q,
+				    struct blkio_group *blkg)
 {
-	__throtl_tg_fill_dev_details(td, tg);
-
-	/* Add group onto cgroup list */
-	blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
-				tg->blkg.dev, BLKIO_POLICY_THROTL);
-
-	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
-	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
-	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
-	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
-
-	throtl_add_group_to_td_list(td, tg);
-}
-
-/* Should be called without queue lock and outside of rcu period */
-static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
-{
-	struct throtl_grp *tg = NULL;
-	int ret;
-
-	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
-	if (!tg)
-		return NULL;
-
-	ret = blkio_alloc_blkg_stats(&tg->blkg);
+	struct throtl_data *td = q->td;
+	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	if (ret) {
-		kfree(tg);
-		return NULL;
-	}
+	__throtl_tg_fill_dev_details(td, tg);
 
-	throtl_init_group(tg);
-	return tg;
+	hlist_add_head(&tg->tg_node, &td->tg_list);
+	td->nr_undestroyed_grps++;
 }
 
 static struct
-throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
+throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
 	struct throtl_grp *tg = NULL;
 
 	/*
 	 * This is the common case when there are no blkio cgroups.
- 	 * Avoid lookup in this case
- 	 */
+	 * Avoid lookup in this case
+	 */
 	if (blkcg == &blkio_root_cgroup)
 		tg = td->root_tg;
 	else
-		tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue,
-						     BLKIO_POLICY_THROTL));
+		tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
+					    BLKIO_POLICY_THROTL));
 
 	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
 }
 
-static struct throtl_grp *throtl_get_tg(struct throtl_data *td,
-					struct blkio_cgroup *blkcg)
+static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
+						  struct blkio_cgroup *blkcg)
 {
-	struct throtl_grp *tg = NULL, *__tg = NULL;
 	struct request_queue *q = td->queue;
-
-	/* no throttling for dead queue */
-	if (unlikely(blk_queue_bypass(q)))
-		return NULL;
-
-	tg = throtl_find_tg(td, blkcg);
-	if (tg)
-		return tg;
-
-	if (!css_tryget(&blkcg->css))
-		return NULL;
-
-	/*
-	 * Need to allocate a group. Allocation of group also needs allocation
-	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
-	 * we need to drop rcu lock and queue_lock before we call alloc.
-	 */
-	spin_unlock_irq(q->queue_lock);
-	rcu_read_unlock();
-
-	tg = throtl_alloc_tg(td);
-
-	/* Group allocated and queue is still alive. take the lock */
-	rcu_read_lock();
-	spin_lock_irq(q->queue_lock);
-	css_put(&blkcg->css);
-
-	/* Make sure @q is still alive */
-	if (unlikely(blk_queue_bypass(q))) {
-		kfree(tg);
-		return NULL;
-	}
+	struct throtl_grp *tg = NULL;
 
 	/*
-	 * If some other thread already allocated the group while we were
-	 * not holding queue lock, free up the group
+	 * This is the common case when there are no blkio cgroups.
+	 * Avoid lookup in this case
 	 */
-	__tg = throtl_find_tg(td, blkcg);
+	if (blkcg == &blkio_root_cgroup) {
+		tg = td->root_tg;
+	} else {
+		struct blkio_group *blkg;
 
-	if (__tg) {
-		kfree(tg);
-		return __tg;
-	}
+		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
 
-	/* Group allocation failed. Account the IO to root group */
-	if (!tg) {
-		tg = td->root_tg;
-		return tg;
+		/* if %NULL and @q is alive, fall back to root_tg */
+		if (!IS_ERR(blkg))
+			tg = tg_of_blkg(blkg);
+		else if (!blk_queue_dead(q))
+			tg = td->root_tg;
 	}
 
-	throtl_init_add_tg_lists(td, tg, blkcg);
+	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
 }
 
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
 
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
+		.blkio_alloc_group_fn = throtl_alloc_blkio_group,
+		.blkio_link_group_fn = throtl_link_blkio_group,
 		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
 		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 */
 	rcu_read_lock();
 	blkcg = task_blkio_cgroup(current);
-	tg = throtl_find_tg(td, blkcg);
+	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		throtl_tg_fill_dev_details(td, tg);
 
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 * IO group
 	 */
 	spin_lock_irq(q->queue_lock);
-	tg = throtl_get_tg(td, blkcg);
+	tg = throtl_lookup_create_tg(td, blkcg);
 	if (unlikely(!tg))
 		goto out_unlock;
 
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q)
 int blk_throtl_init(struct request_queue *q)
 {
 	struct throtl_data *td;
+	struct blkio_group *blkg;
 
 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
 	if (!td)
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q)
 	td->limits_changed = false;
 	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
 
-	/* alloc and Init root group. */
+	q->td = td;
 	td->queue = q;
 
+	/* alloc and init root group. */
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
 
-	td->root_tg = throtl_get_tg(td, &blkio_root_cgroup);
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
+				  true);
+	if (!IS_ERR(blkg))
+		td->root_tg = tg_of_blkg(blkg);
 
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q)
 		kfree(td);
 		return -ENOMEM;
 	}
-
-	/* Attach throtl data to request queue */
-	q->td = td;
 	return 0;
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1c3f41b9d5dd..acef564578c3 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1048,10 +1048,12 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
 	cfqg->needs_update = true;
 }
 
-static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
-			struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
+static void cfq_link_blkio_group(struct request_queue *q,
+				 struct blkio_group *blkg)
 {
-	struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+	struct backing_dev_info *bdi = &q->backing_dev_info;
+	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
 	unsigned int major, minor;
 
 	/*
@@ -1062,34 +1064,26 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
 	 */
 	if (bdi->dev) {
 		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-					cfqd->queue, MKDEV(major, minor));
-	} else
-		cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
-					cfqd->queue, 0);
+		blkg->dev = MKDEV(major, minor);
+	}
 
 	cfqd->nr_blkcg_linked_grps++;
-	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
 	/* Add group on cfqd list */
 	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
 }
 
-/*
- * Should be called from sleepable context. No request queue lock as per
- * cpu stats are allocated dynamically and alloc_percpu needs to be called
- * from sleepable context.
- */
-static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
+						 struct blkio_cgroup *blkcg)
 {
 	struct cfq_group *cfqg;
-	int ret;
 
-	cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
+	cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node);
 	if (!cfqg)
 		return NULL;
 
 	cfq_init_cfqg_base(cfqg);
+	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -1099,90 +1093,38 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
 	 */
 	cfqg->ref = 1;
 
-	ret = blkio_alloc_blkg_stats(&cfqg->blkg);
-	if (ret) {
-		kfree(cfqg);
-		return NULL;
-	}
-
-	return cfqg;
-}
-
-static struct cfq_group *
-cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
-{
-	struct cfq_group *cfqg = NULL;
-	struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
-	unsigned int major, minor;
-
-	/*
-	 * This is the common case when there are no blkio cgroups.
-	 * Avoid lookup in this case
-	 */
-	if (blkcg == &blkio_root_cgroup)
-		cfqg = cfqd->root_group;
-	else
-		cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue,
-							 BLKIO_POLICY_PROP));
-
-	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		cfqg->blkg.dev = MKDEV(major, minor);
-	}
-
-	return cfqg;
+	return &cfqg->blkg;
 }
 
 /*
  * Search for the cfq group current task belongs to. request_queue lock must
  * be held.
  */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd,
-				      struct blkio_cgroup *blkcg)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+						struct blkio_cgroup *blkcg)
 {
-	struct cfq_group *cfqg = NULL, *__cfqg = NULL;
 	struct request_queue *q = cfqd->queue;
+	struct backing_dev_info *bdi = &q->backing_dev_info;
+	struct cfq_group *cfqg = NULL;
 
-	cfqg = cfq_find_cfqg(cfqd, blkcg);
-	if (cfqg)
-		return cfqg;
-
-	if (!css_tryget(&blkcg->css))
-		return NULL;
-
-	/*
-	 * Need to allocate a group. Allocation of group also needs allocation
-	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
-	 * we need to drop rcu lock and queue_lock before we call alloc.
-	 *
-	 * Not taking any queue reference here and assuming that queue is
-	 * around by the time we return. CFQ queue allocation code does
-	 * the same. It might be racy though.
-	 */
-	rcu_read_unlock();
-	spin_unlock_irq(q->queue_lock);
-
-	cfqg = cfq_alloc_cfqg(cfqd);
+	/* avoid lookup for the common case where there's no blkio cgroup */
+	if (blkcg == &blkio_root_cgroup) {
+		cfqg = cfqd->root_group;
+	} else {
+		struct blkio_group *blkg;
 
-	spin_lock_irq(q->queue_lock);
-	rcu_read_lock();
-	css_put(&blkcg->css);
+		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
+		if (!IS_ERR(blkg))
+			cfqg = cfqg_of_blkg(blkg);
+	}
 
-	/*
-	 * If some other thread already allocated the group while we were
-	 * not holding queue lock, free up the group
-	 */
-	__cfqg = cfq_find_cfqg(cfqd, blkcg);
+	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+		unsigned int major, minor;
 
-	if (__cfqg) {
-		kfree(cfqg);
-		return __cfqg;
+		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+		cfqg->blkg.dev = MKDEV(major, minor);
 	}
 
-	if (!cfqg)
-		cfqg = cfqd->root_group;
-
-	cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
 	return cfqg;
 }
 
@@ -1294,8 +1236,8 @@ static bool cfq_clear_queue(struct request_queue *q)
 }
 
 #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd,
-				      struct blkio_cgroup *blkcg)
+static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
+						struct blkio_cgroup *blkcg)
 {
 	return cfqd->root_group;
 }
@@ -2887,7 +2829,8 @@ retry:
 
 	blkcg = task_blkio_cgroup(current);
 
-	cfqg = cfq_get_cfqg(cfqd, blkcg);
+	cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
+
 	cic = cfq_cic_lookup(cfqd, ioc);
 	/* cic always exists here */
 	cfqq = cic_to_cfqq(cic, is_sync);
@@ -3694,6 +3637,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 static int cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
+	struct blkio_group *blkg __maybe_unused;
 	int i;
 
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
@@ -3711,7 +3655,10 @@ static int cfq_init_queue(struct request_queue *q)
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
 
-	cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup);
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
+				  true);
+	if (!IS_ERR(blkg))
+		cfqd->root_group = cfqg_of_blkg(blkg);
 
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
@@ -3897,6 +3844,8 @@ static struct elevator_type iosched_cfq = {
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
+		.blkio_alloc_group_fn =		cfq_alloc_blkio_group,
+		.blkio_link_group_fn =		cfq_link_blkio_group,
 		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
 		.blkio_clear_queue_fn = cfq_clear_queue,
 		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
diff --git a/block/cfq.h b/block/cfq.h
index 343b78a61df3..398760194e11 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -67,12 +67,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 				direction, sync);
 }
 
-static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, struct request_queue *q, dev_t dev)
-{
-	blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP);
-}
-
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
 	return blkiocg_del_blkio_group(blkg);
@@ -105,8 +99,6 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 				uint64_t bytes, bool direction, bool sync) {}
 static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
 
-static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, struct request_queue *q, dev_t dev) {}
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From e56da7e287967667474a58c4f60c286279e3f487 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:07 -0800
Subject: blkcg: don't allow or retain configuration of missing devices

blkcg is very peculiar in that it allows setting and remembering
configurations for non-existent devices by maintaining separate data
structures for configuration.

This behavior is completely out of the usual norms and outright
confusing; furthermore, it uses dev_t number to match the
configuration to devices, which is unpredictable to begin with and
becomes completely unuseable if EXT_DEVT is fully used.

It is wholely unnecessary - we already have fully functional userland
mechanism to program devices being hotplugged which has full access to
device identification, connection topology and filesystem information.

Add a new struct blkio_group_conf which contains all blkcg
configurations to blkio_group and let blkio_group, which can be
created iff the associated device exists and is removed when the
associated device goes away, carry all configurations.

Note that, after this patch, all newly created blkg's will always have
the default configuration (unlimited for throttling and blkcg's weight
for propio).

This patch makes blkio_policy_node meaningless but doesn't remove it.
The next patch will.

-v2: Updated to retry after short sleep if blkg lookup/creation failed
     due to the queue being temporarily bypassed as indicated by
     -EBUSY return.  Pointed out by Vivek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 94 ++++++++++++++++++++++++++++++++++++++++------------
 block/blk-cgroup.h   |  9 +++++
 block/blk-throttle.c |  8 ++---
 block/cfq-iosched.c  |  2 +-
 4 files changed, 87 insertions(+), 26 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bc9891496318..fe8ce148017a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -855,9 +855,12 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 }
 
 static int blkio_policy_parse_and_set(char *buf,
-	struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
+				      struct blkio_policy_node *newpn,
+				      enum blkio_policy_id plid, int fileid,
+				      struct blkio_cgroup *blkcg)
 {
 	struct gendisk *disk = NULL;
+	struct blkio_group *blkg = NULL;
 	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
 	unsigned long major, minor;
 	int i = 0, ret = -EINVAL;
@@ -903,11 +906,25 @@ static int blkio_policy_parse_and_set(char *buf,
 		goto out;
 
 	/* For rule removal, do not check for device presence. */
-	if (temp) {
-		disk = get_gendisk(dev, &part);
-		if (!disk || part) {
-			ret = -ENODEV;
-			goto out;
+	disk = get_gendisk(dev, &part);
+
+	if ((!disk || part) && temp) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	rcu_read_lock();
+
+	if (disk && !part) {
+		spin_lock_irq(disk->queue->queue_lock);
+		blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
+		spin_unlock_irq(disk->queue->queue_lock);
+
+		if (IS_ERR(blkg)) {
+			ret = PTR_ERR(blkg);
+			if (ret == -EBUSY)
+				goto out_unlock;
+			blkg = NULL;
 		}
 	}
 
@@ -917,25 +934,46 @@ static int blkio_policy_parse_and_set(char *buf,
 	case BLKIO_POLICY_PROP:
 		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
 		     temp > BLKIO_WEIGHT_MAX)
-			goto out;
+			goto out_unlock;
 
 		newpn->plid = plid;
 		newpn->fileid = fileid;
 		newpn->val.weight = temp;
+		if (blkg)
+			blkg->conf.weight = temp;
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(fileid) {
 		case BLKIO_THROTL_read_bps_device:
+			if (blkg)
+				blkg->conf.bps[READ] = temp;
+			newpn->plid = plid;
+			newpn->fileid = fileid;
+			newpn->val.bps = temp;
+			break;
 		case BLKIO_THROTL_write_bps_device:
+			if (blkg)
+				blkg->conf.bps[WRITE] = temp;
 			newpn->plid = plid;
 			newpn->fileid = fileid;
 			newpn->val.bps = temp;
 			break;
 		case BLKIO_THROTL_read_iops_device:
+			if (temp > THROTL_IOPS_MAX)
+				goto out_unlock;
+
+			if (blkg)
+				blkg->conf.iops[READ] = temp;
+			newpn->plid = plid;
+			newpn->fileid = fileid;
+			newpn->val.iops = (unsigned int)temp;
+			break;
 		case BLKIO_THROTL_write_iops_device:
 			if (temp > THROTL_IOPS_MAX)
-				goto out;
+				goto out_unlock;
 
+			if (blkg)
+				blkg->conf.iops[WRITE] = temp;
 			newpn->plid = plid;
 			newpn->fileid = fileid;
 			newpn->val.iops = (unsigned int)temp;
@@ -946,8 +984,21 @@ static int blkio_policy_parse_and_set(char *buf,
 		BUG();
 	}
 	ret = 0;
+out_unlock:
+	rcu_read_unlock();
 out:
 	put_disk(disk);
+
+	/*
+	 * If queue was bypassing, we should retry.  Do so after a short
+	 * msleep().  It isn't strictly necessary but queue can be
+	 * bypassing for some time and it's always nice to avoid busy
+	 * looping.
+	 */
+	if (ret == -EBUSY) {
+		msleep(10);
+		return restart_syscall();
+	}
 	return ret;
 }
 
@@ -1095,26 +1146,29 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
 static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
 		struct blkio_group *blkg, struct blkio_policy_node *pn)
 {
-	unsigned int weight, iops;
-	u64 bps;
+	struct blkio_group_conf *conf = &blkg->conf;
 
 	switch(pn->plid) {
 	case BLKIO_POLICY_PROP:
-		weight = pn->val.weight ? pn->val.weight :
-				blkcg->weight;
-		blkio_update_group_weight(blkg, weight);
+		blkio_update_group_weight(blkg, conf->weight ?: blkcg->weight);
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(pn->fileid) {
 		case BLKIO_THROTL_read_bps_device:
+			blkio_update_group_bps(blkg, conf->bps[READ] ?: -1,
+					       pn->fileid);
+			break;
 		case BLKIO_THROTL_write_bps_device:
-			bps = pn->val.bps ? pn->val.bps : (-1);
-			blkio_update_group_bps(blkg, bps, pn->fileid);
+			blkio_update_group_bps(blkg, conf->bps[WRITE] ?: -1,
+					       pn->fileid);
 			break;
 		case BLKIO_THROTL_read_iops_device:
+			blkio_update_group_iops(blkg, conf->iops[READ] ?: -1,
+						pn->fileid);
+			break;
 		case BLKIO_THROTL_write_iops_device:
-			iops = pn->val.iops ? pn->val.iops : (-1);
-			blkio_update_group_iops(blkg, iops, pn->fileid);
+			blkio_update_group_iops(blkg, conf->iops[WRITE] ?: -1,
+						pn->fileid);
 			break;
 		}
 		break;
@@ -1152,7 +1206,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 	int ret = 0;
 	char *buf;
 	struct blkio_policy_node *newpn, *pn;
-	struct blkio_cgroup *blkcg;
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 	int keep_newpn = 0;
 	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
 	int fileid = BLKIOFILE_ATTR(cft->private);
@@ -1167,12 +1221,10 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 		goto free_buf;
 	}
 
-	ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
+	ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid, blkcg);
 	if (ret)
 		goto free_newpn;
 
-	blkcg = cgroup_to_blkio_cgroup(cgrp);
-
 	spin_lock_irq(&blkcg->lock);
 
 	pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2600ae7e6f60..81efe718a1c6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -154,6 +154,12 @@ struct blkio_group_stats_cpu {
 	struct u64_stats_sync syncp;
 };
 
+struct blkio_group_conf {
+	unsigned int weight;
+	unsigned int iops[2];
+	u64 bps[2];
+};
+
 struct blkio_group {
 	/* Pointer to the associated request_queue, RCU protected */
 	struct request_queue __rcu *q;
@@ -166,6 +172,9 @@ struct blkio_group {
 	/* policy which owns this blk group */
 	enum blkio_policy_id plid;
 
+	/* Configuration */
+	struct blkio_group_conf conf;
+
 	/* Need to serialize the stats in the case of reset/update */
 	spinlock_t stats_lock;
 	struct blkio_group_stats stats;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2ae637b9e80c..791b10719e43 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
 	bio_list_init(&tg->bio_lists[1]);
 	tg->limits_changed = false;
 
-	tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
-	tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
-	tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
-	tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
+	tg->bps[READ] = -1;
+	tg->bps[WRITE] = -1;
+	tg->iops[READ] = -1;
+	tg->iops[WRITE] = -1;
 
 	/*
 	 * Take the initial reference that will be released on destroy
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index acef564578c3..08d4fdd188fa 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1083,7 +1083,7 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
 		return NULL;
 
 	cfq_init_cfqg_base(cfqg);
-	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+	cfqg->weight = blkcg->weight;
 
 	/*
 	 * Take the initial reference that will be released on destroy
-- 
cgit v1.2.3-59-g8ed1b


From 4bfd482e73b30284cb21e10834ce729fa81aa256 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:08 -0800
Subject: blkcg: kill blkio_policy_node

Now that blkcg configuration lives in blkg's, blkio_policy_node is no
longer necessary.  Kill it.

blkio_policy_parse_and_set() now fails if invoked for missing device
and functions to print out configurations are updated to print from
blkg's.

cftype_blkg_same_policy() is dropped along with other policy functions
for consistency.  Its one line is open coded in the only user -
blkio_read_blkg_stats().

-v2: Update to reflect the retry-on-bypass logic change of the
     previous patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 437 ++++++++---------------------------------------------
 block/blk-cgroup.h |  32 ----
 2 files changed, 59 insertions(+), 410 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fe8ce148017a..adf61c99258c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -59,54 +59,6 @@ struct cgroup_subsys blkio_subsys = {
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
 
-static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
-					    struct blkio_policy_node *pn)
-{
-	list_add(&pn->node, &blkcg->policy_list);
-}
-
-static inline bool cftype_blkg_same_policy(struct cftype *cft,
-			struct blkio_group *blkg)
-{
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-
-	if (blkg->plid == plid)
-		return 1;
-
-	return 0;
-}
-
-/* Determines if policy node matches cgroup file being accessed */
-static inline bool pn_matches_cftype(struct cftype *cft,
-			struct blkio_policy_node *pn)
-{
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-	int fileid = BLKIOFILE_ATTR(cft->private);
-
-	return (plid == pn->plid && fileid == pn->fileid);
-}
-
-/* Must be called with blkcg->lock held */
-static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
-{
-	list_del(&pn->node);
-}
-
-/* Must be called with blkcg->lock held */
-static struct blkio_policy_node *
-blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
-		enum blkio_policy_id plid, int fileid)
-{
-	struct blkio_policy_node *pn;
-
-	list_for_each_entry(pn, &blkcg->policy_list, node) {
-		if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
-			return pn;
-	}
-
-	return NULL;
-}
-
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -854,10 +806,8 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 	return disk_total;
 }
 
-static int blkio_policy_parse_and_set(char *buf,
-				      struct blkio_policy_node *newpn,
-				      enum blkio_policy_id plid, int fileid,
-				      struct blkio_cgroup *blkcg)
+static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
+				      int fileid, struct blkio_cgroup *blkcg)
 {
 	struct gendisk *disk = NULL;
 	struct blkio_group *blkg = NULL;
@@ -905,78 +855,51 @@ static int blkio_policy_parse_and_set(char *buf,
 	if (strict_strtoull(s[1], 10, &temp))
 		goto out;
 
-	/* For rule removal, do not check for device presence. */
 	disk = get_gendisk(dev, &part);
-
-	if ((!disk || part) && temp) {
-		ret = -ENODEV;
+	if (!disk || part)
 		goto out;
-	}
 
 	rcu_read_lock();
 
-	if (disk && !part) {
-		spin_lock_irq(disk->queue->queue_lock);
-		blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
-		spin_unlock_irq(disk->queue->queue_lock);
+	spin_lock_irq(disk->queue->queue_lock);
+	blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
+	spin_unlock_irq(disk->queue->queue_lock);
 
-		if (IS_ERR(blkg)) {
-			ret = PTR_ERR(blkg);
-			if (ret == -EBUSY)
-				goto out_unlock;
-			blkg = NULL;
-		}
+	if (IS_ERR(blkg)) {
+		ret = PTR_ERR(blkg);
+		goto out_unlock;
 	}
 
-	newpn->dev = dev;
-
 	switch (plid) {
 	case BLKIO_POLICY_PROP:
 		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
 		     temp > BLKIO_WEIGHT_MAX)
 			goto out_unlock;
 
-		newpn->plid = plid;
-		newpn->fileid = fileid;
-		newpn->val.weight = temp;
-		if (blkg)
-			blkg->conf.weight = temp;
+		blkg->conf.weight = temp;
+		blkio_update_group_weight(blkg, temp ?: blkcg->weight);
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(fileid) {
 		case BLKIO_THROTL_read_bps_device:
-			if (blkg)
-				blkg->conf.bps[READ] = temp;
-			newpn->plid = plid;
-			newpn->fileid = fileid;
-			newpn->val.bps = temp;
+			blkg->conf.bps[READ] = temp;
+			blkio_update_group_bps(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_bps_device:
-			if (blkg)
-				blkg->conf.bps[WRITE] = temp;
-			newpn->plid = plid;
-			newpn->fileid = fileid;
-			newpn->val.bps = temp;
+			blkg->conf.bps[WRITE] = temp;
+			blkio_update_group_bps(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_read_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
-
-			if (blkg)
-				blkg->conf.iops[READ] = temp;
-			newpn->plid = plid;
-			newpn->fileid = fileid;
-			newpn->val.iops = (unsigned int)temp;
+			blkg->conf.iops[READ] = temp;
+			blkio_update_group_iops(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
-
-			if (blkg)
-				blkg->conf.iops[WRITE] = temp;
-			newpn->plid = plid;
-			newpn->fileid = fileid;
-			newpn->val.iops = (unsigned int)temp;
+			blkg->conf.iops[WRITE] = temp;
+			blkio_update_group_iops(blkg, temp ?: -1, fileid);
 			break;
 		}
 		break;
@@ -1002,212 +925,12 @@ out:
 	return ret;
 }
 
-unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
-			      dev_t dev)
-{
-	struct blkio_policy_node *pn;
-	unsigned long flags;
-	unsigned int weight;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-
-	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
-				BLKIO_PROP_weight_device);
-	if (pn)
-		weight = pn->val.weight;
-	else
-		weight = blkcg->weight;
-
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return weight;
-}
-EXPORT_SYMBOL_GPL(blkcg_get_weight);
-
-uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
-{
-	struct blkio_policy_node *pn;
-	unsigned long flags;
-	uint64_t bps = -1;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_read_bps_device);
-	if (pn)
-		bps = pn->val.bps;
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return bps;
-}
-
-uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
-{
-	struct blkio_policy_node *pn;
-	unsigned long flags;
-	uint64_t bps = -1;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_write_bps_device);
-	if (pn)
-		bps = pn->val.bps;
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return bps;
-}
-
-unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
-{
-	struct blkio_policy_node *pn;
-	unsigned long flags;
-	unsigned int iops = -1;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_read_iops_device);
-	if (pn)
-		iops = pn->val.iops;
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return iops;
-}
-
-unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
-{
-	struct blkio_policy_node *pn;
-	unsigned long flags;
-	unsigned int iops = -1;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_write_iops_device);
-	if (pn)
-		iops = pn->val.iops;
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return iops;
-}
-
-/* Checks whether user asked for deleting a policy rule */
-static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
-{
-	switch(pn->plid) {
-	case BLKIO_POLICY_PROP:
-		if (pn->val.weight == 0)
-			return 1;
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(pn->fileid) {
-		case BLKIO_THROTL_read_bps_device:
-		case BLKIO_THROTL_write_bps_device:
-			if (pn->val.bps == 0)
-				return 1;
-			break;
-		case BLKIO_THROTL_read_iops_device:
-		case BLKIO_THROTL_write_iops_device:
-			if (pn->val.iops == 0)
-				return 1;
-		}
-		break;
-	default:
-		BUG();
-	}
-
-	return 0;
-}
-
-static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
-					struct blkio_policy_node *newpn)
-{
-	switch(oldpn->plid) {
-	case BLKIO_POLICY_PROP:
-		oldpn->val.weight = newpn->val.weight;
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(newpn->fileid) {
-		case BLKIO_THROTL_read_bps_device:
-		case BLKIO_THROTL_write_bps_device:
-			oldpn->val.bps = newpn->val.bps;
-			break;
-		case BLKIO_THROTL_read_iops_device:
-		case BLKIO_THROTL_write_iops_device:
-			oldpn->val.iops = newpn->val.iops;
-		}
-		break;
-	default:
-		BUG();
-	}
-}
-
-/*
- * Some rules/values in blkg have changed. Propagate those to respective
- * policies.
- */
-static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
-		struct blkio_group *blkg, struct blkio_policy_node *pn)
-{
-	struct blkio_group_conf *conf = &blkg->conf;
-
-	switch(pn->plid) {
-	case BLKIO_POLICY_PROP:
-		blkio_update_group_weight(blkg, conf->weight ?: blkcg->weight);
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(pn->fileid) {
-		case BLKIO_THROTL_read_bps_device:
-			blkio_update_group_bps(blkg, conf->bps[READ] ?: -1,
-					       pn->fileid);
-			break;
-		case BLKIO_THROTL_write_bps_device:
-			blkio_update_group_bps(blkg, conf->bps[WRITE] ?: -1,
-					       pn->fileid);
-			break;
-		case BLKIO_THROTL_read_iops_device:
-			blkio_update_group_iops(blkg, conf->iops[READ] ?: -1,
-						pn->fileid);
-			break;
-		case BLKIO_THROTL_write_iops_device:
-			blkio_update_group_iops(blkg, conf->iops[WRITE] ?: -1,
-						pn->fileid);
-			break;
-		}
-		break;
-	default:
-		BUG();
-	}
-}
-
-/*
- * A policy node rule has been updated. Propagate this update to all the
- * block groups which might be affected by this update.
- */
-static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
-				struct blkio_policy_node *pn)
-{
-	struct blkio_group *blkg;
-	struct hlist_node *n;
-
-	spin_lock(&blkio_list_lock);
-	spin_lock_irq(&blkcg->lock);
-
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		if (pn->dev != blkg->dev || pn->plid != blkg->plid)
-			continue;
-		blkio_update_blkg_policy(blkcg, blkg, pn);
-	}
-
-	spin_unlock_irq(&blkcg->lock);
-	spin_unlock(&blkio_list_lock);
-}
-
 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
  				       const char *buffer)
 {
 	int ret = 0;
 	char *buf;
-	struct blkio_policy_node *newpn, *pn;
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	int keep_newpn = 0;
 	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
 	int fileid = BLKIOFILE_ATTR(cft->private);
 
@@ -1215,69 +938,42 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 	if (!buf)
 		return -ENOMEM;
 
-	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
-	if (!newpn) {
-		ret = -ENOMEM;
-		goto free_buf;
-	}
-
-	ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid, blkcg);
-	if (ret)
-		goto free_newpn;
-
-	spin_lock_irq(&blkcg->lock);
-
-	pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
-	if (!pn) {
-		if (!blkio_delete_rule_command(newpn)) {
-			blkio_policy_insert_node(blkcg, newpn);
-			keep_newpn = 1;
-		}
-		spin_unlock_irq(&blkcg->lock);
-		goto update_io_group;
-	}
-
-	if (blkio_delete_rule_command(newpn)) {
-		blkio_policy_delete_node(pn);
-		kfree(pn);
-		spin_unlock_irq(&blkcg->lock);
-		goto update_io_group;
-	}
-	spin_unlock_irq(&blkcg->lock);
-
-	blkio_update_policy_rule(pn, newpn);
-
-update_io_group:
-	blkio_update_policy_node_blkg(blkcg, newpn);
-
-free_newpn:
-	if (!keep_newpn)
-		kfree(newpn);
-free_buf:
+	ret = blkio_policy_parse_and_set(buf, plid, fileid, blkcg);
 	kfree(buf);
 	return ret;
 }
 
-static void
-blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
+static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
+				   struct seq_file *m)
 {
-	switch(pn->plid) {
+	int fileid = BLKIOFILE_ATTR(cft->private);
+	int rw = WRITE;
+
+	switch (blkg->plid) {
 		case BLKIO_POLICY_PROP:
-			if (pn->fileid == BLKIO_PROP_weight_device)
-				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.weight);
+			if (blkg->conf.weight)
+				seq_printf(m, "%u:%u\t%u\n", MAJOR(blkg->dev),
+					MINOR(blkg->dev), blkg->conf.weight);
 			break;
 		case BLKIO_POLICY_THROTL:
-			switch(pn->fileid) {
+			switch (fileid) {
 			case BLKIO_THROTL_read_bps_device:
+				rw = READ;
 			case BLKIO_THROTL_write_bps_device:
-				seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.bps);
+				if (blkg->conf.bps[rw])
+					seq_printf(m, "%u:%u\t%llu\n",
+						   MAJOR(blkg->dev),
+						   MINOR(blkg->dev),
+						   blkg->conf.bps[rw]);
 				break;
 			case BLKIO_THROTL_read_iops_device:
+				rw = READ;
 			case BLKIO_THROTL_write_iops_device:
-				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.iops);
+				if (blkg->conf.iops[rw])
+					seq_printf(m, "%u:%u\t%u\n",
+						   MAJOR(blkg->dev),
+						   MINOR(blkg->dev),
+						   blkg->conf.iops[rw]);
 				break;
 			}
 			break;
@@ -1287,20 +983,17 @@ blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
 }
 
 /* cgroup files which read their data from policy nodes end up here */
-static void blkio_read_policy_node_files(struct cftype *cft,
-			struct blkio_cgroup *blkcg, struct seq_file *m)
+static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
+			    struct seq_file *m)
 {
-	struct blkio_policy_node *pn;
+	struct blkio_group *blkg;
+	struct hlist_node *n;
 
-	if (!list_empty(&blkcg->policy_list)) {
-		spin_lock_irq(&blkcg->lock);
-		list_for_each_entry(pn, &blkcg->policy_list, node) {
-			if (!pn_matches_cftype(cft, pn))
-				continue;
-			blkio_print_policy_node(m, pn);
-		}
-		spin_unlock_irq(&blkcg->lock);
-	}
+	spin_lock_irq(&blkcg->lock);
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
+		if (BLKIOFILE_POLICY(cft->private) == blkg->plid)
+			blkio_print_group_conf(cft, blkg, m);
+	spin_unlock_irq(&blkcg->lock);
 }
 
 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
@@ -1316,7 +1009,7 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
 	case BLKIO_POLICY_PROP:
 		switch(name) {
 		case BLKIO_PROP_weight_device:
-			blkio_read_policy_node_files(cft, blkcg, m);
+			blkio_read_conf(cft, blkcg, m);
 			return 0;
 		default:
 			BUG();
@@ -1328,7 +1021,7 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
 		case BLKIO_THROTL_write_bps_device:
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
-			blkio_read_policy_node_files(cft, blkcg, m);
+			blkio_read_conf(cft, blkcg, m);
 			return 0;
 		default:
 			BUG();
@@ -1352,7 +1045,7 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		if (blkg->dev) {
-			if (!cftype_blkg_same_policy(cft, blkg))
+			if (BLKIOFILE_POLICY(cft->private) != blkg->plid)
 				continue;
 			if (pcpu)
 				cgroup_total += blkio_get_stat_cpu(blkg, cb,
@@ -1451,11 +1144,10 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
+static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
-	struct blkio_policy_node *pn;
 
 	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
 		return -EINVAL;
@@ -1464,14 +1156,10 @@ static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
 	spin_lock_irq(&blkcg->lock);
 	blkcg->weight = (unsigned int)val;
 
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		pn = blkio_policy_search_node(blkcg, blkg->dev,
-				BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
-		if (pn)
-			continue;
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
+		if (blkg->plid == plid && !blkg->conf.weight)
+			blkio_update_group_weight(blkg, blkcg->weight);
 
-		blkio_update_group_weight(blkg, blkcg->weight);
-	}
 	spin_unlock_irq(&blkcg->lock);
 	spin_unlock(&blkio_list_lock);
 	return 0;
@@ -1510,7 +1198,7 @@ blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
 	case BLKIO_POLICY_PROP:
 		switch(name) {
 		case BLKIO_PROP_weight:
-			return blkio_weight_write(blkcg, val);
+			return blkio_weight_write(blkcg, plid, val);
 		}
 		break;
 	default:
@@ -1691,7 +1379,6 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_group *blkg;
 	struct request_queue *q;
 	struct blkio_policy_type *blkiop;
-	struct blkio_policy_node *pn, *pntmp;
 
 	rcu_read_lock();
 	do {
@@ -1723,11 +1410,6 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 		spin_unlock(&blkio_list_lock);
 	} while (1);
 
-	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
-		blkio_policy_delete_node(pn);
-		kfree(pn);
-	}
-
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
 	if (blkcg != &blkio_root_cgroup)
@@ -1754,7 +1436,6 @@ done:
 	spin_lock_init(&blkcg->lock);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 
-	INIT_LIST_HEAD(&blkcg->policy_list);
 	return &blkcg->css;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 81efe718a1c6..9a5c68d7cc92 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -112,7 +112,6 @@ struct blkio_cgroup {
 	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
-	struct list_head policy_list; /* list of blkio_policy_node */
 };
 
 struct blkio_group_stats {
@@ -182,37 +181,6 @@ struct blkio_group {
 	struct blkio_group_stats_cpu __percpu *stats_cpu;
 };
 
-struct blkio_policy_node {
-	struct list_head node;
-	dev_t dev;
-	/* This node belongs to max bw policy or porportional weight policy */
-	enum blkio_policy_id plid;
-	/* cgroup file to which this rule belongs to */
-	int fileid;
-
-	union {
-		unsigned int weight;
-		/*
-		 * Rate read/write in terms of bytes per second
-		 * Whether this rate represents read or write is determined
-		 * by file type "fileid".
-		 */
-		u64 bps;
-		unsigned int iops;
-	} val;
-};
-
-extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
-				     dev_t dev);
-extern uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg,
-				     dev_t dev);
-extern uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg,
-				     dev_t dev);
-extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
-				     dev_t dev);
-extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
-				     dev_t dev);
-
 typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
 						   struct blkio_cgroup *blkcg);
 typedef void (blkio_link_group_fn)(struct request_queue *q,
-- 
cgit v1.2.3-59-g8ed1b


From 7a4dd281ec66224f802093962d1d903d86b09560 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:09 -0800
Subject: blkcg: kill the mind-bending blkg->dev

blkg->dev is dev_t recording the device number of the block device for
the associated request_queue.  It is used to identify the associated
block device when printing out configuration or stats.

This is redundant to begin with.  A blkg is an association between a
cgroup and a request_queue and it of course is possible to reach
request_queue from blkg and synchronization conventions are in place
for safe q dereferencing, so this shouldn't be necessary from the
beginning.  Furthermore, it's initialized by sscanf()ing the device
name of backing_dev_info.  The mind boggles.

Anyways, if blkg is visible under rcu lock, we *know* that the
associated request_queue hasn't gone away yet and its bdi is
registered and alive - blkg can't be created for request_queue which
hasn't been fully initialized and it can't go away before blkg is
removed.

Let stat and conf read functions get device name from
blkg->q->backing_dev_info.dev and pass it down to printing functions
and remove blkg->dev.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 86 +++++++++++++++++++++++++++-------------------------
 block/blk-cgroup.h   |  2 --
 block/blk-throttle.c | 51 ++-----------------------------
 block/cfq-iosched.c  | 21 -------------
 4 files changed, 47 insertions(+), 113 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index adf61c99258c..8742af3be84b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -662,10 +662,10 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	return 0;
 }
 
-static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
-				int chars_left, bool diskname_only)
+static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
+			       char *str, int chars_left, bool diskname_only)
 {
-	snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
+	snprintf(str, chars_left, "%s", dname);
 	chars_left -= strlen(str);
 	if (chars_left <= 0) {
 		printk(KERN_WARNING
@@ -696,9 +696,9 @@ static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
 }
 
 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
-				struct cgroup_map_cb *cb, dev_t dev)
+				struct cgroup_map_cb *cb, const char *dname)
 {
-	blkio_get_key_name(0, dev, str, chars_left, true);
+	blkio_get_key_name(0, dname, str, chars_left, true);
 	cb->fill(cb, str, val);
 	return val;
 }
@@ -730,7 +730,8 @@ static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
 }
 
 static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
-		struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
+				   struct cgroup_map_cb *cb, const char *dname,
+				   enum stat_type_cpu type)
 {
 	uint64_t disk_total, val;
 	char key_str[MAX_KEY_LEN];
@@ -738,12 +739,14 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
 
 	if (type == BLKIO_STAT_CPU_SECTORS) {
 		val = blkio_read_stat_cpu(blkg, type, 0);
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
+		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
+				       dname);
 	}
 
 	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
 			sub_type++) {
-		blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
+		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
+				   false);
 		val = blkio_read_stat_cpu(blkg, type, sub_type);
 		cb->fill(cb, key_str, val);
 	}
@@ -751,14 +754,16 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
 	disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
 			blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
 
-	blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
+	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
+			   false);
 	cb->fill(cb, key_str, disk_total);
 	return disk_total;
 }
 
 /* This should be called with blkg->stats_lock held */
 static uint64_t blkio_get_stat(struct blkio_group *blkg,
-		struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
+			       struct cgroup_map_cb *cb, const char *dname,
+			       enum stat_type type)
 {
 	uint64_t disk_total;
 	char key_str[MAX_KEY_LEN];
@@ -766,11 +771,11 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 
 	if (type == BLKIO_STAT_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.time, cb, dev);
+					blkg->stats.time, cb, dname);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.unaccounted_time, cb, dev);
+				       blkg->stats.unaccounted_time, cb, dname);
 	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
 		uint64_t sum = blkg->stats.avg_queue_size_sum;
 		uint64_t samples = blkg->stats.avg_queue_size_samples;
@@ -778,30 +783,33 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 			do_div(sum, samples);
 		else
 			sum = 0;
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
+		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+				       sum, cb, dname);
 	}
 	if (type == BLKIO_STAT_GROUP_WAIT_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.group_wait_time, cb, dev);
+				       blkg->stats.group_wait_time, cb, dname);
 	if (type == BLKIO_STAT_IDLE_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.idle_time, cb, dev);
+				       blkg->stats.idle_time, cb, dname);
 	if (type == BLKIO_STAT_EMPTY_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.empty_time, cb, dev);
+				       blkg->stats.empty_time, cb, dname);
 	if (type == BLKIO_STAT_DEQUEUE)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.dequeue, cb, dev);
+				       blkg->stats.dequeue, cb, dname);
 #endif
 
 	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
 			sub_type++) {
-		blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
+		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
+				   false);
 		cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
 	}
 	disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
 			blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
-	blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
+	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
+			   false);
 	cb->fill(cb, key_str, disk_total);
 	return disk_total;
 }
@@ -946,14 +954,15 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 				   struct seq_file *m)
 {
+	const char *dname = dev_name(blkg->q->backing_dev_info.dev);
 	int fileid = BLKIOFILE_ATTR(cft->private);
 	int rw = WRITE;
 
 	switch (blkg->plid) {
 		case BLKIO_POLICY_PROP:
 			if (blkg->conf.weight)
-				seq_printf(m, "%u:%u\t%u\n", MAJOR(blkg->dev),
-					MINOR(blkg->dev), blkg->conf.weight);
+				seq_printf(m, "%s\t%u\n",
+					   dname, blkg->conf.weight);
 			break;
 		case BLKIO_POLICY_THROTL:
 			switch (fileid) {
@@ -961,19 +970,15 @@ static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 				rw = READ;
 			case BLKIO_THROTL_write_bps_device:
 				if (blkg->conf.bps[rw])
-					seq_printf(m, "%u:%u\t%llu\n",
-						   MAJOR(blkg->dev),
-						   MINOR(blkg->dev),
-						   blkg->conf.bps[rw]);
+					seq_printf(m, "%s\t%llu\n",
+						   dname, blkg->conf.bps[rw]);
 				break;
 			case BLKIO_THROTL_read_iops_device:
 				rw = READ;
 			case BLKIO_THROTL_write_iops_device:
 				if (blkg->conf.iops[rw])
-					seq_printf(m, "%u:%u\t%u\n",
-						   MAJOR(blkg->dev),
-						   MINOR(blkg->dev),
-						   blkg->conf.iops[rw]);
+					seq_printf(m, "%s\t%u\n",
+						   dname, blkg->conf.iops[rw]);
 				break;
 			}
 			break;
@@ -1044,18 +1049,17 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		if (blkg->dev) {
-			if (BLKIOFILE_POLICY(cft->private) != blkg->plid)
-				continue;
-			if (pcpu)
-				cgroup_total += blkio_get_stat_cpu(blkg, cb,
-						blkg->dev, type);
-			else {
-				spin_lock_irq(&blkg->stats_lock);
-				cgroup_total += blkio_get_stat(blkg, cb,
-						blkg->dev, type);
-				spin_unlock_irq(&blkg->stats_lock);
-			}
+		const char *dname = dev_name(blkg->q->backing_dev_info.dev);
+
+		if (BLKIOFILE_POLICY(cft->private) != blkg->plid)
+			continue;
+		if (pcpu)
+			cgroup_total += blkio_get_stat_cpu(blkg, cb, dname,
+							   type);
+		else {
+			spin_lock_irq(&blkg->stats_lock);
+			cgroup_total += blkio_get_stat(blkg, cb, dname, type);
+			spin_unlock_irq(&blkg->stats_lock);
 		}
 	}
 	if (show_total)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9a5c68d7cc92..7ebecf6ea8f1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -166,8 +166,6 @@ struct blkio_group {
 	unsigned short blkcg_id;
 	/* Store cgroup path */
 	char path[128];
-	/* The device MKDEV(major, minor), this group has been created for */
-	dev_t dev;
 	/* policy which owns this blk group */
 	enum blkio_policy_id plid;
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 791b10719e43..52a429397d3b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -212,50 +212,12 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
 	return &tg->blkg;
 }
 
-static void
-__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
-{
-	struct backing_dev_info *bdi = &td->queue->backing_dev_info;
-	unsigned int major, minor;
-
-	if (!tg || tg->blkg.dev)
-		return;
-
-	/*
-	 * Fill in device details for a group which might not have been
-	 * filled at group creation time as queue was being instantiated
-	 * and driver had not attached a device yet
-	 */
-	if (bdi->dev && dev_name(bdi->dev)) {
-		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		tg->blkg.dev = MKDEV(major, minor);
-	}
-}
-
-/*
- * Should be called with without queue lock held. Here queue lock will be
- * taken rarely. It will be taken only once during life time of a group
- * if need be
- */
-static void
-throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
-{
-	if (!tg || tg->blkg.dev)
-		return;
-
-	spin_lock_irq(td->queue->queue_lock);
-	__throtl_tg_fill_dev_details(td, tg);
-	spin_unlock_irq(td->queue->queue_lock);
-}
-
 static void throtl_link_blkio_group(struct request_queue *q,
 				    struct blkio_group *blkg)
 {
 	struct throtl_data *td = q->td;
 	struct throtl_grp *tg = tg_of_blkg(blkg);
 
-	__throtl_tg_fill_dev_details(td, tg);
-
 	hlist_add_head(&tg->tg_node, &td->tg_list);
 	td->nr_undestroyed_grps++;
 }
@@ -263,20 +225,14 @@ static void throtl_link_blkio_group(struct request_queue *q,
 static struct
 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
-	struct throtl_grp *tg = NULL;
-
 	/*
 	 * This is the common case when there are no blkio cgroups.
 	 * Avoid lookup in this case
 	 */
 	if (blkcg == &blkio_root_cgroup)
-		tg = td->root_tg;
-	else
-		tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
-					    BLKIO_POLICY_THROTL));
+		return td->root_tg;
 
-	__throtl_tg_fill_dev_details(td, tg);
-	return tg;
+	return tg_of_blkg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL));
 }
 
 static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
@@ -303,7 +259,6 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 			tg = td->root_tg;
 	}
 
-	__throtl_tg_fill_dev_details(td, tg);
 	return tg;
 }
 
@@ -1090,8 +1045,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	blkcg = task_blkio_cgroup(current);
 	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
-		throtl_tg_fill_dev_details(td, tg);
-
 		if (tg_no_rule_group(tg, rw)) {
 			blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
 					rw, rw_is_sync(bio->bi_rw));
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 08d4fdd188fa..f67d109eb974 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1052,20 +1052,7 @@ static void cfq_link_blkio_group(struct request_queue *q,
 				 struct blkio_group *blkg)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct backing_dev_info *bdi = &q->backing_dev_info;
 	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
-	unsigned int major, minor;
-
-	/*
-	 * Add group onto cgroup list. It might happen that bdi->dev is
-	 * not initialized yet. Initialize this new group without major
-	 * and minor info and this info will be filled in once a new thread
-	 * comes for IO.
-	 */
-	if (bdi->dev) {
-		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		blkg->dev = MKDEV(major, minor);
-	}
 
 	cfqd->nr_blkcg_linked_grps++;
 
@@ -1104,7 +1091,6 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 						struct blkio_cgroup *blkcg)
 {
 	struct request_queue *q = cfqd->queue;
-	struct backing_dev_info *bdi = &q->backing_dev_info;
 	struct cfq_group *cfqg = NULL;
 
 	/* avoid lookup for the common case where there's no blkio cgroup */
@@ -1118,13 +1104,6 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 			cfqg = cfqg_of_blkg(blkg);
 	}
 
-	if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
-		unsigned int major, minor;
-
-		sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-		cfqg->blkg.dev = MKDEV(major, minor);
-	}
-
 	return cfqg;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7ee9c5620504906e98451dc9a1945b2b9e892cb8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:11 -0800
Subject: blkcg: let blkio_group point to blkio_cgroup directly

Currently, blkg points to the associated blkcg via its css_id.  This
unnecessarily complicates dereferencing blkcg.  Let blkg hold a
reference to the associated blkcg and point directly to it and disable
css_id on blkio_subsys.

This change requires splitting blkiocg_destroy() into
blkiocg_pre_destroy() and blkiocg_destroy() so that all blkg's can be
destroyed and all the blkcg references held by them dropped during
cgroup removal.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 43 ++++++++++++++++++++++++-------------------
 block/blk-cgroup.h   |  2 +-
 block/blk-throttle.c |  3 +++
 block/cfq-iosched.c  |  4 ++++
 4 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 76942360872b..d42d826ece39 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -37,6 +37,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
 			      struct cgroup_taskset *);
 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
 			   struct cgroup_taskset *);
+static int blkiocg_pre_destroy(struct cgroup_subsys *, struct cgroup *);
 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
 
@@ -51,10 +52,10 @@ struct cgroup_subsys blkio_subsys = {
 	.create = blkiocg_create,
 	.can_attach = blkiocg_can_attach,
 	.attach = blkiocg_attach,
+	.pre_destroy = blkiocg_pre_destroy,
 	.destroy = blkiocg_destroy,
 	.populate = blkiocg_populate,
 	.subsys_id = blkio_subsys_id,
-	.use_id = 1,
 	.module = THIS_MODULE,
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
@@ -442,6 +443,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	if (blkg)
 		return blkg;
 
+	/* blkg holds a reference to blkcg */
 	if (!css_tryget(&blkcg->css))
 		return ERR_PTR(-EINVAL);
 
@@ -463,15 +465,16 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 
 		spin_lock_init(&new_blkg->stats_lock);
 		rcu_assign_pointer(new_blkg->q, q);
-		new_blkg->blkcg_id = css_id(&blkcg->css);
+		new_blkg->blkcg = blkcg;
 		new_blkg->plid = plid;
 		cgroup_path(blkcg->css.cgroup, new_blkg->path,
 			    sizeof(new_blkg->path));
+	} else {
+		css_put(&blkcg->css);
 	}
 
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
-	css_put(&blkcg->css);
 
 	/* did bypass get turned on inbetween? */
 	if (unlikely(blk_queue_bypass(q)) && !for_root) {
@@ -500,6 +503,7 @@ out:
 	if (new_blkg) {
 		free_percpu(new_blkg->stats_cpu);
 		kfree(new_blkg);
+		css_put(&blkcg->css);
 	}
 	return blkg;
 }
@@ -508,7 +512,6 @@ EXPORT_SYMBOL_GPL(blkg_lookup_create);
 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
 	hlist_del_init_rcu(&blkg->blkcg_node);
-	blkg->blkcg_id = 0;
 }
 
 /*
@@ -517,24 +520,17 @@ static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
  */
 int blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
-	struct blkio_cgroup *blkcg;
+	struct blkio_cgroup *blkcg = blkg->blkcg;
 	unsigned long flags;
-	struct cgroup_subsys_state *css;
 	int ret = 1;
 
-	rcu_read_lock();
-	css = css_lookup(&blkio_subsys, blkg->blkcg_id);
-	if (css) {
-		blkcg = container_of(css, struct blkio_cgroup, css);
-		spin_lock_irqsave(&blkcg->lock, flags);
-		if (!hlist_unhashed(&blkg->blkcg_node)) {
-			__blkiocg_del_blkio_group(blkg);
-			ret = 0;
-		}
-		spin_unlock_irqrestore(&blkcg->lock, flags);
+	spin_lock_irqsave(&blkcg->lock, flags);
+	if (!hlist_unhashed(&blkg->blkcg_node)) {
+		__blkiocg_del_blkio_group(blkg);
+		ret = 0;
 	}
+	spin_unlock_irqrestore(&blkcg->lock, flags);
 
-	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
@@ -1387,7 +1383,8 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 				ARRAY_SIZE(blkio_files));
 }
 
-static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
+			       struct cgroup *cgroup)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	unsigned long flags;
@@ -1396,6 +1393,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 	struct blkio_policy_type *blkiop;
 
 	rcu_read_lock();
+
 	do {
 		spin_lock_irqsave(&blkcg->lock, flags);
 
@@ -1425,8 +1423,15 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 		spin_unlock(&blkio_list_lock);
 	} while (1);
 
-	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
+
+	return 0;
+}
+
+static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+
 	if (blkcg != &blkio_root_cgroup)
 		kfree(blkcg);
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 7ebecf6ea8f1..ca1fc637bd6e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -163,7 +163,7 @@ struct blkio_group {
 	/* Pointer to the associated request_queue, RCU protected */
 	struct request_queue __rcu *q;
 	struct hlist_node blkcg_node;
-	unsigned short blkcg_id;
+	struct blkio_cgroup *blkcg;
 	/* Store cgroup path */
 	char path[128];
 	/* policy which owns this blk group */
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 52a429397d3b..fe6a442b8482 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -169,6 +169,9 @@ static void throtl_put_tg(struct throtl_grp *tg)
 	if (!atomic_dec_and_test(&tg->ref))
 		return;
 
+	/* release the extra blkcg reference this blkg has been holding */
+	css_put(&tg->blkg.blkcg->css);
+
 	/*
 	 * A group is freed in rcu manner. But having an rcu lock does not
 	 * mean that one can access all the fields of blkg and assume these
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f67d109eb974..9ef86fbfc9ae 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1133,6 +1133,10 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
 	cfqg->ref--;
 	if (cfqg->ref)
 		return;
+
+	/* release the extra blkcg reference this blkg has been holding */
+	css_put(&cfqg->blkg.blkcg->css);
+
 	for_each_cfqg_st(cfqg, i, j, st)
 		BUG_ON(!RB_EMPTY_ROOT(&st->rb));
 	free_percpu(cfqg->blkg.stats_cpu);
-- 
cgit v1.2.3-59-g8ed1b


From 5efd611351d1a847c72d74fb12ff4bd187c0cb2c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:12 -0800
Subject: blkcg: add blkcg_{init|drain|exit}_queue()

Currently block core calls directly into blk-throttle for init, drain
and exit.  This patch adds blkcg_{init|drain|exit}_queue() which wraps
the blk-throttle functions.  This is to give more control and
visiblity to blkcg core layer for proper layering.  Further patches
will add logic common to blkcg policies to the functions.

While at it, collapse blk_throtl_release() into blk_throtl_exit().
There's no reason to keep them separate.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 42 ++++++++++++++++++++++++++++++++++++++++++
 block/blk-cgroup.h   |  7 +++++++
 block/blk-core.c     |  7 ++++---
 block/blk-sysfs.c    |  4 ++--
 block/blk-throttle.c |  3 ---
 block/blk.h          |  2 --
 6 files changed, 55 insertions(+), 10 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d42d826ece39..b302ce1d662b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -20,6 +20,7 @@
 #include <linux/genhd.h>
 #include <linux/delay.h>
 #include "blk-cgroup.h"
+#include "blk.h"
 
 #define MAX_KEY_LEN 100
 
@@ -1459,6 +1460,47 @@ done:
 	return &blkcg->css;
 }
 
+/**
+ * blkcg_init_queue - initialize blkcg part of request queue
+ * @q: request_queue to initialize
+ *
+ * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
+ * part of new request_queue @q.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int blkcg_init_queue(struct request_queue *q)
+{
+	might_sleep();
+
+	return blk_throtl_init(q);
+}
+
+/**
+ * blkcg_drain_queue - drain blkcg part of request_queue
+ * @q: request_queue to drain
+ *
+ * Called from blk_drain_queue().  Responsible for draining blkcg part.
+ */
+void blkcg_drain_queue(struct request_queue *q)
+{
+	lockdep_assert_held(q->queue_lock);
+
+	blk_throtl_drain(q);
+}
+
+/**
+ * blkcg_exit_queue - exit and release blkcg part of request_queue
+ * @q: request_queue being released
+ *
+ * Called from blk_release_queue().  Responsible for exiting blkcg part.
+ */
+void blkcg_exit_queue(struct request_queue *q)
+{
+	blk_throtl_exit(q);
+}
+
 /*
  * We cannot support shared io contexts, as we have no mean to support
  * two tasks with the same ioc in two different groups without major rework
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ca1fc637bd6e..3bc171080e93 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -215,6 +215,10 @@ struct blkio_policy_type {
 	enum blkio_policy_id plid;
 };
 
+extern int blkcg_init_queue(struct request_queue *q);
+extern void blkcg_drain_queue(struct request_queue *q);
+extern void blkcg_exit_queue(struct request_queue *q);
+
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
@@ -233,6 +237,9 @@ struct blkio_group {
 struct blkio_policy_type {
 };
 
+static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
+static inline void blkcg_drain_queue(struct request_queue *q) { }
+static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
 static inline void blkg_destroy_all(struct request_queue *q) { }
diff --git a/block/blk-core.c b/block/blk-core.c
index 5a1b8cc03003..c3434c6395b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include <trace/events/block.h>
 
 #include "blk.h"
+#include "blk-cgroup.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -280,7 +281,7 @@ EXPORT_SYMBOL(blk_stop_queue);
  *
  *     This function does not cancel any asynchronous activity arising
  *     out of elevator or throttling code. That would require elevaotor_exit()
- *     and blk_throtl_exit() to be called with queue lock initialized.
+ *     and blkcg_exit_queue() to be called with queue lock initialized.
  *
  */
 void blk_sync_queue(struct request_queue *q)
@@ -372,7 +373,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 		if (q->elevator)
 			elv_drain_elevator(q);
 
-		blk_throtl_drain(q);
+		blkcg_drain_queue(q);
 
 		/*
 		 * This function might be called on a queue which failed
@@ -562,7 +563,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	 */
 	q->queue_lock = &q->__queue_lock;
 
-	if (blk_throtl_init(q))
+	if (blkcg_init_queue(q))
 		goto fail_id;
 
 	return q;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cf150011d808..00cdc987b525 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -9,6 +9,7 @@
 #include <linux/blktrace_api.h>
 
 #include "blk.h"
+#include "blk-cgroup.h"
 
 struct queue_sysfs_entry {
 	struct attribute attr;
@@ -486,7 +487,7 @@ static void blk_release_queue(struct kobject *kobj)
 		elevator_exit(q->elevator);
 	}
 
-	blk_throtl_exit(q);
+	blkcg_exit_queue(q);
 
 	if (rl->rq_pool)
 		mempool_destroy(rl->rq_pool);
@@ -494,7 +495,6 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	blk_throtl_release(q);
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index fe6a442b8482..ac6d0fe6e4ee 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1226,10 +1226,7 @@ void blk_throtl_exit(struct request_queue *q)
 	 * it.
 	 */
 	throtl_shutdown_wq(q);
-}
 
-void blk_throtl_release(struct request_queue *q)
-{
 	kfree(q->td);
 }
 
diff --git a/block/blk.h b/block/blk.h
index 7422f3133c5d..de15f920b38f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -236,7 +236,6 @@ extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
 extern void blk_throtl_drain(struct request_queue *q);
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
-extern void blk_throtl_release(struct request_queue *q);
 #else /* CONFIG_BLK_DEV_THROTTLING */
 static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 {
@@ -245,7 +244,6 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 static inline void blk_throtl_drain(struct request_queue *q) { }
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
-static inline void blk_throtl_release(struct request_queue *q) { }
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 
 #endif /* BLK_INTERNAL_H */
-- 
cgit v1.2.3-59-g8ed1b


From 0381411e4b1a52cee134eb73750e5e3cc1155d09 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:14 -0800
Subject: blkcg: let blkcg core handle policy private data allocation

Currently, blkg's are embedded in private data blkcg policy private
data structure and thus allocated and freed by policies.  This leads
to duplicate codes in policies, hinders implementing common part in
blkcg core with strong semantics, and forces duplicate blkg's for the
same cgroup-q association.

This patch introduces struct blkg_policy_data which is a separate data
structure chained from blkg.  Policies specifies the amount of private
data it needs in its blkio_policy_type->pdata_size and blkcg core
takes care of allocating them along with blkg which can be accessed
using blkg_to_pdata().  blkg can be determined from pdata using
pdata_to_blkg().  blkio_alloc_group_fn() method is accordingly updated
to blkio_init_group_fn().

For consistency, tg_of_blkg() and cfqg_of_blkg() are replaced with
blkg_to_tg() and blkg_to_cfqg() respectively, and functions to map in
the reverse direction are added.

Except that policy specific data now lives in a separate data
structure from blkg, this patch doesn't introduce any functional
difference.

This will be used to unify blkg's for different policies.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   |  86 +++++++++++++++++++++++++++++++++----------
 block/blk-cgroup.h   |  53 ++++++++++++++++++++++++--
 block/blk-throttle.c |  79 +++++++++++++++++++--------------------
 block/cfq-iosched.c  | 102 ++++++++++++++++++++++++++-------------------------
 4 files changed, 209 insertions(+), 111 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 266c0707d588..14367499cfed 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -422,6 +422,70 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
+/**
+ * blkg_free - free a blkg
+ * @blkg: blkg to free
+ *
+ * Free @blkg which may be partially allocated.
+ */
+static void blkg_free(struct blkio_group *blkg)
+{
+	if (blkg) {
+		free_percpu(blkg->stats_cpu);
+		kfree(blkg->pd);
+		kfree(blkg);
+	}
+}
+
+/**
+ * blkg_alloc - allocate a blkg
+ * @blkcg: block cgroup the new blkg is associated with
+ * @q: request_queue the new blkg is associated with
+ * @pol: policy the new blkg is associated with
+ *
+ * Allocate a new blkg assocating @blkcg and @q for @pol.
+ *
+ * FIXME: Should be called with queue locked but currently isn't due to
+ *        percpu stat breakage.
+ */
+static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
+				      struct request_queue *q,
+				      struct blkio_policy_type *pol)
+{
+	struct blkio_group *blkg;
+
+	/* alloc and init base part */
+	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
+	if (!blkg)
+		return NULL;
+
+	spin_lock_init(&blkg->stats_lock);
+	rcu_assign_pointer(blkg->q, q);
+	blkg->blkcg = blkcg;
+	blkg->plid = pol->plid;
+	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
+
+	/* alloc per-policy data */
+	blkg->pd = kzalloc_node(sizeof(*blkg->pd) + pol->pdata_size, GFP_ATOMIC,
+				q->node);
+	if (!blkg->pd) {
+		blkg_free(blkg);
+		return NULL;
+	}
+
+	/* broken, read comment in the callsite */
+	blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+	if (!blkg->stats_cpu) {
+		blkg_free(blkg);
+		return NULL;
+	}
+
+	/* attach pd to blkg and invoke per-policy init */
+	blkg->pd->blkg = blkg;
+	pol->ops.blkio_init_group_fn(blkg);
+	return blkg;
+}
+
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       struct request_queue *q,
 				       enum blkio_policy_id plid,
@@ -463,19 +527,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
 
-	new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg);
-	if (new_blkg) {
-		new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-
-		spin_lock_init(&new_blkg->stats_lock);
-		rcu_assign_pointer(new_blkg->q, q);
-		new_blkg->blkcg = blkcg;
-		new_blkg->plid = plid;
-		cgroup_path(blkcg->css.cgroup, new_blkg->path,
-			    sizeof(new_blkg->path));
-	} else {
-		css_put(&blkcg->css);
-	}
+	new_blkg = blkg_alloc(blkcg, q, pol);
 
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
@@ -492,7 +544,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 		goto out;
 
 	/* did alloc fail? */
-	if (unlikely(!new_blkg || !new_blkg->stats_cpu)) {
+	if (unlikely(!new_blkg)) {
 		blkg = ERR_PTR(-ENOMEM);
 		goto out;
 	}
@@ -504,11 +556,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	pol->ops.blkio_link_group_fn(q, blkg);
 	spin_unlock(&blkcg->lock);
 out:
-	if (new_blkg) {
-		free_percpu(new_blkg->stats_cpu);
-		kfree(new_blkg);
-		css_put(&blkcg->css);
-	}
+	blkg_free(new_blkg);
 	return blkg;
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 3bc171080e93..9537819c29c6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -159,6 +159,15 @@ struct blkio_group_conf {
 	u64 bps[2];
 };
 
+/* per-blkg per-policy data */
+struct blkg_policy_data {
+	/* the blkg this per-policy data belongs to */
+	struct blkio_group *blkg;
+
+	/* pol->pdata_size bytes of private data used by policy impl */
+	char pdata[] __aligned(__alignof__(unsigned long long));
+};
+
 struct blkio_group {
 	/* Pointer to the associated request_queue, RCU protected */
 	struct request_queue __rcu *q;
@@ -177,10 +186,11 @@ struct blkio_group {
 	struct blkio_group_stats stats;
 	/* Per cpu stats pointer */
 	struct blkio_group_stats_cpu __percpu *stats_cpu;
+
+	struct blkg_policy_data *pd;
 };
 
-typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
-						   struct blkio_cgroup *blkcg);
+typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
 typedef void (blkio_link_group_fn)(struct request_queue *q,
 			struct blkio_group *blkg);
 typedef void (blkio_unlink_group_fn)(struct request_queue *q,
@@ -198,7 +208,7 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int write_iops);
 
 struct blkio_policy_ops {
-	blkio_alloc_group_fn *blkio_alloc_group_fn;
+	blkio_init_group_fn *blkio_init_group_fn;
 	blkio_link_group_fn *blkio_link_group_fn;
 	blkio_unlink_group_fn *blkio_unlink_group_fn;
 	blkio_clear_queue_fn *blkio_clear_queue_fn;
@@ -213,6 +223,7 @@ struct blkio_policy_type {
 	struct list_head list;
 	struct blkio_policy_ops ops;
 	enum blkio_policy_id plid;
+	size_t pdata_size;		/* policy specific private data size */
 };
 
 extern int blkcg_init_queue(struct request_queue *q);
@@ -224,6 +235,38 @@ extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
 extern void blkg_destroy_all(struct request_queue *q);
 
+/**
+ * blkg_to_pdata - get policy private data
+ * @blkg: blkg of interest
+ * @pol: policy of interest
+ *
+ * Return pointer to private data associated with the @blkg-@pol pair.
+ */
+static inline void *blkg_to_pdata(struct blkio_group *blkg,
+			      struct blkio_policy_type *pol)
+{
+	return blkg ? blkg->pd->pdata : NULL;
+}
+
+/**
+ * pdata_to_blkg - get blkg associated with policy private data
+ * @pdata: policy private data of interest
+ * @pol: policy @pdata is for
+ *
+ * @pdata is policy private data for @pol.  Determine the blkg it's
+ * associated with.
+ */
+static inline struct blkio_group *pdata_to_blkg(void *pdata,
+						struct blkio_policy_type *pol)
+{
+	if (pdata) {
+		struct blkg_policy_data *pd =
+			container_of(pdata, struct blkg_policy_data, pdata);
+		return pd->blkg;
+	}
+	return NULL;
+}
+
 static inline char *blkg_path(struct blkio_group *blkg)
 {
 	return blkg->path;
@@ -244,6 +287,10 @@ static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
 static inline void blkg_destroy_all(struct request_queue *q) { }
 
+static inline void *blkg_to_pdata(struct blkio_group *blkg,
+				struct blkio_policy_type *pol) { return NULL; }
+static inline struct blkio_group *pdata_to_blkg(void *pdata,
+				struct blkio_policy_type *pol) { return NULL; }
 static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
 
 #endif
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index ac6d0fe6e4ee..9c8a12477e13 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -21,6 +21,8 @@ static int throtl_quantum = 32;
 /* Throttling is performed over 100ms slice and after that slice is renewed */
 static unsigned long throtl_slice = HZ/10;	/* 100 ms */
 
+static struct blkio_policy_type blkio_policy_throtl;
+
 /* A workqueue to queue throttle related work */
 static struct workqueue_struct *kthrotld_workqueue;
 static void throtl_schedule_delayed_work(struct throtl_data *td,
@@ -52,7 +54,6 @@ struct throtl_grp {
 	 */
 	unsigned long disptime;
 
-	struct blkio_group blkg;
 	atomic_t ref;
 	unsigned int flags;
 
@@ -108,6 +109,16 @@ struct throtl_data
 	int limits_changed;
 };
 
+static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg)
+{
+	return blkg_to_pdata(blkg, &blkio_policy_throtl);
+}
+
+static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg)
+{
+	return pdata_to_blkg(tg, &blkio_policy_throtl);
+}
+
 enum tg_state_flags {
 	THROTL_TG_FLAG_on_rr = 0,	/* on round-robin busy list */
 };
@@ -130,19 +141,11 @@ THROTL_TG_FNS(on_rr);
 
 #define throtl_log_tg(td, tg, fmt, args...)				\
 	blk_add_trace_msg((td)->queue, "throtl %s " fmt,		\
-				blkg_path(&(tg)->blkg), ##args);      	\
+			  blkg_path(tg_to_blkg(tg)), ##args);		\
 
 #define throtl_log(td, fmt, args...)	\
 	blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
 
-static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
-{
-	if (blkg)
-		return container_of(blkg, struct throtl_grp, blkg);
-
-	return NULL;
-}
-
 static inline unsigned int total_nr_queued(struct throtl_data *td)
 {
 	return td->nr_queued[0] + td->nr_queued[1];
@@ -156,21 +159,24 @@ static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
 
 static void throtl_free_tg(struct rcu_head *head)
 {
-	struct throtl_grp *tg;
+	struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head);
+	struct blkio_group *blkg = tg_to_blkg(tg);
 
-	tg = container_of(head, struct throtl_grp, rcu_head);
-	free_percpu(tg->blkg.stats_cpu);
-	kfree(tg);
+	free_percpu(blkg->stats_cpu);
+	kfree(blkg->pd);
+	kfree(blkg);
 }
 
 static void throtl_put_tg(struct throtl_grp *tg)
 {
+	struct blkio_group *blkg = tg_to_blkg(tg);
+
 	BUG_ON(atomic_read(&tg->ref) <= 0);
 	if (!atomic_dec_and_test(&tg->ref))
 		return;
 
 	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&tg->blkg.blkcg->css);
+	css_put(&blkg->blkcg->css);
 
 	/*
 	 * A group is freed in rcu manner. But having an rcu lock does not
@@ -184,14 +190,9 @@ static void throtl_put_tg(struct throtl_grp *tg)
 	call_rcu(&tg->rcu_head, throtl_free_tg);
 }
 
-static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
-						    struct blkio_cgroup *blkcg)
+static void throtl_init_blkio_group(struct blkio_group *blkg)
 {
-	struct throtl_grp *tg;
-
-	tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
-	if (!tg)
-		return NULL;
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	INIT_HLIST_NODE(&tg->tg_node);
 	RB_CLEAR_NODE(&tg->rb_node);
@@ -211,15 +212,13 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
 	 * exit or cgroup deletion path depending on who is exiting first.
 	 */
 	atomic_set(&tg->ref, 1);
-
-	return &tg->blkg;
 }
 
 static void throtl_link_blkio_group(struct request_queue *q,
 				    struct blkio_group *blkg)
 {
 	struct throtl_data *td = q->td;
-	struct throtl_grp *tg = tg_of_blkg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	hlist_add_head(&tg->tg_node, &td->tg_list);
 	td->nr_undestroyed_grps++;
@@ -235,7 +234,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 	if (blkcg == &blkio_root_cgroup)
 		return td->root_tg;
 
-	return tg_of_blkg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL));
+	return blkg_to_tg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL));
 }
 
 static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
@@ -257,7 +256,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
-			tg = tg_of_blkg(blkg);
+			tg = blkg_to_tg(blkg);
 		else if (!blk_queue_dead(q))
 			tg = td->root_tg;
 	}
@@ -639,7 +638,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	tg->bytes_disp[rw] += bio->bi_size;
 	tg->io_disp[rw]++;
 
-	blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
+	blkiocg_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, rw, sync);
 }
 
 static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
@@ -901,7 +900,7 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
 		 * it from cgroup list, then it will take care of destroying
 		 * cfqg also.
 		 */
-		if (!blkiocg_del_blkio_group(&tg->blkg))
+		if (!blkiocg_del_blkio_group(tg_to_blkg(tg)))
 			throtl_destroy_tg(td, tg);
 		else
 			empty = false;
@@ -929,7 +928,7 @@ void throtl_unlink_blkio_group(struct request_queue *q,
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	throtl_destroy_tg(q->td, tg_of_blkg(blkg));
+	throtl_destroy_tg(q->td, blkg_to_tg(blkg));
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -968,7 +967,7 @@ static void throtl_update_blkio_group_common(struct throtl_data *td,
 static void throtl_update_blkio_group_read_bps(struct request_queue *q,
 				struct blkio_group *blkg, u64 read_bps)
 {
-	struct throtl_grp *tg = tg_of_blkg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->bps[READ] = read_bps;
 	throtl_update_blkio_group_common(q->td, tg);
@@ -977,7 +976,7 @@ static void throtl_update_blkio_group_read_bps(struct request_queue *q,
 static void throtl_update_blkio_group_write_bps(struct request_queue *q,
 				struct blkio_group *blkg, u64 write_bps)
 {
-	struct throtl_grp *tg = tg_of_blkg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->bps[WRITE] = write_bps;
 	throtl_update_blkio_group_common(q->td, tg);
@@ -986,7 +985,7 @@ static void throtl_update_blkio_group_write_bps(struct request_queue *q,
 static void throtl_update_blkio_group_read_iops(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int read_iops)
 {
-	struct throtl_grp *tg = tg_of_blkg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->iops[READ] = read_iops;
 	throtl_update_blkio_group_common(q->td, tg);
@@ -995,7 +994,7 @@ static void throtl_update_blkio_group_read_iops(struct request_queue *q,
 static void throtl_update_blkio_group_write_iops(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int write_iops)
 {
-	struct throtl_grp *tg = tg_of_blkg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->iops[WRITE] = write_iops;
 	throtl_update_blkio_group_common(q->td, tg);
@@ -1010,7 +1009,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
 
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
-		.blkio_alloc_group_fn = throtl_alloc_blkio_group,
+		.blkio_init_group_fn = throtl_init_blkio_group,
 		.blkio_link_group_fn = throtl_link_blkio_group,
 		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
 		.blkio_clear_queue_fn = throtl_clear_queue,
@@ -1024,6 +1023,7 @@ static struct blkio_policy_type blkio_policy_throtl = {
 					throtl_update_blkio_group_write_iops,
 	},
 	.plid = BLKIO_POLICY_THROTL,
+	.pdata_size = sizeof(struct throtl_grp),
 };
 
 bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
@@ -1049,8 +1049,9 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		if (tg_no_rule_group(tg, rw)) {
-			blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
-					rw, rw_is_sync(bio->bi_rw));
+			blkiocg_update_dispatch_stats(tg_to_blkg(tg),
+						      bio->bi_size, rw,
+						      rw_is_sync(bio->bi_rw));
 			goto out_unlock_rcu;
 		}
 	}
@@ -1176,7 +1177,7 @@ int blk_throtl_init(struct request_queue *q)
 	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
 				  true);
 	if (!IS_ERR(blkg))
-		td->root_tg = tg_of_blkg(blkg);
+		td->root_tg = blkg_to_tg(blkg);
 
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
@@ -1207,7 +1208,7 @@ void blk_throtl_exit(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	/*
-	 * Wait for tg->blkg->q accessors to exit their grace periods.
+	 * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods.
 	 * Do this wait only if there are other undestroyed groups out
 	 * there (other than root group). This can happen if cgroup deletion
 	 * path claimed the responsibility of cleaning up a group before
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9ef86fbfc9ae..c7449db52a86 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,6 +17,8 @@
 #include "blk.h"
 #include "cfq.h"
 
+static struct blkio_policy_type blkio_policy_cfq;
+
 /*
  * tunables
  */
@@ -206,7 +208,6 @@ struct cfq_group {
 	unsigned long saved_workload_slice;
 	enum wl_type_t saved_workload;
 	enum wl_prio_t saved_serving_prio;
-	struct blkio_group blkg;
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	struct hlist_node cfqd_node;
 	int ref;
@@ -310,6 +311,16 @@ struct cfq_data {
 	unsigned int nr_blkcg_linked_grps;
 };
 
+static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
+{
+	return blkg_to_pdata(blkg, &blkio_policy_cfq);
+}
+
+static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+	return pdata_to_blkg(cfqg, &blkio_policy_cfq);
+}
+
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
 
 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
@@ -374,11 +385,11 @@ CFQ_CFQQ_FNS(wait_busy);
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	\
 	blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
 			cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
-			blkg_path(&(cfqq)->cfqg->blkg), ##args)
+			blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args)
 
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)				\
 	blk_add_trace_msg((cfqd)->queue, "%s " fmt,			\
-				blkg_path(&(cfqg)->blkg), ##args)       \
+			blkg_path(cfqg_to_blkg((cfqg))), ##args)	\
 
 #else
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	\
@@ -935,7 +946,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
 	cfq_group_service_tree_del(st, cfqg);
 	cfqg->saved_workload_slice = 0;
-	cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
+	cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg), 1);
 }
 
 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1007,9 +1018,9 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
 		     used_sl, cfqq->slice_dispatch, charge,
 		     iops_mode(cfqd), cfqq->nr_sectors);
-	cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
+	cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), used_sl,
 					  unaccounted_sl);
-	cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
+	cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg));
 }
 
 /**
@@ -1032,18 +1043,12 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
-{
-	if (blkg)
-		return container_of(blkg, struct cfq_group, blkg);
-	return NULL;
-}
-
 static void cfq_update_blkio_group_weight(struct request_queue *q,
 					  struct blkio_group *blkg,
 					  unsigned int weight)
 {
-	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
+	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+
 	cfqg->new_weight = weight;
 	cfqg->needs_update = true;
 }
@@ -1052,7 +1057,7 @@ static void cfq_link_blkio_group(struct request_queue *q,
 				 struct blkio_group *blkg)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_group *cfqg = cfqg_of_blkg(blkg);
+	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
 	cfqd->nr_blkcg_linked_grps++;
 
@@ -1060,17 +1065,12 @@ static void cfq_link_blkio_group(struct request_queue *q,
 	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
 }
 
-static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
-						 struct blkio_cgroup *blkcg)
+static void cfq_init_blkio_group(struct blkio_group *blkg)
 {
-	struct cfq_group *cfqg;
-
-	cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node);
-	if (!cfqg)
-		return NULL;
+	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
 	cfq_init_cfqg_base(cfqg);
-	cfqg->weight = blkcg->weight;
+	cfqg->weight = blkg->blkcg->weight;
 
 	/*
 	 * Take the initial reference that will be released on destroy
@@ -1079,8 +1079,6 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
 	 * or cgroup deletion path depending on who is exiting first.
 	 */
 	cfqg->ref = 1;
-
-	return &cfqg->blkg;
 }
 
 /*
@@ -1101,7 +1099,7 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 
 		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
 		if (!IS_ERR(blkg))
-			cfqg = cfqg_of_blkg(blkg);
+			cfqg = blkg_to_cfqg(blkg);
 	}
 
 	return cfqg;
@@ -1126,6 +1124,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 
 static void cfq_put_cfqg(struct cfq_group *cfqg)
 {
+	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
 	struct cfq_rb_root *st;
 	int i, j;
 
@@ -1135,12 +1134,13 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
 		return;
 
 	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&cfqg->blkg.blkcg->css);
+	css_put(&blkg->blkcg->css);
 
 	for_each_cfqg_st(cfqg, i, j, st)
 		BUG_ON(!RB_EMPTY_ROOT(&st->rb));
-	free_percpu(cfqg->blkg.stats_cpu);
-	kfree(cfqg);
+	free_percpu(blkg->stats_cpu);
+	kfree(blkg->pd);
+	kfree(blkg);
 }
 
 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
@@ -1172,7 +1172,7 @@ static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
 		 * it from cgroup list, then it will take care of destroying
 		 * cfqg also.
 		 */
-		if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
+		if (!cfq_blkiocg_del_blkio_group(cfqg_to_blkg(cfqg)))
 			cfq_destroy_cfqg(cfqd, cfqg);
 		else
 			empty = false;
@@ -1201,7 +1201,7 @@ static void cfq_unlink_blkio_group(struct request_queue *q,
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
+	cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -1504,12 +1504,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
 	elv_rb_del(&cfqq->sort_list, rq);
 	cfqq->queued[rq_is_sync(rq)]--;
-	cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
+	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
 					rq_data_dir(rq), rq_is_sync(rq));
 	cfq_add_rq_rb(rq);
-	cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-			&cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
-			rq_is_sync(rq));
+	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+					cfqg_to_blkg(cfqq->cfqd->serving_group),
+					rq_data_dir(rq), rq_is_sync(rq));
 }
 
 static struct request *
@@ -1565,7 +1565,7 @@ static void cfq_remove_request(struct request *rq)
 	cfq_del_rq_rb(rq);
 
 	cfqq->cfqd->rq_queued--;
-	cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
+	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
 					rq_data_dir(rq), rq_is_sync(rq));
 	if (rq->cmd_flags & REQ_PRIO) {
 		WARN_ON(!cfqq->prio_pending);
@@ -1601,7 +1601,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
 				struct bio *bio)
 {
-	cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
+	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
 					bio_data_dir(bio), cfq_bio_sync(bio));
 }
 
@@ -1624,7 +1624,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	if (cfqq->next_rq == next)
 		cfqq->next_rq = rq;
 	cfq_remove_request(next);
-	cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
+	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
 					rq_data_dir(next), rq_is_sync(next));
 
 	cfqq = RQ_CFQQ(next);
@@ -1666,7 +1666,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	del_timer(&cfqd->idle_slice_timer);
-	cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+	cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg));
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1675,7 +1675,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 	if (cfqq) {
 		cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
 				cfqd->serving_prio, cfqd->serving_type);
-		cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
+		cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg));
 		cfqq->slice_start = 0;
 		cfqq->dispatch_start = jiffies;
 		cfqq->allocated_slice = 0;
@@ -2023,7 +2023,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 		sl = cfqd->cfq_slice_idle;
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-	cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
+	cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg));
 	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
 			group_idle ? 1 : 0);
 }
@@ -2046,8 +2046,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
 	cfqq->nr_sectors += blk_rq_sectors(rq);
-	cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
-					rq_data_dir(rq), rq_is_sync(rq));
+	cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
+					  blk_rq_bytes(rq), rq_data_dir(rq),
+					  rq_is_sync(rq));
 }
 
 /*
@@ -3135,7 +3136,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 				__blk_run_queue(cfqd->queue);
 			} else {
 				cfq_blkiocg_update_idle_time_stats(
-						&cfqq->cfqg->blkg);
+						cfqg_to_blkg(cfqq->cfqg));
 				cfq_mark_cfqq_must_dispatch(cfqq);
 			}
 		}
@@ -3162,9 +3163,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
-	cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
-			&cfqd->serving_group->blkg, rq_data_dir(rq),
-			rq_is_sync(rq));
+	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+					cfqg_to_blkg(cfqd->serving_group),
+					rq_data_dir(rq), rq_is_sync(rq));
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
@@ -3260,7 +3261,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	cfqd->rq_in_driver--;
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
-	cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
+	cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
 			rq_start_time_ns(rq), rq_io_start_time_ns(rq),
 			rq_data_dir(rq), rq_is_sync(rq));
 
@@ -3641,7 +3642,7 @@ static int cfq_init_queue(struct request_queue *q)
 	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
 				  true);
 	if (!IS_ERR(blkg))
-		cfqd->root_group = cfqg_of_blkg(blkg);
+		cfqd->root_group = blkg_to_cfqg(blkg);
 
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
@@ -3827,13 +3828,14 @@ static struct elevator_type iosched_cfq = {
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
-		.blkio_alloc_group_fn =		cfq_alloc_blkio_group,
+		.blkio_init_group_fn =		cfq_init_blkio_group,
 		.blkio_link_group_fn =		cfq_link_blkio_group,
 		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
 		.blkio_clear_queue_fn = cfq_clear_queue,
 		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
 	},
 	.plid = BLKIO_POLICY_PROP,
+	.pdata_size = sizeof(struct cfq_group),
 };
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 1adaf3dde37a8b9b59ea59c5f58fed7761178383 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:15 -0800
Subject: blkcg: move refcnt to blkcg core

Currently, blkcg policy implementations manage blkg refcnt duplicating
mostly identical code in both policies.  This patch moves refcnt to
blkg and let blkcg core handle refcnt and freeing of blkgs.

* cfq blkgs now also get freed via RCU.

* cfq blkgs lose RB_EMPTY_ROOT() sanity check on blkg free.  If
  necessary, we can add blkio_exit_group_fn() to resurrect this.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 24 ++++++++++++++++++++++
 block/blk-cgroup.h   | 35 +++++++++++++++++++++++++++++++
 block/blk-throttle.c | 58 ++++------------------------------------------------
 block/cfq-iosched.c  | 58 +++++++++-------------------------------------------
 4 files changed, 73 insertions(+), 102 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 14367499cfed..3b6a0e1265aa 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -463,6 +463,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	rcu_assign_pointer(blkg->q, q);
 	blkg->blkcg = blkcg;
 	blkg->plid = pol->plid;
+	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
 	/* alloc per-policy data */
@@ -633,6 +634,29 @@ void blkg_destroy_all(struct request_queue *q)
 	}
 }
 
+static void blkg_rcu_free(struct rcu_head *rcu_head)
+{
+	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
+}
+
+void __blkg_release(struct blkio_group *blkg)
+{
+	/* release the extra blkcg reference this blkg has been holding */
+	css_put(&blkg->blkcg->css);
+
+	/*
+	 * A group is freed in rcu manner. But having an rcu lock does not
+	 * mean that one can access all the fields of blkg and assume these
+	 * are valid. For example, don't try to follow throtl_data and
+	 * request queue links.
+	 *
+	 * Having a reference to blkg under an rcu allows acess to only
+	 * values local to groups like group stats and group rate limits
+	 */
+	call_rcu(&blkg->rcu_head, blkg_rcu_free);
+}
+EXPORT_SYMBOL_GPL(__blkg_release);
+
 static void blkio_reset_stats_cpu(struct blkio_group *blkg)
 {
 	struct blkio_group_stats_cpu *stats_cpu;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9537819c29c6..7da106843f01 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -177,6 +177,8 @@ struct blkio_group {
 	char path[128];
 	/* policy which owns this blk group */
 	enum blkio_policy_id plid;
+	/* reference count */
+	int refcnt;
 
 	/* Configuration */
 	struct blkio_group_conf conf;
@@ -188,6 +190,8 @@ struct blkio_group {
 	struct blkio_group_stats_cpu __percpu *stats_cpu;
 
 	struct blkg_policy_data *pd;
+
+	struct rcu_head rcu_head;
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
@@ -272,6 +276,35 @@ static inline char *blkg_path(struct blkio_group *blkg)
 	return blkg->path;
 }
 
+/**
+ * blkg_get - get a blkg reference
+ * @blkg: blkg to get
+ *
+ * The caller should be holding queue_lock and an existing reference.
+ */
+static inline void blkg_get(struct blkio_group *blkg)
+{
+	lockdep_assert_held(blkg->q->queue_lock);
+	WARN_ON_ONCE(!blkg->refcnt);
+	blkg->refcnt++;
+}
+
+void __blkg_release(struct blkio_group *blkg);
+
+/**
+ * blkg_put - put a blkg reference
+ * @blkg: blkg to put
+ *
+ * The caller should be holding queue_lock.
+ */
+static inline void blkg_put(struct blkio_group *blkg)
+{
+	lockdep_assert_held(blkg->q->queue_lock);
+	WARN_ON_ONCE(blkg->refcnt <= 0);
+	if (!--blkg->refcnt)
+		__blkg_release(blkg);
+}
+
 #else
 
 struct blkio_group {
@@ -292,6 +325,8 @@ static inline void *blkg_to_pdata(struct blkio_group *blkg,
 static inline struct blkio_group *pdata_to_blkg(void *pdata,
 				struct blkio_policy_type *pol) { return NULL; }
 static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
+static inline void blkg_get(struct blkio_group *blkg) { }
+static inline void blkg_put(struct blkio_group *blkg) { }
 
 #endif
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9c8a12477e13..153ba509446b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -54,7 +54,6 @@ struct throtl_grp {
 	 */
 	unsigned long disptime;
 
-	atomic_t ref;
 	unsigned int flags;
 
 	/* Two lists for READ and WRITE */
@@ -80,8 +79,6 @@ struct throtl_grp {
 
 	/* Some throttle limits got updated for the group */
 	int limits_changed;
-
-	struct rcu_head rcu_head;
 };
 
 struct throtl_data
@@ -151,45 +148,6 @@ static inline unsigned int total_nr_queued(struct throtl_data *td)
 	return td->nr_queued[0] + td->nr_queued[1];
 }
 
-static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
-{
-	atomic_inc(&tg->ref);
-	return tg;
-}
-
-static void throtl_free_tg(struct rcu_head *head)
-{
-	struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head);
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	free_percpu(blkg->stats_cpu);
-	kfree(blkg->pd);
-	kfree(blkg);
-}
-
-static void throtl_put_tg(struct throtl_grp *tg)
-{
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	BUG_ON(atomic_read(&tg->ref) <= 0);
-	if (!atomic_dec_and_test(&tg->ref))
-		return;
-
-	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-
-	/*
-	 * A group is freed in rcu manner. But having an rcu lock does not
-	 * mean that one can access all the fields of blkg and assume these
-	 * are valid. For example, don't try to follow throtl_data and
-	 * request queue links.
-	 *
-	 * Having a reference to blkg under an rcu allows acess to only
-	 * values local to groups like group stats and group rate limits
-	 */
-	call_rcu(&tg->rcu_head, throtl_free_tg);
-}
-
 static void throtl_init_blkio_group(struct blkio_group *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -204,14 +162,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	tg->bps[WRITE] = -1;
 	tg->iops[READ] = -1;
 	tg->iops[WRITE] = -1;
-
-	/*
-	 * Take the initial reference that will be released on destroy
-	 * This can be thought of a joint reference by cgroup and
-	 * request queue which will be dropped by either request queue
-	 * exit or cgroup deletion path depending on who is exiting first.
-	 */
-	atomic_set(&tg->ref, 1);
 }
 
 static void throtl_link_blkio_group(struct request_queue *q,
@@ -648,7 +598,7 @@ static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
 
 	bio_list_add(&tg->bio_lists[rw], bio);
 	/* Take a bio reference on tg */
-	throtl_ref_get_tg(tg);
+	blkg_get(tg_to_blkg(tg));
 	tg->nr_queued[rw]++;
 	td->nr_queued[rw]++;
 	throtl_enqueue_tg(td, tg);
@@ -681,8 +631,8 @@ static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg,
 
 	bio = bio_list_pop(&tg->bio_lists[rw]);
 	tg->nr_queued[rw]--;
-	/* Drop bio reference on tg */
-	throtl_put_tg(tg);
+	/* Drop bio reference on blkg */
+	blkg_put(tg_to_blkg(tg));
 
 	BUG_ON(td->nr_queued[rw] <= 0);
 	td->nr_queued[rw]--;
@@ -880,7 +830,7 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	throtl_put_tg(tg);
+	blkg_put(tg_to_blkg(tg));
 	td->nr_undestroyed_grps--;
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c7449db52a86..86980023339a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,7 +210,6 @@ struct cfq_group {
 	enum wl_prio_t saved_serving_prio;
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	struct hlist_node cfqd_node;
-	int ref;
 #endif
 	/* number of requests that are on the dispatch list or inside driver */
 	int dispatched;
@@ -1071,14 +1070,6 @@ static void cfq_init_blkio_group(struct blkio_group *blkg)
 
 	cfq_init_cfqg_base(cfqg);
 	cfqg->weight = blkg->blkcg->weight;
-
-	/*
-	 * Take the initial reference that will be released on destroy
-	 * This can be thought of a joint reference by cgroup and
-	 * elevator which will be dropped by either elevator exit
-	 * or cgroup deletion path depending on who is exiting first.
-	 */
-	cfqg->ref = 1;
 }
 
 /*
@@ -1105,12 +1096,6 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	return cfqg;
 }
 
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-	cfqg->ref++;
-	return cfqg;
-}
-
 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 {
 	/* Currently, all async queues are mapped to root group */
@@ -1119,28 +1104,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 
 	cfqq->cfqg = cfqg;
 	/* cfqq reference on cfqg */
-	cfqq->cfqg->ref++;
-}
-
-static void cfq_put_cfqg(struct cfq_group *cfqg)
-{
-	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-	struct cfq_rb_root *st;
-	int i, j;
-
-	BUG_ON(cfqg->ref <= 0);
-	cfqg->ref--;
-	if (cfqg->ref)
-		return;
-
-	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-
-	for_each_cfqg_st(cfqg, i, j, st)
-		BUG_ON(!RB_EMPTY_ROOT(&st->rb));
-	free_percpu(blkg->stats_cpu);
-	kfree(blkg->pd);
-	kfree(blkg);
+	blkg_get(cfqg_to_blkg(cfqg));
 }
 
 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
@@ -1157,7 +1121,7 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	cfq_put_cfqg(cfqg);
+	blkg_put(cfqg_to_blkg(cfqg));
 }
 
 static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
@@ -1225,18 +1189,12 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	return cfqd->root_group;
 }
 
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-	return cfqg;
-}
-
 static inline void
 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
 	cfqq->cfqg = cfqg;
 }
 
 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
 
 #endif /* GROUP_IOSCHED */
 
@@ -2630,7 +2588,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	kmem_cache_free(cfq_pool, cfqq);
-	cfq_put_cfqg(cfqg);
+	blkg_put(cfqg_to_blkg(cfqg));
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -3382,7 +3340,7 @@ static void cfq_put_request(struct request *rq)
 		cfqq->allocated[rw]--;
 
 		/* Put down rq reference on cfqg */
-		cfq_put_cfqg(RQ_CFQG(rq));
+		blkg_put(cfqg_to_blkg(RQ_CFQG(rq)));
 		rq->elv.priv[0] = NULL;
 		rq->elv.priv[1] = NULL;
 
@@ -3477,8 +3435,9 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
+	blkg_get(cfqg_to_blkg(cfqq->cfqg));
 	rq->elv.priv[0] = cfqq;
-	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+	rq->elv.priv[1] = cfqq->cfqg;
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 }
@@ -3676,8 +3635,11 @@ static int cfq_init_queue(struct request_queue *q)
 	 */
 	cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
 	cfqd->oom_cfqq.ref++;
+
+	spin_lock_irq(q->queue_lock);
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
-	cfq_put_cfqg(cfqd->root_group);
+	blkg_put(cfqg_to_blkg(cfqd->root_group));
+	spin_unlock_irq(q->queue_lock);
 
 	init_timer(&cfqd->idle_slice_timer);
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
-- 
cgit v1.2.3-59-g8ed1b


From 549d3aa872cd1aec1ee540fd93afd9611faa0def Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:16 -0800
Subject: blkcg: make blkg->pd an array and move configuration and stats into
 it

To prepare for unifying blkgs for different policies, make blkg->pd an
array with BLKIO_NR_POLICIES elements and move blkg->conf, ->stats,
and ->stats_cpu into blkg_policy_data.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 150 +++++++++++++++++++++++++++++++++--------------------
 block/blk-cgroup.h |  18 +++----
 2 files changed, 102 insertions(+), 66 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3b6a0e1265aa..0eb39981e7c2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -184,12 +184,14 @@ static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 						struct blkio_group *curr_blkg)
 {
-	if (blkio_blkg_waiting(&blkg->stats))
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+
+	if (blkio_blkg_waiting(&pd->stats))
 		return;
 	if (blkg == curr_blkg)
 		return;
-	blkg->stats.start_group_wait_time = sched_clock();
-	blkio_mark_blkg_waiting(&blkg->stats);
+	pd->stats.start_group_wait_time = sched_clock();
+	blkio_mark_blkg_waiting(&pd->stats);
 }
 
 /* This should be called with the blkg->stats_lock held. */
@@ -222,24 +224,26 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
 
 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	BUG_ON(blkio_blkg_idling(&blkg->stats));
-	blkg->stats.start_idle_time = sched_clock();
-	blkio_mark_blkg_idling(&blkg->stats);
+	BUG_ON(blkio_blkg_idling(&pd->stats));
+	pd->stats.start_idle_time = sched_clock();
+	blkio_mark_blkg_idling(&pd->stats);
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
 
 void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 	unsigned long long now;
 	struct blkio_group_stats *stats;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &blkg->stats;
+	stats = &pd->stats;
 	if (blkio_blkg_idling(stats)) {
 		now = sched_clock();
 		if (time_after64(now, stats->start_idle_time))
@@ -252,11 +256,12 @@ EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
 
 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 	struct blkio_group_stats *stats;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &blkg->stats;
+	stats = &pd->stats;
 	stats->avg_queue_size_sum +=
 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
@@ -268,11 +273,12 @@ EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 
 void blkiocg_set_start_empty_time(struct blkio_group *blkg)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 	struct blkio_group_stats *stats;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &blkg->stats;
+	stats = &pd->stats;
 
 	if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
@@ -299,7 +305,9 @@ EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 			unsigned long dequeue)
 {
-	blkg->stats.dequeue += dequeue;
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+
+	pd->stats.dequeue += dequeue;
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
 #else
@@ -312,12 +320,13 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg,
 			struct blkio_group *curr_blkg, bool direction,
 			bool sync)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
+	blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
 			sync);
-	blkio_end_empty_time(&blkg->stats);
+	blkio_end_empty_time(&pd->stats);
 	blkio_set_start_group_wait_time(blkg, curr_blkg);
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
@@ -326,10 +335,11 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 						bool direction, bool sync)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
+	blkio_check_and_dec_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED],
 					direction, sync);
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
@@ -338,12 +348,13 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
 				unsigned long unaccounted_time)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkg->stats.time += time;
+	pd->stats.time += time;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	blkg->stats.unaccounted_time += unaccounted_time;
+	pd->stats.unaccounted_time += unaccounted_time;
 #endif
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
@@ -356,6 +367,7 @@ EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 				uint64_t bytes, bool direction, bool sync)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
@@ -366,7 +378,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 	 */
 	local_irq_save(flags);
 
-	stats_cpu = this_cpu_ptr(blkg->stats_cpu);
+	stats_cpu = this_cpu_ptr(pd->stats_cpu);
 
 	u64_stats_update_begin(&stats_cpu->syncp);
 	stats_cpu->sectors += bytes >> 9;
@@ -382,12 +394,13 @@ EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 void blkiocg_update_completion_stats(struct blkio_group *blkg,
 	uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	struct blkio_group_stats *stats;
 	unsigned long flags;
 	unsigned long long now = sched_clock();
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &blkg->stats;
+	stats = &pd->stats;
 	if (time_after64(now, io_start_time))
 		blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
 				now - io_start_time, direction, sync);
@@ -402,6 +415,7 @@ EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 					bool sync)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
@@ -412,7 +426,7 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 	 */
 	local_irq_save(flags);
 
-	stats_cpu = this_cpu_ptr(blkg->stats_cpu);
+	stats_cpu = this_cpu_ptr(pd->stats_cpu);
 
 	u64_stats_update_begin(&stats_cpu->syncp);
 	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
@@ -430,11 +444,17 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
  */
 static void blkg_free(struct blkio_group *blkg)
 {
-	if (blkg) {
-		free_percpu(blkg->stats_cpu);
-		kfree(blkg->pd);
-		kfree(blkg);
+	struct blkg_policy_data *pd;
+
+	if (!blkg)
+		return;
+
+	pd = blkg->pd[blkg->plid];
+	if (pd) {
+		free_percpu(pd->stats_cpu);
+		kfree(pd);
 	}
+	kfree(blkg);
 }
 
 /**
@@ -453,6 +473,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 				      struct blkio_policy_type *pol)
 {
 	struct blkio_group *blkg;
+	struct blkg_policy_data *pd;
 
 	/* alloc and init base part */
 	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
@@ -466,23 +487,26 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
-	/* alloc per-policy data */
-	blkg->pd = kzalloc_node(sizeof(*blkg->pd) + pol->pdata_size, GFP_ATOMIC,
-				q->node);
-	if (!blkg->pd) {
+	/* alloc per-policy data and attach it to blkg */
+	pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
+			  q->node);
+	if (!pd) {
 		blkg_free(blkg);
 		return NULL;
 	}
 
+	blkg->pd[pol->plid] = pd;
+	pd->blkg = blkg;
+
 	/* broken, read comment in the callsite */
-	blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-	if (!blkg->stats_cpu) {
+
+	pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+	if (!pd->stats_cpu) {
 		blkg_free(blkg);
 		return NULL;
 	}
 
-	/* attach pd to blkg and invoke per-policy init */
-	blkg->pd->blkg = blkg;
+	/* invoke per-policy init */
 	pol->ops.blkio_init_group_fn(blkg);
 	return blkg;
 }
@@ -659,6 +683,7 @@ EXPORT_SYMBOL_GPL(__blkg_release);
 
 static void blkio_reset_stats_cpu(struct blkio_group *blkg)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	int i, j, k;
 	/*
@@ -673,7 +698,7 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg)
 	 * unless this becomes a real issue.
 	 */
 	for_each_possible_cpu(i) {
-		stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
+		stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
 		stats_cpu->sectors = 0;
 		for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
 			for (k = 0; k < BLKIO_STAT_TOTAL; k++)
@@ -698,8 +723,10 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	blkcg = cgroup_to_blkio_cgroup(cgroup);
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+
 		spin_lock(&blkg->stats_lock);
-		stats = &blkg->stats;
+		stats = &pd->stats;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 		idling = blkio_blkg_idling(stats);
 		waiting = blkio_blkg_waiting(stats);
@@ -779,13 +806,14 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
 static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
 			enum stat_type_cpu type, enum stat_sub_type sub_type)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	int cpu;
 	struct blkio_group_stats_cpu *stats_cpu;
 	u64 val = 0, tval;
 
 	for_each_possible_cpu(cpu) {
 		unsigned int start;
-		stats_cpu  = per_cpu_ptr(blkg->stats_cpu, cpu);
+		stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
 
 		do {
 			start = u64_stats_fetch_begin(&stats_cpu->syncp);
@@ -837,20 +865,21 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 			       struct cgroup_map_cb *cb, const char *dname,
 			       enum stat_type type)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	uint64_t disk_total;
 	char key_str[MAX_KEY_LEN];
 	enum stat_sub_type sub_type;
 
 	if (type == BLKIO_STAT_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					blkg->stats.time, cb, dname);
+					pd->stats.time, cb, dname);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       blkg->stats.unaccounted_time, cb, dname);
+				       pd->stats.unaccounted_time, cb, dname);
 	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
-		uint64_t sum = blkg->stats.avg_queue_size_sum;
-		uint64_t samples = blkg->stats.avg_queue_size_samples;
+		uint64_t sum = pd->stats.avg_queue_size_sum;
+		uint64_t samples = pd->stats.avg_queue_size_samples;
 		if (samples)
 			do_div(sum, samples);
 		else
@@ -860,26 +889,26 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 	}
 	if (type == BLKIO_STAT_GROUP_WAIT_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       blkg->stats.group_wait_time, cb, dname);
+				       pd->stats.group_wait_time, cb, dname);
 	if (type == BLKIO_STAT_IDLE_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       blkg->stats.idle_time, cb, dname);
+				       pd->stats.idle_time, cb, dname);
 	if (type == BLKIO_STAT_EMPTY_TIME)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       blkg->stats.empty_time, cb, dname);
+				       pd->stats.empty_time, cb, dname);
 	if (type == BLKIO_STAT_DEQUEUE)
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       blkg->stats.dequeue, cb, dname);
+				       pd->stats.dequeue, cb, dname);
 #endif
 
 	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
 			sub_type++) {
 		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
 				   false);
-		cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
+		cb->fill(cb, key_str, pd->stats.stat_arr[type][sub_type]);
 	}
-	disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
-			blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
+	disk_total = pd->stats.stat_arr[type][BLKIO_STAT_READ] +
+			pd->stats.stat_arr[type][BLKIO_STAT_WRITE];
 	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 			   false);
 	cb->fill(cb, key_str, disk_total);
@@ -891,6 +920,7 @@ static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 {
 	struct gendisk *disk = NULL;
 	struct blkio_group *blkg = NULL;
+	struct blkg_policy_data *pd;
 	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
 	unsigned long major, minor;
 	int i = 0, ret = -EINVAL;
@@ -950,35 +980,37 @@ static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 		goto out_unlock;
 	}
 
+	pd = blkg->pd[plid];
+
 	switch (plid) {
 	case BLKIO_POLICY_PROP:
 		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
 		     temp > BLKIO_WEIGHT_MAX)
 			goto out_unlock;
 
-		blkg->conf.weight = temp;
+		pd->conf.weight = temp;
 		blkio_update_group_weight(blkg, temp ?: blkcg->weight);
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(fileid) {
 		case BLKIO_THROTL_read_bps_device:
-			blkg->conf.bps[READ] = temp;
+			pd->conf.bps[READ] = temp;
 			blkio_update_group_bps(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_bps_device:
-			blkg->conf.bps[WRITE] = temp;
+			pd->conf.bps[WRITE] = temp;
 			blkio_update_group_bps(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_read_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
-			blkg->conf.iops[READ] = temp;
+			pd->conf.iops[READ] = temp;
 			blkio_update_group_iops(blkg, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
-			blkg->conf.iops[WRITE] = temp;
+			pd->conf.iops[WRITE] = temp;
 			blkio_update_group_iops(blkg, temp ?: -1, fileid);
 			break;
 		}
@@ -1034,6 +1066,7 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
 static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 				   struct seq_file *m)
 {
+	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 	const char *dname = blkg_dev_name(blkg);
 	int fileid = BLKIOFILE_ATTR(cft->private);
 	int rw = WRITE;
@@ -1043,25 +1076,25 @@ static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 
 	switch (blkg->plid) {
 		case BLKIO_POLICY_PROP:
-			if (blkg->conf.weight)
+			if (pd->conf.weight)
 				seq_printf(m, "%s\t%u\n",
-					   dname, blkg->conf.weight);
+					   dname, pd->conf.weight);
 			break;
 		case BLKIO_POLICY_THROTL:
 			switch (fileid) {
 			case BLKIO_THROTL_read_bps_device:
 				rw = READ;
 			case BLKIO_THROTL_write_bps_device:
-				if (blkg->conf.bps[rw])
+				if (pd->conf.bps[rw])
 					seq_printf(m, "%s\t%llu\n",
-						   dname, blkg->conf.bps[rw]);
+						   dname, pd->conf.bps[rw]);
 				break;
 			case BLKIO_THROTL_read_iops_device:
 				rw = READ;
 			case BLKIO_THROTL_write_iops_device:
-				if (blkg->conf.iops[rw])
+				if (pd->conf.iops[rw])
 					seq_printf(m, "%s\t%u\n",
-						   dname, blkg->conf.iops[rw]);
+						   dname, pd->conf.iops[rw]);
 				break;
 			}
 			break;
@@ -1243,9 +1276,12 @@ static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
 	spin_lock_irq(&blkcg->lock);
 	blkcg->weight = (unsigned int)val;
 
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (blkg->plid == plid && !blkg->conf.weight)
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+
+		if (blkg->plid == plid && !pd->conf.weight)
 			blkio_update_group_weight(blkg, blkcg->weight);
+	}
 
 	spin_unlock_irq(&blkcg->lock);
 	spin_unlock(&blkio_list_lock);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 7da106843f01..5dffd436f30d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -164,6 +164,13 @@ struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
 	struct blkio_group *blkg;
 
+	/* Configuration */
+	struct blkio_group_conf conf;
+
+	struct blkio_group_stats stats;
+	/* Per cpu stats pointer */
+	struct blkio_group_stats_cpu __percpu *stats_cpu;
+
 	/* pol->pdata_size bytes of private data used by policy impl */
 	char pdata[] __aligned(__alignof__(unsigned long long));
 };
@@ -180,16 +187,9 @@ struct blkio_group {
 	/* reference count */
 	int refcnt;
 
-	/* Configuration */
-	struct blkio_group_conf conf;
-
 	/* Need to serialize the stats in the case of reset/update */
 	spinlock_t stats_lock;
-	struct blkio_group_stats stats;
-	/* Per cpu stats pointer */
-	struct blkio_group_stats_cpu __percpu *stats_cpu;
-
-	struct blkg_policy_data *pd;
+	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
 
 	struct rcu_head rcu_head;
 };
@@ -249,7 +249,7 @@ extern void blkg_destroy_all(struct request_queue *q);
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 			      struct blkio_policy_type *pol)
 {
-	return blkg ? blkg->pd->pdata : NULL;
+	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From c1768268f9424410761da57ea71107acae7b03cc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:17 -0800
Subject: blkcg: don't use blkg->plid in stat related functions

blkg is scheduled to be unified for all policies and thus there won't
be one-to-one mapping from blkg to policy.  Update stat related
functions to take explicit @pol or @plid arguments and not use
blkg->plid.

This is painful for now but most of specific stat interface functions
will be replaced with a handful of generic helpers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 150 +++++++++++++++++++++++++++++----------------------
 block/blk-cgroup.h   |  80 ++++++++++++++++-----------
 block/blk-throttle.c |   4 +-
 block/cfq-iosched.c  |  44 +++++++++------
 block/cfq.h          |  96 ++++++++++++++++++++-------------
 5 files changed, 224 insertions(+), 150 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0eb39981e7c2..91f9824be5cc 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -78,14 +78,14 @@ struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(task_blkio_cgroup);
 
-static inline void
-blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
+static inline void blkio_update_group_weight(struct blkio_group *blkg,
+					     int plid, unsigned int weight)
 {
 	struct blkio_policy_type *blkiop;
 
 	list_for_each_entry(blkiop, &blkio_list, list) {
 		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != blkg->plid)
+		if (blkiop->plid != plid)
 			continue;
 		if (blkiop->ops.blkio_update_group_weight_fn)
 			blkiop->ops.blkio_update_group_weight_fn(blkg->q,
@@ -93,15 +93,15 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
 	}
 }
 
-static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
-				int fileid)
+static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
+					  u64 bps, int fileid)
 {
 	struct blkio_policy_type *blkiop;
 
 	list_for_each_entry(blkiop, &blkio_list, list) {
 
 		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != blkg->plid)
+		if (blkiop->plid != plid)
 			continue;
 
 		if (fileid == BLKIO_THROTL_read_bps_device
@@ -117,14 +117,15 @@ static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
 }
 
 static inline void blkio_update_group_iops(struct blkio_group *blkg,
-			unsigned int iops, int fileid)
+					   int plid, unsigned int iops,
+					   int fileid)
 {
 	struct blkio_policy_type *blkiop;
 
 	list_for_each_entry(blkiop, &blkio_list, list) {
 
 		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != blkg->plid)
+		if (blkiop->plid != plid)
 			continue;
 
 		if (fileid == BLKIO_THROTL_read_iops_device
@@ -182,9 +183,10 @@ static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 /* This should be called with the blkg->stats_lock held. */
 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-						struct blkio_group *curr_blkg)
+					    struct blkio_policy_type *pol,
+					    struct blkio_group *curr_blkg)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 
 	if (blkio_blkg_waiting(&pd->stats))
 		return;
@@ -222,9 +224,10 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
 	blkio_clear_blkg_empty(stats);
 }
 
-void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
+void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+					struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
@@ -235,9 +238,10 @@ void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
 
-void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
+void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+				    struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 	unsigned long long now;
 	struct blkio_group_stats *stats;
@@ -254,9 +258,10 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
 
-void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
+void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+					 struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 	struct blkio_group_stats *stats;
 
@@ -271,9 +276,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 
-void blkiocg_set_start_empty_time(struct blkio_group *blkg)
+void blkiocg_set_start_empty_time(struct blkio_group *blkg,
+				  struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 	struct blkio_group_stats *stats;
 
@@ -303,39 +309,43 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg)
 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
 
 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			unsigned long dequeue)
+				  struct blkio_policy_type *pol,
+				  unsigned long dequeue)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 
 	pd->stats.dequeue += dequeue;
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
 #else
 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-					struct blkio_group *curr_blkg) {}
-static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
+					struct blkio_policy_type *pol,
+					struct blkio_group *curr_blkg) { }
+static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
 #endif
 
 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-			struct blkio_group *curr_blkg, bool direction,
-			bool sync)
+				 struct blkio_policy_type *pol,
+				 struct blkio_group *curr_blkg, bool direction,
+				 bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
 	blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
 			sync);
 	blkio_end_empty_time(&pd->stats);
-	blkio_set_start_group_wait_time(blkg, curr_blkg);
+	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
 	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
 
 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-						bool direction, bool sync)
+				    struct blkio_policy_type *pol,
+				    bool direction, bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
@@ -345,10 +355,12 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 
-void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
-				unsigned long unaccounted_time)
+void blkiocg_update_timeslice_used(struct blkio_group *blkg,
+				   struct blkio_policy_type *pol,
+				   unsigned long time,
+				   unsigned long unaccounted_time)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	unsigned long flags;
 
 	spin_lock_irqsave(&blkg->stats_lock, flags);
@@ -365,9 +377,10 @@ EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
  * is valid.
  */
 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				uint64_t bytes, bool direction, bool sync)
+				   struct blkio_policy_type *pol,
+				   uint64_t bytes, bool direction, bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
@@ -392,9 +405,12 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 
 void blkiocg_update_completion_stats(struct blkio_group *blkg,
-	uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
+				     struct blkio_policy_type *pol,
+				     uint64_t start_time,
+				     uint64_t io_start_time, bool direction,
+				     bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	struct blkio_group_stats *stats;
 	unsigned long flags;
 	unsigned long long now = sched_clock();
@@ -412,10 +428,11 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 
 /*  Merged stats are per cpu.  */
-void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
-					bool sync)
+void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
+				    struct blkio_policy_type *pol,
+				    bool direction, bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
@@ -681,9 +698,9 @@ void __blkg_release(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(__blkg_release);
 
-static void blkio_reset_stats_cpu(struct blkio_group *blkg)
+static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	int i, j, k;
 	/*
@@ -754,7 +771,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 		spin_unlock(&blkg->stats_lock);
 
 		/* Reset Per cpu stats which don't take blkg->stats_lock */
-		blkio_reset_stats_cpu(blkg);
+		blkio_reset_stats_cpu(blkg, blkg->plid);
 	}
 
 	spin_unlock_irq(&blkcg->lock);
@@ -803,10 +820,10 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
 }
 
 
-static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
+static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
 			enum stat_type_cpu type, enum stat_sub_type sub_type)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[plid];
 	int cpu;
 	struct blkio_group_stats_cpu *stats_cpu;
 	u64 val = 0, tval;
@@ -829,7 +846,7 @@ static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
 	return val;
 }
 
-static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
+static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 				   struct cgroup_map_cb *cb, const char *dname,
 				   enum stat_type_cpu type)
 {
@@ -838,7 +855,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
 	enum stat_sub_type sub_type;
 
 	if (type == BLKIO_STAT_CPU_SECTORS) {
-		val = blkio_read_stat_cpu(blkg, type, 0);
+		val = blkio_read_stat_cpu(blkg, plid, type, 0);
 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
 				       dname);
 	}
@@ -847,12 +864,12 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
 			sub_type++) {
 		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
 				   false);
-		val = blkio_read_stat_cpu(blkg, type, sub_type);
+		val = blkio_read_stat_cpu(blkg, plid, type, sub_type);
 		cb->fill(cb, key_str, val);
 	}
 
-	disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
-			blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
+	disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) +
+		blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE);
 
 	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 			   false);
@@ -861,11 +878,11 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
 }
 
 /* This should be called with blkg->stats_lock held */
-static uint64_t blkio_get_stat(struct blkio_group *blkg,
+static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 			       struct cgroup_map_cb *cb, const char *dname,
 			       enum stat_type type)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+	struct blkg_policy_data *pd = blkg->pd[plid];
 	uint64_t disk_total;
 	char key_str[MAX_KEY_LEN];
 	enum stat_sub_type sub_type;
@@ -989,29 +1006,29 @@ static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 			goto out_unlock;
 
 		pd->conf.weight = temp;
-		blkio_update_group_weight(blkg, temp ?: blkcg->weight);
+		blkio_update_group_weight(blkg, plid, temp ?: blkcg->weight);
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(fileid) {
 		case BLKIO_THROTL_read_bps_device:
 			pd->conf.bps[READ] = temp;
-			blkio_update_group_bps(blkg, temp ?: -1, fileid);
+			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_bps_device:
 			pd->conf.bps[WRITE] = temp;
-			blkio_update_group_bps(blkg, temp ?: -1, fileid);
+			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_read_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
 			pd->conf.iops[READ] = temp;
-			blkio_update_group_iops(blkg, temp ?: -1, fileid);
+			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
 			break;
 		case BLKIO_THROTL_write_iops_device:
 			if (temp > THROTL_IOPS_MAX)
 				goto out_unlock;
 			pd->conf.iops[WRITE] = temp;
-			blkio_update_group_iops(blkg, temp ?: -1, fileid);
+			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
 			break;
 		}
 		break;
@@ -1066,15 +1083,16 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
 static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 				   struct seq_file *m)
 {
-	struct blkg_policy_data *pd = blkg->pd[blkg->plid];
-	const char *dname = blkg_dev_name(blkg);
+	int plid = BLKIOFILE_POLICY(cft->private);
 	int fileid = BLKIOFILE_ATTR(cft->private);
+	struct blkg_policy_data *pd = blkg->pd[plid];
+	const char *dname = blkg_dev_name(blkg);
 	int rw = WRITE;
 
 	if (!dname)
 		return;
 
-	switch (blkg->plid) {
+	switch (plid) {
 		case BLKIO_POLICY_PROP:
 			if (pd->conf.weight)
 				seq_printf(m, "%s\t%u\n",
@@ -1166,15 +1184,17 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		const char *dname = blkg_dev_name(blkg);
+		int plid = BLKIOFILE_POLICY(cft->private);
 
-		if (!dname || BLKIOFILE_POLICY(cft->private) != blkg->plid)
+		if (!dname || plid != blkg->plid)
 			continue;
-		if (pcpu)
-			cgroup_total += blkio_get_stat_cpu(blkg, cb, dname,
-							   type);
-		else {
+		if (pcpu) {
+			cgroup_total += blkio_get_stat_cpu(blkg, plid,
+							   cb, dname, type);
+		} else {
 			spin_lock_irq(&blkg->stats_lock);
-			cgroup_total += blkio_get_stat(blkg, cb, dname, type);
+			cgroup_total += blkio_get_stat(blkg, plid,
+						       cb, dname, type);
 			spin_unlock_irq(&blkg->stats_lock);
 		}
 	}
@@ -1280,7 +1300,7 @@ static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
 		struct blkg_policy_data *pd = blkg->pd[blkg->plid];
 
 		if (blkg->plid == plid && !pd->conf.weight)
-			blkio_update_group_weight(blkg, blkcg->weight);
+			blkio_update_group_weight(blkg, plid, blkcg->weight);
 	}
 
 	spin_unlock_irq(&blkcg->lock);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 5dffd436f30d..60e96b4be4ce 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -335,12 +335,17 @@ static inline void blkg_put(struct blkio_group *blkg) { }
 #define BLKIO_WEIGHT_DEFAULT	500
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg);
+void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+					 struct blkio_policy_type *pol);
 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-				unsigned long dequeue);
-void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg);
-void blkiocg_update_idle_time_stats(struct blkio_group *blkg);
-void blkiocg_set_start_empty_time(struct blkio_group *blkg);
+				  struct blkio_policy_type *pol,
+				  unsigned long dequeue);
+void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+					struct blkio_policy_type *pol);
+void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+				    struct blkio_policy_type *pol);
+void blkiocg_set_start_empty_time(struct blkio_group *blkg,
+				  struct blkio_policy_type *pol);
 
 #define BLKG_FLAG_FNS(name)						\
 static inline void blkio_mark_blkg_##name(				\
@@ -363,14 +368,16 @@ BLKG_FLAG_FNS(idling)
 BLKG_FLAG_FNS(empty)
 #undef BLKG_FLAG_FNS
 #else
-static inline void blkiocg_update_avg_queue_size_stats(
-						struct blkio_group *blkg) {}
+static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
 static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-						unsigned long dequeue) {}
-static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
-{}
-static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {}
-static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
+			struct blkio_policy_type *pol, unsigned long dequeue) { }
+static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
+static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
+static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
@@ -386,18 +393,27 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       enum blkio_policy_id plid,
 				       bool for_root);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-					unsigned long time,
-					unsigned long unaccounted_time);
-void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
-						bool direction, bool sync);
+				   struct blkio_policy_type *pol,
+				   unsigned long time,
+				   unsigned long unaccounted_time);
+void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
+				   struct blkio_policy_type *pol,
+				   uint64_t bytes, bool direction, bool sync);
 void blkiocg_update_completion_stats(struct blkio_group *blkg,
-	uint64_t start_time, uint64_t io_start_time, bool direction, bool sync);
-void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
-					bool sync);
+				     struct blkio_policy_type *pol,
+				     uint64_t start_time,
+				     uint64_t io_start_time, bool direction,
+				     bool sync);
+void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
+				    struct blkio_policy_type *pol,
+				    bool direction, bool sync);
 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-		struct blkio_group *curr_blkg, bool direction, bool sync);
+				 struct blkio_policy_type *pol,
+				 struct blkio_group *curr_blkg, bool direction,
+				 bool sync);
 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-					bool direction, bool sync);
+				    struct blkio_policy_type *pol,
+				    bool direction, bool sync);
 #else
 struct cgroup;
 static inline struct blkio_cgroup *
@@ -411,19 +427,23 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
 static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 					      void *key) { return NULL; }
 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-						unsigned long time,
-						unsigned long unaccounted_time)
-{}
+			struct blkio_policy_type *pol, unsigned long time,
+			unsigned long unaccounted_time) { }
 static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				uint64_t bytes, bool direction, bool sync) {}
+			struct blkio_policy_type *pol, uint64_t bytes,
+			bool direction, bool sync) { }
 static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
-		uint64_t start_time, uint64_t io_start_time, bool direction,
-		bool sync) {}
+			struct blkio_policy_type *pol, uint64_t start_time,
+			uint64_t io_start_time, bool direction, bool sync) { }
 static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-						bool direction, bool sync) {}
+			struct blkio_policy_type *pol, bool direction,
+			bool sync) { }
 static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-		struct blkio_group *curr_blkg, bool direction, bool sync) {}
+			struct blkio_policy_type *pol,
+			struct blkio_group *curr_blkg, bool direction,
+			bool sync) { }
 static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-						bool direction, bool sync) {}
+			struct blkio_policy_type *pol, bool direction,
+			bool sync) { }
 #endif
 #endif /* _BLK_CGROUP_H */
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 153ba509446b..b2fddaf20b98 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -588,7 +588,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	tg->bytes_disp[rw] += bio->bi_size;
 	tg->io_disp[rw]++;
 
-	blkiocg_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, rw, sync);
+	blkiocg_update_dispatch_stats(tg_to_blkg(tg), &blkio_policy_throtl,
+				      bio->bi_size, rw, sync);
 }
 
 static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
@@ -1000,6 +1001,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	if (tg) {
 		if (tg_no_rule_group(tg, rw)) {
 			blkiocg_update_dispatch_stats(tg_to_blkg(tg),
+						      &blkio_policy_throtl,
 						      bio->bi_size, rw,
 						      rw_is_sync(bio->bi_rw));
 			goto out_unlock_rcu;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 86980023339a..11dd9d7f2edb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -945,7 +945,8 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
 	cfq_group_service_tree_del(st, cfqg);
 	cfqg->saved_workload_slice = 0;
-	cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg), 1);
+	cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg),
+					 &blkio_policy_cfq, 1);
 }
 
 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1017,9 +1018,9 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
 		     used_sl, cfqq->slice_dispatch, charge,
 		     iops_mode(cfqd), cfqq->nr_sectors);
-	cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), used_sl,
-					  unaccounted_sl);
-	cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg));
+	cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), &blkio_policy_cfq,
+					  used_sl, unaccounted_sl);
+	cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg), &blkio_policy_cfq);
 }
 
 /**
@@ -1463,9 +1464,11 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 	elv_rb_del(&cfqq->sort_list, rq);
 	cfqq->queued[rq_is_sync(rq)]--;
 	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					rq_data_dir(rq), rq_is_sync(rq));
+					   &blkio_policy_cfq, rq_data_dir(rq),
+					   rq_is_sync(rq));
 	cfq_add_rq_rb(rq);
 	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+					&blkio_policy_cfq,
 					cfqg_to_blkg(cfqq->cfqd->serving_group),
 					rq_data_dir(rq), rq_is_sync(rq));
 }
@@ -1524,7 +1527,8 @@ static void cfq_remove_request(struct request *rq)
 
 	cfqq->cfqd->rq_queued--;
 	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					rq_data_dir(rq), rq_is_sync(rq));
+					   &blkio_policy_cfq, rq_data_dir(rq),
+					   rq_is_sync(rq));
 	if (rq->cmd_flags & REQ_PRIO) {
 		WARN_ON(!cfqq->prio_pending);
 		cfqq->prio_pending--;
@@ -1560,7 +1564,8 @@ static void cfq_bio_merged(struct request_queue *q, struct request *req,
 				struct bio *bio)
 {
 	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
-					bio_data_dir(bio), cfq_bio_sync(bio));
+					   &blkio_policy_cfq, bio_data_dir(bio),
+					   cfq_bio_sync(bio));
 }
 
 static void
@@ -1583,7 +1588,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 		cfqq->next_rq = rq;
 	cfq_remove_request(next);
 	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					rq_data_dir(next), rq_is_sync(next));
+					   &blkio_policy_cfq, rq_data_dir(next),
+					   rq_is_sync(next));
 
 	cfqq = RQ_CFQQ(next);
 	/*
@@ -1624,7 +1630,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	del_timer(&cfqd->idle_slice_timer);
-	cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg));
+	cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+					   &blkio_policy_cfq);
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -1633,7 +1640,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 	if (cfqq) {
 		cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
 				cfqd->serving_prio, cfqd->serving_type);
-		cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg));
+		cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg),
+							&blkio_policy_cfq);
 		cfqq->slice_start = 0;
 		cfqq->dispatch_start = jiffies;
 		cfqq->allocated_slice = 0;
@@ -1981,7 +1989,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 		sl = cfqd->cfq_slice_idle;
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-	cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg));
+	cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
+					       &blkio_policy_cfq);
 	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
 			group_idle ? 1 : 0);
 }
@@ -2005,8 +2014,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
 	cfqq->nr_sectors += blk_rq_sectors(rq);
 	cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
-					  blk_rq_bytes(rq), rq_data_dir(rq),
-					  rq_is_sync(rq));
+					  &blkio_policy_cfq, blk_rq_bytes(rq),
+					  rq_data_dir(rq), rq_is_sync(rq));
 }
 
 /*
@@ -3094,7 +3103,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 				__blk_run_queue(cfqd->queue);
 			} else {
 				cfq_blkiocg_update_idle_time_stats(
-						cfqg_to_blkg(cfqq->cfqg));
+						cfqg_to_blkg(cfqq->cfqg),
+						&blkio_policy_cfq);
 				cfq_mark_cfqq_must_dispatch(cfqq);
 			}
 		}
@@ -3122,6 +3132,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
 	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
+					&blkio_policy_cfq,
 					cfqg_to_blkg(cfqd->serving_group),
 					rq_data_dir(rq), rq_is_sync(rq));
 	cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -3220,8 +3231,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
 	cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
-			rq_start_time_ns(rq), rq_io_start_time_ns(rq),
-			rq_data_dir(rq), rq_is_sync(rq));
+			&blkio_policy_cfq, rq_start_time_ns(rq),
+			rq_io_start_time_ns(rq), rq_data_dir(rq),
+			rq_is_sync(rq));
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
diff --git a/block/cfq.h b/block/cfq.h
index 398760194e11..5584e1b63ca8 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -4,67 +4,79 @@
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
-	struct blkio_group *curr_blkg, bool direction, bool sync)
+			struct blkio_policy_type *pol,
+			struct blkio_group *curr_blkg,
+			bool direction, bool sync)
 {
-	blkiocg_update_io_add_stats(blkg, curr_blkg, direction, sync);
+	blkiocg_update_io_add_stats(blkg, pol, curr_blkg, direction, sync);
 }
 
 static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			unsigned long dequeue)
+			struct blkio_policy_type *pol, unsigned long dequeue)
 {
-	blkiocg_update_dequeue_stats(blkg, dequeue);
+	blkiocg_update_dequeue_stats(blkg, pol, dequeue);
 }
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			unsigned long time, unsigned long unaccounted_time)
+			struct blkio_policy_type *pol, unsigned long time,
+			unsigned long unaccounted_time)
 {
-	blkiocg_update_timeslice_used(blkg, time, unaccounted_time);
+	blkiocg_update_timeslice_used(blkg, pol, time, unaccounted_time);
 }
 
-static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg)
+static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
+			struct blkio_policy_type *pol)
 {
-	blkiocg_set_start_empty_time(blkg);
+	blkiocg_set_start_empty_time(blkg, pol);
 }
 
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-				bool direction, bool sync)
+			struct blkio_policy_type *pol, bool direction,
+			bool sync)
 {
-	blkiocg_update_io_remove_stats(blkg, direction, sync);
+	blkiocg_update_io_remove_stats(blkg, pol, direction, sync);
 }
 
 static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-		bool direction, bool sync)
+			struct blkio_policy_type *pol, bool direction,
+			bool sync)
 {
-	blkiocg_update_io_merged_stats(blkg, direction, sync);
+	blkiocg_update_io_merged_stats(blkg, pol, direction, sync);
 }
 
-static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg)
+static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol)
 {
-	blkiocg_update_idle_time_stats(blkg);
+	blkiocg_update_idle_time_stats(blkg, pol);
 }
 
 static inline void
-cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
+cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol)
 {
-	blkiocg_update_avg_queue_size_stats(blkg);
+	blkiocg_update_avg_queue_size_stats(blkg, pol);
 }
 
 static inline void
-cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
+cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol)
 {
-	blkiocg_update_set_idle_time_stats(blkg);
+	blkiocg_update_set_idle_time_stats(blkg, pol);
 }
 
 static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				uint64_t bytes, bool direction, bool sync)
+			struct blkio_policy_type *pol, uint64_t bytes,
+			bool direction, bool sync)
 {
-	blkiocg_update_dispatch_stats(blkg, bytes, direction, sync);
+	blkiocg_update_dispatch_stats(blkg, pol, bytes, direction, sync);
 }
 
-static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
+static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol, uint64_t start_time,
+			uint64_t io_start_time, bool direction, bool sync)
 {
-	blkiocg_update_completion_stats(blkg, start_time, io_start_time,
-				direction, sync);
+	blkiocg_update_completion_stats(blkg, pol, start_time, io_start_time,
+					direction, sync);
 }
 
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
@@ -74,30 +86,38 @@ static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
 
 #else /* CFQ_GROUP_IOSCHED */
 static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
-	struct blkio_group *curr_blkg, bool direction, bool sync) {}
-
+			struct blkio_policy_type *pol,
+			struct blkio_group *curr_blkg, bool direction,
+			bool sync) { }
 static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			unsigned long dequeue) {}
-
+			struct blkio_policy_type *pol, unsigned long dequeue) { }
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			unsigned long time, unsigned long unaccounted_time) {}
-static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
+			struct blkio_policy_type *pol, unsigned long time,
+			unsigned long unaccounted_time) { }
+static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-				bool direction, bool sync) {}
+			struct blkio_policy_type *pol, bool direction,
+			bool sync) { }
 static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-		bool direction, bool sync) {}
-static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg)
-{
-}
+			struct blkio_policy_type *pol, bool direction,
+			bool sync) { }
+static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol) { }
 static inline void
-cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) {}
+cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+					struct blkio_policy_type *pol) { }
 
 static inline void
-cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) {}
+cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+				       struct blkio_policy_type *pol) { }
 
 static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				uint64_t bytes, bool direction, bool sync) {}
-static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
+			struct blkio_policy_type *pol, uint64_t bytes,
+			bool direction, bool sync) { }
+static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
+			struct blkio_policy_type *pol, uint64_t start_time,
+			uint64_t io_start_time, bool direction, bool sync) { }
 
 static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 4eef3049986e8397d5003916aed8cad6567a5e02 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:18 -0800
Subject: blkcg: move per-queue blkg list heads and counters to queue and blkg

Currently, specific policy implementations are responsible for
maintaining list and number of blkgs.  This duplicates code
unnecessarily, and hinders factoring common code and providing blkcg
API with better defined semantics.

After this patch, request_queue hosts list heads and counters and blkg
has list nodes for both policies.  This patch only relocates the
necessary fields and the next patch will actually move management code
into blkcg core.

Note that request_queue->blkg_list[] and ->nr_blkgs[] are hardcoded to
have 2 elements.  This is to avoid include dependency and will be
removed by the next patch.

This patch doesn't introduce any behavior change.

-v2: Now unnecessary conditional on CONFIG_BLK_CGROUP_MODULE removed
     as pointed out by Vivek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     |  2 ++
 block/blk-cgroup.h     |  1 +
 block/blk-core.c       |  4 ++++
 block/blk-throttle.c   | 49 +++++++++++++++++++++++--------------------------
 block/cfq-iosched.c    | 47 +++++++++++++++++++----------------------------
 include/linux/blkdev.h |  5 +++++
 6 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 91f9824be5cc..e940972ccd66 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -499,6 +499,8 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 	spin_lock_init(&blkg->stats_lock);
 	rcu_assign_pointer(blkg->q, q);
+	INIT_LIST_HEAD(&blkg->q_node[0]);
+	INIT_LIST_HEAD(&blkg->q_node[1]);
 	blkg->blkcg = blkcg;
 	blkg->plid = pol->plid;
 	blkg->refcnt = 1;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 60e96b4be4ce..ae96f196d469 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -178,6 +178,7 @@ struct blkg_policy_data {
 struct blkio_group {
 	/* Pointer to the associated request_queue, RCU protected */
 	struct request_queue __rcu *q;
+	struct list_head q_node[BLKIO_NR_POLICIES];
 	struct hlist_node blkcg_node;
 	struct blkio_cgroup *blkcg;
 	/* Store cgroup path */
diff --git a/block/blk-core.c b/block/blk-core.c
index c3434c6395b9..83a47fcf5946 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -547,6 +547,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	INIT_LIST_HEAD(&q->queue_head);
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_LIST_HEAD(&q->icq_list);
+#ifdef CONFIG_BLK_CGROUP
+	INIT_LIST_HEAD(&q->blkg_list[0]);
+	INIT_LIST_HEAD(&q->blkg_list[1]);
+#endif
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
 	INIT_LIST_HEAD(&q->flush_data_in_flight);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b2fddaf20b98..c15d38307e1d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -41,9 +41,6 @@ struct throtl_rb_root {
 #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
 
 struct throtl_grp {
-	/* List of throtl groups on the request queue*/
-	struct hlist_node tg_node;
-
 	/* active throtl group service_tree member */
 	struct rb_node rb_node;
 
@@ -83,9 +80,6 @@ struct throtl_grp {
 
 struct throtl_data
 {
-	/* List of throtl groups */
-	struct hlist_head tg_list;
-
 	/* service tree for active throtl groups */
 	struct throtl_rb_root tg_service_tree;
 
@@ -152,7 +146,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
-	INIT_HLIST_NODE(&tg->tg_node);
 	RB_CLEAR_NODE(&tg->rb_node);
 	bio_list_init(&tg->bio_lists[0]);
 	bio_list_init(&tg->bio_lists[1]);
@@ -167,11 +160,9 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 static void throtl_link_blkio_group(struct request_queue *q,
 				    struct blkio_group *blkg)
 {
-	struct throtl_data *td = q->td;
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-
-	hlist_add_head(&tg->tg_node, &td->tg_list);
-	td->nr_undestroyed_grps++;
+	list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
+		 &q->blkg_list[BLKIO_POLICY_THROTL]);
+	q->nr_blkgs[BLKIO_POLICY_THROTL]++;
 }
 
 static struct
@@ -711,8 +702,8 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
 
 static void throtl_process_limit_change(struct throtl_data *td)
 {
-	struct throtl_grp *tg;
-	struct hlist_node *pos, *n;
+	struct request_queue *q = td->queue;
+	struct blkio_group *blkg, *n;
 
 	if (!td->limits_changed)
 		return;
@@ -721,7 +712,10 @@ static void throtl_process_limit_change(struct throtl_data *td)
 
 	throtl_log(td, "limits changed");
 
-	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
+	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
+				 q_node[BLKIO_POLICY_THROTL]) {
+		struct throtl_grp *tg = blkg_to_tg(blkg);
+
 		if (!tg->limits_changed)
 			continue;
 
@@ -822,26 +816,31 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 static void
 throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
 {
+	struct blkio_group *blkg = tg_to_blkg(tg);
+
 	/* Something wrong if we are trying to remove same group twice */
-	BUG_ON(hlist_unhashed(&tg->tg_node));
+	WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
 
-	hlist_del_init(&tg->tg_node);
+	list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
 
 	/*
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
 	blkg_put(tg_to_blkg(tg));
-	td->nr_undestroyed_grps--;
+	td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
 }
 
 static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
 {
-	struct hlist_node *pos, *n;
-	struct throtl_grp *tg;
+	struct request_queue *q = td->queue;
+	struct blkio_group *blkg, *n;
 	bool empty = true;
 
-	hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
+	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
+				 q_node[BLKIO_POLICY_THROTL]) {
+		struct throtl_grp *tg = blkg_to_tg(blkg);
+
 		/* skip root? */
 		if (!release_root && tg == td->root_tg)
 			continue;
@@ -851,7 +850,7 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
 		 * it from cgroup list, then it will take care of destroying
 		 * cfqg also.
 		 */
-		if (!blkiocg_del_blkio_group(tg_to_blkg(tg)))
+		if (!blkiocg_del_blkio_group(blkg))
 			throtl_destroy_tg(td, tg);
 		else
 			empty = false;
@@ -1114,7 +1113,6 @@ int blk_throtl_init(struct request_queue *q)
 	if (!td)
 		return -ENOMEM;
 
-	INIT_HLIST_HEAD(&td->tg_list);
 	td->tg_service_tree = THROTL_RB_ROOT;
 	td->limits_changed = false;
 	INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
@@ -1144,7 +1142,7 @@ int blk_throtl_init(struct request_queue *q)
 void blk_throtl_exit(struct request_queue *q)
 {
 	struct throtl_data *td = q->td;
-	bool wait = false;
+	bool wait;
 
 	BUG_ON(!td);
 
@@ -1154,8 +1152,7 @@ void blk_throtl_exit(struct request_queue *q)
 	throtl_release_tgs(td, true);
 
 	/* If there are other groups */
-	if (td->nr_undestroyed_grps > 0)
-		wait = true;
+	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
 
 	spin_unlock_irq(q->queue_lock);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 11dd9d7f2edb..e846803280a6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -208,9 +208,7 @@ struct cfq_group {
 	unsigned long saved_workload_slice;
 	enum wl_type_t saved_workload;
 	enum wl_prio_t saved_serving_prio;
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-	struct hlist_node cfqd_node;
-#endif
+
 	/* number of requests that are on the dispatch list or inside driver */
 	int dispatched;
 	struct cfq_ttime ttime;
@@ -302,12 +300,6 @@ struct cfq_data {
 	struct cfq_queue oom_cfqq;
 
 	unsigned long last_delayed_sync;
-
-	/* List of cfq groups being managed on this device*/
-	struct hlist_head cfqg_list;
-
-	/* Number of groups which are on blkcg->blkg_list */
-	unsigned int nr_blkcg_linked_grps;
 };
 
 static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
@@ -1056,13 +1048,9 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
 static void cfq_link_blkio_group(struct request_queue *q,
 				 struct blkio_group *blkg)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
-
-	cfqd->nr_blkcg_linked_grps++;
-
-	/* Add group on cfqd list */
-	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+	list_add(&blkg->q_node[BLKIO_POLICY_PROP],
+		 &q->blkg_list[BLKIO_POLICY_PROP]);
+	q->nr_blkgs[BLKIO_POLICY_PROP]++;
 }
 
 static void cfq_init_blkio_group(struct blkio_group *blkg)
@@ -1110,13 +1098,15 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 
 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
+	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
+
 	/* Something wrong if we are trying to remove same group twice */
-	BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
+	BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
 
-	hlist_del_init(&cfqg->cfqd_node);
+	list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
 
-	BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
-	cfqd->nr_blkcg_linked_grps--;
+	BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
+	cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
 
 	/*
 	 * Put the reference taken at the time of creation so that when all
@@ -1127,18 +1117,19 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
 
 static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
 {
-	struct hlist_node *pos, *n;
-	struct cfq_group *cfqg;
+	struct request_queue *q = cfqd->queue;
+	struct blkio_group *blkg, *n;
 	bool empty = true;
 
-	hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
+	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
+				 q_node[BLKIO_POLICY_PROP]) {
 		/*
 		 * If cgroup removal path got to blk_group first and removed
 		 * it from cgroup list, then it will take care of destroying
 		 * cfqg also.
 		 */
-		if (!cfq_blkiocg_del_blkio_group(cfqg_to_blkg(cfqg)))
-			cfq_destroy_cfqg(cfqd, cfqg);
+		if (!cfq_blkiocg_del_blkio_group(blkg))
+			cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
 		else
 			empty = false;
 	}
@@ -3558,13 +3549,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	cfq_put_async_queues(cfqd);
 	cfq_release_cfq_groups(cfqd);
 
+#ifdef CONFIG_BLK_CGROUP
 	/*
 	 * If there are groups which we could not unlink from blkcg list,
 	 * wait for a rcu period for them to be freed.
 	 */
-	if (cfqd->nr_blkcg_linked_grps)
-		wait = true;
-
+	wait = q->nr_blkgs[BLKIO_POLICY_PROP];
+#endif
 	spin_unlock_irq(q->queue_lock);
 
 	cfq_shutdown_timer_wq(cfqd);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e8c0bbd06b9a..f4e35edea70f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -362,6 +362,11 @@ struct request_queue {
 	struct list_head	timeout_list;
 
 	struct list_head	icq_list;
+#ifdef CONFIG_BLK_CGROUP
+	/* XXX: array size hardcoded to avoid include dependency (temporary) */
+	struct list_head	blkg_list[2];
+	int			nr_blkgs[2];
+#endif
 
 	struct queue_limits	limits;
 
-- 
cgit v1.2.3-59-g8ed1b


From 03aa264ac15637b6f98374270bcdf31400965505 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:19 -0800
Subject: blkcg: let blkcg core manage per-queue blkg list and counter

With the previous patch to move blkg list heads and counters to
request_queue and blkg, logic to manage them in both policies are
almost identical and can be moved to blkcg core.

This patch moves blkg link logic into blkg_lookup_create(), implements
common blkg unlink code in blkg_destroy(), and updates
blkg_destory_all() so that it's policy specific and can skip root
group.  The updated blkg_destroy_all() is now used to both clear queue
for bypassing and elv switching, and release all blkgs on q exit.

This patch introduces a race window where policy [de]registration may
race against queue blkg clearing.  This can only be a problem on cfq
unload and shouldn't be a real problem in practice (and we have many
other places where this race already exists).  Future patches will
remove these unlikely races.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     |  72 +++++++++++++++++++++++++++--------
 block/blk-cgroup.h     |  15 +++-----
 block/blk-throttle.c   |  99 +-----------------------------------------------
 block/cfq-iosched.c    | 100 +++----------------------------------------------
 block/elevator.c       |   5 ++-
 include/linux/blkdev.h |   4 +-
 6 files changed, 74 insertions(+), 221 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e940972ccd66..2ca9a15db0f7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	/* insert */
 	spin_lock(&blkcg->lock);
 	swap(blkg, new_blkg);
+
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	pol->ops.blkio_link_group_fn(q, blkg);
+	list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+	q->nr_blkgs[plid]++;
+
 	spin_unlock(&blkcg->lock);
 out:
 	blkg_free(new_blkg);
@@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+{
+	struct request_queue *q = blkg->q;
+
+	lockdep_assert_held(q->queue_lock);
+
+	/* Something wrong if we are trying to remove same group twice */
+	WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+	list_del_init(&blkg->q_node[plid]);
+
+	WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+	q->nr_blkgs[plid]--;
+
+	/*
+	 * Put the reference taken at the time of creation so that when all
+	 * queues are gone, group can be destroyed.
+	 */
+	blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+		      bool destroy_root)
 {
-	struct blkio_policy_type *pol;
+	struct blkio_group *blkg, *n;
 
 	while (true) {
 		bool done = true;
 
-		spin_lock(&blkio_list_lock);
 		spin_lock_irq(q->queue_lock);
 
-		/*
-		 * clear_queue_fn() might return with non-empty group list
-		 * if it raced cgroup removal and lost.  cgroup removal is
-		 * guaranteed to make forward progress and retrying after a
-		 * while is enough.  This ugliness is scheduled to be
-		 * removed after locking update.
-		 */
-		list_for_each_entry(pol, &blkio_list, list)
-			if (!pol->ops.blkio_clear_queue_fn(q))
+		list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+					 q_node[plid]) {
+			/* skip root? */
+			if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+				continue;
+
+			/*
+			 * If cgroup removal path got to blk_group first
+			 * and removed it from cgroup list, then it will
+			 * take care of destroying cfqg also.
+			 */
+			if (!blkiocg_del_blkio_group(blkg))
+				blkg_destroy(blkg, plid);
+			else
 				done = false;
+		}
 
 		spin_unlock_irq(q->queue_lock);
-		spin_unlock(&blkio_list_lock);
 
+		/*
+		 * Group list may not be empty if we raced cgroup removal
+		 * and lost.  cgroup removal is guaranteed to make forward
+		 * progress and retrying after a while is enough.  This
+		 * ugliness is scheduled to be removed after locking
+		 * update.
+		 */
 		if (done)
 			break;
 
 		msleep(10);	/* just some random duration I like */
 	}
 }
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
@@ -1549,11 +1585,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 		 * this event.
 		 */
 		spin_lock(&blkio_list_lock);
+		spin_lock_irqsave(q->queue_lock, flags);
 		list_for_each_entry(blkiop, &blkio_list, list) {
 			if (blkiop->plid != blkg->plid)
 				continue;
-			blkiop->ops.blkio_unlink_group_fn(q, blkg);
+			blkg_destroy(blkg, blkiop->plid);
 		}
+		spin_unlock_irqrestore(q->queue_lock, flags);
 		spin_unlock(&blkio_list_lock);
 	} while (1);
 
@@ -1695,12 +1733,14 @@ static void blkcg_bypass_start(void)
 	__acquires(&all_q_mutex)
 {
 	struct request_queue *q;
+	int i;
 
 	mutex_lock(&all_q_mutex);
 
 	list_for_each_entry(q, &all_q_list, all_q_node) {
 		blk_queue_bypass_start(q);
-		blkg_destroy_all(q);
+		for (i = 0; i < BLKIO_NR_POLICIES; i++)
+			blkg_destroy_all(q, i, false);
 	}
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ae96f196d469..83ce5fa0a604 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -196,11 +196,6 @@ struct blkio_group {
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_link_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
-			struct blkio_group *blkg);
-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
 			struct blkio_group *blkg, unsigned int weight);
 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
@@ -214,9 +209,6 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 
 struct blkio_policy_ops {
 	blkio_init_group_fn *blkio_init_group_fn;
-	blkio_link_group_fn *blkio_link_group_fn;
-	blkio_unlink_group_fn *blkio_unlink_group_fn;
-	blkio_clear_queue_fn *blkio_clear_queue_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
 	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
 	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -238,7 +230,8 @@ extern void blkcg_exit_queue(struct request_queue *q);
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q);
+extern void blkg_destroy_all(struct request_queue *q,
+			     enum blkio_policy_id plid, bool destroy_root);
 
 /**
  * blkg_to_pdata - get policy private data
@@ -319,7 +312,9 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
-static inline void blkg_destroy_all(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q,
+				    enum blkio_policy_id plid,
+				    bool destory_root) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c15d38307e1d..132941260e58 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	tg->iops[WRITE] = -1;
 }
 
-static void throtl_link_blkio_group(struct request_queue *q,
-				    struct blkio_group *blkg)
-{
-	list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
-		 &q->blkg_list[BLKIO_POLICY_THROTL]);
-	q->nr_blkgs[BLKIO_POLICY_THROTL]++;
-}
-
 static struct
 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
@@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
-static void
-throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
-{
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	/* Something wrong if we are trying to remove same group twice */
-	WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
-
-	list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
-
-	/*
-	 * Put the reference taken at the time of creation so that when all
-	 * queues are gone, group can be destroyed.
-	 */
-	blkg_put(tg_to_blkg(tg));
-	td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
-}
-
-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
-{
-	struct request_queue *q = td->queue;
-	struct blkio_group *blkg, *n;
-	bool empty = true;
-
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
-				 q_node[BLKIO_POLICY_THROTL]) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		/* skip root? */
-		if (!release_root && tg == td->root_tg)
-			continue;
-
-		/*
-		 * If cgroup removal path got to blk_group first and removed
-		 * it from cgroup list, then it will take care of destroying
-		 * cfqg also.
-		 */
-		if (!blkiocg_del_blkio_group(blkg))
-			throtl_destroy_tg(td, tg);
-		else
-			empty = false;
-	}
-	return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). @q is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if queue was going away, cgroup deltion
- * path got to it first.
- */
-void throtl_unlink_blkio_group(struct request_queue *q,
-			       struct blkio_group *blkg)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	throtl_destroy_tg(q->td, blkg_to_tg(blkg));
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static bool throtl_clear_queue(struct request_queue *q)
-{
-	lockdep_assert_held(q->queue_lock);
-
-	/*
-	 * Clear tgs but leave the root one alone.  This is necessary
-	 * because root_tg is expected to be persistent and safe because
-	 * blk-throtl can never be disabled while @q is alive.  This is a
-	 * kludge to prepare for unified blkg.  This whole function will be
-	 * removed soon.
-	 */
-	return throtl_release_tgs(q->td, false);
-}
-
 static void throtl_update_blkio_group_common(struct throtl_data *td,
 				struct throtl_grp *tg)
 {
@@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_init_group_fn = throtl_init_blkio_group,
-		.blkio_link_group_fn = throtl_link_blkio_group,
-		.blkio_unlink_group_fn = throtl_unlink_blkio_group,
-		.blkio_clear_queue_fn = throtl_clear_queue,
 		.blkio_update_group_read_bps_fn =
 					throtl_update_blkio_group_read_bps,
 		.blkio_update_group_write_bps_fn =
@@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q)
 
 	throtl_shutdown_wq(q);
 
-	spin_lock_irq(q->queue_lock);
-	throtl_release_tgs(td, true);
+	blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
 
 	/* If there are other groups */
+	spin_lock_irq(q->queue_lock);
 	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
-
 	spin_unlock_irq(q->queue_lock);
 
 	/*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e846803280a6..dc73690dec44 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1045,14 +1045,6 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
 	cfqg->needs_update = true;
 }
 
-static void cfq_link_blkio_group(struct request_queue *q,
-				 struct blkio_group *blkg)
-{
-	list_add(&blkg->q_node[BLKIO_POLICY_PROP],
-		 &q->blkg_list[BLKIO_POLICY_PROP]);
-	q->nr_blkgs[BLKIO_POLICY_PROP]++;
-}
-
 static void cfq_init_blkio_group(struct blkio_group *blkg)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1096,84 +1088,6 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 	blkg_get(cfqg_to_blkg(cfqg));
 }
 
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
-	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-
-	/* Something wrong if we are trying to remove same group twice */
-	BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
-
-	list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
-
-	BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
-	cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
-
-	/*
-	 * Put the reference taken at the time of creation so that when all
-	 * queues are gone, group can be destroyed.
-	 */
-	blkg_put(cfqg_to_blkg(cfqg));
-}
-
-static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
-	struct request_queue *q = cfqd->queue;
-	struct blkio_group *blkg, *n;
-	bool empty = true;
-
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
-				 q_node[BLKIO_POLICY_PROP]) {
-		/*
-		 * If cgroup removal path got to blk_group first and removed
-		 * it from cgroup list, then it will take care of destroying
-		 * cfqg also.
-		 */
-		if (!cfq_blkiocg_del_blkio_group(blkg))
-			cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-		else
-			empty = false;
-	}
-	return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(struct request_queue *q,
-				   struct blkio_group *blkg)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static struct elevator_type iosched_cfq;
-
-static bool cfq_clear_queue(struct request_queue *q)
-{
-	lockdep_assert_held(q->queue_lock);
-
-	/* shoot down blkgs iff the current elevator is cfq */
-	if (!q->elevator || q->elevator->type != &iosched_cfq)
-		return true;
-
-	return cfq_release_cfq_groups(q->elevator->elevator_data);
-}
-
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 						struct blkio_cgroup *blkcg)
@@ -1186,8 +1100,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
 	cfqq->cfqg = cfqg;
 }
 
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-
 #endif /* GROUP_IOSCHED */
 
 /*
@@ -3547,17 +3459,20 @@ static void cfq_exit_queue(struct elevator_queue *e)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
 	cfq_put_async_queues(cfqd);
-	cfq_release_cfq_groups(cfqd);
+
+	spin_unlock_irq(q->queue_lock);
+
+	blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
 
 #ifdef CONFIG_BLK_CGROUP
 	/*
 	 * If there are groups which we could not unlink from blkcg list,
 	 * wait for a rcu period for them to be freed.
 	 */
+	spin_lock_irq(q->queue_lock);
 	wait = q->nr_blkgs[BLKIO_POLICY_PROP];
-#endif
 	spin_unlock_irq(q->queue_lock);
-
+#endif
 	cfq_shutdown_timer_wq(cfqd);
 
 	/*
@@ -3794,9 +3709,6 @@ static struct elevator_type iosched_cfq = {
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
 		.blkio_init_group_fn =		cfq_init_blkio_group,
-		.blkio_link_group_fn =		cfq_link_blkio_group,
-		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
-		.blkio_clear_queue_fn = cfq_clear_queue,
 		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
 	},
 	.plid = BLKIO_POLICY_PROP,
diff --git a/block/elevator.c b/block/elevator.c
index 8c7561fd2c79..d4d39dab841a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -876,7 +876,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	struct elevator_queue *old = q->elevator;
 	bool registered = old->registered;
-	int err;
+	int i, err;
 
 	/*
 	 * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -895,7 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	ioc_clear_queue(q);
 	spin_unlock_irq(q->queue_lock);
 
-	blkg_destroy_all(q);
+	for (i = 0; i < BLKIO_NR_POLICIES; i++)
+		blkg_destroy_all(q, i, false);
 
 	/* allocate, init and register new elevator */
 	err = -ENOMEM;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f4e35edea70f..b4d1d4bfc168 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -364,8 +364,8 @@ struct request_queue {
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
 	/* XXX: array size hardcoded to avoid include dependency (temporary) */
-	struct list_head	blkg_list[2];
-	int			nr_blkgs[2];
+	struct list_head	blkg_list;
+	int			nr_blkgs;
 #endif
 
 	struct queue_limits	limits;
-- 
cgit v1.2.3-59-g8ed1b


From e8989fae38d9831c72b20375a206a919ca468c52 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:20 -0800
Subject: blkcg: unify blkg's for blkcg policies

Currently, blkg is per cgroup-queue-policy combination.  This is
unnatural and leads to various convolutions in partially used
duplicate fields in blkg, config / stat access, and general management
of blkgs.

This patch make blkg's per cgroup-queue and let them serve all
policies.  blkgs are now created and destroyed by blkcg core proper.
This will allow further consolidation of common management logic into
blkcg core and API with better defined semantics and layering.

As a transitional step to untangle blkg management, elvswitch and
policy [de]registration, all blkgs except the root blkg are being shot
down during elvswitch and bypass.  This patch adds blkg_root_update()
to update root blkg in place on policy change.  This is hacky and racy
but should be good enough as interim step until we get locking
simplified and switch over to proper in-place update for all blkgs.

-v2: Root blkgs need to be updated on elvswitch too and blkg_alloc()
     comment wasn't updated according to the function change.  Fixed.
     Both pointed out by Vivek.

-v3: v2 updated blkg_destroy_all() to invoke update_root_blkg_pd() for
     all policies.  This freed root pd during elvswitch before the
     last queue finished exiting and led to oops.  Directly invoke
     update_root_blkg_pd() only on BLKIO_POLICY_PROP from
     cfq_exit_queue().  This also is closer to what will be done with
     proper in-place blkg update.  Reported by Vivek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 224 +++++++++++++++++++++++++++++++--------------------
 block/blk-cgroup.h   |  15 ++--
 block/blk-core.c     |   3 +-
 block/blk-sysfs.c    |   4 +-
 block/blk-throttle.c |   9 +--
 block/cfq-iosched.c  |   5 +-
 block/elevator.c     |   5 +-
 7 files changed, 154 insertions(+), 111 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2ca9a15db0f7..cad5f15cf49b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -461,16 +461,20 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
  */
 static void blkg_free(struct blkio_group *blkg)
 {
-	struct blkg_policy_data *pd;
+	int i;
 
 	if (!blkg)
 		return;
 
-	pd = blkg->pd[blkg->plid];
-	if (pd) {
-		free_percpu(pd->stats_cpu);
-		kfree(pd);
+	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		struct blkg_policy_data *pd = blkg->pd[i];
+
+		if (pd) {
+			free_percpu(pd->stats_cpu);
+			kfree(pd);
+		}
 	}
+
 	kfree(blkg);
 }
 
@@ -478,19 +482,17 @@ static void blkg_free(struct blkio_group *blkg)
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
  * @q: request_queue the new blkg is associated with
- * @pol: policy the new blkg is associated with
  *
- * Allocate a new blkg assocating @blkcg and @q for @pol.
+ * Allocate a new blkg assocating @blkcg and @q.
  *
  * FIXME: Should be called with queue locked but currently isn't due to
  *        percpu stat breakage.
  */
 static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
-				      struct request_queue *q,
-				      struct blkio_policy_type *pol)
+				      struct request_queue *q)
 {
 	struct blkio_group *blkg;
-	struct blkg_policy_data *pd;
+	int i;
 
 	/* alloc and init base part */
 	blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
@@ -499,34 +501,45 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 	spin_lock_init(&blkg->stats_lock);
 	rcu_assign_pointer(blkg->q, q);
-	INIT_LIST_HEAD(&blkg->q_node[0]);
-	INIT_LIST_HEAD(&blkg->q_node[1]);
+	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
-	blkg->plid = pol->plid;
 	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
-	/* alloc per-policy data and attach it to blkg */
-	pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
-			  q->node);
-	if (!pd) {
-		blkg_free(blkg);
-		return NULL;
-	}
+	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		struct blkio_policy_type *pol = blkio_policy[i];
+		struct blkg_policy_data *pd;
 
-	blkg->pd[pol->plid] = pd;
-	pd->blkg = blkg;
+		if (!pol)
+			continue;
+
+		/* alloc per-policy data and attach it to blkg */
+		pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
+				  q->node);
+		if (!pd) {
+			blkg_free(blkg);
+			return NULL;
+		}
 
-	/* broken, read comment in the callsite */
+		blkg->pd[i] = pd;
+		pd->blkg = blkg;
 
-	pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-	if (!pd->stats_cpu) {
-		blkg_free(blkg);
-		return NULL;
+		/* broken, read comment in the callsite */
+		pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+		if (!pd->stats_cpu) {
+			blkg_free(blkg);
+			return NULL;
+		}
 	}
 
 	/* invoke per-policy init */
-	pol->ops.blkio_init_group_fn(blkg);
+	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		struct blkio_policy_type *pol = blkio_policy[i];
+
+		if (pol)
+			pol->ops.blkio_init_group_fn(blkg);
+	}
+
 	return blkg;
 }
 
@@ -536,7 +549,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       bool for_root)
 	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
-	struct blkio_policy_type *pol = blkio_policy[plid];
 	struct blkio_group *blkg, *new_blkg;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -551,7 +563,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	if (unlikely(blk_queue_bypass(q)) && !for_root)
 		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
 
-	blkg = blkg_lookup(blkcg, q, plid);
+	blkg = blkg_lookup(blkcg, q);
 	if (blkg)
 		return blkg;
 
@@ -571,7 +583,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	spin_unlock_irq(q->queue_lock);
 	rcu_read_unlock();
 
-	new_blkg = blkg_alloc(blkcg, q, pol);
+	new_blkg = blkg_alloc(blkcg, q);
 
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
@@ -583,7 +595,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	}
 
 	/* did someone beat us to it? */
-	blkg = blkg_lookup(blkcg, q, plid);
+	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(blkg))
 		goto out;
 
@@ -598,8 +610,8 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	swap(blkg, new_blkg);
 
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
-	q->nr_blkgs[plid]++;
+	list_add(&blkg->q_node, &q->blkg_list);
+	q->nr_blkgs++;
 
 	spin_unlock(&blkcg->lock);
 out:
@@ -636,31 +648,30 @@ EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
 
 /* called under rcu_read_lock(). */
 struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-				struct request_queue *q,
-				enum blkio_policy_id plid)
+				struct request_queue *q)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
 
 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (blkg->q == q && blkg->plid == plid)
+		if (blkg->q == q)
 			return blkg;
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+static void blkg_destroy(struct blkio_group *blkg)
 {
 	struct request_queue *q = blkg->q;
 
 	lockdep_assert_held(q->queue_lock);
 
 	/* Something wrong if we are trying to remove same group twice */
-	WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
-	list_del_init(&blkg->q_node[plid]);
+	WARN_ON_ONCE(list_empty(&blkg->q_node));
+	list_del_init(&blkg->q_node);
 
-	WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
-	q->nr_blkgs[plid]--;
+	WARN_ON_ONCE(q->nr_blkgs <= 0);
+	q->nr_blkgs--;
 
 	/*
 	 * Put the reference taken at the time of creation so that when all
@@ -669,8 +680,40 @@ static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
 	blkg_put(blkg);
 }
 
-void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
-		      bool destroy_root)
+/*
+ * XXX: This updates blkg policy data in-place for root blkg, which is
+ * necessary across elevator switch and policy registration as root blkgs
+ * aren't shot down.  This broken and racy implementation is temporary.
+ * Eventually, blkg shoot down will be replaced by proper in-place update.
+ */
+void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
+{
+	struct blkio_policy_type *pol = blkio_policy[plid];
+	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
+	struct blkg_policy_data *pd;
+
+	if (!blkg)
+		return;
+
+	kfree(blkg->pd[plid]);
+	blkg->pd[plid] = NULL;
+
+	if (!pol)
+		return;
+
+	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
+	WARN_ON_ONCE(!pd);
+
+	pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
+	WARN_ON_ONCE(!pd->stats_cpu);
+
+	blkg->pd[plid] = pd;
+	pd->blkg = blkg;
+	pol->ops.blkio_init_group_fn(blkg);
+}
+EXPORT_SYMBOL_GPL(update_root_blkg_pd);
+
+void blkg_destroy_all(struct request_queue *q, bool destroy_root)
 {
 	struct blkio_group *blkg, *n;
 
@@ -679,8 +722,7 @@ void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
 
 		spin_lock_irq(q->queue_lock);
 
-		list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
-					 q_node[plid]) {
+		list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
 			/* skip root? */
 			if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
 				continue;
@@ -691,7 +733,7 @@ void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
 			 * take care of destroying cfqg also.
 			 */
 			if (!blkiocg_del_blkio_group(blkg))
-				blkg_destroy(blkg, plid);
+				blkg_destroy(blkg);
 			else
 				done = false;
 		}
@@ -776,43 +818,49 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 #endif
 
 	blkcg = cgroup_to_blkio_cgroup(cgroup);
+	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+		struct blkio_policy_type *pol;
 
-		spin_lock(&blkg->stats_lock);
-		stats = &pd->stats;
+		list_for_each_entry(pol, &blkio_list, list) {
+			struct blkg_policy_data *pd = blkg->pd[pol->plid];
+
+			spin_lock(&blkg->stats_lock);
+			stats = &pd->stats;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-		idling = blkio_blkg_idling(stats);
-		waiting = blkio_blkg_waiting(stats);
-		empty = blkio_blkg_empty(stats);
+			idling = blkio_blkg_idling(stats);
+			waiting = blkio_blkg_waiting(stats);
+			empty = blkio_blkg_empty(stats);
 #endif
-		for (i = 0; i < BLKIO_STAT_TOTAL; i++)
-			queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
-		memset(stats, 0, sizeof(struct blkio_group_stats));
-		for (i = 0; i < BLKIO_STAT_TOTAL; i++)
-			stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
+			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
+				queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
+			memset(stats, 0, sizeof(struct blkio_group_stats));
+			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
+				stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-		if (idling) {
-			blkio_mark_blkg_idling(stats);
-			stats->start_idle_time = now;
-		}
-		if (waiting) {
-			blkio_mark_blkg_waiting(stats);
-			stats->start_group_wait_time = now;
-		}
-		if (empty) {
-			blkio_mark_blkg_empty(stats);
-			stats->start_empty_time = now;
-		}
+			if (idling) {
+				blkio_mark_blkg_idling(stats);
+				stats->start_idle_time = now;
+			}
+			if (waiting) {
+				blkio_mark_blkg_waiting(stats);
+				stats->start_group_wait_time = now;
+			}
+			if (empty) {
+				blkio_mark_blkg_empty(stats);
+				stats->start_empty_time = now;
+			}
 #endif
-		spin_unlock(&blkg->stats_lock);
+			spin_unlock(&blkg->stats_lock);
 
-		/* Reset Per cpu stats which don't take blkg->stats_lock */
-		blkio_reset_stats_cpu(blkg, blkg->plid);
+			/* Reset Per cpu stats which don't take blkg->stats_lock */
+			blkio_reset_stats_cpu(blkg, pol->plid);
+		}
 	}
 
 	spin_unlock_irq(&blkcg->lock);
+	spin_unlock(&blkio_list_lock);
 	return 0;
 }
 
@@ -1168,8 +1216,7 @@ static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
 
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (BLKIOFILE_POLICY(cft->private) == blkg->plid)
-			blkio_print_group_conf(cft, blkg, m);
+		blkio_print_group_conf(cft, blkg, m);
 	spin_unlock_irq(&blkcg->lock);
 }
 
@@ -1224,7 +1271,7 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 		const char *dname = blkg_dev_name(blkg);
 		int plid = BLKIOFILE_POLICY(cft->private);
 
-		if (!dname || plid != blkg->plid)
+		if (!dname)
 			continue;
 		if (pcpu) {
 			cgroup_total += blkio_get_stat_cpu(blkg, plid,
@@ -1335,9 +1382,9 @@ static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
 	blkcg->weight = (unsigned int)val;
 
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		struct blkg_policy_data *pd = blkg->pd[blkg->plid];
+		struct blkg_policy_data *pd = blkg->pd[plid];
 
-		if (blkg->plid == plid && !pd->conf.weight)
+		if (!pd->conf.weight)
 			blkio_update_group_weight(blkg, plid, blkcg->weight);
 	}
 
@@ -1560,7 +1607,6 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 	unsigned long flags;
 	struct blkio_group *blkg;
 	struct request_queue *q;
-	struct blkio_policy_type *blkiop;
 
 	rcu_read_lock();
 
@@ -1586,11 +1632,7 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 		 */
 		spin_lock(&blkio_list_lock);
 		spin_lock_irqsave(q->queue_lock, flags);
-		list_for_each_entry(blkiop, &blkio_list, list) {
-			if (blkiop->plid != blkg->plid)
-				continue;
-			blkg_destroy(blkg, blkiop->plid);
-		}
+		blkg_destroy(blkg);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		spin_unlock(&blkio_list_lock);
 	} while (1);
@@ -1684,6 +1726,8 @@ void blkcg_exit_queue(struct request_queue *q)
 	list_del_init(&q->all_q_node);
 	mutex_unlock(&all_q_mutex);
 
+	blkg_destroy_all(q, true);
+
 	blk_throtl_exit(q);
 }
 
@@ -1733,14 +1777,12 @@ static void blkcg_bypass_start(void)
 	__acquires(&all_q_mutex)
 {
 	struct request_queue *q;
-	int i;
 
 	mutex_lock(&all_q_mutex);
 
 	list_for_each_entry(q, &all_q_list, all_q_node) {
 		blk_queue_bypass_start(q);
-		for (i = 0; i < BLKIO_NR_POLICIES; i++)
-			blkg_destroy_all(q, i, false);
+		blkg_destroy_all(q, false);
 	}
 }
 
@@ -1757,6 +1799,8 @@ static void blkcg_bypass_end(void)
 
 void blkio_policy_register(struct blkio_policy_type *blkiop)
 {
+	struct request_queue *q;
+
 	blkcg_bypass_start();
 	spin_lock(&blkio_list_lock);
 
@@ -1765,12 +1809,16 @@ void blkio_policy_register(struct blkio_policy_type *blkiop)
 	list_add_tail(&blkiop->list, &blkio_list);
 
 	spin_unlock(&blkio_list_lock);
+	list_for_each_entry(q, &all_q_list, all_q_node)
+		update_root_blkg_pd(q, blkiop->plid);
 	blkcg_bypass_end();
 }
 EXPORT_SYMBOL_GPL(blkio_policy_register);
 
 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
+	struct request_queue *q;
+
 	blkcg_bypass_start();
 	spin_lock(&blkio_list_lock);
 
@@ -1779,6 +1827,8 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 	list_del_init(&blkiop->list);
 
 	spin_unlock(&blkio_list_lock);
+	list_for_each_entry(q, &all_q_list, all_q_node)
+		update_root_blkg_pd(q, blkiop->plid);
 	blkcg_bypass_end();
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 83ce5fa0a604..6e8ee86a2870 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -178,13 +178,11 @@ struct blkg_policy_data {
 struct blkio_group {
 	/* Pointer to the associated request_queue, RCU protected */
 	struct request_queue __rcu *q;
-	struct list_head q_node[BLKIO_NR_POLICIES];
+	struct list_head q_node;
 	struct hlist_node blkcg_node;
 	struct blkio_cgroup *blkcg;
 	/* Store cgroup path */
 	char path[128];
-	/* policy which owns this blk group */
-	enum blkio_policy_id plid;
 	/* reference count */
 	int refcnt;
 
@@ -230,8 +228,9 @@ extern void blkcg_exit_queue(struct request_queue *q);
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q,
-			     enum blkio_policy_id plid, bool destroy_root);
+extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
+extern void update_root_blkg_pd(struct request_queue *q,
+				enum blkio_policy_id plid);
 
 /**
  * blkg_to_pdata - get policy private data
@@ -313,8 +312,9 @@ static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
 static inline void blkg_destroy_all(struct request_queue *q,
-				    enum blkio_policy_id plid,
 				    bool destory_root) { }
+static inline void update_root_blkg_pd(struct request_queue *q,
+				       enum blkio_policy_id plid) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
@@ -382,8 +382,7 @@ extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
 extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-				       struct request_queue *q,
-				       enum blkio_policy_id plid);
+				       struct request_queue *q);
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       struct request_queue *q,
 				       enum blkio_policy_id plid,
diff --git a/block/blk-core.c b/block/blk-core.c
index 83a47fcf5946..05693f403e46 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -548,8 +548,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_LIST_HEAD(&q->icq_list);
 #ifdef CONFIG_BLK_CGROUP
-	INIT_LIST_HEAD(&q->blkg_list[0]);
-	INIT_LIST_HEAD(&q->blkg_list[1]);
+	INIT_LIST_HEAD(&q->blkg_list);
 #endif
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 00cdc987b525..aa41b47c22d2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -480,6 +480,8 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_sync_queue(q);
 
+	blkcg_exit_queue(q);
+
 	if (q->elevator) {
 		spin_lock_irq(q->queue_lock);
 		ioc_clear_queue(q);
@@ -487,8 +489,6 @@ static void blk_release_queue(struct kobject *kobj)
 		elevator_exit(q->elevator);
 	}
 
-	blkcg_exit_queue(q);
-
 	if (rl->rq_pool)
 		mempool_destroy(rl->rq_pool);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 132941260e58..e35ee7aeea69 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -167,7 +167,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 	if (blkcg == &blkio_root_cgroup)
 		return td->root_tg;
 
-	return blkg_to_tg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL));
+	return blkg_to_tg(blkg_lookup(blkcg, td->queue));
 }
 
 static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
@@ -704,8 +704,7 @@ static void throtl_process_limit_change(struct throtl_data *td)
 
 	throtl_log(td, "limits changed");
 
-	list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
-				 q_node[BLKIO_POLICY_THROTL]) {
+	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
 		struct throtl_grp *tg = blkg_to_tg(blkg);
 
 		if (!tg->limits_changed)
@@ -1054,11 +1053,9 @@ void blk_throtl_exit(struct request_queue *q)
 
 	throtl_shutdown_wq(q);
 
-	blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
-
 	/* If there are other groups */
 	spin_lock_irq(q->queue_lock);
-	wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
+	wait = q->nr_blkgs;
 	spin_unlock_irq(q->queue_lock);
 
 	/*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dc73690dec44..393eaa59913b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3462,15 +3462,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
 	spin_unlock_irq(q->queue_lock);
 
-	blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
-
 #ifdef CONFIG_BLK_CGROUP
 	/*
 	 * If there are groups which we could not unlink from blkcg list,
 	 * wait for a rcu period for them to be freed.
 	 */
 	spin_lock_irq(q->queue_lock);
-	wait = q->nr_blkgs[BLKIO_POLICY_PROP];
+	wait = q->nr_blkgs;
 	spin_unlock_irq(q->queue_lock);
 #endif
 	cfq_shutdown_timer_wq(cfqd);
@@ -3492,6 +3490,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
 	kfree(cfqd->root_group);
 #endif
+	update_root_blkg_pd(q, BLKIO_POLICY_PROP);
 	kfree(cfqd);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index d4d39dab841a..451654fadab0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -876,7 +876,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	struct elevator_queue *old = q->elevator;
 	bool registered = old->registered;
-	int i, err;
+	int err;
 
 	/*
 	 * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -895,8 +895,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	ioc_clear_queue(q);
 	spin_unlock_irq(q->queue_lock);
 
-	for (i = 0; i < BLKIO_NR_POLICIES; i++)
-		blkg_destroy_all(q, i, false);
+	blkg_destroy_all(q, false);
 
 	/* allocate, init and register new elevator */
 	err = -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 9f13ef678efd977487fc0c2e489f17c9a8c67a3e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:21 -0800
Subject: blkcg: use double locking instead of RCU for blkg synchronization

blkgs are chained from both blkcgs and request_queues and thus
subjected to two locks - blkcg->lock and q->queue_lock.  As both blkcg
and q can go away anytime, locking during removal is tricky.  It's
currently solved by wrapping removal inside RCU, which makes the
synchronization complex.  There are three locks to worry about - the
outer RCU, q lock and blkcg lock, and it leads to nasty subtle
complications like conditional synchronize_rcu() on queue exit paths.

For all other paths, blkcg lock is naturally nested inside q lock and
the only exception is blkcg removal path, which is a very cold path
and can be implemented as clumsy but conceptually-simple reverse
double lock dancing.

This patch updates blkg removal path such that blkgs are removed while
holding both q and blkcg locks, which is trivial for request queue
exit path - blkg_destroy_all().  The blkcg removal path,
blkiocg_pre_destroy(), implements reverse double lock dancing
essentially identical to ioc_release_fn().

This simplifies blkg locking - no half-dead blkgs to worry about.  Now
unnecessary RCU annotations will be removed by the next patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 136 ++++++++++++++++++++---------------------------------
 block/blk-cgroup.h |   4 --
 block/cfq.h        |  10 ----
 3 files changed, 51 insertions(+), 99 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cad5f15cf49b..e9e3b038c702 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -620,32 +620,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
-static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
-{
-	hlist_del_init_rcu(&blkg->blkcg_node);
-}
-
-/*
- * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
- * indicating that blk_group was unhashed by the time we got to it.
- */
-int blkiocg_del_blkio_group(struct blkio_group *blkg)
-{
-	struct blkio_cgroup *blkcg = blkg->blkcg;
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&blkcg->lock, flags);
-	if (!hlist_unhashed(&blkg->blkcg_node)) {
-		__blkiocg_del_blkio_group(blkg);
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&blkcg->lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
-
 /* called under rcu_read_lock(). */
 struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 				struct request_queue *q)
@@ -663,12 +637,16 @@ EXPORT_SYMBOL_GPL(blkg_lookup);
 static void blkg_destroy(struct blkio_group *blkg)
 {
 	struct request_queue *q = blkg->q;
+	struct blkio_cgroup *blkcg = blkg->blkcg;
 
 	lockdep_assert_held(q->queue_lock);
+	lockdep_assert_held(&blkcg->lock);
 
 	/* Something wrong if we are trying to remove same group twice */
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
+	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 	list_del_init(&blkg->q_node);
+	hlist_del_init_rcu(&blkg->blkcg_node);
 
 	WARN_ON_ONCE(q->nr_blkgs <= 0);
 	q->nr_blkgs--;
@@ -713,45 +691,33 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
 }
 EXPORT_SYMBOL_GPL(update_root_blkg_pd);
 
+/**
+ * blkg_destroy_all - destroy all blkgs associated with a request_queue
+ * @q: request_queue of interest
+ * @destroy_root: whether to destroy root blkg or not
+ *
+ * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
+ * destroyed; otherwise, root blkg is left alone.
+ */
 void blkg_destroy_all(struct request_queue *q, bool destroy_root)
 {
 	struct blkio_group *blkg, *n;
 
-	while (true) {
-		bool done = true;
-
-		spin_lock_irq(q->queue_lock);
-
-		list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
-			/* skip root? */
-			if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
-				continue;
-
-			/*
-			 * If cgroup removal path got to blk_group first
-			 * and removed it from cgroup list, then it will
-			 * take care of destroying cfqg also.
-			 */
-			if (!blkiocg_del_blkio_group(blkg))
-				blkg_destroy(blkg);
-			else
-				done = false;
-		}
+	spin_lock_irq(q->queue_lock);
 
-		spin_unlock_irq(q->queue_lock);
+	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
+		struct blkio_cgroup *blkcg = blkg->blkcg;
 
-		/*
-		 * Group list may not be empty if we raced cgroup removal
-		 * and lost.  cgroup removal is guaranteed to make forward
-		 * progress and retrying after a while is enough.  This
-		 * ugliness is scheduled to be removed after locking
-		 * update.
-		 */
-		if (done)
-			break;
+		/* skip root? */
+		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+			continue;
 
-		msleep(10);	/* just some random duration I like */
+		spin_lock(&blkcg->lock);
+		blkg_destroy(blkg);
+		spin_unlock(&blkcg->lock);
 	}
+
+	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blkg_destroy_all);
 
@@ -1600,45 +1566,45 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 				ARRAY_SIZE(blkio_files));
 }
 
+/**
+ * blkiocg_pre_destroy - cgroup pre_destroy callback
+ * @subsys: cgroup subsys
+ * @cgroup: cgroup of interest
+ *
+ * This function is called when @cgroup is about to go away and responsible
+ * for shooting down all blkgs associated with @cgroup.  blkgs should be
+ * removed while holding both q and blkcg locks.  As blkcg lock is nested
+ * inside q lock, this function performs reverse double lock dancing.
+ *
+ * This is the blkcg counterpart of ioc_release_fn().
+ */
 static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 			       struct cgroup *cgroup)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
-	unsigned long flags;
-	struct blkio_group *blkg;
-	struct request_queue *q;
 
 	rcu_read_lock();
+	spin_lock_irq(&blkcg->lock);
 
-	do {
-		spin_lock_irqsave(&blkcg->lock, flags);
+	while (!hlist_empty(&blkcg->blkg_list)) {
+		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
+						struct blkio_group, blkcg_node);
+		struct request_queue *q = rcu_dereference(blkg->q);
 
-		if (hlist_empty(&blkcg->blkg_list)) {
-			spin_unlock_irqrestore(&blkcg->lock, flags);
-			break;
+		if (spin_trylock(q->queue_lock)) {
+			blkg_destroy(blkg);
+			spin_unlock(q->queue_lock);
+		} else {
+			spin_unlock_irq(&blkcg->lock);
+			rcu_read_unlock();
+			cpu_relax();
+			rcu_read_lock();
+			spin_lock(&blkcg->lock);
 		}
+	}
 
-		blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
-					blkcg_node);
-		q = rcu_dereference(blkg->q);
-		__blkiocg_del_blkio_group(blkg);
-
-		spin_unlock_irqrestore(&blkcg->lock, flags);
-
-		/*
-		 * This blkio_group is being unlinked as associated cgroup is
-		 * going away. Let all the IO controlling policies know about
-		 * this event.
-		 */
-		spin_lock(&blkio_list_lock);
-		spin_lock_irqsave(q->queue_lock, flags);
-		blkg_destroy(blkg);
-		spin_unlock_irqrestore(q->queue_lock, flags);
-		spin_unlock(&blkio_list_lock);
-	} while (1);
-
+	spin_unlock_irq(&blkcg->lock);
 	rcu_read_unlock();
-
 	return 0;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 6e8ee86a2870..df73040a6a5f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -380,7 +380,6 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
-extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
 extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 				       struct request_queue *q);
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
@@ -416,9 +415,6 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
 static inline struct blkio_cgroup *
 task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
 
-static inline int
-blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
-
 static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 					      void *key) { return NULL; }
 static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
diff --git a/block/cfq.h b/block/cfq.h
index 5584e1b63ca8..c8b15ef57e5d 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -79,11 +79,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 					direction, sync);
 }
 
-static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
-{
-	return blkiocg_del_blkio_group(blkg);
-}
-
 #else /* CFQ_GROUP_IOSCHED */
 static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol,
@@ -119,10 +114,5 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, uint64_t start_time,
 			uint64_t io_start_time, bool direction, bool sync) { }
 
-static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
-{
-	return 0;
-}
-
 #endif /* CFQ_GROUP_IOSCHED */
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From c875f4d0250a1f070fa26087a73bdd8f54c48100 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:22 -0800
Subject: blkcg: drop unnecessary RCU locking

Now that blkg additions / removals are always done under both q and
blkcg locks, the only places RCU locking is necessary are
blkg_lookup[_create]() for lookup w/o blkcg lock.  This patch drops
unncessary RCU locking replacing it with plain blkcg locking as
necessary.

* blkiocg_pre_destroy() already perform proper locking and don't need
  RCU.  Dropped.

* blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read
  lock.  This isn't a hot path.

* Now unnecessary synchronize_rcu() from queue exit paths removed.
  This makes q->nr_blkgs unnecessary.  Dropped.

* RCU annotation on blkg->q removed.

-v2: Vivek pointed out that blkg_lookup_create() still needs to be
     called under rcu_read_lock().  Updated.

-v3: After the update, stats_lock locking in blkio_read_blkg_stats()
     shouldn't be using _irq variant as it otherwise ends up enabling
     irq while blkcg->lock is locked.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     | 24 +++++++++---------------
 block/blk-cgroup.h     |  4 ++--
 block/blk-throttle.c   | 33 +--------------------------------
 block/cfq-iosched.c    | 24 ------------------------
 include/linux/blkdev.h |  1 -
 5 files changed, 12 insertions(+), 74 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e9e3b038c702..27d39a810cb6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -500,7 +500,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 		return NULL;
 
 	spin_lock_init(&blkg->stats_lock);
-	rcu_assign_pointer(blkg->q, q);
+	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
 	blkg->refcnt = 1;
@@ -611,7 +611,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 	list_add(&blkg->q_node, &q->blkg_list);
-	q->nr_blkgs++;
 
 	spin_unlock(&blkcg->lock);
 out:
@@ -648,9 +647,6 @@ static void blkg_destroy(struct blkio_group *blkg)
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
-	WARN_ON_ONCE(q->nr_blkgs <= 0);
-	q->nr_blkgs--;
-
 	/*
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
@@ -1232,8 +1228,9 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 	struct hlist_node *n;
 	uint64_t cgroup_total = 0;
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
+	spin_lock_irq(&blkcg->lock);
+
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		const char *dname = blkg_dev_name(blkg);
 		int plid = BLKIOFILE_POLICY(cft->private);
 
@@ -1243,15 +1240,16 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 			cgroup_total += blkio_get_stat_cpu(blkg, plid,
 							   cb, dname, type);
 		} else {
-			spin_lock_irq(&blkg->stats_lock);
+			spin_lock(&blkg->stats_lock);
 			cgroup_total += blkio_get_stat(blkg, plid,
 						       cb, dname, type);
-			spin_unlock_irq(&blkg->stats_lock);
+			spin_unlock(&blkg->stats_lock);
 		}
 	}
 	if (show_total)
 		cb->fill(cb, "Total", cgroup_total);
-	rcu_read_unlock();
+
+	spin_unlock_irq(&blkcg->lock);
 	return 0;
 }
 
@@ -1583,28 +1581,24 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 
-	rcu_read_lock();
 	spin_lock_irq(&blkcg->lock);
 
 	while (!hlist_empty(&blkcg->blkg_list)) {
 		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
 						struct blkio_group, blkcg_node);
-		struct request_queue *q = rcu_dereference(blkg->q);
+		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
 			blkg_destroy(blkg);
 			spin_unlock(q->queue_lock);
 		} else {
 			spin_unlock_irq(&blkcg->lock);
-			rcu_read_unlock();
 			cpu_relax();
-			rcu_read_lock();
 			spin_lock(&blkcg->lock);
 		}
 	}
 
 	spin_unlock_irq(&blkcg->lock);
-	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index df73040a6a5f..66eaefefcbd2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -176,8 +176,8 @@ struct blkg_policy_data {
 };
 
 struct blkio_group {
-	/* Pointer to the associated request_queue, RCU protected */
-	struct request_queue __rcu *q;
+	/* Pointer to the associated request_queue */
+	struct request_queue *q;
 	struct list_head q_node;
 	struct hlist_node blkcg_node;
 	struct blkio_cgroup *blkcg;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e35ee7aeea69..bfa5168249eb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q)
 
 void blk_throtl_exit(struct request_queue *q)
 {
-	struct throtl_data *td = q->td;
-	bool wait;
-
-	BUG_ON(!td);
-
+	BUG_ON(!q->td);
 	throtl_shutdown_wq(q);
-
-	/* If there are other groups */
-	spin_lock_irq(q->queue_lock);
-	wait = q->nr_blkgs;
-	spin_unlock_irq(q->queue_lock);
-
-	/*
-	 * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods.
-	 * Do this wait only if there are other undestroyed groups out
-	 * there (other than root group). This can happen if cgroup deletion
-	 * path claimed the responsibility of cleaning up a group before
-	 * queue cleanup code get to the group.
-	 *
-	 * Do not call synchronize_rcu() unconditionally as there are drivers
-	 * which create/delete request queue hundreds of times during scan/boot
-	 * and synchronize_rcu() can take significant time and slow down boot.
-	 */
-	if (wait)
-		synchronize_rcu();
-
-	/*
-	 * Just being safe to make sure after previous flush if some body did
-	 * update limits through cgroup and another work got queued, cancel
-	 * it.
-	 */
-	throtl_shutdown_wq(q);
-
 	kfree(q->td);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 393eaa59913b..9e386d9bcb79 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3449,7 +3449,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
 	struct request_queue *q = cfqd->queue;
-	bool wait = false;
 
 	cfq_shutdown_timer_wq(cfqd);
 
@@ -3462,31 +3461,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
 	spin_unlock_irq(q->queue_lock);
 
-#ifdef CONFIG_BLK_CGROUP
-	/*
-	 * If there are groups which we could not unlink from blkcg list,
-	 * wait for a rcu period for them to be freed.
-	 */
-	spin_lock_irq(q->queue_lock);
-	wait = q->nr_blkgs;
-	spin_unlock_irq(q->queue_lock);
-#endif
 	cfq_shutdown_timer_wq(cfqd);
 
-	/*
-	 * Wait for cfqg->blkg->key accessors to exit their grace periods.
-	 * Do this wait only if there are other unlinked groups out
-	 * there. This can happen if cgroup deletion path claimed the
-	 * responsibility of cleaning up a group before queue cleanup code
-	 * get to the group.
-	 *
-	 * Do not call synchronize_rcu() unconditionally as there are drivers
-	 * which create/delete request queue hundreds of times during scan/boot
-	 * and synchronize_rcu() can take significant time and slow down boot.
-	 */
-	if (wait)
-		synchronize_rcu();
-
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
 	kfree(cfqd->root_group);
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4d1d4bfc168..33f1b29e53f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -365,7 +365,6 @@ struct request_queue {
 #ifdef CONFIG_BLK_CGROUP
 	/* XXX: array size hardcoded to avoid include dependency (temporary) */
 	struct list_head	blkg_list;
-	int			nr_blkgs;
 #endif
 
 	struct queue_limits	limits;
-- 
cgit v1.2.3-59-g8ed1b


From 4f85cb96d9d2fbbb7160db855a6beee1baced5e5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Mar 2012 13:15:28 -0800
Subject: block: make block cgroup policies follow bio task association

Implement bio_blkio_cgroup() which returns the blkcg associated with
the bio if exists or %current's blkcg, and use it in blk-throttle and
cfq-iosched propio.  This makes both cgroup policies honor task
association for the bio instead of always assuming %current.

As nobody is using bio_set_task() yet, this doesn't introduce any
behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 11 +++++++++--
 block/blk-cgroup.h   |  4 ++--
 block/blk-throttle.c |  2 +-
 block/cfq-iosched.c  | 21 +++++++++++----------
 4 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 27d39a810cb6..ee962f327ba5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -71,12 +71,19 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 }
 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
 
-struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
+static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
 {
 	return container_of(task_subsys_state(tsk, blkio_subsys_id),
 			    struct blkio_cgroup, css);
 }
-EXPORT_SYMBOL_GPL(task_blkio_cgroup);
+
+struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
+{
+	if (bio && bio->bi_css)
+		return container_of(bio->bi_css, struct blkio_cgroup, css);
+	return task_blkio_cgroup(current);
+}
+EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
 
 static inline void blkio_update_group_weight(struct blkio_group *blkg,
 					     int plid, unsigned int weight)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 66eaefefcbd2..98cd8533378f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -379,7 +379,7 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
 #ifdef CONFIG_BLK_CGROUP
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
-extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
+extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
 extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 				       struct request_queue *q);
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
@@ -413,7 +413,7 @@ struct cgroup;
 static inline struct blkio_cgroup *
 cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
 static inline struct blkio_cgroup *
-task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
+bio_blkio_cgroup(struct bio *bio) { return NULL; }
 
 static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 					      void *key) { return NULL; }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index bfa5168249eb..08b7ab292a80 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -900,7 +900,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 * just update the dispatch stats in lockless manner and return.
 	 */
 	rcu_read_lock();
-	blkcg = task_blkio_cgroup(current);
+	blkcg = bio_blkio_cgroup(bio);
 	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		if (tg_no_rule_group(tg, rw)) {
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index abac87337d70..f2387b50f82e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -467,8 +467,9 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
 }
 
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
-				       struct io_context *, gfp_t);
+static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
+				       struct io_context *ioc, struct bio *bio,
+				       gfp_t gfp_mask);
 
 static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
 {
@@ -2601,7 +2602,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct cfq_io_cq *cic)
+static void changed_ioprio(struct cfq_io_cq *cic, struct bio *bio)
 {
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
@@ -2613,7 +2614,7 @@ static void changed_ioprio(struct cfq_io_cq *cic)
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
 		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
-						GFP_ATOMIC);
+					 bio, GFP_ATOMIC);
 		if (new_cfqq) {
 			cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
 			cfq_put_queue(cfqq);
@@ -2671,7 +2672,7 @@ static void changed_cgroup(struct cfq_io_cq *cic)
 
 static struct cfq_queue *
 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
-		     struct io_context *ioc, gfp_t gfp_mask)
+		     struct io_context *ioc, struct bio *bio, gfp_t gfp_mask)
 {
 	struct blkio_cgroup *blkcg;
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -2681,7 +2682,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
 retry:
 	rcu_read_lock();
 
-	blkcg = task_blkio_cgroup(current);
+	blkcg = bio_blkio_cgroup(bio);
 
 	cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
 
@@ -2746,7 +2747,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
-	      gfp_t gfp_mask)
+	      struct bio *bio, gfp_t gfp_mask)
 {
 	const int ioprio = task_ioprio(ioc);
 	const int ioprio_class = task_ioprio_class(ioc);
@@ -2759,7 +2760,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	}
 
 	if (!cfqq)
-		cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
+		cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, bio, gfp_mask);
 
 	/*
 	 * pin the queue now that it's allocated, scheduler exit will prune it
@@ -3316,7 +3317,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
 	/* handle changed notifications */
 	changed = icq_get_changed(&cic->icq);
 	if (unlikely(changed & ICQ_IOPRIO_CHANGED))
-		changed_ioprio(cic);
+		changed_ioprio(cic, bio);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	if (unlikely(changed & ICQ_CGROUP_CHANGED))
 		changed_cgroup(cic);
@@ -3325,7 +3326,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
 new_queue:
 	cfqq = cic_to_cfqq(cic, is_sync);
 	if (!cfqq || cfqq == &cfqd->oom_cfqq) {
-		cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
+		cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, bio, gfp_mask);
 		cic_set_cfqq(cic, cfqq, is_sync);
 	} else {
 		/*
-- 
cgit v1.2.3-59-g8ed1b


From 1cd9e039fc258f91fe38b97b3c622b13a3b8a795 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Thu, 8 Mar 2012 10:53:56 -0800
Subject: blkcg: alloc per cpu stats from worker thread in a delayed manner

Current per cpu stat allocation assumes GFP_KERNEL allocation flag. But in
IO path there are times when we want GFP_NOIO semantics. As there is no
way to pass the allocation flags to alloc_percpu(), this patch delays the
allocation of stats using a worker thread.

v2-> tejun suggested following changes. Changed the patch accordingly.
	- move alloc_node location in structure
	- reduce the size of names of some of the fields
	- Reduce the scope of locking of alloc_list_lock
	- Simplified stat_alloc_fn() by allocating stats for all
	  policies in one go and then assigning these to a group.

v3 -> Andrew suggested to put some comments in the code. Also raised
      concerns about trying to allocate infinitely in case of allocation
      failure. I have changed the logic to sleep for 10ms before retrying.
      That should take care of non-preemptible UP kernels.

v4 -> Tejun had more suggestions.
	- drop list_for_each_entry_all()
	- instead of msleep() use queue_delayed_work()
	- Some cleanups realted to more compact coding.

v5-> tejun suggested more cleanups leading to more compact code.

tj: - Relocated pcpu_stats into blkio_stat_alloc_fn().
    - Minor comment update.
    - This also fixes suspicious RCU usage warning caused by invoking
      cgroup_path() from blkg_alloc() without holding RCU read lock.
      Now that blkg_alloc() doesn't require sleepable context, RCU
      read lock from blkg_lookup_create() is maintained throughout
      blkg_alloc().

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 129 ++++++++++++++++++++++++++++++++++++-----------------
 block/blk-cgroup.h |   2 +
 2 files changed, 91 insertions(+), 40 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ee962f327ba5..622fb4143226 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,6 +30,13 @@ static LIST_HEAD(blkio_list);
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
 
+/* List of groups pending per cpu stats allocation */
+static DEFINE_SPINLOCK(alloc_list_lock);
+static LIST_HEAD(alloc_list);
+
+static void blkio_stat_alloc_fn(struct work_struct *);
+static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
+
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
@@ -391,6 +398,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
+	/* If per cpu stats are not allocated yet, don't do any accounting. */
+	if (pd->stats_cpu == NULL)
+		return;
+
 	/*
 	 * Disabling interrupts to provide mutual exclusion between two
 	 * writes on same cpu. It probably is not needed for 64bit. Not
@@ -443,6 +454,10 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
 
+	/* If per cpu stats are not allocated yet, don't do any accounting. */
+	if (pd->stats_cpu == NULL)
+		return;
+
 	/*
 	 * Disabling interrupts to provide mutual exclusion between two
 	 * writes on same cpu. It probably is not needed for 64bit. Not
@@ -460,6 +475,60 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
+/*
+ * Worker for allocating per cpu stat for blk groups. This is scheduled on
+ * the system_nrt_wq once there are some groups on the alloc_list waiting
+ * for allocation.
+ */
+static void blkio_stat_alloc_fn(struct work_struct *work)
+{
+	static void *pcpu_stats[BLKIO_NR_POLICIES];
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct blkio_group *blkg;
+	int i;
+	bool empty = false;
+
+alloc_stats:
+	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		if (pcpu_stats[i] != NULL)
+			continue;
+
+		pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
+
+		/* Allocation failed. Try again after some time. */
+		if (pcpu_stats[i] == NULL) {
+			queue_delayed_work(system_nrt_wq, dwork,
+						msecs_to_jiffies(10));
+			return;
+		}
+	}
+
+	spin_lock_irq(&blkio_list_lock);
+	spin_lock(&alloc_list_lock);
+
+	/* cgroup got deleted or queue exited. */
+	if (!list_empty(&alloc_list)) {
+		blkg = list_first_entry(&alloc_list, struct blkio_group,
+						alloc_node);
+		for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+			struct blkg_policy_data *pd = blkg->pd[i];
+
+			if (blkio_policy[i] && pd && !pd->stats_cpu)
+				swap(pd->stats_cpu, pcpu_stats[i]);
+		}
+
+		list_del_init(&blkg->alloc_node);
+	}
+
+	empty = list_empty(&alloc_list);
+
+	spin_unlock(&alloc_list_lock);
+	spin_unlock_irq(&blkio_list_lock);
+
+	if (!empty)
+		goto alloc_stats;
+}
+
 /**
  * blkg_free - free a blkg
  * @blkg: blkg to free
@@ -491,9 +560,6 @@ static void blkg_free(struct blkio_group *blkg)
  * @q: request_queue the new blkg is associated with
  *
  * Allocate a new blkg assocating @blkcg and @q.
- *
- * FIXME: Should be called with queue locked but currently isn't due to
- *        percpu stat breakage.
  */
 static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 				      struct request_queue *q)
@@ -509,6 +575,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	spin_lock_init(&blkg->stats_lock);
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
+	INIT_LIST_HEAD(&blkg->alloc_node);
 	blkg->blkcg = blkcg;
 	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
@@ -530,13 +597,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 		blkg->pd[i] = pd;
 		pd->blkg = blkg;
-
-		/* broken, read comment in the callsite */
-		pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-		if (!pd->stats_cpu) {
-			blkg_free(blkg);
-			return NULL;
-		}
 	}
 
 	/* invoke per-policy init */
@@ -556,7 +616,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       bool for_root)
 	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
-	struct blkio_group *blkg, *new_blkg;
+	struct blkio_group *blkg;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lockdep_assert_held(q->queue_lock);
@@ -580,48 +640,27 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 
 	/*
 	 * Allocate and initialize.
-	 *
-	 * FIXME: The following is broken.  Percpu memory allocation
-	 * requires %GFP_KERNEL context and can't be performed from IO
-	 * path.  Allocation here should inherently be atomic and the
-	 * following lock dancing can be removed once the broken percpu
-	 * allocation is fixed.
 	 */
-	spin_unlock_irq(q->queue_lock);
-	rcu_read_unlock();
-
-	new_blkg = blkg_alloc(blkcg, q);
-
-	rcu_read_lock();
-	spin_lock_irq(q->queue_lock);
-
-	/* did bypass get turned on inbetween? */
-	if (unlikely(blk_queue_bypass(q)) && !for_root) {
-		blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
-		goto out;
-	}
-
-	/* did someone beat us to it? */
-	blkg = blkg_lookup(blkcg, q);
-	if (unlikely(blkg))
-		goto out;
+	blkg = blkg_alloc(blkcg, q);
 
 	/* did alloc fail? */
-	if (unlikely(!new_blkg)) {
+	if (unlikely(!blkg)) {
 		blkg = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 
 	/* insert */
 	spin_lock(&blkcg->lock);
-	swap(blkg, new_blkg);
-
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 	list_add(&blkg->q_node, &q->blkg_list);
-
 	spin_unlock(&blkcg->lock);
+
+	spin_lock(&alloc_list_lock);
+	list_add(&blkg->alloc_node, &alloc_list);
+	/* Queue per cpu stat allocation from worker thread. */
+	queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
+	spin_unlock(&alloc_list_lock);
 out:
-	blkg_free(new_blkg);
 	return blkg;
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
@@ -654,6 +693,10 @@ static void blkg_destroy(struct blkio_group *blkg)
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
+	spin_lock(&alloc_list_lock);
+	list_del_init(&blkg->alloc_node);
+	spin_unlock(&alloc_list_lock);
+
 	/*
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
@@ -752,6 +795,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 	struct blkg_policy_data *pd = blkg->pd[plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	int i, j, k;
+
+	if (pd->stats_cpu == NULL)
+		return;
 	/*
 	 * Note: On 64 bit arch this should not be an issue. This has the
 	 * possibility of returning some inconsistent value on 32bit arch
@@ -883,6 +929,9 @@ static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
 	struct blkio_group_stats_cpu *stats_cpu;
 	u64 val = 0, tval;
 
+	if (pd->stats_cpu == NULL)
+		return val;
+
 	for_each_possible_cpu(cpu) {
 		unsigned int start;
 		stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 98cd8533378f..1de32fe0e2af 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -190,6 +190,8 @@ struct blkio_group {
 	spinlock_t stats_lock;
 	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
 
+	/* List of blkg waiting for per cpu stats memory to be allocated */
+	struct list_head alloc_node;
 	struct rcu_head rcu_head;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 5fe224d2d5fbf8f020b30d0ba69fed7856923752 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Mar 2012 10:53:57 -0800
Subject: blkcg: don't use percpu for merged stats

With recent plug merge updates, merged stats are no longer called for
plug merges and now only updated while holding queue_lock.  As
stats_lock is scheduled to be removed, there's no reason to use percpu
for merged stats.  Don't use percpu for merged stats.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 26 ++++++--------------------
 block/blk-cgroup.h |  6 +++---
 2 files changed, 9 insertions(+), 23 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 622fb4143226..6eedf3afa276 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -451,27 +451,13 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 				    bool direction, bool sync)
 {
 	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	struct blkio_group_stats_cpu *stats_cpu;
+	struct blkio_group_stats *stats;
 	unsigned long flags;
 
-	/* If per cpu stats are not allocated yet, don't do any accounting. */
-	if (pd->stats_cpu == NULL)
-		return;
-
-	/*
-	 * Disabling interrupts to provide mutual exclusion between two
-	 * writes on same cpu. It probably is not needed for 64bit. Not
-	 * optimizing that case yet.
-	 */
-	local_irq_save(flags);
-
-	stats_cpu = this_cpu_ptr(pd->stats_cpu);
-
-	u64_stats_update_begin(&stats_cpu->syncp);
-	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
-				direction, sync);
-	u64_stats_update_end(&stats_cpu->syncp);
-	local_irq_restore(flags);
+	spin_lock_irqsave(&blkg->stats_lock, flags);
+	stats = &pd->stats;
+	blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
+	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
@@ -1342,7 +1328,7 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
 						BLKIO_STAT_WAIT_TIME, 1, 0);
 		case BLKIO_PROP_io_merged:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_CPU_MERGED, 1, 1);
+						BLKIO_STAT_MERGED, 1, 0);
 		case BLKIO_PROP_io_queued:
 			return blkio_read_blkg_stats(blkcg, cft, cb,
 						BLKIO_STAT_QUEUED, 1, 0);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1de32fe0e2af..6c8e3e345426 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -29,10 +29,12 @@ enum blkio_policy_id {
 #ifdef CONFIG_BLK_CGROUP
 
 enum stat_type {
+	/* Number of IOs merged */
+	BLKIO_STAT_MERGED,
 	/* Total time spent (in ns) between request dispatch to the driver and
 	 * request completion for IOs doen by this cgroup. This may not be
 	 * accurate when NCQ is turned on. */
-	BLKIO_STAT_SERVICE_TIME = 0,
+	BLKIO_STAT_SERVICE_TIME,
 	/* Total time spent waiting in scheduler queue in ns */
 	BLKIO_STAT_WAIT_TIME,
 	/* Number of IOs queued up */
@@ -57,8 +59,6 @@ enum stat_type_cpu {
 	BLKIO_STAT_CPU_SERVICE_BYTES,
 	/* Total IOs serviced, post merge */
 	BLKIO_STAT_CPU_SERVICED,
-	/* Number of IOs merged */
-	BLKIO_STAT_CPU_MERGED,
 	BLKIO_STAT_CPU_NR
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 997a026c80c3cc05f82e589aced1f0011c17d376 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Mar 2012 10:53:58 -0800
Subject: blkcg: simplify stat reset

blkiocg_reset_stats() implements stat reset for blkio.reset_stats
cgroupfs file.  This feature is very unconventional and something
which shouldn't have been merged.  It's only useful when there's only
one user or tool looking at the stats.  As soon as multiple users
and/or tools are involved, it becomes useless as resetting disrupts
other usages.  There are very good reasons why all other stats expect
readers to read values at the start and end of a period and subtract
to determine delta over the period.

The implementation is rather complex - some fields shouldn't be
cleared and it saves some fields, resets whole and restores for some
reason.  Reset of percpu stats is also racy.  The comment points to
64bit store atomicity for the reason but even without that stores for
zero can simply race with other CPUs doing RMW and get clobbered.

Simplify reset by

* Clear selectively instead of resetting and restoring.

* Grouping debug stat fields to be reset and using memset() over them.

* Not caring about stats_lock.

* Using memset() to reset percpu stats.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 80 +++++++++++++++++-------------------------------------
 block/blk-cgroup.h | 14 ++++++++--
 2 files changed, 37 insertions(+), 57 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6eedf3afa276..759bc58154c2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -779,83 +779,53 @@ EXPORT_SYMBOL_GPL(__blkg_release);
 static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 {
 	struct blkg_policy_data *pd = blkg->pd[plid];
-	struct blkio_group_stats_cpu *stats_cpu;
-	int i, j, k;
+	int cpu;
 
 	if (pd->stats_cpu == NULL)
 		return;
-	/*
-	 * Note: On 64 bit arch this should not be an issue. This has the
-	 * possibility of returning some inconsistent value on 32bit arch
-	 * as 64bit update on 32bit is non atomic. Taking care of this
-	 * corner case makes code very complicated, like sending IPIs to
-	 * cpus, taking care of stats of offline cpus etc.
-	 *
-	 * reset stats is anyway more of a debug feature and this sounds a
-	 * corner case. So I am not complicating the code yet until and
-	 * unless this becomes a real issue.
-	 */
-	for_each_possible_cpu(i) {
-		stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
-		stats_cpu->sectors = 0;
-		for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
-			for (k = 0; k < BLKIO_STAT_TOTAL; k++)
-				stats_cpu->stat_arr_cpu[j][k] = 0;
+
+	for_each_possible_cpu(cpu) {
+		struct blkio_group_stats_cpu *sc =
+			per_cpu_ptr(pd->stats_cpu, cpu);
+
+		sc->sectors = 0;
+		memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu));
 	}
 }
 
 static int
 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 {
-	struct blkio_cgroup *blkcg;
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	struct blkio_group *blkg;
-	struct blkio_group_stats *stats;
 	struct hlist_node *n;
-	uint64_t queued[BLKIO_STAT_TOTAL];
 	int i;
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-	bool idling, waiting, empty;
-	unsigned long long now = sched_clock();
-#endif
 
-	blkcg = cgroup_to_blkio_cgroup(cgroup);
 	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
+
+	/*
+	 * Note that stat reset is racy - it doesn't synchronize against
+	 * stat updates.  This is a debug feature which shouldn't exist
+	 * anyway.  If you get hit by a race, retry.
+	 */
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		struct blkio_policy_type *pol;
 
 		list_for_each_entry(pol, &blkio_list, list) {
 			struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
-			spin_lock(&blkg->stats_lock);
-			stats = &pd->stats;
+			struct blkio_group_stats *stats = &pd->stats;
+
+			/* queued stats shouldn't be cleared */
+			for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++)
+				if (i != BLKIO_STAT_QUEUED)
+					memset(stats->stat_arr[i], 0,
+					       sizeof(stats->stat_arr[i]));
+			stats->time = 0;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-			idling = blkio_blkg_idling(stats);
-			waiting = blkio_blkg_waiting(stats);
-			empty = blkio_blkg_empty(stats);
+			memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0,
+			       BLKG_STATS_DEBUG_CLEAR_SIZE);
 #endif
-			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
-				queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
-			memset(stats, 0, sizeof(struct blkio_group_stats));
-			for (i = 0; i < BLKIO_STAT_TOTAL; i++)
-				stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-			if (idling) {
-				blkio_mark_blkg_idling(stats);
-				stats->start_idle_time = now;
-			}
-			if (waiting) {
-				blkio_mark_blkg_waiting(stats);
-				stats->start_group_wait_time = now;
-			}
-			if (empty) {
-				blkio_mark_blkg_empty(stats);
-				stats->start_empty_time = now;
-			}
-#endif
-			spin_unlock(&blkg->stats_lock);
-
-			/* Reset Per cpu stats which don't take blkg->stats_lock */
 			blkio_reset_stats_cpu(blkg, pol->plid);
 		}
 	}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 6c8e3e345426..1fa3c5e8d87f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -131,21 +131,31 @@ struct blkio_group_stats {
 
 	/* Total time spent waiting for it to be assigned a timeslice. */
 	uint64_t group_wait_time;
-	uint64_t start_group_wait_time;
 
 	/* Time spent idling for this blkio_group */
 	uint64_t idle_time;
-	uint64_t start_idle_time;
 	/*
 	 * Total time when we have requests queued and do not contain the
 	 * current active queue.
 	 */
 	uint64_t empty_time;
+
+	/* fields after this shouldn't be cleared on stat reset */
+	uint64_t start_group_wait_time;
+	uint64_t start_idle_time;
 	uint64_t start_empty_time;
 	uint16_t flags;
 #endif
 };
 
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+#define BLKG_STATS_DEBUG_CLEAR_START	\
+	offsetof(struct blkio_group_stats, unaccounted_time)
+#define BLKG_STATS_DEBUG_CLEAR_SIZE	\
+	(offsetof(struct blkio_group_stats, start_group_wait_time) - \
+	 BLKG_STATS_DEBUG_CLEAR_START)
+#endif
+
 /* Per cpu blkio group stats */
 struct blkio_group_stats_cpu {
 	uint64_t sectors;
-- 
cgit v1.2.3-59-g8ed1b


From c4c76a05382c7d05e0b911daa58a827399e9ba1a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Mar 2012 10:53:59 -0800
Subject: blkcg: restructure blkio_get_stat()

Restructure blkio_get_stat() to prepare for removal of stats_lock.

* Define BLKIO_STAT_ARR_NR explicitly to denote which stats have
  subtypes instead of using BLKIO_STAT_QUEUED.

* Separate out stat acquisition and printing.  After this, there are
  only two users of blkio_fill_stat().  Just open code it.

* The code was mixing MAX_KEY_LEN and MAX_KEY_LEN - 1.  There's no
  need to subtract one.  Use MAX_KEY_LEN consistently.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 100 +++++++++++++++++++++++++++--------------------------
 block/blk-cgroup.h |   6 +++-
 2 files changed, 56 insertions(+), 50 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 759bc58154c2..80887bc3a049 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -868,15 +868,6 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
 	}
 }
 
-static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
-				struct cgroup_map_cb *cb, const char *dname)
-{
-	blkio_get_key_name(0, dname, str, chars_left, true);
-	cb->fill(cb, str, val);
-	return val;
-}
-
-
 static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
 			enum stat_type_cpu type, enum stat_sub_type sub_type)
 {
@@ -916,8 +907,9 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 
 	if (type == BLKIO_STAT_CPU_SECTORS) {
 		val = blkio_read_stat_cpu(blkg, plid, type, 0);
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
-				       dname);
+		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
+		cb->fill(cb, key_str, val);
+		return val;
 	}
 
 	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
@@ -942,50 +934,60 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 			       struct cgroup_map_cb *cb, const char *dname,
 			       enum stat_type type)
 {
-	struct blkg_policy_data *pd = blkg->pd[plid];
-	uint64_t disk_total;
+	struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
+	uint64_t v = 0, disk_total = 0;
 	char key_str[MAX_KEY_LEN];
-	enum stat_sub_type sub_type;
+	int st;
 
-	if (type == BLKIO_STAT_TIME)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-					pd->stats.time, cb, dname);
+	if (type >= BLKIO_STAT_ARR_NR) {
+		switch (type) {
+		case BLKIO_STAT_TIME:
+			v = stats->time;
+			break;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	if (type == BLKIO_STAT_UNACCOUNTED_TIME)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       pd->stats.unaccounted_time, cb, dname);
-	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
-		uint64_t sum = pd->stats.avg_queue_size_sum;
-		uint64_t samples = pd->stats.avg_queue_size_samples;
-		if (samples)
-			do_div(sum, samples);
-		else
-			sum = 0;
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       sum, cb, dname);
-	}
-	if (type == BLKIO_STAT_GROUP_WAIT_TIME)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       pd->stats.group_wait_time, cb, dname);
-	if (type == BLKIO_STAT_IDLE_TIME)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       pd->stats.idle_time, cb, dname);
-	if (type == BLKIO_STAT_EMPTY_TIME)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       pd->stats.empty_time, cb, dname);
-	if (type == BLKIO_STAT_DEQUEUE)
-		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
-				       pd->stats.dequeue, cb, dname);
+		case BLKIO_STAT_UNACCOUNTED_TIME:
+			v = stats->unaccounted_time;
+			break;
+		case BLKIO_STAT_AVG_QUEUE_SIZE: {
+			uint64_t samples = stats->avg_queue_size_samples;
+
+			if (samples) {
+				v = stats->avg_queue_size_sum;
+				do_div(v, samples);
+			}
+			break;
+		}
+		case BLKIO_STAT_IDLE_TIME:
+			v = stats->idle_time;
+			break;
+		case BLKIO_STAT_EMPTY_TIME:
+			v = stats->empty_time;
+			break;
+		case BLKIO_STAT_DEQUEUE:
+			v = stats->dequeue;
+			break;
+		case BLKIO_STAT_GROUP_WAIT_TIME:
+			v = stats->group_wait_time;
+			break;
 #endif
+		default:
+			WARN_ON_ONCE(1);
+		}
 
-	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
-			sub_type++) {
-		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
-				   false);
-		cb->fill(cb, key_str, pd->stats.stat_arr[type][sub_type]);
+		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
+		cb->fill(cb, key_str, v);
+		return v;
 	}
-	disk_total = pd->stats.stat_arr[type][BLKIO_STAT_READ] +
-			pd->stats.stat_arr[type][BLKIO_STAT_WRITE];
+
+	for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) {
+		v = stats->stat_arr[type][st];
+
+		blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
+		cb->fill(cb, key_str, v);
+		if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE)
+			disk_total += v;
+	}
+
 	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 			   false);
 	cb->fill(cb, key_str, disk_total);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1fa3c5e8d87f..8bdcf504f94b 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -39,6 +39,7 @@ enum stat_type {
 	BLKIO_STAT_WAIT_TIME,
 	/* Number of IOs queued up */
 	BLKIO_STAT_QUEUED,
+
 	/* All the single valued stats go below this */
 	BLKIO_STAT_TIME,
 #ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -52,6 +53,9 @@ enum stat_type {
 #endif
 };
 
+/* Types lower than this live in stat_arr and have subtypes */
+#define BLKIO_STAT_ARR_NR	(BLKIO_STAT_QUEUED + 1)
+
 /* Per cpu stats */
 enum stat_type_cpu {
 	BLKIO_STAT_CPU_SECTORS,
@@ -117,7 +121,7 @@ struct blkio_cgroup {
 struct blkio_group_stats {
 	/* total disk time and nr sectors dispatched by this group */
 	uint64_t time;
-	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
+	uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL];
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	/* Time not charged to this cgroup */
 	uint64_t unaccounted_time;
-- 
cgit v1.2.3-59-g8ed1b


From edf1b879e308d37b5b7c414476ab39f79650a253 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 8 Mar 2012 10:54:00 -0800
Subject: blkcg: remove blkio_group->stats_lock

With recent plug merge updates, all non-percpu stat updates happen
under queue_lock making stats_lock unnecessary to synchronize stat
updates.  The only synchronization necessary is stat reading, which
can be done using u64_stats_sync instead.

This patch removes blkio_group->stats_lock and adds
blkio_group_stats->syncp for reader synchronization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 209 ++++++++++++++++++++++++++---------------------------
 block/blk-cgroup.h |   3 +-
 2 files changed, 103 insertions(+), 109 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 80887bc3a049..b15a51711bca 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -156,7 +156,7 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
 
 /*
  * Add to the appropriate stat variable depending on the request type.
- * This should be called with the blkg->stats_lock held.
+ * This should be called with queue_lock held.
  */
 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
 				bool sync)
@@ -174,7 +174,7 @@ static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
 /*
  * Decrements the appropriate stat variable if non-zero depending on the
  * request type. Panics on value being zero.
- * This should be called with the blkg->stats_lock held.
+ * This should be called with the queue_lock held.
  */
 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 {
@@ -195,7 +195,7 @@ static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 }
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-/* This should be called with the blkg->stats_lock held. */
+/* This should be called with the queue_lock held. */
 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 					    struct blkio_policy_type *pol,
 					    struct blkio_group *curr_blkg)
@@ -210,7 +210,7 @@ static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 	blkio_mark_blkg_waiting(&pd->stats);
 }
 
-/* This should be called with the blkg->stats_lock held. */
+/* This should be called with the queue_lock held. */
 static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
 {
 	unsigned long long now;
@@ -224,7 +224,7 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
 	blkio_clear_blkg_waiting(stats);
 }
 
-/* This should be called with the blkg->stats_lock held. */
+/* This should be called with the queue_lock held. */
 static void blkio_end_empty_time(struct blkio_group_stats *stats)
 {
 	unsigned long long now;
@@ -241,84 +241,74 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
 					struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	BUG_ON(blkio_blkg_idling(&pd->stats));
-	pd->stats.start_idle_time = sched_clock();
-	blkio_mark_blkg_idling(&pd->stats);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	lockdep_assert_held(blkg->q->queue_lock);
+	BUG_ON(blkio_blkg_idling(stats));
+
+	stats->start_idle_time = sched_clock();
+	blkio_mark_blkg_idling(stats);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
 
 void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
 				    struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
-	unsigned long long now;
-	struct blkio_group_stats *stats;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &pd->stats;
 	if (blkio_blkg_idling(stats)) {
-		now = sched_clock();
-		if (time_after64(now, stats->start_idle_time))
+		unsigned long long now = sched_clock();
+
+		if (time_after64(now, stats->start_idle_time)) {
+			u64_stats_update_begin(&stats->syncp);
 			stats->idle_time += now - stats->start_idle_time;
+			u64_stats_update_end(&stats->syncp);
+		}
 		blkio_clear_blkg_idling(stats);
 	}
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
 
 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
 					 struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
-	struct blkio_group_stats *stats;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &pd->stats;
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	u64_stats_update_begin(&stats->syncp);
 	stats->avg_queue_size_sum +=
 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
 	stats->avg_queue_size_samples++;
 	blkio_update_group_wait_time(stats);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 
 void blkiocg_set_start_empty_time(struct blkio_group *blkg,
 				  struct blkio_policy_type *pol)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
-	struct blkio_group_stats *stats;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &pd->stats;
+	lockdep_assert_held(blkg->q->queue_lock);
 
 	if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
-			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
-		spin_unlock_irqrestore(&blkg->stats_lock, flags);
+			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE])
 		return;
-	}
 
 	/*
 	 * group is already marked empty. This can happen if cfqq got new
 	 * request in parent group and moved to this group while being added
 	 * to service tree. Just ignore the event and move on.
 	 */
-	if(blkio_blkg_empty(stats)) {
-		spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	if (blkio_blkg_empty(stats))
 		return;
-	}
 
 	stats->start_empty_time = sched_clock();
 	blkio_mark_blkg_empty(stats);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
 
@@ -328,6 +318,8 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 {
 	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 
+	lockdep_assert_held(blkg->q->queue_lock);
+
 	pd->stats.dequeue += dequeue;
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
@@ -343,15 +335,16 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg,
 				 struct blkio_group *curr_blkg, bool direction,
 				 bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	u64_stats_update_begin(&stats->syncp);
+	blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync);
+	blkio_end_empty_time(stats);
+	u64_stats_update_end(&stats->syncp);
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
-			sync);
-	blkio_end_empty_time(&pd->stats);
 	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
 
@@ -359,13 +352,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 				    struct blkio_policy_type *pol,
 				    bool direction, bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	blkio_check_and_dec_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED],
-					direction, sync);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	u64_stats_update_begin(&stats->syncp);
+	blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction,
+				 sync);
+	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 
@@ -374,15 +368,16 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 				   unsigned long time,
 				   unsigned long unaccounted_time)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	pd->stats.time += time;
+	u64_stats_update_begin(&stats->syncp);
+	stats->time += time;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	pd->stats.unaccounted_time += unaccounted_time;
+	stats->unaccounted_time += unaccounted_time;
 #endif
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 
@@ -428,20 +423,19 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
 				     uint64_t io_start_time, bool direction,
 				     bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	struct blkio_group_stats *stats;
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 	unsigned long long now = sched_clock();
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &pd->stats;
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	u64_stats_update_begin(&stats->syncp);
 	if (time_after64(now, io_start_time))
 		blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
 				now - io_start_time, direction, sync);
 	if (time_after64(io_start_time, start_time))
 		blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
 				io_start_time - start_time, direction, sync);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 
@@ -450,14 +444,13 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 				    struct blkio_policy_type *pol,
 				    bool direction, bool sync)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	struct blkio_group_stats *stats;
-	unsigned long flags;
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-	spin_lock_irqsave(&blkg->stats_lock, flags);
-	stats = &pd->stats;
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	u64_stats_update_begin(&stats->syncp);
 	blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
-	spin_unlock_irqrestore(&blkg->stats_lock, flags);
+	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
@@ -558,7 +551,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	if (!blkg)
 		return NULL;
 
-	spin_lock_init(&blkg->stats_lock);
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	INIT_LIST_HEAD(&blkg->alloc_node);
@@ -929,7 +921,6 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 	return disk_total;
 }
 
-/* This should be called with blkg->stats_lock held */
 static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 			       struct cgroup_map_cb *cb, const char *dname,
 			       enum stat_type type)
@@ -937,42 +928,46 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 	struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
 	uint64_t v = 0, disk_total = 0;
 	char key_str[MAX_KEY_LEN];
+	unsigned int sync_start;
 	int st;
 
 	if (type >= BLKIO_STAT_ARR_NR) {
-		switch (type) {
-		case BLKIO_STAT_TIME:
-			v = stats->time;
-			break;
+		do {
+			sync_start = u64_stats_fetch_begin(&stats->syncp);
+			switch (type) {
+			case BLKIO_STAT_TIME:
+				v = stats->time;
+				break;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-		case BLKIO_STAT_UNACCOUNTED_TIME:
-			v = stats->unaccounted_time;
-			break;
-		case BLKIO_STAT_AVG_QUEUE_SIZE: {
-			uint64_t samples = stats->avg_queue_size_samples;
+			case BLKIO_STAT_UNACCOUNTED_TIME:
+				v = stats->unaccounted_time;
+				break;
+			case BLKIO_STAT_AVG_QUEUE_SIZE: {
+				uint64_t samples = stats->avg_queue_size_samples;
 
-			if (samples) {
-				v = stats->avg_queue_size_sum;
-				do_div(v, samples);
+				if (samples) {
+					v = stats->avg_queue_size_sum;
+					do_div(v, samples);
+				}
+				break;
 			}
-			break;
-		}
-		case BLKIO_STAT_IDLE_TIME:
-			v = stats->idle_time;
-			break;
-		case BLKIO_STAT_EMPTY_TIME:
-			v = stats->empty_time;
-			break;
-		case BLKIO_STAT_DEQUEUE:
-			v = stats->dequeue;
-			break;
-		case BLKIO_STAT_GROUP_WAIT_TIME:
-			v = stats->group_wait_time;
-			break;
+			case BLKIO_STAT_IDLE_TIME:
+				v = stats->idle_time;
+				break;
+			case BLKIO_STAT_EMPTY_TIME:
+				v = stats->empty_time;
+				break;
+			case BLKIO_STAT_DEQUEUE:
+				v = stats->dequeue;
+				break;
+			case BLKIO_STAT_GROUP_WAIT_TIME:
+				v = stats->group_wait_time;
+				break;
 #endif
-		default:
-			WARN_ON_ONCE(1);
-		}
+			default:
+				WARN_ON_ONCE(1);
+			}
+		} while (u64_stats_fetch_retry(&stats->syncp, sync_start));
 
 		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
 		cb->fill(cb, key_str, v);
@@ -980,7 +975,10 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 	}
 
 	for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) {
-		v = stats->stat_arr[type][st];
+		do {
+			sync_start = u64_stats_fetch_begin(&stats->syncp);
+			v = stats->stat_arr[type][st];
+		} while (u64_stats_fetch_retry(&stats->syncp, sync_start));
 
 		blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
 		cb->fill(cb, key_str, v);
@@ -1250,15 +1248,12 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
 
 		if (!dname)
 			continue;
-		if (pcpu) {
+		if (pcpu)
 			cgroup_total += blkio_get_stat_cpu(blkg, plid,
 							   cb, dname, type);
-		} else {
-			spin_lock(&blkg->stats_lock);
+		else
 			cgroup_total += blkio_get_stat(blkg, plid,
 						       cb, dname, type);
-			spin_unlock(&blkg->stats_lock);
-		}
 	}
 	if (show_total)
 		cb->fill(cb, "Total", cgroup_total);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8bdcf504f94b..9df5ab04b6d6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -119,6 +119,7 @@ struct blkio_cgroup {
 };
 
 struct blkio_group_stats {
+	struct u64_stats_sync syncp;
 	/* total disk time and nr sectors dispatched by this group */
 	uint64_t time;
 	uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL];
@@ -200,8 +201,6 @@ struct blkio_group {
 	/* reference count */
 	int refcnt;
 
-	/* Need to serialize the stats in the case of reset/update */
-	spinlock_t stats_lock;
 	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
 
 	/* List of blkg waiting for per cpu stats memory to be allocated */
-- 
cgit v1.2.3-59-g8ed1b


From 9a9e8a26da4c2c5ddc60999bdea957935fb22b6b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 19 Mar 2012 15:10:56 -0700
Subject: blkcg: add blkcg->id

Add 64bit unique id to blkcg.  This will be used by policies which
want blkcg identity test to tell whether the associated blkcg has
changed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 3 +++
 block/blk-cgroup.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b15a51711bca..30e07308db24 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/genhd.h>
 #include <linux/delay.h>
+#include <linux/atomic.h>
 #include "blk-cgroup.h"
 #include "blk.h"
 
@@ -1622,6 +1623,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 static struct cgroup_subsys_state *
 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 {
+	static atomic64_t id_seq = ATOMIC64_INIT(0);
 	struct blkio_cgroup *blkcg;
 	struct cgroup *parent = cgroup->parent;
 
@@ -1635,6 +1637,7 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
 		return ERR_PTR(-ENOMEM);
 
 	blkcg->weight = BLKIO_WEIGHT_DEFAULT;
+	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
 done:
 	spin_lock_init(&blkcg->lock);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9df5ab04b6d6..1cb8f7643258 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -116,6 +116,9 @@ struct blkio_cgroup {
 	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
+
+	/* for policies to test whether associated blkcg has changed */
+	uint64_t id;
 };
 
 struct blkio_group_stats {
-- 
cgit v1.2.3-59-g8ed1b


From aaec55a002a29bf940588dc03253099a4cd543bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:42 -0700
Subject: blkcg: remove unused @pol and @plid parameters

@pol to blkg_to_pdata() and @plid to blkg_lookup_create() are no
longer necessary.  Drop them.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 3 +--
 block/blk-cgroup.h   | 8 ++------
 block/blk-throttle.c | 7 +++----
 block/cfq-iosched.c  | 7 +++----
 4 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4fdeb46b4436..55ccbae6c434 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -568,7 +568,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       struct request_queue *q,
-				       enum blkio_policy_id plid,
 				       bool for_root)
 	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
@@ -1027,7 +1026,7 @@ static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 	rcu_read_lock();
 
 	spin_lock_irq(disk->queue->queue_lock);
-	blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
+	blkg = blkg_lookup_create(blkcg, disk->queue, false);
 	spin_unlock_irq(disk->queue->queue_lock);
 
 	if (IS_ERR(blkg)) {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1cb8f7643258..1add3dcfc19d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -266,13 +266,10 @@ static inline void *blkg_to_pdata(struct blkio_group *blkg,
 /**
  * pdata_to_blkg - get blkg associated with policy private data
  * @pdata: policy private data of interest
- * @pol: policy @pdata is for
  *
- * @pdata is policy private data for @pol.  Determine the blkg it's
- * associated with.
+ * @pdata is policy private data.  Determine the blkg it's associated with.
  */
-static inline struct blkio_group *pdata_to_blkg(void *pdata,
-						struct blkio_policy_type *pol)
+static inline struct blkio_group *pdata_to_blkg(void *pdata)
 {
 	if (pdata) {
 		struct blkg_policy_data *pd =
@@ -402,7 +399,6 @@ extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 				       struct request_queue *q);
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       struct request_queue *q,
-				       enum blkio_policy_id plid,
 				       bool for_root);
 void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 				   struct blkio_policy_type *pol,
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4ba141820a2e..1cc6c23de2c1 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -107,7 +107,7 @@ static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg)
 
 static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg)
 {
-	return pdata_to_blkg(tg, &blkio_policy_throtl);
+	return pdata_to_blkg(tg);
 }
 
 enum tg_state_flags {
@@ -185,7 +185,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 	} else {
 		struct blkio_group *blkg;
 
-		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
+		blkg = blkg_lookup_create(blkcg, q, false);
 
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
@@ -1033,8 +1033,7 @@ int blk_throtl_init(struct request_queue *q)
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
 
-	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
-				  true);
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
 	if (!IS_ERR(blkg))
 		td->root_tg = blkg_to_tg(blkg);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 39c43307dc6c..8cca6161d0bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -374,7 +374,7 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
 
 static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
 {
-	return pdata_to_blkg(cfqg, &blkio_policy_cfq);
+	return pdata_to_blkg(cfqg);
 }
 
 static inline void cfqg_get(struct cfq_group *cfqg)
@@ -1092,7 +1092,7 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	} else {
 		struct blkio_group *blkg;
 
-		blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
+		blkg = blkg_lookup_create(blkcg, q, false);
 		if (!IS_ERR(blkg))
 			cfqg = blkg_to_cfqg(blkg);
 	}
@@ -3523,8 +3523,7 @@ static int cfq_init_queue(struct request_queue *q)
 	rcu_read_lock();
 	spin_lock_irq(q->queue_lock);
 
-	blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
-				  true);
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
 	if (!IS_ERR(blkg))
 		cfqd->root_group = blkg_to_cfqg(blkg);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2aa4a1523b40a065bc3a31e20097ea7a618ec3de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:42 -0700
Subject: blkcg: BLKIO_STAT_CPU_SECTORS doesn't have subcounters

BLKIO_STAT_CPU_SECTORS doesn't need read/write/sync/async subcounters
and is counted by blkio_group_stats_cpu->sectors; however, it still
holds a member in blkio_group_stats_cpu->stat_arr_cpu.

Rearrange stat_type_cpu and define BLKIO_STAT_CPU_ARR_NR and use it
for stat_arr_cpu[] size so that only SERVICE_BYTES and SERVICED have
subcounters.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1add3dcfc19d..2060d812ae8e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -58,14 +58,17 @@ enum stat_type {
 
 /* Per cpu stats */
 enum stat_type_cpu {
-	BLKIO_STAT_CPU_SECTORS,
 	/* Total bytes transferred */
 	BLKIO_STAT_CPU_SERVICE_BYTES,
 	/* Total IOs serviced, post merge */
 	BLKIO_STAT_CPU_SERVICED,
-	BLKIO_STAT_CPU_NR
+
+	/* All the single valued stats go below this */
+	BLKIO_STAT_CPU_SECTORS,
 };
 
+#define BLKIO_STAT_CPU_ARR_NR	(BLKIO_STAT_CPU_SERVICED + 1)
+
 enum stat_sub_type {
 	BLKIO_STAT_READ = 0,
 	BLKIO_STAT_WRITE,
@@ -167,7 +170,7 @@ struct blkio_group_stats {
 /* Per cpu blkio group stats */
 struct blkio_group_stats_cpu {
 	uint64_t sectors;
-	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_NR][BLKIO_STAT_TOTAL];
+	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_ARR_NR][BLKIO_STAT_TOTAL];
 	struct u64_stats_sync syncp;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From edcb0722c654947908388df660791abd41e6617c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:42 -0700
Subject: blkcg: introduce blkg_stat and blkg_rwstat

blkcg uses u64_stats_sync to avoid reading wrong u64 statistic values
on 32bit archs and some stat counters have subtypes to distinguish
read/writes and sync/async IOs.  The stat code paths are confusing and
involve a lot of going back and forth between blkcg core and specific
policy implementations, and synchronization and subtype handling are
open coded in blkcg core.

This patch introduces struct blkg_stat and blkg_rwstat which, with
accompanying operations, encapsulate stat updating and accessing with
proper synchronization.

blkg_stat is simple u64 counter with 64bit read-access protection.
blkg_rwstat is the one with rw and [a]sync subcounters and takes @rw
flags to distinguish IO subtypes (%REQ_WRITE and %REQ_SYNC) and
replaces stat_sub_type indexed arrays.

All counters in blkio_group_stats and blkio_group_stats_cpu are
replaced with either blkg_stat or blkg_rwstat along with all users.

This does add one u64_stats_sync per counter and increase stats_sync
operations but they're empty/noops on 64bit archs and blkcg doesn't
have too many counters, especially with DEBUG_BLK_CGROUP off.

While the currently resulting code isn't necessarily simpler at the
moment, this will enable further clean up of blkcg stats code.

- BLKIO_STAT_{READ|WRITE|SYNC|ASYNC|TOTAL} renamed to
  BLKG_RWSTAT_{READ|WRITE|SYNC|ASYNC|TOTAL}.

- blkg_stat_add() replaces blkio_add_stat() and
  blkio_check_and_dec_stat().  Note that BUG_ON() on underflow in the
  latter function no longer exists.  It's *way* better to have
  underflowed stat counters than oopsing.

- blkio_group_stats->dequeue is now a proper u64 stat counter instead
  of ulong.

- reset_stats() updated to clear each stat counters individually and
  BLKG_STATS_DEBUG_CLEAR_{START|SIZE} are removed.

- Some functions reconstruct rw flags from direction and sync
  booleans.  This will be removed by future patches.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 289 +++++++++++++++++++++++------------------------------
 block/blk-cgroup.h | 211 +++++++++++++++++++++++++++++---------
 2 files changed, 293 insertions(+), 207 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 55ccbae6c434..09ac462ba89e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
 	}
 }
 
-/*
- * Add to the appropriate stat variable depending on the request type.
- * This should be called with queue_lock held.
- */
-static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
-				bool sync)
-{
-	if (direction)
-		stat[BLKIO_STAT_WRITE] += add;
-	else
-		stat[BLKIO_STAT_READ] += add;
-	if (sync)
-		stat[BLKIO_STAT_SYNC] += add;
-	else
-		stat[BLKIO_STAT_ASYNC] += add;
-}
-
-/*
- * Decrements the appropriate stat variable if non-zero depending on the
- * request type. Panics on value being zero.
- * This should be called with the queue_lock held.
- */
-static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
-{
-	if (direction) {
-		BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
-		stat[BLKIO_STAT_WRITE]--;
-	} else {
-		BUG_ON(stat[BLKIO_STAT_READ] == 0);
-		stat[BLKIO_STAT_READ]--;
-	}
-	if (sync) {
-		BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
-		stat[BLKIO_STAT_SYNC]--;
-	} else {
-		BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
-		stat[BLKIO_STAT_ASYNC]--;
-	}
-}
-
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 /* This should be called with the queue_lock held. */
 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
@@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
 
 	now = sched_clock();
 	if (time_after64(now, stats->start_group_wait_time))
-		stats->group_wait_time += now - stats->start_group_wait_time;
+		blkg_stat_add(&stats->group_wait_time,
+			      now - stats->start_group_wait_time);
 	blkio_clear_blkg_waiting(stats);
 }
 
@@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
 
 	now = sched_clock();
 	if (time_after64(now, stats->start_empty_time))
-		stats->empty_time += now - stats->start_empty_time;
+		blkg_stat_add(&stats->empty_time,
+			      now - stats->start_empty_time);
 	blkio_clear_blkg_empty(stats);
 }
 
@@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
 	if (blkio_blkg_idling(stats)) {
 		unsigned long long now = sched_clock();
 
-		if (time_after64(now, stats->start_idle_time)) {
-			u64_stats_update_begin(&stats->syncp);
-			stats->idle_time += now - stats->start_idle_time;
-			u64_stats_update_end(&stats->syncp);
-		}
+		if (time_after64(now, stats->start_idle_time))
+			blkg_stat_add(&stats->idle_time,
+				      now - stats->start_idle_time);
 		blkio_clear_blkg_idling(stats);
 	}
 }
@@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
-	stats->avg_queue_size_sum +=
-			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
-			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
-	stats->avg_queue_size_samples++;
+	blkg_stat_add(&stats->avg_queue_size_sum,
+		      blkg_rwstat_sum(&stats->queued));
+	blkg_stat_add(&stats->avg_queue_size_samples, 1);
 	blkio_update_group_wait_time(stats);
-	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 
@@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg,
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
-			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE])
+	if (blkg_rwstat_sum(&stats->queued))
 		return;
 
 	/*
@@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	pd->stats.dequeue += dequeue;
+	blkg_stat_add(&pd->stats.dequeue, dequeue);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
 #else
@@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg,
 				 bool sync)
 {
 	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
-	blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync);
+	blkg_rwstat_add(&stats->queued, rw, 1);
 	blkio_end_empty_time(stats);
-	u64_stats_update_end(&stats->syncp);
-
 	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
@@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 				    bool direction, bool sync)
 {
 	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
-	blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction,
-				 sync);
-	u64_stats_update_end(&stats->syncp);
+	blkg_rwstat_add(&stats->queued, rw, -1);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 
@@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg,
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
-	stats->time += time;
+	blkg_stat_add(&stats->time, time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	stats->unaccounted_time += unaccounted_time;
+	blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
 #endif
-	u64_stats_update_end(&stats->syncp);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 
@@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 				   struct blkio_policy_type *pol,
 				   uint64_t bytes, bool direction, bool sync)
 {
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 	struct blkg_policy_data *pd = blkg->pd[pol->plid];
 	struct blkio_group_stats_cpu *stats_cpu;
 	unsigned long flags;
@@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 
 	stats_cpu = this_cpu_ptr(pd->stats_cpu);
 
-	u64_stats_update_begin(&stats_cpu->syncp);
-	stats_cpu->sectors += bytes >> 9;
-	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
-			1, direction, sync);
-	blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
-			bytes, direction, sync);
-	u64_stats_update_end(&stats_cpu->syncp);
+	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
+	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
+	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
+
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
@@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
 {
 	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 	unsigned long long now = sched_clock();
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
 	if (time_after64(now, io_start_time))
-		blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
-				now - io_start_time, direction, sync);
+		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
 	if (time_after64(io_start_time, start_time))
-		blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
-				io_start_time - start_time, direction, sync);
-	u64_stats_update_end(&stats->syncp);
+		blkg_rwstat_add(&stats->wait_time, rw,
+				io_start_time - start_time);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 
@@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 				    bool direction, bool sync)
 {
 	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
 	lockdep_assert_held(blkg->q->queue_lock);
 
-	u64_stats_update_begin(&stats->syncp);
-	blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
-	u64_stats_update_end(&stats->syncp);
+	blkg_rwstat_add(&stats->merged, rw, 1);
 }
 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 
@@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 		struct blkio_group_stats_cpu *sc =
 			per_cpu_ptr(pd->stats_cpu, cpu);
 
-		sc->sectors = 0;
-		memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu));
+		blkg_rwstat_reset(&sc->service_bytes);
+		blkg_rwstat_reset(&sc->serviced);
+		blkg_stat_reset(&sc->sectors);
 	}
 }
 
@@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	struct blkio_group *blkg;
 	struct hlist_node *n;
-	int i;
 
 	spin_lock(&blkio_list_lock);
 	spin_lock_irq(&blkcg->lock);
@@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 			struct blkio_group_stats *stats = &pd->stats;
 
 			/* queued stats shouldn't be cleared */
-			for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++)
-				if (i != BLKIO_STAT_QUEUED)
-					memset(stats->stat_arr[i], 0,
-					       sizeof(stats->stat_arr[i]));
-			stats->time = 0;
+			blkg_rwstat_reset(&stats->merged);
+			blkg_rwstat_reset(&stats->service_time);
+			blkg_rwstat_reset(&stats->wait_time);
+			blkg_stat_reset(&stats->time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-			memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0,
-			       BLKG_STATS_DEBUG_CLEAR_SIZE);
+			blkg_stat_reset(&stats->unaccounted_time);
+			blkg_stat_reset(&stats->avg_queue_size_sum);
+			blkg_stat_reset(&stats->avg_queue_size_samples);
+			blkg_stat_reset(&stats->dequeue);
+			blkg_stat_reset(&stats->group_wait_time);
+			blkg_stat_reset(&stats->idle_time);
+			blkg_stat_reset(&stats->empty_time);
 #endif
 			blkio_reset_stats_cpu(blkg, pol->plid);
 		}
@@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	return 0;
 }
 
-static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
+static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname,
 			       char *str, int chars_left, bool diskname_only)
 {
 	snprintf(str, chars_left, "%s", dname);
@@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
 	if (diskname_only)
 		return;
 	switch (type) {
-	case BLKIO_STAT_READ:
+	case BLKG_RWSTAT_READ:
 		strlcat(str, " Read", chars_left);
 		break;
-	case BLKIO_STAT_WRITE:
+	case BLKG_RWSTAT_WRITE:
 		strlcat(str, " Write", chars_left);
 		break;
-	case BLKIO_STAT_SYNC:
+	case BLKG_RWSTAT_SYNC:
 		strlcat(str, " Sync", chars_left);
 		break;
-	case BLKIO_STAT_ASYNC:
+	case BLKG_RWSTAT_ASYNC:
 		strlcat(str, " Async", chars_left);
 		break;
-	case BLKIO_STAT_TOTAL:
+	case BLKG_RWSTAT_TOTAL:
 		strlcat(str, " Total", chars_left);
 		break;
 	default:
@@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
 }
 
 static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
-			enum stat_type_cpu type, enum stat_sub_type sub_type)
+				    enum stat_type_cpu type,
+				    enum blkg_rwstat_type sub_type)
 {
 	struct blkg_policy_data *pd = blkg->pd[plid];
+	u64 val = 0;
 	int cpu;
-	struct blkio_group_stats_cpu *stats_cpu;
-	u64 val = 0, tval;
 
 	if (pd->stats_cpu == NULL)
 		return val;
 
 	for_each_possible_cpu(cpu) {
-		unsigned int start;
-		stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
-
-		do {
-			start = u64_stats_fetch_begin(&stats_cpu->syncp);
-			if (type == BLKIO_STAT_CPU_SECTORS)
-				tval = stats_cpu->sectors;
-			else
-				tval = stats_cpu->stat_arr_cpu[type][sub_type];
-		} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
-
-		val += tval;
+		struct blkio_group_stats_cpu *stats_cpu =
+			per_cpu_ptr(pd->stats_cpu, cpu);
+		struct blkg_rwstat rws;
+
+		switch (type) {
+		case BLKIO_STAT_CPU_SECTORS:
+			val += blkg_stat_read(&stats_cpu->sectors);
+			break;
+		case BLKIO_STAT_CPU_SERVICE_BYTES:
+			rws = blkg_rwstat_read(&stats_cpu->service_bytes);
+			val += rws.cnt[sub_type];
+			break;
+		case BLKIO_STAT_CPU_SERVICED:
+			rws = blkg_rwstat_read(&stats_cpu->serviced);
+			val += rws.cnt[sub_type];
+			break;
+		}
 	}
 
 	return val;
@@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 {
 	uint64_t disk_total, val;
 	char key_str[MAX_KEY_LEN];
-	enum stat_sub_type sub_type;
+	enum blkg_rwstat_type sub_type;
 
 	if (type == BLKIO_STAT_CPU_SECTORS) {
 		val = blkio_read_stat_cpu(blkg, plid, type, 0);
@@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 		return val;
 	}
 
-	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
+	for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR;
 			sub_type++) {
 		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
 				   false);
@@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
 		cb->fill(cb, key_str, val);
 	}
 
-	disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) +
-		blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE);
+	disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) +
+		blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE);
 
-	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
+	blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 			   false);
 	cb->fill(cb, key_str, disk_total);
 	return disk_total;
@@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
 	struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
 	uint64_t v = 0, disk_total = 0;
 	char key_str[MAX_KEY_LEN];
-	unsigned int sync_start;
+	struct blkg_rwstat rws = { };
 	int st;
 
 	if (type >= BLKIO_STAT_ARR_NR) {
-		do {
-			sync_start = u64_stats_fetch_begin(&stats->syncp);
-			switch (type) {
-			case BLKIO_STAT_TIME:
-				v = stats->time;
-				break;
+		switch (type) {
+		case BLKIO_STAT_TIME:
+			v = blkg_stat_read(&stats->time);
+			break;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-			case BLKIO_STAT_UNACCOUNTED_TIME:
-				v = stats->unaccounted_time;
-				break;
-			case BLKIO_STAT_AVG_QUEUE_SIZE: {
-				uint64_t samples = stats->avg_queue_size_samples;
+		case BLKIO_STAT_UNACCOUNTED_TIME:
+			v = blkg_stat_read(&stats->unaccounted_time);
+			break;
+		case BLKIO_STAT_AVG_QUEUE_SIZE: {
+			uint64_t samples;
 
-				if (samples) {
-					v = stats->avg_queue_size_sum;
-					do_div(v, samples);
-				}
-				break;
+			samples = blkg_stat_read(&stats->avg_queue_size_samples);
+			if (samples) {
+				v = blkg_stat_read(&stats->avg_queue_size_sum);
+				do_div(v, samples);
 			}
-			case BLKIO_STAT_IDLE_TIME:
-				v = stats->idle_time;
-				break;
-			case BLKIO_STAT_EMPTY_TIME:
-				v = stats->empty_time;
-				break;
-			case BLKIO_STAT_DEQUEUE:
-				v = stats->dequeue;
-				break;
-			case BLKIO_STAT_GROUP_WAIT_TIME:
-				v = stats->group_wait_time;
-				break;
+			break;
+		}
+		case BLKIO_STAT_IDLE_TIME:
+			v = blkg_stat_read(&stats->idle_time);
+			break;
+		case BLKIO_STAT_EMPTY_TIME:
+			v = blkg_stat_read(&stats->empty_time);
+			break;
+		case BLKIO_STAT_DEQUEUE:
+			v = blkg_stat_read(&stats->dequeue);
+			break;
+		case BLKIO_STAT_GROUP_WAIT_TIME:
+			v = blkg_stat_read(&stats->group_wait_time);
+			break;
 #endif
-			default:
-				WARN_ON_ONCE(1);
-			}
-		} while (u64_stats_fetch_retry(&stats->syncp, sync_start));
+		default:
+			WARN_ON_ONCE(1);
+		}
 
 		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
 		cb->fill(cb, key_str, v);
 		return v;
 	}
 
-	for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) {
-		do {
-			sync_start = u64_stats_fetch_begin(&stats->syncp);
-			v = stats->stat_arr[type][st];
-		} while (u64_stats_fetch_retry(&stats->syncp, sync_start));
+	switch (type) {
+	case BLKIO_STAT_MERGED:
+		rws = blkg_rwstat_read(&stats->merged);
+		break;
+	case BLKIO_STAT_SERVICE_TIME:
+		rws = blkg_rwstat_read(&stats->service_time);
+		break;
+	case BLKIO_STAT_WAIT_TIME:
+		rws = blkg_rwstat_read(&stats->wait_time);
+		break;
+	case BLKIO_STAT_QUEUED:
+		rws = blkg_rwstat_read(&stats->queued);
+		break;
+	default:
+		WARN_ON_ONCE(true);
+		break;
+	}
 
+	for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) {
 		blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
-		cb->fill(cb, key_str, v);
-		if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE)
-			disk_total += v;
+		cb->fill(cb, key_str, rws.cnt[st]);
+		if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE)
+			disk_total += rws.cnt[st];
 	}
 
-	blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
+	blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
 			   false);
 	cb->fill(cb, key_str, disk_total);
 	return disk_total;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2060d812ae8e..7578df3938b4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -69,12 +69,14 @@ enum stat_type_cpu {
 
 #define BLKIO_STAT_CPU_ARR_NR	(BLKIO_STAT_CPU_SERVICED + 1)
 
-enum stat_sub_type {
-	BLKIO_STAT_READ = 0,
-	BLKIO_STAT_WRITE,
-	BLKIO_STAT_SYNC,
-	BLKIO_STAT_ASYNC,
-	BLKIO_STAT_TOTAL
+enum blkg_rwstat_type {
+	BLKG_RWSTAT_READ,
+	BLKG_RWSTAT_WRITE,
+	BLKG_RWSTAT_SYNC,
+	BLKG_RWSTAT_ASYNC,
+
+	BLKG_RWSTAT_NR,
+	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
 };
 
 /* blkg state flags */
@@ -124,54 +126,58 @@ struct blkio_cgroup {
 	uint64_t id;
 };
 
+struct blkg_stat {
+	struct u64_stats_sync		syncp;
+	uint64_t			cnt;
+};
+
+struct blkg_rwstat {
+	struct u64_stats_sync		syncp;
+	uint64_t			cnt[BLKG_RWSTAT_NR];
+};
+
 struct blkio_group_stats {
-	struct u64_stats_sync syncp;
+	/* number of ios merged */
+	struct blkg_rwstat		merged;
+	/* total time spent on device in ns, may not be accurate w/ queueing */
+	struct blkg_rwstat		service_time;
+	/* total time spent waiting in scheduler queue in ns */
+	struct blkg_rwstat		wait_time;
+	/* number of IOs queued up */
+	struct blkg_rwstat		queued;
 	/* total disk time and nr sectors dispatched by this group */
-	uint64_t time;
-	uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL];
+	struct blkg_stat		time;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	/* Time not charged to this cgroup */
-	uint64_t unaccounted_time;
-
-	/* Sum of number of IOs queued across all samples */
-	uint64_t avg_queue_size_sum;
-	/* Count of samples taken for average */
-	uint64_t avg_queue_size_samples;
-	/* How many times this group has been removed from service tree */
-	unsigned long dequeue;
-
-	/* Total time spent waiting for it to be assigned a timeslice. */
-	uint64_t group_wait_time;
-
-	/* Time spent idling for this blkio_group */
-	uint64_t idle_time;
-	/*
-	 * Total time when we have requests queued and do not contain the
-	 * current active queue.
-	 */
-	uint64_t empty_time;
-
+	/* time not charged to this cgroup */
+	struct blkg_stat		unaccounted_time;
+	/* sum of number of ios queued across all samples */
+	struct blkg_stat		avg_queue_size_sum;
+	/* count of samples taken for average */
+	struct blkg_stat		avg_queue_size_samples;
+	/* how many times this group has been removed from service tree */
+	struct blkg_stat		dequeue;
+	/* total time spent waiting for it to be assigned a timeslice. */
+	struct blkg_stat		group_wait_time;
+	/* time spent idling for this blkio_group */
+	struct blkg_stat		idle_time;
+	/* total time with empty current active q with other requests queued */
+	struct blkg_stat		empty_time;
 	/* fields after this shouldn't be cleared on stat reset */
-	uint64_t start_group_wait_time;
-	uint64_t start_idle_time;
-	uint64_t start_empty_time;
-	uint16_t flags;
+	uint64_t			start_group_wait_time;
+	uint64_t			start_idle_time;
+	uint64_t			start_empty_time;
+	uint16_t			flags;
 #endif
 };
 
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-#define BLKG_STATS_DEBUG_CLEAR_START	\
-	offsetof(struct blkio_group_stats, unaccounted_time)
-#define BLKG_STATS_DEBUG_CLEAR_SIZE	\
-	(offsetof(struct blkio_group_stats, start_group_wait_time) - \
-	 BLKG_STATS_DEBUG_CLEAR_START)
-#endif
-
 /* Per cpu blkio group stats */
 struct blkio_group_stats_cpu {
-	uint64_t sectors;
-	uint64_t stat_arr_cpu[BLKIO_STAT_CPU_ARR_NR][BLKIO_STAT_TOTAL];
-	struct u64_stats_sync syncp;
+	/* total bytes transferred */
+	struct blkg_rwstat		service_bytes;
+	/* total IOs serviced, post merge */
+	struct blkg_rwstat		serviced;
+	/* total sectors transferred */
+	struct blkg_stat		sectors;
 };
 
 struct blkio_group_conf {
@@ -316,6 +322,121 @@ static inline void blkg_put(struct blkio_group *blkg)
 		__blkg_release(blkg);
 }
 
+/**
+ * blkg_stat_add - add a value to a blkg_stat
+ * @stat: target blkg_stat
+ * @val: value to add
+ *
+ * Add @val to @stat.  The caller is responsible for synchronizing calls to
+ * this function.
+ */
+static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
+{
+	u64_stats_update_begin(&stat->syncp);
+	stat->cnt += val;
+	u64_stats_update_end(&stat->syncp);
+}
+
+/**
+ * blkg_stat_read - read the current value of a blkg_stat
+ * @stat: blkg_stat to read
+ *
+ * Read the current value of @stat.  This function can be called without
+ * synchroniztion and takes care of u64 atomicity.
+ */
+static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
+{
+	unsigned int start;
+	uint64_t v;
+
+	do {
+		start = u64_stats_fetch_begin(&stat->syncp);
+		v = stat->cnt;
+	} while (u64_stats_fetch_retry(&stat->syncp, start));
+
+	return v;
+}
+
+/**
+ * blkg_stat_reset - reset a blkg_stat
+ * @stat: blkg_stat to reset
+ */
+static inline void blkg_stat_reset(struct blkg_stat *stat)
+{
+	stat->cnt = 0;
+}
+
+/**
+ * blkg_rwstat_add - add a value to a blkg_rwstat
+ * @rwstat: target blkg_rwstat
+ * @rw: mask of REQ_{WRITE|SYNC}
+ * @val: value to add
+ *
+ * Add @val to @rwstat.  The counters are chosen according to @rw.  The
+ * caller is responsible for synchronizing calls to this function.
+ */
+static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
+				   int rw, uint64_t val)
+{
+	u64_stats_update_begin(&rwstat->syncp);
+
+	if (rw & REQ_WRITE)
+		rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
+	else
+		rwstat->cnt[BLKG_RWSTAT_READ] += val;
+	if (rw & REQ_SYNC)
+		rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
+	else
+		rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
+
+	u64_stats_update_end(&rwstat->syncp);
+}
+
+/**
+ * blkg_rwstat_read - read the current values of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Read the current snapshot of @rwstat and return it as the return value.
+ * This function can be called without synchronization and takes care of
+ * u64 atomicity.
+ */
+static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
+{
+	unsigned int start;
+	struct blkg_rwstat tmp;
+
+	do {
+		start = u64_stats_fetch_begin(&rwstat->syncp);
+		tmp = *rwstat;
+	} while (u64_stats_fetch_retry(&rwstat->syncp, start));
+
+	return tmp;
+}
+
+/**
+ * blkg_rwstat_sum - read the total count of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Return the total count of @rwstat regardless of the IO direction.  This
+ * function can be called without synchronization and takes care of u64
+ * atomicity.
+ */
+static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
+{
+	struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
+
+	return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
+}
+
+/**
+ * blkg_rwstat_reset - reset a blkg_rwstat
+ * @rwstat: blkg_rwstat to reset
+ */
+static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
+{
+	memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
+}
+
 #else
 
 struct blkio_group {
-- 
cgit v1.2.3-59-g8ed1b


From d3d32e69fa368e131b25ee68806aa3fd3916cec1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:42 -0700
Subject: blkcg: restructure statistics printing

blkcg stats handling is a mess.  None of the stats has much to do with
blkcg core but they are all implemented in blkcg core.  Code sharing
is achieved by mixing common code with hard-coded cases for each stat
counter.

This patch restructures statistics printing such that

* Common logic exists as helper functions and specific print functions
  use the helpers to implement specific cases.

* Printing functions serving multiple counters don't require hardcoded
  switching on specific counters.

* Printing uses read_seq_string callback (other methods will be phased
  out).

This change enables further cleanups and relocating stats code to the
policy implementation it belongs to.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 557 +++++++++++++++++++++++------------------------------
 block/blk-cgroup.h |  60 +-----
 2 files changed, 243 insertions(+), 374 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 09ac462ba89e..951e7f3a8c89 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -753,186 +753,227 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	return 0;
 }
 
-static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname,
-			       char *str, int chars_left, bool diskname_only)
-{
-	snprintf(str, chars_left, "%s", dname);
-	chars_left -= strlen(str);
-	if (chars_left <= 0) {
-		printk(KERN_WARNING
-			"Possibly incorrect cgroup stat display format");
-		return;
-	}
-	if (diskname_only)
-		return;
-	switch (type) {
-	case BLKG_RWSTAT_READ:
-		strlcat(str, " Read", chars_left);
-		break;
-	case BLKG_RWSTAT_WRITE:
-		strlcat(str, " Write", chars_left);
-		break;
-	case BLKG_RWSTAT_SYNC:
-		strlcat(str, " Sync", chars_left);
-		break;
-	case BLKG_RWSTAT_ASYNC:
-		strlcat(str, " Async", chars_left);
-		break;
-	case BLKG_RWSTAT_TOTAL:
-		strlcat(str, " Total", chars_left);
-		break;
-	default:
-		strlcat(str, " Invalid", chars_left);
-	}
+static const char *blkg_dev_name(struct blkio_group *blkg)
+{
+	/* some drivers (floppy) instantiate a queue w/o disk registered */
+	if (blkg->q->backing_dev_info.dev)
+		return dev_name(blkg->q->backing_dev_info.dev);
+	return NULL;
 }
 
-static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
-				    enum stat_type_cpu type,
-				    enum blkg_rwstat_type sub_type)
+/**
+ * blkcg_print_blkgs - helper for printing per-blkg data
+ * @sf: seq_file to print to
+ * @blkcg: blkcg of interest
+ * @prfill: fill function to print out a blkg
+ * @pol: policy in question
+ * @data: data to be passed to @prfill
+ * @show_total: to print out sum of prfill return values or not
+ *
+ * This function invokes @prfill on each blkg of @blkcg if pd for the
+ * policy specified by @pol exists.  @prfill is invoked with @sf, the
+ * policy data and @data.  If @show_total is %true, the sum of the return
+ * values from @prfill is printed with "Total" label at the end.
+ *
+ * This is to be used to construct print functions for
+ * cftype->read_seq_string method.
+ */
+static void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
+			      u64 (*prfill)(struct seq_file *,
+					    struct blkg_policy_data *, int),
+			      int pol, int data, bool show_total)
 {
-	struct blkg_policy_data *pd = blkg->pd[plid];
-	u64 val = 0;
-	int cpu;
+	struct blkio_group *blkg;
+	struct hlist_node *n;
+	u64 total = 0;
 
-	if (pd->stats_cpu == NULL)
-		return val;
+	spin_lock_irq(&blkcg->lock);
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
+		if (blkg->pd[pol])
+			total += prfill(sf, blkg->pd[pol], data);
+	spin_unlock_irq(&blkcg->lock);
+
+	if (show_total)
+		seq_printf(sf, "Total %llu\n", (unsigned long long)total);
+}
+
+/**
+ * __blkg_prfill_u64 - prfill helper for a single u64 value
+ * @sf: seq_file to print to
+ * @pd: policy data of interest
+ * @v: value to print
+ *
+ * Print @v to @sf for the device assocaited with @pd.
+ */
+static u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd,
+			     u64 v)
+{
+	const char *dname = blkg_dev_name(pd->blkg);
+
+	if (!dname)
+		return 0;
+
+	seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
+	return v;
+}
+
+/**
+ * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
+ * @sf: seq_file to print to
+ * @pd: policy data of interest
+ * @rwstat: rwstat to print
+ *
+ * Print @rwstat to @sf for the device assocaited with @pd.
+ */
+static u64 __blkg_prfill_rwstat(struct seq_file *sf,
+				struct blkg_policy_data *pd,
+				const struct blkg_rwstat *rwstat)
+{
+	static const char *rwstr[] = {
+		[BLKG_RWSTAT_READ]	= "Read",
+		[BLKG_RWSTAT_WRITE]	= "Write",
+		[BLKG_RWSTAT_SYNC]	= "Sync",
+		[BLKG_RWSTAT_ASYNC]	= "Async",
+	};
+	const char *dname = blkg_dev_name(pd->blkg);
+	u64 v;
+	int i;
+
+	if (!dname)
+		return 0;
+
+	for (i = 0; i < BLKG_RWSTAT_NR; i++)
+		seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
+			   (unsigned long long)rwstat->cnt[i]);
+
+	v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
+	seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
+	return v;
+}
+
+static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
+			    int off)
+{
+	return __blkg_prfill_u64(sf, pd,
+				 blkg_stat_read((void *)&pd->stats + off));
+}
+
+static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			      int off)
+{
+	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)&pd->stats + off);
+
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/* print blkg_stat specified by BLKCG_STAT_PRIV() */
+static int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+			    struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
+			  BLKCG_STAT_POL(cft->private),
+			  BLKCG_STAT_OFF(cft->private), false);
+	return 0;
+}
+
+/* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
+static int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
+			      struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
+			  BLKCG_STAT_POL(cft->private),
+			  BLKCG_STAT_OFF(cft->private), true);
+	return 0;
+}
+
+static u64 blkg_prfill_cpu_stat(struct seq_file *sf,
+				struct blkg_policy_data *pd, int off)
+{
+	u64 v = 0;
+	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		struct blkio_group_stats_cpu *stats_cpu =
+		struct blkio_group_stats_cpu *sc =
 			per_cpu_ptr(pd->stats_cpu, cpu);
-		struct blkg_rwstat rws;
 
-		switch (type) {
-		case BLKIO_STAT_CPU_SECTORS:
-			val += blkg_stat_read(&stats_cpu->sectors);
-			break;
-		case BLKIO_STAT_CPU_SERVICE_BYTES:
-			rws = blkg_rwstat_read(&stats_cpu->service_bytes);
-			val += rws.cnt[sub_type];
-			break;
-		case BLKIO_STAT_CPU_SERVICED:
-			rws = blkg_rwstat_read(&stats_cpu->serviced);
-			val += rws.cnt[sub_type];
-			break;
-		}
+		v += blkg_stat_read((void *)sc + off);
 	}
 
-	return val;
+	return __blkg_prfill_u64(sf, pd, v);
 }
 
-static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
-				   struct cgroup_map_cb *cb, const char *dname,
-				   enum stat_type_cpu type)
+static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
+				  struct blkg_policy_data *pd, int off)
 {
-	uint64_t disk_total, val;
-	char key_str[MAX_KEY_LEN];
-	enum blkg_rwstat_type sub_type;
+	struct blkg_rwstat rwstat = { }, tmp;
+	int i, cpu;
 
-	if (type == BLKIO_STAT_CPU_SECTORS) {
-		val = blkio_read_stat_cpu(blkg, plid, type, 0);
-		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
-		cb->fill(cb, key_str, val);
-		return val;
-	}
+	for_each_possible_cpu(cpu) {
+		struct blkio_group_stats_cpu *sc =
+			per_cpu_ptr(pd->stats_cpu, cpu);
 
-	for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR;
-			sub_type++) {
-		blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
-				   false);
-		val = blkio_read_stat_cpu(blkg, plid, type, sub_type);
-		cb->fill(cb, key_str, val);
+		tmp = blkg_rwstat_read((void *)sc + off);
+		for (i = 0; i < BLKG_RWSTAT_NR; i++)
+			rwstat.cnt[i] += tmp.cnt[i];
 	}
 
-	disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) +
-		blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE);
-
-	blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
-			   false);
-	cb->fill(cb, key_str, disk_total);
-	return disk_total;
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
 }
 
-static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
-			       struct cgroup_map_cb *cb, const char *dname,
-			       enum stat_type type)
+/* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
+static int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
+				struct seq_file *sf)
 {
-	struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
-	uint64_t v = 0, disk_total = 0;
-	char key_str[MAX_KEY_LEN];
-	struct blkg_rwstat rws = { };
-	int st;
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	if (type >= BLKIO_STAT_ARR_NR) {
-		switch (type) {
-		case BLKIO_STAT_TIME:
-			v = blkg_stat_read(&stats->time);
-			break;
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-		case BLKIO_STAT_UNACCOUNTED_TIME:
-			v = blkg_stat_read(&stats->unaccounted_time);
-			break;
-		case BLKIO_STAT_AVG_QUEUE_SIZE: {
-			uint64_t samples;
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_stat,
+			  BLKCG_STAT_POL(cft->private),
+			  BLKCG_STAT_OFF(cft->private), false);
+	return 0;
+}
 
-			samples = blkg_stat_read(&stats->avg_queue_size_samples);
-			if (samples) {
-				v = blkg_stat_read(&stats->avg_queue_size_sum);
-				do_div(v, samples);
-			}
-			break;
-		}
-		case BLKIO_STAT_IDLE_TIME:
-			v = blkg_stat_read(&stats->idle_time);
-			break;
-		case BLKIO_STAT_EMPTY_TIME:
-			v = blkg_stat_read(&stats->empty_time);
-			break;
-		case BLKIO_STAT_DEQUEUE:
-			v = blkg_stat_read(&stats->dequeue);
-			break;
-		case BLKIO_STAT_GROUP_WAIT_TIME:
-			v = blkg_stat_read(&stats->group_wait_time);
-			break;
-#endif
-		default:
-			WARN_ON_ONCE(1);
-		}
+/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
+static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
+				  struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-		blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
-		cb->fill(cb, key_str, v);
-		return v;
-	}
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
+			  BLKCG_STAT_POL(cft->private),
+			  BLKCG_STAT_OFF(cft->private), true);
+	return 0;
+}
 
-	switch (type) {
-	case BLKIO_STAT_MERGED:
-		rws = blkg_rwstat_read(&stats->merged);
-		break;
-	case BLKIO_STAT_SERVICE_TIME:
-		rws = blkg_rwstat_read(&stats->service_time);
-		break;
-	case BLKIO_STAT_WAIT_TIME:
-		rws = blkg_rwstat_read(&stats->wait_time);
-		break;
-	case BLKIO_STAT_QUEUED:
-		rws = blkg_rwstat_read(&stats->queued);
-		break;
-	default:
-		WARN_ON_ONCE(true);
-		break;
-	}
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
+{
+	u64 samples = blkg_stat_read(&pd->stats.avg_queue_size_samples);
+	u64 v = 0;
 
-	for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) {
-		blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
-		cb->fill(cb, key_str, rws.cnt[st]);
-		if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE)
-			disk_total += rws.cnt[st];
+	if (samples) {
+		v = blkg_stat_read(&pd->stats.avg_queue_size_sum);
+		do_div(v, samples);
 	}
+	__blkg_prfill_u64(sf, pd, v);
+	return 0;
+}
+
+/* print avg_queue_size */
+static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
+				      struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
-			   false);
-	cb->fill(cb, key_str, disk_total);
-	return disk_total;
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_avg_queue_size,
+			  BLKIO_POLICY_PROP, 0, false);
+	return 0;
 }
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
 
 static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 				      int fileid, struct blkio_cgroup *blkcg)
@@ -1074,14 +1115,6 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 	return ret;
 }
 
-static const char *blkg_dev_name(struct blkio_group *blkg)
-{
-	/* some drivers (floppy) instantiate a queue w/o disk registered */
-	if (blkg->q->backing_dev_info.dev)
-		return dev_name(blkg->q->backing_dev_info.dev);
-	return NULL;
-}
-
 static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
 				   struct seq_file *m)
 {
@@ -1174,116 +1207,6 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
-		struct cftype *cft, struct cgroup_map_cb *cb,
-		enum stat_type type, bool show_total, bool pcpu)
-{
-	struct blkio_group *blkg;
-	struct hlist_node *n;
-	uint64_t cgroup_total = 0;
-
-	spin_lock_irq(&blkcg->lock);
-
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		const char *dname = blkg_dev_name(blkg);
-		int plid = BLKIOFILE_POLICY(cft->private);
-
-		if (!dname)
-			continue;
-		if (pcpu)
-			cgroup_total += blkio_get_stat_cpu(blkg, plid,
-							   cb, dname, type);
-		else
-			cgroup_total += blkio_get_stat(blkg, plid,
-						       cb, dname, type);
-	}
-	if (show_total)
-		cb->fill(cb, "Total", cgroup_total);
-
-	spin_unlock_irq(&blkcg->lock);
-	return 0;
-}
-
-/* All map kind of cgroup file get serviced by this function */
-static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
-				struct cgroup_map_cb *cb)
-{
-	struct blkio_cgroup *blkcg;
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-	int name = BLKIOFILE_ATTR(cft->private);
-
-	blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	switch(plid) {
-	case BLKIO_POLICY_PROP:
-		switch(name) {
-		case BLKIO_PROP_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_TIME, 0, 0);
-		case BLKIO_PROP_sectors:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_CPU_SECTORS, 0, 1);
-		case BLKIO_PROP_io_service_bytes:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-					BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
-		case BLKIO_PROP_io_serviced:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_CPU_SERVICED, 1, 1);
-		case BLKIO_PROP_io_service_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_SERVICE_TIME, 1, 0);
-		case BLKIO_PROP_io_wait_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_WAIT_TIME, 1, 0);
-		case BLKIO_PROP_io_merged:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_MERGED, 1, 0);
-		case BLKIO_PROP_io_queued:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_QUEUED, 1, 0);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-		case BLKIO_PROP_unaccounted_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-					BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
-		case BLKIO_PROP_dequeue:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_DEQUEUE, 0, 0);
-		case BLKIO_PROP_avg_queue_size:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-					BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
-		case BLKIO_PROP_group_wait_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-					BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
-		case BLKIO_PROP_idle_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_IDLE_TIME, 0, 0);
-		case BLKIO_PROP_empty_time:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_EMPTY_TIME, 0, 0);
-#endif
-		default:
-			BUG();
-		}
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(name){
-		case BLKIO_THROTL_io_service_bytes:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
-		case BLKIO_THROTL_io_serviced:
-			return blkio_read_blkg_stats(blkcg, cft, cb,
-						BLKIO_STAT_CPU_SERVICED, 1, 1);
-		default:
-			BUG();
-		}
-		break;
-	default:
-		BUG();
-	}
-
-	return 0;
-}
-
 static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
 {
 	struct blkio_group *blkg;
@@ -1369,51 +1292,51 @@ struct cftype blkio_files[] = {
 	},
 	{
 		.name = "time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, time)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "sectors",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_sectors),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, sectors)),
+		.read_seq_string = blkcg_print_cpu_stat,
 	},
 	{
 		.name = "io_service_bytes",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_service_bytes),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, service_bytes)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
 	},
 	{
 		.name = "io_serviced",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_serviced),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, serviced)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
 	},
 	{
 		.name = "io_service_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_service_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, service_time)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_wait_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_wait_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, wait_time)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_merged",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_merged),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, merged)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_queued",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_io_queued),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, queued)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "reset_stats",
@@ -1457,54 +1380,52 @@ struct cftype blkio_files[] = {
 	},
 	{
 		.name = "throttle.io_service_bytes",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_io_service_bytes),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
+				offsetof(struct blkio_group_stats_cpu, service_bytes)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
 	},
 	{
 		.name = "throttle.io_serviced",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_io_serviced),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
+				offsetof(struct blkio_group_stats_cpu, serviced)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
 	},
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	{
 		.name = "avg_queue_size",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_avg_queue_size),
-		.read_map = blkiocg_file_read_map,
+		.read_seq_string = blkcg_print_avg_queue_size,
 	},
 	{
 		.name = "group_wait_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_group_wait_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, group_wait_time)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "idle_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_idle_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, idle_time)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "empty_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_empty_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, empty_time)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "dequeue",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_dequeue),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, dequeue)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "unaccounted_time",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_unaccounted_time),
-		.read_map = blkiocg_file_read_map,
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, unaccounted_time)),
+		.read_seq_string = blkcg_print_stat,
 	},
 #endif
 	{ }	/* terminate */
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 7578df3938b4..7331d7965a5e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -28,46 +28,10 @@ enum blkio_policy_id {
 
 #ifdef CONFIG_BLK_CGROUP
 
-enum stat_type {
-	/* Number of IOs merged */
-	BLKIO_STAT_MERGED,
-	/* Total time spent (in ns) between request dispatch to the driver and
-	 * request completion for IOs doen by this cgroup. This may not be
-	 * accurate when NCQ is turned on. */
-	BLKIO_STAT_SERVICE_TIME,
-	/* Total time spent waiting in scheduler queue in ns */
-	BLKIO_STAT_WAIT_TIME,
-	/* Number of IOs queued up */
-	BLKIO_STAT_QUEUED,
-
-	/* All the single valued stats go below this */
-	BLKIO_STAT_TIME,
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-	/* Time not charged to this cgroup */
-	BLKIO_STAT_UNACCOUNTED_TIME,
-	BLKIO_STAT_AVG_QUEUE_SIZE,
-	BLKIO_STAT_IDLE_TIME,
-	BLKIO_STAT_EMPTY_TIME,
-	BLKIO_STAT_GROUP_WAIT_TIME,
-	BLKIO_STAT_DEQUEUE
-#endif
-};
-
-/* Types lower than this live in stat_arr and have subtypes */
-#define BLKIO_STAT_ARR_NR	(BLKIO_STAT_QUEUED + 1)
-
-/* Per cpu stats */
-enum stat_type_cpu {
-	/* Total bytes transferred */
-	BLKIO_STAT_CPU_SERVICE_BYTES,
-	/* Total IOs serviced, post merge */
-	BLKIO_STAT_CPU_SERVICED,
-
-	/* All the single valued stats go below this */
-	BLKIO_STAT_CPU_SECTORS,
-};
-
-#define BLKIO_STAT_CPU_ARR_NR	(BLKIO_STAT_CPU_SERVICED + 1)
+/* cft->private [un]packing for stat printing */
+#define BLKCG_STAT_PRIV(pol, off)	(((unsigned)(pol) << 16) | (off))
+#define BLKCG_STAT_POL(prv)		((unsigned)(prv) >> 16)
+#define BLKCG_STAT_OFF(prv)		((unsigned)(prv) & 0xffff)
 
 enum blkg_rwstat_type {
 	BLKG_RWSTAT_READ,
@@ -90,20 +54,6 @@ enum blkg_state_flags {
 enum blkcg_file_name_prop {
 	BLKIO_PROP_weight = 1,
 	BLKIO_PROP_weight_device,
-	BLKIO_PROP_io_service_bytes,
-	BLKIO_PROP_io_serviced,
-	BLKIO_PROP_time,
-	BLKIO_PROP_sectors,
-	BLKIO_PROP_unaccounted_time,
-	BLKIO_PROP_io_service_time,
-	BLKIO_PROP_io_wait_time,
-	BLKIO_PROP_io_merged,
-	BLKIO_PROP_io_queued,
-	BLKIO_PROP_avg_queue_size,
-	BLKIO_PROP_group_wait_time,
-	BLKIO_PROP_idle_time,
-	BLKIO_PROP_empty_time,
-	BLKIO_PROP_dequeue,
 };
 
 /* cgroup files owned by throttle policy */
@@ -112,8 +62,6 @@ enum blkcg_file_name_throtl {
 	BLKIO_THROTL_write_bps_device,
 	BLKIO_THROTL_read_iops_device,
 	BLKIO_THROTL_write_iops_device,
-	BLKIO_THROTL_io_service_bytes,
-	BLKIO_THROTL_io_serviced,
 };
 
 struct blkio_cgroup {
-- 
cgit v1.2.3-59-g8ed1b


From c4682aec9caaca1fcfd1dd4b59cef47af22cbdc6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:43 -0700
Subject: blkcg: restructure configuration printing

Similarly to the previous stat restructuring, this patch restructures
conf printing code such that,

* Conf printing uses the same helpers as stat.

* Printing function doesn't require hardcoded switching on the config
  being printed.  Note that this isn't complete yet for throttle
  confs.  The next patch will convert setting for these confs and will
  complete the transition.

* Printing uses read_seq_string callback (other methods will be phased
  out).

Note that blkio_group_conf.iops[2] is changed to u64 so that they can
be manipulated with the same functions.  This is transitional and will
go away later.

After this patch, per-device configurations - weight, bps and iops -
use __blkg_prfill_u64() for printing which uses white space as
delimiter instead of tab.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 156 +++++++++++++++++++----------------------------------
 block/blk-cgroup.h |   3 +-
 2 files changed, 55 insertions(+), 104 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 13cae77e8bf6..a9723a8dc983 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1115,95 +1115,28 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 	return ret;
 }
 
-static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
-				   struct seq_file *m)
+/* for propio conf */
+static u64 blkg_prfill_weight_device(struct seq_file *sf,
+				     struct blkg_policy_data *pd, int off)
 {
-	int plid = BLKIOFILE_POLICY(cft->private);
-	int fileid = BLKIOFILE_ATTR(cft->private);
-	struct blkg_policy_data *pd = blkg->pd[plid];
-	const char *dname = blkg_dev_name(blkg);
-	int rw = WRITE;
-
-	if (!dname)
-		return;
-
-	switch (plid) {
-		case BLKIO_POLICY_PROP:
-			if (pd->conf.weight)
-				seq_printf(m, "%s\t%u\n",
-					   dname, pd->conf.weight);
-			break;
-		case BLKIO_POLICY_THROTL:
-			switch (fileid) {
-			case BLKIO_THROTL_read_bps_device:
-				rw = READ;
-			case BLKIO_THROTL_write_bps_device:
-				if (pd->conf.bps[rw])
-					seq_printf(m, "%s\t%llu\n",
-						   dname, pd->conf.bps[rw]);
-				break;
-			case BLKIO_THROTL_read_iops_device:
-				rw = READ;
-			case BLKIO_THROTL_write_iops_device:
-				if (pd->conf.iops[rw])
-					seq_printf(m, "%s\t%u\n",
-						   dname, pd->conf.iops[rw]);
-				break;
-			}
-			break;
-		default:
-			BUG();
-	}
+	if (!pd->conf.weight)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, pd->conf.weight);
 }
 
-/* cgroup files which read their data from policy nodes end up here */
-static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
-			    struct seq_file *m)
+static int blkcg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *sf)
 {
-	struct blkio_group *blkg;
-	struct hlist_node *n;
-
-	spin_lock_irq(&blkcg->lock);
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
-		blkio_print_group_conf(cft, blkg, m);
-	spin_unlock_irq(&blkcg->lock);
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
+			  blkg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
+			  false);
+	return 0;
 }
 
-static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
-				struct seq_file *m)
+static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
+			      struct seq_file *sf)
 {
-	struct blkio_cgroup *blkcg;
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-	int name = BLKIOFILE_ATTR(cft->private);
-
-	blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	switch(plid) {
-	case BLKIO_POLICY_PROP:
-		switch(name) {
-		case BLKIO_PROP_weight_device:
-			blkio_read_conf(cft, blkcg, m);
-			return 0;
-		default:
-			BUG();
-		}
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(name){
-		case BLKIO_THROTL_read_bps_device:
-		case BLKIO_THROTL_write_bps_device:
-		case BLKIO_THROTL_read_iops_device:
-		case BLKIO_THROTL_write_iops_device:
-			blkio_read_conf(cft, blkcg, m);
-			return 0;
-		default:
-			BUG();
-		}
-		break;
-	default:
-		BUG();
-	}
-
+	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->weight);
 	return 0;
 }
 
@@ -1233,40 +1166,59 @@ static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 	return 0;
 }
 
-static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
-	struct blkio_cgroup *blkcg;
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-	int name = BLKIOFILE_ATTR(cft->private);
+/* for blk-throttle conf */
+#ifdef CONFIG_BLK_DEV_THROTTLING
+static u64 blkg_prfill_conf_u64(struct seq_file *sf,
+				struct blkg_policy_data *pd, int off)
+{
+	u64 v = *(u64 *)((void *)&pd->conf + off);
 
-	blkcg = cgroup_to_blkio_cgroup(cgrp);
+	if (!v)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, v);
+}
 
-	switch(plid) {
-	case BLKIO_POLICY_PROP:
-		switch(name) {
-		case BLKIO_PROP_weight:
-			return (u64)blkcg->weight;
-		}
+static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+				struct seq_file *sf)
+{
+	int off;
+
+	switch (BLKIOFILE_ATTR(cft->private)) {
+	case BLKIO_THROTL_read_bps_device:
+		off = offsetof(struct blkio_group_conf, bps[READ]);
+		break;
+	case BLKIO_THROTL_write_bps_device:
+		off = offsetof(struct blkio_group_conf, bps[WRITE]);
+		break;
+	case BLKIO_THROTL_read_iops_device:
+		off = offsetof(struct blkio_group_conf, iops[READ]);
+		break;
+	case BLKIO_THROTL_write_iops_device:
+		off = offsetof(struct blkio_group_conf, iops[WRITE]);
 		break;
 	default:
-		BUG();
+		return -EINVAL;
 	}
+
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
+			  blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
+			  off, false);
 	return 0;
 }
+#endif
 
 struct cftype blkio_files[] = {
 	{
 		.name = "weight_device",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
 				BLKIO_PROP_weight_device),
-		.read_seq_string = blkiocg_file_read,
+		.read_seq_string = blkcg_print_weight_device,
 		.write_string = blkiocg_file_write,
 		.max_write_len = 256,
 	},
 	{
 		.name = "weight",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_weight),
-		.read_u64 = blkiocg_file_read_u64,
+		.read_seq_string = blkcg_print_weight,
 		.write_u64 = blkcg_set_weight,
 	},
 	{
@@ -1326,7 +1278,7 @@ struct cftype blkio_files[] = {
 		.name = "throttle.read_bps_device",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_read_bps_device),
-		.read_seq_string = blkiocg_file_read,
+		.read_seq_string = blkcg_print_conf_u64,
 		.write_string = blkiocg_file_write,
 		.max_write_len = 256,
 	},
@@ -1335,7 +1287,7 @@ struct cftype blkio_files[] = {
 		.name = "throttle.write_bps_device",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_write_bps_device),
-		.read_seq_string = blkiocg_file_read,
+		.read_seq_string = blkcg_print_conf_u64,
 		.write_string = blkiocg_file_write,
 		.max_write_len = 256,
 	},
@@ -1344,7 +1296,7 @@ struct cftype blkio_files[] = {
 		.name = "throttle.read_iops_device",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_read_iops_device),
-		.read_seq_string = blkiocg_file_read,
+		.read_seq_string = blkcg_print_conf_u64,
 		.write_string = blkiocg_file_write,
 		.max_write_len = 256,
 	},
@@ -1353,7 +1305,7 @@ struct cftype blkio_files[] = {
 		.name = "throttle.write_iops_device",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_write_iops_device),
-		.read_seq_string = blkiocg_file_read,
+		.read_seq_string = blkcg_print_conf_u64,
 		.write_string = blkiocg_file_write,
 		.max_write_len = 256,
 	},
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 7331d7965a5e..b67eefa706c3 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -52,7 +52,6 @@ enum blkg_state_flags {
 
 /* cgroup files owned by proportional weight policy */
 enum blkcg_file_name_prop {
-	BLKIO_PROP_weight = 1,
 	BLKIO_PROP_weight_device,
 };
 
@@ -130,7 +129,7 @@ struct blkio_group_stats_cpu {
 
 struct blkio_group_conf {
 	unsigned int weight;
-	unsigned int iops[2];
+	u64 iops[2];
 	u64 bps[2];
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 3a8b31d396b296df4b8594429d86d415d3409432 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:43 -0700
Subject: blkcg: restructure blkio_group configruation setting

As part of userland interface restructuring, this patch updates
per-blkio_group configuration setting.  Instead of funneling
everything through a master function which has hard-coded cases for
each config file it may handle, the common part is factored into
blkg_conf_prep() and blkg_conf_finish() and different configuration
setters are implemented using the helpers.

While this doesn't result in immediate LOC reduction, this enables
further cleanups and more modular implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 274 ++++++++++++++++++++++++++++-------------------------
 block/blk-cgroup.h |  13 ---
 2 files changed, 147 insertions(+), 140 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a9723a8dc983..1e1ee2af7b5f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -43,12 +43,6 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
 static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
 
-/* for encoding cft->private value on file */
-#define BLKIOFILE_PRIVATE(x, val)	(((x) << 16) | (val))
-/* What policy owns the file, proportional or throttle */
-#define BLKIOFILE_POLICY(val)		(((val) >> 16) & 0xffff)
-#define BLKIOFILE_ATTR(val)		((val) & 0xffff)
-
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -86,7 +80,7 @@ static inline void blkio_update_group_weight(struct blkio_group *blkg,
 }
 
 static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
-					  u64 bps, int fileid)
+					  u64 bps, int rw)
 {
 	struct blkio_policy_type *blkiop;
 
@@ -96,21 +90,18 @@ static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
 		if (blkiop->plid != plid)
 			continue;
 
-		if (fileid == BLKIO_THROTL_read_bps_device
-		    && blkiop->ops.blkio_update_group_read_bps_fn)
+		if (rw == READ && blkiop->ops.blkio_update_group_read_bps_fn)
 			blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
 								blkg, bps);
 
-		if (fileid == BLKIO_THROTL_write_bps_device
-		    && blkiop->ops.blkio_update_group_write_bps_fn)
+		if (rw == WRITE && blkiop->ops.blkio_update_group_write_bps_fn)
 			blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
 								blkg, bps);
 	}
 }
 
-static inline void blkio_update_group_iops(struct blkio_group *blkg,
-					   int plid, unsigned int iops,
-					   int fileid)
+static inline void blkio_update_group_iops(struct blkio_group *blkg, int plid,
+					   u64 iops, int rw)
 {
 	struct blkio_policy_type *blkiop;
 
@@ -120,13 +111,11 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
 		if (blkiop->plid != plid)
 			continue;
 
-		if (fileid == BLKIO_THROTL_read_iops_device
-		    && blkiop->ops.blkio_update_group_read_iops_fn)
+		if (rw == READ && blkiop->ops.blkio_update_group_read_iops_fn)
 			blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
 								blkg, iops);
 
-		if (fileid == BLKIO_THROTL_write_iops_device
-		    && blkiop->ops.blkio_update_group_write_iops_fn)
+		if (rw == WRITE && blkiop->ops.blkio_update_group_write_iops_fn)
 			blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
 								blkg,iops);
 	}
@@ -975,19 +964,40 @@ static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 }
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
 
-static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
-				      int fileid, struct blkio_cgroup *blkcg)
+struct blkg_conf_ctx {
+	struct gendisk		*disk;
+	struct blkio_group	*blkg;
+	u64			v;
+};
+
+/**
+ * blkg_conf_prep - parse and prepare for per-blkg config update
+ * @blkcg: target block cgroup
+ * @input: input string
+ * @ctx: blkg_conf_ctx to be filled
+ *
+ * Parse per-blkg config update from @input and initialize @ctx with the
+ * result.  @ctx->blkg points to the blkg to be updated and @ctx->v the new
+ * value.  This function returns with RCU read locked and must be paired
+ * with blkg_conf_finish().
+ */
+static int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+			  struct blkg_conf_ctx *ctx)
+	__acquires(rcu)
 {
-	struct gendisk *disk = NULL;
-	struct blkio_group *blkg = NULL;
-	struct blkg_policy_data *pd;
-	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+	struct gendisk *disk;
+	struct blkio_group *blkg;
+	char *buf, *s[4], *p, *major_s, *minor_s;
 	unsigned long major, minor;
 	int i = 0, ret = -EINVAL;
 	int part;
 	dev_t dev;
 	u64 temp;
 
+	buf = kstrdup(input, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
 	memset(s, 0, sizeof(s));
 
 	while ((p = strsep(&buf, " ")) != NULL) {
@@ -1037,82 +1047,42 @@ static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
 
 	if (IS_ERR(blkg)) {
 		ret = PTR_ERR(blkg);
-		goto out_unlock;
-	}
-
-	pd = blkg->pd[plid];
-
-	switch (plid) {
-	case BLKIO_POLICY_PROP:
-		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
-		     temp > BLKIO_WEIGHT_MAX)
-			goto out_unlock;
-
-		pd->conf.weight = temp;
-		blkio_update_group_weight(blkg, plid, temp ?: blkcg->weight);
-		break;
-	case BLKIO_POLICY_THROTL:
-		switch(fileid) {
-		case BLKIO_THROTL_read_bps_device:
-			pd->conf.bps[READ] = temp;
-			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
-			break;
-		case BLKIO_THROTL_write_bps_device:
-			pd->conf.bps[WRITE] = temp;
-			blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
-			break;
-		case BLKIO_THROTL_read_iops_device:
-			if (temp > THROTL_IOPS_MAX)
-				goto out_unlock;
-			pd->conf.iops[READ] = temp;
-			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
-			break;
-		case BLKIO_THROTL_write_iops_device:
-			if (temp > THROTL_IOPS_MAX)
-				goto out_unlock;
-			pd->conf.iops[WRITE] = temp;
-			blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
-			break;
+		rcu_read_unlock();
+		put_disk(disk);
+		/*
+		 * If queue was bypassing, we should retry.  Do so after a
+		 * short msleep().  It isn't strictly necessary but queue
+		 * can be bypassing for some time and it's always nice to
+		 * avoid busy looping.
+		 */
+		if (ret == -EBUSY) {
+			msleep(10);
+			ret = restart_syscall();
 		}
-		break;
-	default:
-		BUG();
+		goto out;
 	}
+
+	ctx->disk = disk;
+	ctx->blkg = blkg;
+	ctx->v = temp;
 	ret = 0;
-out_unlock:
-	rcu_read_unlock();
 out:
-	put_disk(disk);
-
-	/*
-	 * If queue was bypassing, we should retry.  Do so after a short
-	 * msleep().  It isn't strictly necessary but queue can be
-	 * bypassing for some time and it's always nice to avoid busy
-	 * looping.
-	 */
-	if (ret == -EBUSY) {
-		msleep(10);
-		return restart_syscall();
-	}
+	kfree(buf);
 	return ret;
 }
 
-static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
- 				       const char *buffer)
+/**
+ * blkg_conf_finish - finish up per-blkg config update
+ * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
+ *
+ * Finish up after per-blkg config update.  This function must be paired
+ * with blkg_conf_prep().
+ */
+static void blkg_conf_finish(struct blkg_conf_ctx *ctx)
+	__releases(rcu)
 {
-	int ret = 0;
-	char *buf;
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
-	int fileid = BLKIOFILE_ATTR(cft->private);
-
-	buf = kstrdup(buffer, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = blkio_policy_parse_and_set(buf, plid, fileid, blkcg);
-	kfree(buf);
-	return ret;
+	rcu_read_unlock();
+	put_disk(ctx->disk);
 }
 
 /* for propio conf */
@@ -1140,6 +1110,32 @@ static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
+static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				   const char *buf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkg_policy_data *pd;
+	struct blkg_conf_ctx ctx;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	pd = ctx.blkg->pd[BLKIO_POLICY_PROP];
+	if (pd && (!ctx.v || (ctx.v >= BLKIO_WEIGHT_MIN &&
+			      ctx.v <= BLKIO_WEIGHT_MAX))) {
+		pd->conf.weight = ctx.v;
+		blkio_update_group_weight(ctx.blkg, BLKIO_POLICY_PROP,
+					  ctx.v ?: blkcg->weight);
+		ret = 0;
+	}
+
+	blkg_conf_finish(&ctx);
+	return ret;
+}
+
 static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
@@ -1181,39 +1177,67 @@ static u64 blkg_prfill_conf_u64(struct seq_file *sf,
 static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
 				struct seq_file *sf)
 {
-	int off;
-
-	switch (BLKIOFILE_ATTR(cft->private)) {
-	case BLKIO_THROTL_read_bps_device:
-		off = offsetof(struct blkio_group_conf, bps[READ]);
-		break;
-	case BLKIO_THROTL_write_bps_device:
-		off = offsetof(struct blkio_group_conf, bps[WRITE]);
-		break;
-	case BLKIO_THROTL_read_iops_device:
-		off = offsetof(struct blkio_group_conf, iops[READ]);
-		break;
-	case BLKIO_THROTL_write_iops_device:
-		off = offsetof(struct blkio_group_conf, iops[WRITE]);
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
 			  blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
-			  off, false);
+			  cft->private, false);
 	return 0;
 }
+
+static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+			      const char *buf, int rw,
+			      void (*update)(struct blkio_group *, int, u64, int))
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkg_policy_data *pd;
+	struct blkg_conf_ctx ctx;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	pd = ctx.blkg->pd[BLKIO_POLICY_THROTL];
+	if (pd) {
+		*(u64 *)((void *)&pd->conf + cft->private) = ctx.v;
+		update(ctx.blkg, BLKIO_POLICY_THROTL, ctx.v ?: -1, rw);
+		ret = 0;
+	}
+
+	blkg_conf_finish(&ctx);
+	return ret;
+}
+
+static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft,
+				const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_bps);
+}
+
+static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft,
+				const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_bps);
+}
+
+static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft,
+				 const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_iops);
+}
+
+static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft,
+				 const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_iops);
+}
 #endif
 
 struct cftype blkio_files[] = {
 	{
 		.name = "weight_device",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
-				BLKIO_PROP_weight_device),
 		.read_seq_string = blkcg_print_weight_device,
-		.write_string = blkiocg_file_write,
+		.write_string = blkcg_set_weight_device,
 		.max_write_len = 256,
 	},
 	{
@@ -1276,37 +1300,33 @@ struct cftype blkio_files[] = {
 #ifdef CONFIG_BLK_DEV_THROTTLING
 	{
 		.name = "throttle.read_bps_device",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_read_bps_device),
+		.private = offsetof(struct blkio_group_conf, bps[READ]),
 		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkiocg_file_write,
+		.write_string = blkcg_set_conf_bps_r,
 		.max_write_len = 256,
 	},
 
 	{
 		.name = "throttle.write_bps_device",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_write_bps_device),
+		.private = offsetof(struct blkio_group_conf, bps[WRITE]),
 		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkiocg_file_write,
+		.write_string = blkcg_set_conf_bps_w,
 		.max_write_len = 256,
 	},
 
 	{
 		.name = "throttle.read_iops_device",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_read_iops_device),
+		.private = offsetof(struct blkio_group_conf, iops[READ]),
 		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkiocg_file_write,
+		.write_string = blkcg_set_conf_iops_r,
 		.max_write_len = 256,
 	},
 
 	{
 		.name = "throttle.write_iops_device",
-		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
-				BLKIO_THROTL_write_iops_device),
+		.private = offsetof(struct blkio_group_conf, iops[WRITE]),
 		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkiocg_file_write,
+		.write_string = blkcg_set_conf_iops_w,
 		.max_write_len = 256,
 	},
 	{
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b67eefa706c3..108ffbf0d184 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -50,19 +50,6 @@ enum blkg_state_flags {
 	BLKG_empty,
 };
 
-/* cgroup files owned by proportional weight policy */
-enum blkcg_file_name_prop {
-	BLKIO_PROP_weight_device,
-};
-
-/* cgroup files owned by throttle policy */
-enum blkcg_file_name_throtl {
-	BLKIO_THROTL_read_bps_device,
-	BLKIO_THROTL_write_bps_device,
-	BLKIO_THROTL_read_iops_device,
-	BLKIO_THROTL_write_iops_device,
-};
-
 struct blkio_cgroup {
 	struct cgroup_subsys_state css;
 	unsigned int weight;
-- 
cgit v1.2.3-59-g8ed1b


From 829fdb50004de78f1bd187e428d72edcd9721cb8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:43 -0700
Subject: blkcg: export conf/stat helpers to prepare for reorganization

conf/stat handling is about to be moved to policy implementation from
blkcg core.  Export conf/stat helpers from blkcg core so that
blk-throttle and cfq-iosched can use them.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 52 +++++++++++++++++++++++++---------------------------
 block/blk-cgroup.h | 27 +++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 27 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b07a501839e7..53976f2704a5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -11,7 +11,6 @@
  * 	              Nauman Rafique <nauman@google.com>
  */
 #include <linux/ioprio.h>
-#include <linux/seq_file.h>
 #include <linux/kdev_t.h>
 #include <linux/module.h>
 #include <linux/err.h>
@@ -767,10 +766,9 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
  * This is to be used to construct print functions for
  * cftype->read_seq_string method.
  */
-static void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
-			      u64 (*prfill)(struct seq_file *,
-					    struct blkg_policy_data *, int),
-			      int pol, int data, bool show_total)
+void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
+		       u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
+		       int pol, int data, bool show_total)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
@@ -785,6 +783,7 @@ static void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 	if (show_total)
 		seq_printf(sf, "Total %llu\n", (unsigned long long)total);
 }
+EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
 
 /**
  * __blkg_prfill_u64 - prfill helper for a single u64 value
@@ -794,8 +793,7 @@ static void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  *
  * Print @v to @sf for the device assocaited with @pd.
  */
-static u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd,
-			     u64 v)
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
 {
 	const char *dname = blkg_dev_name(pd->blkg);
 
@@ -805,6 +803,7 @@ static u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd,
 	seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
 	return v;
 }
+EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
 
 /**
  * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
@@ -814,9 +813,8 @@ static u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd,
  *
  * Print @rwstat to @sf for the device assocaited with @pd.
  */
-static u64 __blkg_prfill_rwstat(struct seq_file *sf,
-				struct blkg_policy_data *pd,
-				const struct blkg_rwstat *rwstat)
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			 const struct blkg_rwstat *rwstat)
 {
 	static const char *rwstr[] = {
 		[BLKG_RWSTAT_READ]	= "Read",
@@ -856,8 +854,8 @@ static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 }
 
 /* print blkg_stat specified by BLKCG_STAT_PRIV() */
-static int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
-			    struct seq_file *sf)
+int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+		     struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
@@ -866,10 +864,11 @@ static int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 			  BLKCG_STAT_OFF(cft->private), false);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blkcg_print_stat);
 
 /* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
-static int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
-			      struct seq_file *sf)
+int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
+		       struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
@@ -878,6 +877,7 @@ static int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 			  BLKCG_STAT_OFF(cft->private), true);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
 
 static u64 blkg_prfill_cpu_stat(struct seq_file *sf,
 				struct blkg_policy_data *pd, int off)
@@ -914,8 +914,8 @@ static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
 }
 
 /* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
-static int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
-				struct seq_file *sf)
+int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
+			 struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
@@ -924,10 +924,11 @@ static int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
 			  BLKCG_STAT_OFF(cft->private), false);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blkcg_print_cpu_stat);
 
 /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
-static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
-				  struct seq_file *sf)
+int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
+			   struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
@@ -936,6 +937,7 @@ static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 			  BLKCG_STAT_OFF(cft->private), true);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat);
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
@@ -964,12 +966,6 @@ static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 }
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
 
-struct blkg_conf_ctx {
-	struct gendisk		*disk;
-	struct blkio_group	*blkg;
-	u64			v;
-};
-
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
@@ -981,8 +977,8 @@ struct blkg_conf_ctx {
  * value.  This function returns with RCU read locked and must be paired
  * with blkg_conf_finish().
  */
-static int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
-			  struct blkg_conf_ctx *ctx)
+int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+		   struct blkg_conf_ctx *ctx)
 	__acquires(rcu)
 {
 	struct gendisk *disk;
@@ -1026,6 +1022,7 @@ static int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
 	ctx->v = v;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(blkg_conf_prep);
 
 /**
  * blkg_conf_finish - finish up per-blkg config update
@@ -1034,12 +1031,13 @@ static int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  * Finish up after per-blkg config update.  This function must be paired
  * with blkg_conf_prep().
  */
-static void blkg_conf_finish(struct blkg_conf_ctx *ctx)
+void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(rcu)
 {
 	rcu_read_unlock();
 	put_disk(ctx->disk);
 }
+EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
 /* for propio conf */
 static u64 blkg_prfill_weight_device(struct seq_file *sf,
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 108ffbf0d184..361ecfa4d28d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,6 +15,7 @@
 
 #include <linux/cgroup.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/seq_file.h>
 
 enum blkio_policy_id {
 	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
@@ -193,6 +194,32 @@ extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
 extern void update_root_blkg_pd(struct request_queue *q,
 				enum blkio_policy_id plid);
 
+void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
+		       u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
+		       int pol, int data, bool show_total);
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+			 const struct blkg_rwstat *rwstat);
+int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+		     struct seq_file *sf);
+int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
+		       struct seq_file *sf);
+int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
+			 struct seq_file *sf);
+int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
+			   struct seq_file *sf);
+
+struct blkg_conf_ctx {
+	struct gendisk		*disk;
+	struct blkio_group	*blkg;
+	u64			v;
+};
+
+int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+		   struct blkg_conf_ctx *ctx);
+void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+
+
 /**
  * blkg_to_pdata - get policy private data
  * @blkg: blkg of interest
-- 
cgit v1.2.3-59-g8ed1b


From 44ea53de46a8b01a65ae6217f47e00b516725190 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:43 -0700
Subject: blkcg: implement blkio_policy_type->cftypes

Add blkiop->cftypes which is added and removed together with the
policy.  This will be used to move conf/stat handling to the policies.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 6 ++++++
 block/blk-cgroup.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 53976f2704a5..4e714f8ddcd2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1538,6 +1538,9 @@ void blkio_policy_register(struct blkio_policy_type *blkiop)
 	list_for_each_entry(q, &all_q_list, all_q_node)
 		update_root_blkg_pd(q, blkiop->plid);
 	blkcg_bypass_end();
+
+	if (blkiop->cftypes)
+		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
 }
 EXPORT_SYMBOL_GPL(blkio_policy_register);
 
@@ -1545,6 +1548,9 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
 	struct request_queue *q;
 
+	if (blkiop->cftypes)
+		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
+
 	blkcg_bypass_start();
 	spin_lock(&blkio_list_lock);
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 361ecfa4d28d..fa744d57bebb 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -181,6 +181,7 @@ struct blkio_policy_type {
 	struct blkio_policy_ops ops;
 	enum blkio_policy_id plid;
 	size_t pdata_size;		/* policy specific private data size */
+	struct cftype *cftypes;		/* cgroup files for the policy */
 };
 
 extern int blkcg_init_queue(struct request_queue *q);
-- 
cgit v1.2.3-59-g8ed1b


From 60c2bc2d5a12369deef395cda41638d7e6b6bf19 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:43 -0700
Subject: blkcg: move conf/stat file handling code to policies

blkcg conf/stat handling is convoluted in that details which belong to
specific policy implementations are all out in blkcg core and then
policies hook into core layer to access and manipulate confs and
stats.  This sadly achieves both inflexibility (confs/stats can't be
modified without messing with blkcg core) and complexity (all the
call-ins and call-backs).

The previous patches restructured conf and stat handling code such
that they can be separated out.  This patch relocates the file
handling part.  All conf/stat file handling code which belongs to
BLKIO_POLICY_PROP is moved to cfq-iosched.c and all
BKLIO_POLICY_THROTL code to blk-throtl.c.

The move is verbatim except for blkio_update_group_{weight|bps|iops}()
callbacks which relays conf changes to policies.  The configuration
settings are handled in policies themselves so the relaying isn't
necessary.  Conf setting functions are modified to directly call
per-policy update functions and the relaying mechanism is dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 373 ---------------------------------------------------
 block/blk-cgroup.h   |  15 ---
 block/blk-throttle.c | 163 ++++++++++++++++++----
 block/cfq-iosched.c  | 202 +++++++++++++++++++++++++++-
 4 files changed, 333 insertions(+), 420 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4e714f8ddcd2..b963fb4b3995 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -63,63 +63,6 @@ struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
 
-static inline void blkio_update_group_weight(struct blkio_group *blkg,
-					     int plid, unsigned int weight)
-{
-	struct blkio_policy_type *blkiop;
-
-	list_for_each_entry(blkiop, &blkio_list, list) {
-		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != plid)
-			continue;
-		if (blkiop->ops.blkio_update_group_weight_fn)
-			blkiop->ops.blkio_update_group_weight_fn(blkg->q,
-							blkg, weight);
-	}
-}
-
-static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
-					  u64 bps, int rw)
-{
-	struct blkio_policy_type *blkiop;
-
-	list_for_each_entry(blkiop, &blkio_list, list) {
-
-		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != plid)
-			continue;
-
-		if (rw == READ && blkiop->ops.blkio_update_group_read_bps_fn)
-			blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
-								blkg, bps);
-
-		if (rw == WRITE && blkiop->ops.blkio_update_group_write_bps_fn)
-			blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
-								blkg, bps);
-	}
-}
-
-static inline void blkio_update_group_iops(struct blkio_group *blkg, int plid,
-					   u64 iops, int rw)
-{
-	struct blkio_policy_type *blkiop;
-
-	list_for_each_entry(blkiop, &blkio_list, list) {
-
-		/* If this policy does not own the blkg, do not send updates */
-		if (blkiop->plid != plid)
-			continue;
-
-		if (rw == READ && blkiop->ops.blkio_update_group_read_iops_fn)
-			blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
-								blkg, iops);
-
-		if (rw == WRITE && blkiop->ops.blkio_update_group_write_iops_fn)
-			blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
-								blkg,iops);
-	}
-}
-
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 /* This should be called with the queue_lock held. */
 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
@@ -939,33 +882,6 @@ int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 }
 EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat);
 
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
-				      struct blkg_policy_data *pd, int off)
-{
-	u64 samples = blkg_stat_read(&pd->stats.avg_queue_size_samples);
-	u64 v = 0;
-
-	if (samples) {
-		v = blkg_stat_read(&pd->stats.avg_queue_size_sum);
-		do_div(v, samples);
-	}
-	__blkg_prfill_u64(sf, pd, v);
-	return 0;
-}
-
-/* print avg_queue_size */
-static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
-				      struct seq_file *sf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_avg_queue_size,
-			  BLKIO_POLICY_PROP, 0, false);
-	return 0;
-}
-#endif	/* CONFIG_DEBUG_BLK_CGROUP */
-
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
@@ -1039,300 +955,11 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
-/* for propio conf */
-static u64 blkg_prfill_weight_device(struct seq_file *sf,
-				     struct blkg_policy_data *pd, int off)
-{
-	if (!pd->conf.weight)
-		return 0;
-	return __blkg_prfill_u64(sf, pd, pd->conf.weight);
-}
-
-static int blkcg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *sf)
-{
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  blkg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
-			  false);
-	return 0;
-}
-
-static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
-			      struct seq_file *sf)
-{
-	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->weight);
-	return 0;
-}
-
-static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				   const char *buf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkg_policy_data *pd;
-	struct blkg_conf_ctx ctx;
-	int ret;
-
-	ret = blkg_conf_prep(blkcg, buf, &ctx);
-	if (ret)
-		return ret;
-
-	ret = -EINVAL;
-	pd = ctx.blkg->pd[BLKIO_POLICY_PROP];
-	if (pd && (!ctx.v || (ctx.v >= BLKIO_WEIGHT_MIN &&
-			      ctx.v <= BLKIO_WEIGHT_MAX))) {
-		pd->conf.weight = ctx.v;
-		blkio_update_group_weight(ctx.blkg, BLKIO_POLICY_PROP,
-					  ctx.v ?: blkcg->weight);
-		ret = 0;
-	}
-
-	blkg_conf_finish(&ctx);
-	return ret;
-}
-
-static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkio_group *blkg;
-	struct hlist_node *n;
-
-	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
-		return -EINVAL;
-
-	spin_lock(&blkio_list_lock);
-	spin_lock_irq(&blkcg->lock);
-	blkcg->weight = (unsigned int)val;
-
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_PROP];
-
-		if (pd && !pd->conf.weight)
-			blkio_update_group_weight(blkg, BLKIO_POLICY_PROP,
-						  blkcg->weight);
-	}
-
-	spin_unlock_irq(&blkcg->lock);
-	spin_unlock(&blkio_list_lock);
-	return 0;
-}
-
-/* for blk-throttle conf */
-#ifdef CONFIG_BLK_DEV_THROTTLING
-static u64 blkg_prfill_conf_u64(struct seq_file *sf,
-				struct blkg_policy_data *pd, int off)
-{
-	u64 v = *(u64 *)((void *)&pd->conf + off);
-
-	if (!v)
-		return 0;
-	return __blkg_prfill_u64(sf, pd, v);
-}
-
-static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
-				struct seq_file *sf)
-{
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
-			  cft->private, false);
-	return 0;
-}
-
-static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
-			      const char *buf, int rw,
-			      void (*update)(struct blkio_group *, int, u64, int))
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkg_policy_data *pd;
-	struct blkg_conf_ctx ctx;
-	int ret;
-
-	ret = blkg_conf_prep(blkcg, buf, &ctx);
-	if (ret)
-		return ret;
-
-	ret = -EINVAL;
-	pd = ctx.blkg->pd[BLKIO_POLICY_THROTL];
-	if (pd) {
-		*(u64 *)((void *)&pd->conf + cft->private) = ctx.v;
-		update(ctx.blkg, BLKIO_POLICY_THROTL, ctx.v ?: -1, rw);
-		ret = 0;
-	}
-
-	blkg_conf_finish(&ctx);
-	return ret;
-}
-
-static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft,
-				const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_bps);
-}
-
-static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft,
-				const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_bps);
-}
-
-static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft,
-				 const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_iops);
-}
-
-static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft,
-				 const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_iops);
-}
-#endif
-
 struct cftype blkio_files[] = {
-	{
-		.name = "weight_device",
-		.read_seq_string = blkcg_print_weight_device,
-		.write_string = blkcg_set_weight_device,
-		.max_write_len = 256,
-	},
-	{
-		.name = "weight",
-		.read_seq_string = blkcg_print_weight,
-		.write_u64 = blkcg_set_weight,
-	},
-	{
-		.name = "time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, time)),
-		.read_seq_string = blkcg_print_stat,
-	},
-	{
-		.name = "sectors",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, sectors)),
-		.read_seq_string = blkcg_print_cpu_stat,
-	},
-	{
-		.name = "io_service_bytes",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, service_bytes)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
-	},
-	{
-		.name = "io_serviced",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, serviced)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
-	},
-	{
-		.name = "io_service_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, service_time)),
-		.read_seq_string = blkcg_print_rwstat,
-	},
-	{
-		.name = "io_wait_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, wait_time)),
-		.read_seq_string = blkcg_print_rwstat,
-	},
-	{
-		.name = "io_merged",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, merged)),
-		.read_seq_string = blkcg_print_rwstat,
-	},
-	{
-		.name = "io_queued",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, queued)),
-		.read_seq_string = blkcg_print_rwstat,
-	},
 	{
 		.name = "reset_stats",
 		.write_u64 = blkiocg_reset_stats,
 	},
-#ifdef CONFIG_BLK_DEV_THROTTLING
-	{
-		.name = "throttle.read_bps_device",
-		.private = offsetof(struct blkio_group_conf, bps[READ]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_bps_r,
-		.max_write_len = 256,
-	},
-
-	{
-		.name = "throttle.write_bps_device",
-		.private = offsetof(struct blkio_group_conf, bps[WRITE]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_bps_w,
-		.max_write_len = 256,
-	},
-
-	{
-		.name = "throttle.read_iops_device",
-		.private = offsetof(struct blkio_group_conf, iops[READ]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_iops_r,
-		.max_write_len = 256,
-	},
-
-	{
-		.name = "throttle.write_iops_device",
-		.private = offsetof(struct blkio_group_conf, iops[WRITE]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_iops_w,
-		.max_write_len = 256,
-	},
-	{
-		.name = "throttle.io_service_bytes",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct blkio_group_stats_cpu, service_bytes)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
-	},
-	{
-		.name = "throttle.io_serviced",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct blkio_group_stats_cpu, serviced)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
-	},
-#endif /* CONFIG_BLK_DEV_THROTTLING */
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-	{
-		.name = "avg_queue_size",
-		.read_seq_string = blkcg_print_avg_queue_size,
-	},
-	{
-		.name = "group_wait_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, group_wait_time)),
-		.read_seq_string = blkcg_print_stat,
-	},
-	{
-		.name = "idle_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, idle_time)),
-		.read_seq_string = blkcg_print_stat,
-	},
-	{
-		.name = "empty_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, empty_time)),
-		.read_seq_string = blkcg_print_stat,
-	},
-	{
-		.name = "dequeue",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, dequeue)),
-		.read_seq_string = blkcg_print_stat,
-	},
-	{
-		.name = "unaccounted_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, unaccounted_time)),
-		.read_seq_string = blkcg_print_stat,
-	},
-#endif
 	{ }	/* terminate */
 };
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index fa744d57bebb..ba64b2857571 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -156,24 +156,9 @@ struct blkio_group {
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
-			struct blkio_group *blkg, unsigned int weight);
-typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
-			struct blkio_group *blkg, u64 read_bps);
-typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
-			struct blkio_group *blkg, u64 write_bps);
-typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
-			struct blkio_group *blkg, unsigned int read_iops);
-typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
-			struct blkio_group *blkg, unsigned int write_iops);
 
 struct blkio_policy_ops {
 	blkio_init_group_fn *blkio_init_group_fn;
-	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
-	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
-	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
-	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
-	blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
 };
 
 struct blkio_policy_type {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1cc6c23de2c1..fb6f25778fb2 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -804,6 +804,11 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
+/*
+ * Can not take queue lock in update functions as queue lock under
+ * blkcg_lock is not allowed. Under other paths we take blkcg_lock under
+ * queue_lock.
+ */
 static void throtl_update_blkio_group_common(struct throtl_data *td,
 				struct throtl_grp *tg)
 {
@@ -813,51 +818,158 @@ static void throtl_update_blkio_group_common(struct throtl_data *td,
 	throtl_schedule_delayed_work(td, 0);
 }
 
-/*
- * For all update functions, @q should be a valid pointer because these
- * update functions are called under blkcg_lock, that means, blkg is
- * valid and in turn @q is valid. queue exit path can not race because
- * of blkcg_lock
- *
- * Can not take queue lock in update functions as queue lock under blkcg_lock
- * is not allowed. Under other paths we take blkcg_lock under queue_lock.
- */
-static void throtl_update_blkio_group_read_bps(struct request_queue *q,
-				struct blkio_group *blkg, u64 read_bps)
+static u64 blkg_prfill_conf_u64(struct seq_file *sf,
+				struct blkg_policy_data *pd, int off)
+{
+	u64 v = *(u64 *)((void *)&pd->conf + off);
+
+	if (!v)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, v);
+}
+
+static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+				struct seq_file *sf)
+{
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
+			  blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
+			  cft->private, false);
+	return 0;
+}
+
+static void throtl_update_blkio_group_read_bps(struct blkio_group *blkg,
+					       u64 read_bps)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->bps[READ] = read_bps;
-	throtl_update_blkio_group_common(q->td, tg);
+	throtl_update_blkio_group_common(blkg->q->td, tg);
 }
 
-static void throtl_update_blkio_group_write_bps(struct request_queue *q,
-				struct blkio_group *blkg, u64 write_bps)
+static void throtl_update_blkio_group_write_bps(struct blkio_group *blkg,
+						u64 write_bps)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->bps[WRITE] = write_bps;
-	throtl_update_blkio_group_common(q->td, tg);
+	throtl_update_blkio_group_common(blkg->q->td, tg);
 }
 
-static void throtl_update_blkio_group_read_iops(struct request_queue *q,
-			struct blkio_group *blkg, unsigned int read_iops)
+static void throtl_update_blkio_group_read_iops(struct blkio_group *blkg,
+						u64 read_iops)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->iops[READ] = read_iops;
-	throtl_update_blkio_group_common(q->td, tg);
+	throtl_update_blkio_group_common(blkg->q->td, tg);
 }
 
-static void throtl_update_blkio_group_write_iops(struct request_queue *q,
-			struct blkio_group *blkg, unsigned int write_iops)
+static void throtl_update_blkio_group_write_iops(struct blkio_group *blkg,
+						 u64 write_iops)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
 	tg->iops[WRITE] = write_iops;
-	throtl_update_blkio_group_common(q->td, tg);
+	throtl_update_blkio_group_common(blkg->q->td, tg);
+}
+
+static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+			      const char *buf,
+			      void (*update)(struct blkio_group *, u64))
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkg_policy_data *pd;
+	struct blkg_conf_ctx ctx;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	pd = ctx.blkg->pd[BLKIO_POLICY_THROTL];
+	if (pd) {
+		*(u64 *)((void *)&pd->conf + cft->private) = ctx.v;
+		update(ctx.blkg, ctx.v ?: -1);
+		ret = 0;
+	}
+
+	blkg_conf_finish(&ctx);
+	return ret;
 }
 
+static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft,
+				const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf,
+				  throtl_update_blkio_group_read_bps);
+}
+
+static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft,
+				const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf,
+				  throtl_update_blkio_group_write_bps);
+}
+
+static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft,
+				 const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf,
+				  throtl_update_blkio_group_read_iops);
+}
+
+static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft,
+				 const char *buf)
+{
+	return blkcg_set_conf_u64(cgrp, cft, buf,
+				  throtl_update_blkio_group_write_iops);
+}
+
+static struct cftype throtl_files[] = {
+	{
+		.name = "throttle.read_bps_device",
+		.private = offsetof(struct blkio_group_conf, bps[READ]),
+		.read_seq_string = blkcg_print_conf_u64,
+		.write_string = blkcg_set_conf_bps_r,
+		.max_write_len = 256,
+	},
+	{
+		.name = "throttle.write_bps_device",
+		.private = offsetof(struct blkio_group_conf, bps[WRITE]),
+		.read_seq_string = blkcg_print_conf_u64,
+		.write_string = blkcg_set_conf_bps_w,
+		.max_write_len = 256,
+	},
+	{
+		.name = "throttle.read_iops_device",
+		.private = offsetof(struct blkio_group_conf, iops[READ]),
+		.read_seq_string = blkcg_print_conf_u64,
+		.write_string = blkcg_set_conf_iops_r,
+		.max_write_len = 256,
+	},
+	{
+		.name = "throttle.write_iops_device",
+		.private = offsetof(struct blkio_group_conf, iops[WRITE]),
+		.read_seq_string = blkcg_print_conf_u64,
+		.write_string = blkcg_set_conf_iops_w,
+		.max_write_len = 256,
+	},
+	{
+		.name = "throttle.io_service_bytes",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
+				offsetof(struct blkio_group_stats_cpu, service_bytes)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
+	},
+	{
+		.name = "throttle.io_serviced",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
+				offsetof(struct blkio_group_stats_cpu, serviced)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
+	},
+	{ }	/* terminate */
+};
+
 static void throtl_shutdown_wq(struct request_queue *q)
 {
 	struct throtl_data *td = q->td;
@@ -868,17 +980,10 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_init_group_fn = throtl_init_blkio_group,
-		.blkio_update_group_read_bps_fn =
-					throtl_update_blkio_group_read_bps,
-		.blkio_update_group_write_bps_fn =
-					throtl_update_blkio_group_write_bps,
-		.blkio_update_group_read_iops_fn =
-					throtl_update_blkio_group_read_iops,
-		.blkio_update_group_write_iops_fn =
-					throtl_update_blkio_group_write_iops,
 	},
 	.plid = BLKIO_POLICY_THROTL,
 	.pdata_size = sizeof(struct throtl_grp),
+	.cftypes = throtl_files,
 };
 
 bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 8cca6161d0bc..119e061a7675 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1058,8 +1058,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void cfq_update_blkio_group_weight(struct request_queue *q,
-					  struct blkio_group *blkg,
+static void cfq_update_blkio_group_weight(struct blkio_group *blkg,
 					  unsigned int weight)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1111,6 +1110,203 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 	cfqg_get(cfqg);
 }
 
+static u64 blkg_prfill_weight_device(struct seq_file *sf,
+				     struct blkg_policy_data *pd, int off)
+{
+	if (!pd->conf.weight)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, pd->conf.weight);
+}
+
+static int blkcg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *sf)
+{
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
+			  blkg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
+			  false);
+	return 0;
+}
+
+static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
+			      struct seq_file *sf)
+{
+	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->weight);
+	return 0;
+}
+
+static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				   const char *buf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkg_policy_data *pd;
+	struct blkg_conf_ctx ctx;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	pd = ctx.blkg->pd[BLKIO_POLICY_PROP];
+	if (pd && (!ctx.v || (ctx.v >= BLKIO_WEIGHT_MIN &&
+			      ctx.v <= BLKIO_WEIGHT_MAX))) {
+		pd->conf.weight = ctx.v;
+		cfq_update_blkio_group_weight(ctx.blkg, ctx.v ?: blkcg->weight);
+		ret = 0;
+	}
+
+	blkg_conf_finish(&ctx);
+	return ret;
+}
+
+static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkio_group *blkg;
+	struct hlist_node *n;
+
+	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
+		return -EINVAL;
+
+	spin_lock_irq(&blkcg->lock);
+	blkcg->weight = (unsigned int)val;
+
+	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+		struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_PROP];
+
+		if (pd && !pd->conf.weight)
+			cfq_update_blkio_group_weight(blkg, blkcg->weight);
+	}
+
+	spin_unlock_irq(&blkcg->lock);
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
+{
+	u64 samples = blkg_stat_read(&pd->stats.avg_queue_size_samples);
+	u64 v = 0;
+
+	if (samples) {
+		v = blkg_stat_read(&pd->stats.avg_queue_size_sum);
+		do_div(v, samples);
+	}
+	__blkg_prfill_u64(sf, pd, v);
+	return 0;
+}
+
+/* print avg_queue_size */
+static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
+				      struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_avg_queue_size,
+			  BLKIO_POLICY_PROP, 0, false);
+	return 0;
+}
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
+
+static struct cftype cfq_blkcg_files[] = {
+	{
+		.name = "weight_device",
+		.read_seq_string = blkcg_print_weight_device,
+		.write_string = blkcg_set_weight_device,
+		.max_write_len = 256,
+	},
+	{
+		.name = "weight",
+		.read_seq_string = blkcg_print_weight,
+		.write_u64 = blkcg_set_weight,
+	},
+	{
+		.name = "time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, time)),
+		.read_seq_string = blkcg_print_stat,
+	},
+	{
+		.name = "sectors",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, sectors)),
+		.read_seq_string = blkcg_print_cpu_stat,
+	},
+	{
+		.name = "io_service_bytes",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, service_bytes)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
+	},
+	{
+		.name = "io_serviced",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats_cpu, serviced)),
+		.read_seq_string = blkcg_print_cpu_rwstat,
+	},
+	{
+		.name = "io_service_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, service_time)),
+		.read_seq_string = blkcg_print_rwstat,
+	},
+	{
+		.name = "io_wait_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, wait_time)),
+		.read_seq_string = blkcg_print_rwstat,
+	},
+	{
+		.name = "io_merged",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, merged)),
+		.read_seq_string = blkcg_print_rwstat,
+	},
+	{
+		.name = "io_queued",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, queued)),
+		.read_seq_string = blkcg_print_rwstat,
+	},
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	{
+		.name = "avg_queue_size",
+		.read_seq_string = blkcg_print_avg_queue_size,
+	},
+	{
+		.name = "group_wait_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, group_wait_time)),
+		.read_seq_string = blkcg_print_stat,
+	},
+	{
+		.name = "idle_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, idle_time)),
+		.read_seq_string = blkcg_print_stat,
+	},
+	{
+		.name = "empty_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, empty_time)),
+		.read_seq_string = blkcg_print_stat,
+	},
+	{
+		.name = "dequeue",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, dequeue)),
+		.read_seq_string = blkcg_print_stat,
+	},
+	{
+		.name = "unaccounted_time",
+		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
+				offsetof(struct blkio_group_stats, unaccounted_time)),
+		.read_seq_string = blkcg_print_stat,
+	},
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
+	{ }	/* terminate */
+};
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 						struct blkio_cgroup *blkcg)
@@ -3715,10 +3911,10 @@ static struct elevator_type iosched_cfq = {
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
 		.blkio_init_group_fn =		cfq_init_blkio_group,
-		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
 	},
 	.plid = BLKIO_POLICY_PROP,
 	.pdata_size = sizeof(struct cfq_group),
+	.cftypes = cfq_blkcg_files,
 };
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 629ed0b10209ffc4e1d439e5508d52d5e3a090b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: move statistics update code to policies

As with conf/stats file handling code, there's no reason for stat
update code to live in blkcg core with policies calling into update
them.  The current organization is both inflexible and complex.

This patch moves stat update code to specific policies.  All
blkiocg_update_*_stats() functions which deal with BLKIO_POLICY_PROP
stats are collapsed into their cfq_blkiocg_update_*_stats()
counterparts.  blkiocg_update_dispatch_stats() is used by both
policies and duplicated as throtl_update_dispatch_stats() and
cfq_blkiocg_update_dispatch_stats().  This will be cleaned up later.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 245 --------------------------------------------
 block/blk-cgroup.h   |  94 -----------------
 block/blk-throttle.c |  37 +++++--
 block/cfq-iosched.c  | 280 +++++++++++++++++++++++++++++++++++++++++----------
 4 files changed, 259 insertions(+), 397 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b963fb4b3995..821a0a393e85 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -63,251 +63,6 @@ struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
 
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-/* This should be called with the queue_lock held. */
-static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-					    struct blkio_policy_type *pol,
-					    struct blkio_group *curr_blkg)
-{
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
-	if (blkio_blkg_waiting(&pd->stats))
-		return;
-	if (blkg == curr_blkg)
-		return;
-	pd->stats.start_group_wait_time = sched_clock();
-	blkio_mark_blkg_waiting(&pd->stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
-{
-	unsigned long long now;
-
-	if (!blkio_blkg_waiting(stats))
-		return;
-
-	now = sched_clock();
-	if (time_after64(now, stats->start_group_wait_time))
-		blkg_stat_add(&stats->group_wait_time,
-			      now - stats->start_group_wait_time);
-	blkio_clear_blkg_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void blkio_end_empty_time(struct blkio_group_stats *stats)
-{
-	unsigned long long now;
-
-	if (!blkio_blkg_empty(stats))
-		return;
-
-	now = sched_clock();
-	if (time_after64(now, stats->start_empty_time))
-		blkg_stat_add(&stats->empty_time,
-			      now - stats->start_empty_time);
-	blkio_clear_blkg_empty(stats);
-}
-
-void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-					struct blkio_policy_type *pol)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-	BUG_ON(blkio_blkg_idling(stats));
-
-	stats->start_idle_time = sched_clock();
-	blkio_mark_blkg_idling(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
-
-void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	if (blkio_blkg_idling(stats)) {
-		unsigned long long now = sched_clock();
-
-		if (time_after64(now, stats->start_idle_time))
-			blkg_stat_add(&stats->idle_time,
-				      now - stats->start_idle_time);
-		blkio_clear_blkg_idling(stats);
-	}
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
-
-void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-					 struct blkio_policy_type *pol)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_stat_add(&stats->avg_queue_size_sum,
-		      blkg_rwstat_sum(&stats->queued));
-	blkg_stat_add(&stats->avg_queue_size_samples, 1);
-	blkio_update_group_wait_time(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
-
-void blkiocg_set_start_empty_time(struct blkio_group *blkg,
-				  struct blkio_policy_type *pol)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	if (blkg_rwstat_sum(&stats->queued))
-		return;
-
-	/*
-	 * group is already marked empty. This can happen if cfqq got new
-	 * request in parent group and moved to this group while being added
-	 * to service tree. Just ignore the event and move on.
-	 */
-	if (blkio_blkg_empty(stats))
-		return;
-
-	stats->start_empty_time = sched_clock();
-	blkio_mark_blkg_empty(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
-
-void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-				  struct blkio_policy_type *pol,
-				  unsigned long dequeue)
-{
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_stat_add(&pd->stats.dequeue, dequeue);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
-#else
-static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-					struct blkio_policy_type *pol,
-					struct blkio_group *curr_blkg) { }
-static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
-#endif
-
-void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-				 struct blkio_policy_type *pol,
-				 struct blkio_group *curr_blkg, bool direction,
-				 bool sync)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->queued, rw, 1);
-	blkio_end_empty_time(stats);
-	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
-
-void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol,
-				    bool direction, bool sync)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->queued, rw, -1);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
-
-void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-				   struct blkio_policy_type *pol,
-				   unsigned long time,
-				   unsigned long unaccounted_time)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_stat_add(&stats->time, time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-	blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
-#endif
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
-
-/*
- * should be called under rcu read lock or queue lock to make sure blkg pointer
- * is valid.
- */
-void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				   struct blkio_policy_type *pol,
-				   uint64_t bytes, bool direction, bool sync)
-{
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	struct blkio_group_stats_cpu *stats_cpu;
-	unsigned long flags;
-
-	/* If per cpu stats are not allocated yet, don't do any accounting. */
-	if (pd->stats_cpu == NULL)
-		return;
-
-	/*
-	 * Disabling interrupts to provide mutual exclusion between two
-	 * writes on same cpu. It probably is not needed for 64bit. Not
-	 * optimizing that case yet.
-	 */
-	local_irq_save(flags);
-
-	stats_cpu = this_cpu_ptr(pd->stats_cpu);
-
-	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
-	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
-	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
-
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
-
-void blkiocg_update_completion_stats(struct blkio_group *blkg,
-				     struct blkio_policy_type *pol,
-				     uint64_t start_time,
-				     uint64_t io_start_time, bool direction,
-				     bool sync)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	unsigned long long now = sched_clock();
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
-	if (time_after64(io_start_time, start_time))
-		blkg_rwstat_add(&stats->wait_time, rw,
-				io_start_time - start_time);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
-
-/*  Merged stats are per cpu.  */
-void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol,
-				    bool direction, bool sync)
-{
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->merged, rw, 1);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
-
 /*
  * Worker for allocating per cpu stat for blk groups. This is scheduled on
  * the system_nrt_wq once there are some groups on the alloc_list waiting
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ba64b2857571..0b0a176ee007 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -44,13 +44,6 @@ enum blkg_rwstat_type {
 	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
 };
 
-/* blkg state flags */
-enum blkg_state_flags {
-	BLKG_waiting = 0,
-	BLKG_idling,
-	BLKG_empty,
-};
-
 struct blkio_cgroup {
 	struct cgroup_subsys_state css;
 	unsigned int weight;
@@ -416,52 +409,6 @@ static inline void blkg_put(struct blkio_group *blkg) { }
 #define BLKIO_WEIGHT_MAX	1000
 #define BLKIO_WEIGHT_DEFAULT	500
 
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-					 struct blkio_policy_type *pol);
-void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-				  struct blkio_policy_type *pol,
-				  unsigned long dequeue);
-void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-					struct blkio_policy_type *pol);
-void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol);
-void blkiocg_set_start_empty_time(struct blkio_group *blkg,
-				  struct blkio_policy_type *pol);
-
-#define BLKG_FLAG_FNS(name)						\
-static inline void blkio_mark_blkg_##name(				\
-		struct blkio_group_stats *stats)			\
-{									\
-	stats->flags |= (1 << BLKG_##name);				\
-}									\
-static inline void blkio_clear_blkg_##name(				\
-		struct blkio_group_stats *stats)			\
-{									\
-	stats->flags &= ~(1 << BLKG_##name);				\
-}									\
-static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
-{									\
-	return (stats->flags & (1 << BLKG_##name)) != 0;		\
-}									\
-
-BLKG_FLAG_FNS(waiting)
-BLKG_FLAG_FNS(idling)
-BLKG_FLAG_FNS(empty)
-#undef BLKG_FLAG_FNS
-#else
-static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
-static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long dequeue) { }
-static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
-static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
-static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
-#endif
-
 #ifdef CONFIG_BLK_CGROUP
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
@@ -471,28 +418,6 @@ extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 				       struct request_queue *q,
 				       bool for_root);
-void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-				   struct blkio_policy_type *pol,
-				   unsigned long time,
-				   unsigned long unaccounted_time);
-void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-				   struct blkio_policy_type *pol,
-				   uint64_t bytes, bool direction, bool sync);
-void blkiocg_update_completion_stats(struct blkio_group *blkg,
-				     struct blkio_policy_type *pol,
-				     uint64_t start_time,
-				     uint64_t io_start_time, bool direction,
-				     bool sync);
-void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol,
-				    bool direction, bool sync);
-void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-				 struct blkio_policy_type *pol,
-				 struct blkio_group *curr_blkg, bool direction,
-				 bool sync);
-void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-				    struct blkio_policy_type *pol,
-				    bool direction, bool sync);
 #else
 struct cgroup;
 static inline struct blkio_cgroup *
@@ -502,24 +427,5 @@ bio_blkio_cgroup(struct bio *bio) { return NULL; }
 
 static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 					      void *key) { return NULL; }
-static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long time,
-			unsigned long unaccounted_time) { }
-static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t bytes,
-			bool direction, bool sync) { }
-static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t start_time,
-			uint64_t io_start_time, bool direction, bool sync) { }
-static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync) { }
-static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol,
-			struct blkio_group *curr_blkg, bool direction,
-			bool sync) { }
-static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync) { }
 #endif
 #endif /* _BLK_CGROUP_H */
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index fb6f25778fb2..5d647edc02a1 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -562,17 +562,42 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
 	return 0;
 }
 
+static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes,
+					 int rw)
+{
+	struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_THROTL];
+	struct blkio_group_stats_cpu *stats_cpu;
+	unsigned long flags;
+
+	/* If per cpu stats are not allocated yet, don't do any accounting. */
+	if (pd->stats_cpu == NULL)
+		return;
+
+	/*
+	 * Disabling interrupts to provide mutual exclusion between two
+	 * writes on same cpu. It probably is not needed for 64bit. Not
+	 * optimizing that case yet.
+	 */
+	local_irq_save(flags);
+
+	stats_cpu = this_cpu_ptr(pd->stats_cpu);
+
+	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
+	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
+	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
+
+	local_irq_restore(flags);
+}
+
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 {
 	bool rw = bio_data_dir(bio);
-	bool sync = rw_is_sync(bio->bi_rw);
 
 	/* Charge the bio to the group */
 	tg->bytes_disp[rw] += bio->bi_size;
 	tg->io_disp[rw]++;
 
-	blkiocg_update_dispatch_stats(tg_to_blkg(tg), &blkio_policy_throtl,
-				      bio->bi_size, rw, sync);
+	throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw);
 }
 
 static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
@@ -1012,10 +1037,8 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		if (tg_no_rule_group(tg, rw)) {
-			blkiocg_update_dispatch_stats(tg_to_blkg(tg),
-						      &blkio_policy_throtl,
-						      bio->bi_size, rw,
-						      rw_is_sync(bio->bi_rw));
+			throtl_update_dispatch_stats(tg_to_blkg(tg),
+						     bio->bi_size, bio->bi_rw);
 			goto out_unlock_rcu;
 		}
 	}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 2e13e9e689bd..49913804e8dd 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -15,6 +15,7 @@
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
 #include "blk.h"
+#include "blk-cgroup.h"
 
 static struct blkio_policy_type blkio_policy_cfq;
 
@@ -365,9 +366,177 @@ CFQ_CFQQ_FNS(deep);
 CFQ_CFQQ_FNS(wait_busy);
 #undef CFQ_CFQQ_FNS
 
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
+#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 
-#include "blk-cgroup.h"
+/* blkg state flags */
+enum blkg_state_flags {
+	BLKG_waiting = 0,
+	BLKG_idling,
+	BLKG_empty,
+};
+
+#define BLKG_FLAG_FNS(name)						\
+static inline void blkio_mark_blkg_##name(				\
+		struct blkio_group_stats *stats)			\
+{									\
+	stats->flags |= (1 << BLKG_##name);				\
+}									\
+static inline void blkio_clear_blkg_##name(				\
+		struct blkio_group_stats *stats)			\
+{									\
+	stats->flags &= ~(1 << BLKG_##name);				\
+}									\
+static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
+{									\
+	return (stats->flags & (1 << BLKG_##name)) != 0;		\
+}									\
+
+BLKG_FLAG_FNS(waiting)
+BLKG_FLAG_FNS(idling)
+BLKG_FLAG_FNS(empty)
+#undef BLKG_FLAG_FNS
+
+/* This should be called with the queue_lock held. */
+static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
+{
+	unsigned long long now;
+
+	if (!blkio_blkg_waiting(stats))
+		return;
+
+	now = sched_clock();
+	if (time_after64(now, stats->start_group_wait_time))
+		blkg_stat_add(&stats->group_wait_time,
+			      now - stats->start_group_wait_time);
+	blkio_clear_blkg_waiting(stats);
+}
+
+/* This should be called with the queue_lock held. */
+static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
+					    struct blkio_policy_type *pol,
+					    struct blkio_group *curr_blkg)
+{
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
+
+	if (blkio_blkg_waiting(&pd->stats))
+		return;
+	if (blkg == curr_blkg)
+		return;
+	pd->stats.start_group_wait_time = sched_clock();
+	blkio_mark_blkg_waiting(&pd->stats);
+}
+
+/* This should be called with the queue_lock held. */
+static void blkio_end_empty_time(struct blkio_group_stats *stats)
+{
+	unsigned long long now;
+
+	if (!blkio_blkg_empty(stats))
+		return;
+
+	now = sched_clock();
+	if (time_after64(now, stats->start_empty_time))
+		blkg_stat_add(&stats->empty_time,
+			      now - stats->start_empty_time);
+	blkio_clear_blkg_empty(stats);
+}
+
+static void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
+					     struct blkio_policy_type *pol,
+					     unsigned long dequeue)
+{
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	blkg_stat_add(&pd->stats.dequeue, dequeue);
+}
+
+static void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
+					     struct blkio_policy_type *pol)
+{
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	if (blkg_rwstat_sum(&stats->queued))
+		return;
+
+	/*
+	 * group is already marked empty. This can happen if cfqq got new
+	 * request in parent group and moved to this group while being added
+	 * to service tree. Just ignore the event and move on.
+	 */
+	if (blkio_blkg_empty(stats))
+		return;
+
+	stats->start_empty_time = sched_clock();
+	blkio_mark_blkg_empty(stats);
+}
+
+static void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+					       struct blkio_policy_type *pol)
+{
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	if (blkio_blkg_idling(stats)) {
+		unsigned long long now = sched_clock();
+
+		if (time_after64(now, stats->start_idle_time))
+			blkg_stat_add(&stats->idle_time,
+				      now - stats->start_idle_time);
+		blkio_clear_blkg_idling(stats);
+	}
+}
+
+static void cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+						   struct blkio_policy_type *pol)
+{
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+	BUG_ON(blkio_blkg_idling(stats));
+
+	stats->start_idle_time = sched_clock();
+	blkio_mark_blkg_idling(stats);
+}
+
+static void cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+						    struct blkio_policy_type *pol)
+{
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	blkg_stat_add(&stats->avg_queue_size_sum,
+		      blkg_rwstat_sum(&stats->queued));
+	blkg_stat_add(&stats->avg_queue_size_samples, 1);
+	blkio_update_group_wait_time(stats);
+}
+
+#else	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
+
+static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
+					    struct blkio_policy_type *pol,
+					    struct blkio_group *curr_blkg) { }
+static void blkio_end_empty_time(struct blkio_group_stats *stats) { }
+static void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
+					     struct blkio_policy_type *pol,
+					     unsigned long dequeue) { }
+static void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
+					     struct blkio_policy_type *pol) { }
+static void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
+					       struct blkio_policy_type *pol) { }
+static void cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
+						   struct blkio_policy_type *pol) { }
+static void cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
+						    struct blkio_policy_type *pol) { }
+
+#endif	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
+
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
 
 static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
 {
@@ -403,75 +572,98 @@ static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
 			struct blkio_group *curr_blkg,
 			bool direction, bool sync)
 {
-	blkiocg_update_io_add_stats(blkg, pol, curr_blkg, direction, sync);
-}
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
-static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long dequeue)
-{
-	blkiocg_update_dequeue_stats(blkg, pol, dequeue);
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	blkg_rwstat_add(&stats->queued, rw, 1);
+	blkio_end_empty_time(stats);
+	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
 }
 
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, unsigned long time,
 			unsigned long unaccounted_time)
 {
-	blkiocg_update_timeslice_used(blkg, pol, time, unaccounted_time);
-}
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 
-static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
-			struct blkio_policy_type *pol)
-{
-	blkiocg_set_start_empty_time(blkg, pol);
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	blkg_stat_add(&stats->time, time);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
+#endif
 }
 
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, bool direction,
 			bool sync)
 {
-	blkiocg_update_io_remove_stats(blkg, pol, direction, sync);
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	blkg_rwstat_add(&stats->queued, rw, -1);
 }
 
 static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, bool direction,
 			bool sync)
 {
-	blkiocg_update_io_merged_stats(blkg, pol, direction, sync);
-}
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
 
-static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol)
-{
-	blkiocg_update_idle_time_stats(blkg, pol);
-}
+	lockdep_assert_held(blkg->q->queue_lock);
 
-static inline void
-cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol)
-{
-	blkiocg_update_avg_queue_size_stats(blkg, pol);
-}
-
-static inline void
-cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol)
-{
-	blkiocg_update_set_idle_time_stats(blkg, pol);
+	blkg_rwstat_add(&stats->merged, rw, 1);
 }
 
 static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, uint64_t bytes,
 			bool direction, bool sync)
 {
-	blkiocg_update_dispatch_stats(blkg, pol, bytes, direction, sync);
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
+	struct blkg_policy_data *pd = blkg->pd[pol->plid];
+	struct blkio_group_stats_cpu *stats_cpu;
+	unsigned long flags;
+
+	/* If per cpu stats are not allocated yet, don't do any accounting. */
+	if (pd->stats_cpu == NULL)
+		return;
+
+	/*
+	 * Disabling interrupts to provide mutual exclusion between two
+	 * writes on same cpu. It probably is not needed for 64bit. Not
+	 * optimizing that case yet.
+	 */
+	local_irq_save(flags);
+
+	stats_cpu = this_cpu_ptr(pd->stats_cpu);
+
+	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
+	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
+	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
+
+	local_irq_restore(flags);
 }
 
 static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, uint64_t start_time,
 			uint64_t io_start_time, bool direction, bool sync)
 {
-	blkiocg_update_completion_stats(blkg, pol, start_time, io_start_time,
-					direction, sync);
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	unsigned long long now = sched_clock();
+	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
+
+	lockdep_assert_held(blkg->q->queue_lock);
+
+	if (time_after64(now, io_start_time))
+		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+	if (time_after64(io_start_time, start_time))
+		blkg_rwstat_add(&stats->wait_time, rw,
+				io_start_time - start_time);
 }
 
 #else	/* CONFIG_CFQ_GROUP_IOSCHED */
@@ -489,29 +681,15 @@ static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol,
 			struct blkio_group *curr_blkg, bool direction,
 			bool sync) { }
-static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long dequeue) { }
 static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, unsigned long time,
 			unsigned long unaccounted_time) { }
-static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
 static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, bool direction,
 			bool sync) { }
 static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, bool direction,
 			bool sync) { }
-static inline void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol) { }
-static inline void
-cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-					struct blkio_policy_type *pol) { }
-
-static inline void
-cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-				       struct blkio_policy_type *pol) { }
-
 static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, uint64_t bytes,
 			bool direction, bool sync) { }
-- 
cgit v1.2.3-59-g8ed1b


From 41b38b6d540f951c49315d8573e6f6195a6e736d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: cfq doesn't need per-cpu dispatch stats

blkio_group_stats_cpu is used to count dispatch stats using per-cpu
counters.  This is used by both blk-throtl and cfq-iosched but the
sharing is rather silly.

* cfq-iosched doesn't need per-cpu dispatch stats.  cfq always updates
  those stats while holding queue_lock.

* blk-throtl needs per-cpu dispatch stats but only service_bytes and
  serviced.  It doesn't make use of sectors.

This patch makes cfq add and use global stats for service_bytes,
serviced and sectors, removes per-cpu sectors counter and moves
per-cpu stat printing code to blk-throttle.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 63 ++--------------------------------------------------
 block/blk-cgroup.h   | 12 +++++-----
 block/blk-throttle.c | 31 +++++++++++++++++++++++++-
 block/cfq-iosched.c  | 37 +++++++++---------------------
 4 files changed, 48 insertions(+), 95 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 821a0a393e85..19ee29f1b7c5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -390,7 +390,6 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
 
 		blkg_rwstat_reset(&sc->service_bytes);
 		blkg_rwstat_reset(&sc->serviced);
-		blkg_stat_reset(&sc->sectors);
 	}
 }
 
@@ -417,6 +416,8 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 			struct blkio_group_stats *stats = &pd->stats;
 
 			/* queued stats shouldn't be cleared */
+			blkg_rwstat_reset(&stats->service_bytes);
+			blkg_rwstat_reset(&stats->serviced);
 			blkg_rwstat_reset(&stats->merged);
 			blkg_rwstat_reset(&stats->service_time);
 			blkg_rwstat_reset(&stats->wait_time);
@@ -577,66 +578,6 @@ int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 }
 EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
 
-static u64 blkg_prfill_cpu_stat(struct seq_file *sf,
-				struct blkg_policy_data *pd, int off)
-{
-	u64 v = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct blkio_group_stats_cpu *sc =
-			per_cpu_ptr(pd->stats_cpu, cpu);
-
-		v += blkg_stat_read((void *)sc + off);
-	}
-
-	return __blkg_prfill_u64(sf, pd, v);
-}
-
-static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
-				  struct blkg_policy_data *pd, int off)
-{
-	struct blkg_rwstat rwstat = { }, tmp;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct blkio_group_stats_cpu *sc =
-			per_cpu_ptr(pd->stats_cpu, cpu);
-
-		tmp = blkg_rwstat_read((void *)sc + off);
-		for (i = 0; i < BLKG_RWSTAT_NR; i++)
-			rwstat.cnt[i] += tmp.cnt[i];
-	}
-
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
-			 struct seq_file *sf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_stat,
-			  BLKCG_STAT_POL(cft->private),
-			  BLKCG_STAT_OFF(cft->private), false);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_cpu_stat);
-
-/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
-			   struct seq_file *sf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
-			  BLKCG_STAT_POL(cft->private),
-			  BLKCG_STAT_OFF(cft->private), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat);
-
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 0b0a176ee007..c82de47ae69f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -65,6 +65,10 @@ struct blkg_rwstat {
 };
 
 struct blkio_group_stats {
+	/* total bytes transferred */
+	struct blkg_rwstat		service_bytes;
+	/* total IOs serviced, post merge */
+	struct blkg_rwstat		serviced;
 	/* number of ios merged */
 	struct blkg_rwstat		merged;
 	/* total time spent on device in ns, may not be accurate w/ queueing */
@@ -73,6 +77,8 @@ struct blkio_group_stats {
 	struct blkg_rwstat		wait_time;
 	/* number of IOs queued up */
 	struct blkg_rwstat		queued;
+	/* total sectors transferred */
+	struct blkg_stat		sectors;
 	/* total disk time and nr sectors dispatched by this group */
 	struct blkg_stat		time;
 #ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -104,8 +110,6 @@ struct blkio_group_stats_cpu {
 	struct blkg_rwstat		service_bytes;
 	/* total IOs serviced, post merge */
 	struct blkg_rwstat		serviced;
-	/* total sectors transferred */
-	struct blkg_stat		sectors;
 };
 
 struct blkio_group_conf {
@@ -183,10 +187,6 @@ int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 		     struct seq_file *sf);
 int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 		       struct seq_file *sf);
-int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
-			 struct seq_file *sf);
-int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
-			   struct seq_file *sf);
 
 struct blkg_conf_ctx {
 	struct gendisk		*disk;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5d647edc02a1..cb259bc46f43 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -582,7 +582,6 @@ static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes,
 
 	stats_cpu = this_cpu_ptr(pd->stats_cpu);
 
-	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
 	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
 	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
 
@@ -843,6 +842,36 @@ static void throtl_update_blkio_group_common(struct throtl_data *td,
 	throtl_schedule_delayed_work(td, 0);
 }
 
+static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
+				  struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat rwstat = { }, tmp;
+	int i, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct blkio_group_stats_cpu *sc =
+			per_cpu_ptr(pd->stats_cpu, cpu);
+
+		tmp = blkg_rwstat_read((void *)sc + off);
+		for (i = 0; i < BLKG_RWSTAT_NR; i++)
+			rwstat.cnt[i] += tmp.cnt[i];
+	}
+
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
+static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
+				  struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
+			  BLKCG_STAT_POL(cft->private),
+			  BLKCG_STAT_OFF(cft->private), true);
+	return 0;
+}
+
 static u64 blkg_prfill_conf_u64(struct seq_file *sf,
 				struct blkg_policy_data *pd, int off)
 {
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 49913804e8dd..effd89489506 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -624,29 +624,12 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 			struct blkio_policy_type *pol, uint64_t bytes,
 			bool direction, bool sync)
 {
+	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
 	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-	struct blkio_group_stats_cpu *stats_cpu;
-	unsigned long flags;
 
-	/* If per cpu stats are not allocated yet, don't do any accounting. */
-	if (pd->stats_cpu == NULL)
-		return;
-
-	/*
-	 * Disabling interrupts to provide mutual exclusion between two
-	 * writes on same cpu. It probably is not needed for 64bit. Not
-	 * optimizing that case yet.
-	 */
-	local_irq_save(flags);
-
-	stats_cpu = this_cpu_ptr(pd->stats_cpu);
-
-	blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
-	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
-	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
-
-	local_irq_restore(flags);
+	blkg_stat_add(&stats->sectors, bytes >> 9);
+	blkg_rwstat_add(&stats->serviced, rw, 1);
+	blkg_rwstat_add(&stats->service_bytes, rw, bytes);
 }
 
 static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
@@ -1520,20 +1503,20 @@ static struct cftype cfq_blkcg_files[] = {
 	{
 		.name = "sectors",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, sectors)),
-		.read_seq_string = blkcg_print_cpu_stat,
+				offsetof(struct blkio_group_stats, sectors)),
+		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "io_service_bytes",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, service_bytes)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
+				offsetof(struct blkio_group_stats, service_bytes)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_serviced",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats_cpu, serviced)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
+				offsetof(struct blkio_group_stats, serviced)),
+		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_service_time",
-- 
cgit v1.2.3-59-g8ed1b


From 9ade5ea4ce57d3596eaee6a57cd212a483674058 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: add blkio_policy_ops operations for exit and stat reset

Add blkio_policy_ops->blkio_exit_group_fn() and
->blkio_reset_group_stats_fn().  These will be used to further
modularize blkcg policy implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c | 16 ++++++++++++----
 block/blk-cgroup.h |  4 ++++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 19ee29f1b7c5..2e6fb7d91805 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -131,12 +131,17 @@ static void blkg_free(struct blkio_group *blkg)
 		return;
 
 	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		struct blkio_policy_type *pol = blkio_policy[i];
 		struct blkg_policy_data *pd = blkg->pd[i];
 
-		if (pd) {
-			free_percpu(pd->stats_cpu);
-			kfree(pd);
-		}
+		if (!pd)
+			continue;
+
+		if (pol && pol->ops.blkio_exit_group_fn)
+			pol->ops.blkio_exit_group_fn(blkg);
+
+		free_percpu(pd->stats_cpu);
+		kfree(pd);
 	}
 
 	kfree(blkg);
@@ -432,6 +437,9 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 			blkg_stat_reset(&stats->empty_time);
 #endif
 			blkio_reset_stats_cpu(blkg, pol->plid);
+
+			if (pol->ops.blkio_reset_group_stats_fn)
+				pol->ops.blkio_reset_group_stats_fn(blkg);
 		}
 	}
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c82de47ae69f..d0ee649e8bbb 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -153,9 +153,13 @@ struct blkio_group {
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
+typedef void (blkio_exit_group_fn)(struct blkio_group *blkg);
+typedef void (blkio_reset_group_stats_fn)(struct blkio_group *blkg);
 
 struct blkio_policy_ops {
 	blkio_init_group_fn *blkio_init_group_fn;
+	blkio_exit_group_fn *blkio_exit_group_fn;
+	blkio_reset_group_stats_fn *blkio_reset_group_stats_fn;
 };
 
 struct blkio_policy_type {
-- 
cgit v1.2.3-59-g8ed1b


From 155fead9b6347ead90e0b0396cb108a6ba6126c6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: move blkio_group_stats to cfq-iosched.c

blkio_group_stats contains only fields used by cfq and has no reason
to be defined in blkcg core.

* Move blkio_group_stats to cfq-iosched.c and rename it to cfqg_stats.

* blkg_policy_data->stats is replaced with cfq_group->stats.
  blkg_prfill_[rw]stat() are updated to use offset against pd->pdata
  instead.

* All related macros / functions are renamed so that they have cfqg_
  prefix and the unnecessary @pol arguments are dropped.

* All stat functions now take cfq_group * instead of blkio_group *.

* lockdep assertion on queue lock dropped.  Elevator runs under queue
  lock by default.  There isn't much to be gained by adding lockdep
  assertions at stat function level.

* cfqg_stats_reset() implemented for blkio_reset_group_stats_fn method
  so that cfqg->stats can be reset.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c  |  23 +--
 block/blk-cgroup.h  |  41 ------
 block/cfq-iosched.c | 407 ++++++++++++++++++++++++----------------------------
 3 files changed, 193 insertions(+), 278 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2e6fb7d91805..cfdda44f4a0b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -417,25 +417,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 		struct blkio_policy_type *pol;
 
 		list_for_each_entry(pol, &blkio_list, list) {
-			struct blkg_policy_data *pd = blkg->pd[pol->plid];
-			struct blkio_group_stats *stats = &pd->stats;
-
-			/* queued stats shouldn't be cleared */
-			blkg_rwstat_reset(&stats->service_bytes);
-			blkg_rwstat_reset(&stats->serviced);
-			blkg_rwstat_reset(&stats->merged);
-			blkg_rwstat_reset(&stats->service_time);
-			blkg_rwstat_reset(&stats->wait_time);
-			blkg_stat_reset(&stats->time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-			blkg_stat_reset(&stats->unaccounted_time);
-			blkg_stat_reset(&stats->avg_queue_size_sum);
-			blkg_stat_reset(&stats->avg_queue_size_samples);
-			blkg_stat_reset(&stats->dequeue);
-			blkg_stat_reset(&stats->group_wait_time);
-			blkg_stat_reset(&stats->idle_time);
-			blkg_stat_reset(&stats->empty_time);
-#endif
 			blkio_reset_stats_cpu(blkg, pol->plid);
 
 			if (pol->ops.blkio_reset_group_stats_fn)
@@ -549,13 +530,13 @@ static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
 			    int off)
 {
 	return __blkg_prfill_u64(sf, pd,
-				 blkg_stat_read((void *)&pd->stats + off));
+				 blkg_stat_read((void *)pd->pdata + off));
 }
 
 static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 			      int off)
 {
-	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)&pd->stats + off);
+	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->pdata + off);
 
 	return __blkg_prfill_rwstat(sf, pd, &rwstat);
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d0ee649e8bbb..791570394e87 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -64,46 +64,6 @@ struct blkg_rwstat {
 	uint64_t			cnt[BLKG_RWSTAT_NR];
 };
 
-struct blkio_group_stats {
-	/* total bytes transferred */
-	struct blkg_rwstat		service_bytes;
-	/* total IOs serviced, post merge */
-	struct blkg_rwstat		serviced;
-	/* number of ios merged */
-	struct blkg_rwstat		merged;
-	/* total time spent on device in ns, may not be accurate w/ queueing */
-	struct blkg_rwstat		service_time;
-	/* total time spent waiting in scheduler queue in ns */
-	struct blkg_rwstat		wait_time;
-	/* number of IOs queued up */
-	struct blkg_rwstat		queued;
-	/* total sectors transferred */
-	struct blkg_stat		sectors;
-	/* total disk time and nr sectors dispatched by this group */
-	struct blkg_stat		time;
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-	/* time not charged to this cgroup */
-	struct blkg_stat		unaccounted_time;
-	/* sum of number of ios queued across all samples */
-	struct blkg_stat		avg_queue_size_sum;
-	/* count of samples taken for average */
-	struct blkg_stat		avg_queue_size_samples;
-	/* how many times this group has been removed from service tree */
-	struct blkg_stat		dequeue;
-	/* total time spent waiting for it to be assigned a timeslice. */
-	struct blkg_stat		group_wait_time;
-	/* time spent idling for this blkio_group */
-	struct blkg_stat		idle_time;
-	/* total time with empty current active q with other requests queued */
-	struct blkg_stat		empty_time;
-	/* fields after this shouldn't be cleared on stat reset */
-	uint64_t			start_group_wait_time;
-	uint64_t			start_idle_time;
-	uint64_t			start_empty_time;
-	uint16_t			flags;
-#endif
-};
-
 /* Per cpu blkio group stats */
 struct blkio_group_stats_cpu {
 	/* total bytes transferred */
@@ -126,7 +86,6 @@ struct blkg_policy_data {
 	/* Configuration */
 	struct blkio_group_conf conf;
 
-	struct blkio_group_stats stats;
 	/* Per cpu stats pointer */
 	struct blkio_group_stats_cpu __percpu *stats_cpu;
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index effd89489506..a1f37dfd1b8b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -173,6 +173,48 @@ enum wl_type_t {
 	SYNC_WORKLOAD = 2
 };
 
+struct cfqg_stats {
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+	/* total bytes transferred */
+	struct blkg_rwstat		service_bytes;
+	/* total IOs serviced, post merge */
+	struct blkg_rwstat		serviced;
+	/* number of ios merged */
+	struct blkg_rwstat		merged;
+	/* total time spent on device in ns, may not be accurate w/ queueing */
+	struct blkg_rwstat		service_time;
+	/* total time spent waiting in scheduler queue in ns */
+	struct blkg_rwstat		wait_time;
+	/* number of IOs queued up */
+	struct blkg_rwstat		queued;
+	/* total sectors transferred */
+	struct blkg_stat		sectors;
+	/* total disk time and nr sectors dispatched by this group */
+	struct blkg_stat		time;
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	/* time not charged to this cgroup */
+	struct blkg_stat		unaccounted_time;
+	/* sum of number of ios queued across all samples */
+	struct blkg_stat		avg_queue_size_sum;
+	/* count of samples taken for average */
+	struct blkg_stat		avg_queue_size_samples;
+	/* how many times this group has been removed from service tree */
+	struct blkg_stat		dequeue;
+	/* total time spent waiting for it to be assigned a timeslice. */
+	struct blkg_stat		group_wait_time;
+	/* time spent idling for this blkio_group */
+	struct blkg_stat		idle_time;
+	/* total time with empty current active q with other requests queued */
+	struct blkg_stat		empty_time;
+	/* fields after this shouldn't be cleared on stat reset */
+	uint64_t			start_group_wait_time;
+	uint64_t			start_idle_time;
+	uint64_t			start_empty_time;
+	uint16_t			flags;
+#endif	/* CONFIG_DEBUG_BLK_CGROUP */
+#endif	/* CONFIG_CFQ_GROUP_IOSCHED */
+};
+
 /* This is per cgroup per device grouping structure */
 struct cfq_group {
 	/* group service_tree member */
@@ -212,6 +254,7 @@ struct cfq_group {
 	/* number of requests that are on the dispatch list or inside driver */
 	int dispatched;
 	struct cfq_ttime ttime;
+	struct cfqg_stats stats;
 };
 
 struct cfq_io_cq {
@@ -368,96 +411,84 @@ CFQ_CFQQ_FNS(wait_busy);
 
 #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 
-/* blkg state flags */
-enum blkg_state_flags {
-	BLKG_waiting = 0,
-	BLKG_idling,
-	BLKG_empty,
+/* cfqg stats flags */
+enum cfqg_stats_flags {
+	CFQG_stats_waiting = 0,
+	CFQG_stats_idling,
+	CFQG_stats_empty,
 };
 
-#define BLKG_FLAG_FNS(name)						\
-static inline void blkio_mark_blkg_##name(				\
-		struct blkio_group_stats *stats)			\
+#define CFQG_FLAG_FNS(name)						\
+static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats)	\
 {									\
-	stats->flags |= (1 << BLKG_##name);				\
+	stats->flags |= (1 << CFQG_stats_##name);			\
 }									\
-static inline void blkio_clear_blkg_##name(				\
-		struct blkio_group_stats *stats)			\
+static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats)	\
 {									\
-	stats->flags &= ~(1 << BLKG_##name);				\
+	stats->flags &= ~(1 << CFQG_stats_##name);			\
 }									\
-static inline int blkio_blkg_##name(struct blkio_group_stats *stats)	\
+static inline int cfqg_stats_##name(struct cfqg_stats *stats)		\
 {									\
-	return (stats->flags & (1 << BLKG_##name)) != 0;		\
+	return (stats->flags & (1 << CFQG_stats_##name)) != 0;		\
 }									\
 
-BLKG_FLAG_FNS(waiting)
-BLKG_FLAG_FNS(idling)
-BLKG_FLAG_FNS(empty)
-#undef BLKG_FLAG_FNS
+CFQG_FLAG_FNS(waiting)
+CFQG_FLAG_FNS(idling)
+CFQG_FLAG_FNS(empty)
+#undef CFQG_FLAG_FNS
 
 /* This should be called with the queue_lock held. */
-static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
+static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
 {
 	unsigned long long now;
 
-	if (!blkio_blkg_waiting(stats))
+	if (!cfqg_stats_waiting(stats))
 		return;
 
 	now = sched_clock();
 	if (time_after64(now, stats->start_group_wait_time))
 		blkg_stat_add(&stats->group_wait_time,
 			      now - stats->start_group_wait_time);
-	blkio_clear_blkg_waiting(stats);
+	cfqg_stats_clear_waiting(stats);
 }
 
 /* This should be called with the queue_lock held. */
-static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-					    struct blkio_policy_type *pol,
-					    struct blkio_group *curr_blkg)
+static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
+						 struct cfq_group *curr_cfqg)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
+	struct cfqg_stats *stats = &cfqg->stats;
 
-	if (blkio_blkg_waiting(&pd->stats))
+	if (cfqg_stats_waiting(stats))
 		return;
-	if (blkg == curr_blkg)
+	if (cfqg == curr_cfqg)
 		return;
-	pd->stats.start_group_wait_time = sched_clock();
-	blkio_mark_blkg_waiting(&pd->stats);
+	stats->start_group_wait_time = sched_clock();
+	cfqg_stats_mark_waiting(stats);
 }
 
 /* This should be called with the queue_lock held. */
-static void blkio_end_empty_time(struct blkio_group_stats *stats)
+static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
 {
 	unsigned long long now;
 
-	if (!blkio_blkg_empty(stats))
+	if (!cfqg_stats_empty(stats))
 		return;
 
 	now = sched_clock();
 	if (time_after64(now, stats->start_empty_time))
 		blkg_stat_add(&stats->empty_time,
 			      now - stats->start_empty_time);
-	blkio_clear_blkg_empty(stats);
+	cfqg_stats_clear_empty(stats);
 }
 
-static void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-					     struct blkio_policy_type *pol,
-					     unsigned long dequeue)
+static void cfqg_stats_update_dequeue(struct cfq_group *cfqg)
 {
-	struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_stat_add(&pd->stats.dequeue, dequeue);
+	blkg_stat_add(&cfqg->stats.dequeue, 1);
 }
 
-static void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
-					     struct blkio_policy_type *pol)
+static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
+	struct cfqg_stats *stats = &cfqg->stats;
 
 	if (blkg_rwstat_sum(&stats->queued))
 		return;
@@ -467,72 +498,57 @@ static void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
 	 * request in parent group and moved to this group while being added
 	 * to service tree. Just ignore the event and move on.
 	 */
-	if (blkio_blkg_empty(stats))
+	if (cfqg_stats_empty(stats))
 		return;
 
 	stats->start_empty_time = sched_clock();
-	blkio_mark_blkg_empty(stats);
+	cfqg_stats_mark_empty(stats);
 }
 
-static void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-					       struct blkio_policy_type *pol)
+static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	struct cfqg_stats *stats = &cfqg->stats;
 
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	if (blkio_blkg_idling(stats)) {
+	if (cfqg_stats_idling(stats)) {
 		unsigned long long now = sched_clock();
 
 		if (time_after64(now, stats->start_idle_time))
 			blkg_stat_add(&stats->idle_time,
 				      now - stats->start_idle_time);
-		blkio_clear_blkg_idling(stats);
+		cfqg_stats_clear_idling(stats);
 	}
 }
 
-static void cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-						   struct blkio_policy_type *pol)
+static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	struct cfqg_stats *stats = &cfqg->stats;
 
-	lockdep_assert_held(blkg->q->queue_lock);
-	BUG_ON(blkio_blkg_idling(stats));
+	BUG_ON(cfqg_stats_idling(stats));
 
 	stats->start_idle_time = sched_clock();
-	blkio_mark_blkg_idling(stats);
+	cfqg_stats_mark_idling(stats);
 }
 
-static void cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-						    struct blkio_policy_type *pol)
+static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
+	struct cfqg_stats *stats = &cfqg->stats;
 
 	blkg_stat_add(&stats->avg_queue_size_sum,
 		      blkg_rwstat_sum(&stats->queued));
 	blkg_stat_add(&stats->avg_queue_size_samples, 1);
-	blkio_update_group_wait_time(stats);
+	cfqg_stats_update_group_wait_time(stats);
 }
 
 #else	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 
-static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
-					    struct blkio_policy_type *pol,
-					    struct blkio_group *curr_blkg) { }
-static void blkio_end_empty_time(struct blkio_group_stats *stats) { }
-static void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
-					     struct blkio_policy_type *pol,
-					     unsigned long dequeue) { }
-static void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg,
-					     struct blkio_policy_type *pol) { }
-static void cfq_blkiocg_update_idle_time_stats(struct blkio_group *blkg,
-					       struct blkio_policy_type *pol) { }
-static void cfq_blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
-						   struct blkio_policy_type *pol) { }
-static void cfq_blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
-						    struct blkio_policy_type *pol) { }
+static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
+						 struct cfq_group *curr_cfqg) { }
+static void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
+static void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
+static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
+static void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
+static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
+static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 
@@ -567,80 +583,46 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 	blk_add_trace_msg((cfqd)->queue, "%s " fmt,			\
 			blkg_path(cfqg_to_blkg((cfqg))), ##args)	\
 
-static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol,
-			struct blkio_group *curr_blkg,
-			bool direction, bool sync)
+static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
+					    struct cfq_group *curr_cfqg, int rw)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->queued, rw, 1);
-	blkio_end_empty_time(stats);
-	blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
+	blkg_rwstat_add(&cfqg->stats.queued, rw, 1);
+	cfqg_stats_end_empty_time(&cfqg->stats);
+	cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
 }
 
-static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long time,
-			unsigned long unaccounted_time)
+static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
+			unsigned long time, unsigned long unaccounted_time)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_stat_add(&stats->time, time);
+	blkg_stat_add(&cfqg->stats.time, time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-	blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
+	blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time);
 #endif
 }
 
-static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->queued, rw, -1);
+	blkg_rwstat_add(&cfqg->stats.queued, rw, -1);
 }
 
-static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
-
-	blkg_rwstat_add(&stats->merged, rw, 1);
+	blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
 }
 
-static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t bytes,
-			bool direction, bool sync)
+static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg,
+					      uint64_t bytes, int rw)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	blkg_stat_add(&stats->sectors, bytes >> 9);
-	blkg_rwstat_add(&stats->serviced, rw, 1);
-	blkg_rwstat_add(&stats->service_bytes, rw, bytes);
+	blkg_stat_add(&cfqg->stats.sectors, bytes >> 9);
+	blkg_rwstat_add(&cfqg->stats.serviced, rw, 1);
+	blkg_rwstat_add(&cfqg->stats.service_bytes, rw, bytes);
 }
 
-static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t start_time,
-			uint64_t io_start_time, bool direction, bool sync)
+static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
+			uint64_t start_time, uint64_t io_start_time, int rw)
 {
-	struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
+	struct cfqg_stats *stats = &cfqg->stats;
 	unsigned long long now = sched_clock();
-	int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
-	lockdep_assert_held(blkg->q->queue_lock);
 
 	if (time_after64(now, io_start_time))
 		blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
@@ -649,6 +631,29 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
 				io_start_time - start_time);
 }
 
+static void cfqg_stats_reset(struct blkio_group *blkg)
+{
+	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+	struct cfqg_stats *stats = &cfqg->stats;
+
+	/* queued stats shouldn't be cleared */
+	blkg_rwstat_reset(&stats->service_bytes);
+	blkg_rwstat_reset(&stats->serviced);
+	blkg_rwstat_reset(&stats->merged);
+	blkg_rwstat_reset(&stats->service_time);
+	blkg_rwstat_reset(&stats->wait_time);
+	blkg_stat_reset(&stats->time);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+	blkg_stat_reset(&stats->unaccounted_time);
+	blkg_stat_reset(&stats->avg_queue_size_sum);
+	blkg_stat_reset(&stats->avg_queue_size_samples);
+	blkg_stat_reset(&stats->dequeue);
+	blkg_stat_reset(&stats->group_wait_time);
+	blkg_stat_reset(&stats->idle_time);
+	blkg_stat_reset(&stats->empty_time);
+#endif
+}
+
 #else	/* CONFIG_CFQ_GROUP_IOSCHED */
 
 static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; }
@@ -660,25 +665,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
 	blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)		do {} while (0)
 
-static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol,
-			struct blkio_group *curr_blkg, bool direction,
-			bool sync) { }
-static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, unsigned long time,
-			unsigned long unaccounted_time) { }
-static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync) { }
-static inline void cfq_blkiocg_update_io_merged_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, bool direction,
-			bool sync) { }
-static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t bytes,
-			bool direction, bool sync) { }
-static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
-			struct blkio_policy_type *pol, uint64_t start_time,
-			uint64_t io_start_time, bool direction, bool sync) { }
+static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
+			struct cfq_group *curr_cfqg, int rw) { }
+static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
+			unsigned long time, unsigned long unaccounted_time) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
+static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg,
+					      uint64_t bytes, int rw) { }
+static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
+			uint64_t start_time, uint64_t io_start_time, int rw) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
 
@@ -1233,8 +1229,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
 	cfq_group_service_tree_del(st, cfqg);
 	cfqg->saved_workload_slice = 0;
-	cfq_blkiocg_update_dequeue_stats(cfqg_to_blkg(cfqg),
-					 &blkio_policy_cfq, 1);
+	cfqg_stats_update_dequeue(cfqg);
 }
 
 static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
@@ -1306,9 +1301,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
 		     used_sl, cfqq->slice_dispatch, charge,
 		     iops_mode(cfqd), cfqq->nr_sectors);
-	cfq_blkiocg_update_timeslice_used(cfqg_to_blkg(cfqg), &blkio_policy_cfq,
-					  used_sl, unaccounted_sl);
-	cfq_blkiocg_set_start_empty_time(cfqg_to_blkg(cfqg), &blkio_policy_cfq);
+	cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
+	cfqg_stats_set_start_empty_time(cfqg);
 }
 
 /**
@@ -1456,14 +1450,15 @@ static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 }
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
+static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
 				      struct blkg_policy_data *pd, int off)
 {
-	u64 samples = blkg_stat_read(&pd->stats.avg_queue_size_samples);
+	struct cfq_group *cfqg = (void *)pd->pdata;
+	u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
 	u64 v = 0;
 
 	if (samples) {
-		v = blkg_stat_read(&pd->stats.avg_queue_size_sum);
+		v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
 		do_div(v, samples);
 	}
 	__blkg_prfill_u64(sf, pd, v);
@@ -1471,12 +1466,12 @@ static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
 }
 
 /* print avg_queue_size */
-static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
-				      struct seq_file *sf)
+static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_avg_queue_size,
+	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
 			  BLKIO_POLICY_PROP, 0, false);
 	return 0;
 }
@@ -1497,84 +1492,84 @@ static struct cftype cfq_blkcg_files[] = {
 	{
 		.name = "time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, time)),
+				offsetof(struct cfq_group, stats.time)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "sectors",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, sectors)),
+				offsetof(struct cfq_group, stats.sectors)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "io_service_bytes",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, service_bytes)),
+				offsetof(struct cfq_group, stats.service_bytes)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_serviced",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, serviced)),
+				offsetof(struct cfq_group, stats.serviced)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_service_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, service_time)),
+				offsetof(struct cfq_group, stats.service_time)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_wait_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, wait_time)),
+				offsetof(struct cfq_group, stats.wait_time)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_merged",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, merged)),
+				offsetof(struct cfq_group, stats.merged)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 	{
 		.name = "io_queued",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, queued)),
+				offsetof(struct cfq_group, stats.queued)),
 		.read_seq_string = blkcg_print_rwstat,
 	},
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	{
 		.name = "avg_queue_size",
-		.read_seq_string = blkcg_print_avg_queue_size,
+		.read_seq_string = cfqg_print_avg_queue_size,
 	},
 	{
 		.name = "group_wait_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, group_wait_time)),
+				offsetof(struct cfq_group, stats.group_wait_time)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "idle_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, idle_time)),
+				offsetof(struct cfq_group, stats.idle_time)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "empty_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, empty_time)),
+				offsetof(struct cfq_group, stats.empty_time)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "dequeue",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, dequeue)),
+				offsetof(struct cfq_group, stats.dequeue)),
 		.read_seq_string = blkcg_print_stat,
 	},
 	{
 		.name = "unaccounted_time",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct blkio_group_stats, unaccounted_time)),
+				offsetof(struct cfq_group, stats.unaccounted_time)),
 		.read_seq_string = blkcg_print_stat,
 	},
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
@@ -1858,14 +1853,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
 	elv_rb_del(&cfqq->sort_list, rq);
 	cfqq->queued[rq_is_sync(rq)]--;
-	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					   &blkio_policy_cfq, rq_data_dir(rq),
-					   rq_is_sync(rq));
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
 	cfq_add_rq_rb(rq);
-	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					&blkio_policy_cfq,
-					cfqg_to_blkg(cfqq->cfqd->serving_group),
-					rq_data_dir(rq), rq_is_sync(rq));
+	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
+				 rq->cmd_flags);
 }
 
 static struct request *
@@ -1921,9 +1912,7 @@ static void cfq_remove_request(struct request *rq)
 	cfq_del_rq_rb(rq);
 
 	cfqq->cfqd->rq_queued--;
-	cfq_blkiocg_update_io_remove_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					   &blkio_policy_cfq, rq_data_dir(rq),
-					   rq_is_sync(rq));
+	cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
 	if (rq->cmd_flags & REQ_PRIO) {
 		WARN_ON(!cfqq->prio_pending);
 		cfqq->prio_pending--;
@@ -1958,9 +1947,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
 				struct bio *bio)
 {
-	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(req)),
-					   &blkio_policy_cfq, bio_data_dir(bio),
-					   cfq_bio_sync(bio));
+	cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw);
 }
 
 static void
@@ -1982,9 +1969,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	if (cfqq->next_rq == next)
 		cfqq->next_rq = rq;
 	cfq_remove_request(next);
-	cfq_blkiocg_update_io_merged_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					   &blkio_policy_cfq, rq_data_dir(next),
-					   rq_is_sync(next));
+	cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
 
 	cfqq = RQ_CFQQ(next);
 	/*
@@ -2025,8 +2010,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	del_timer(&cfqd->idle_slice_timer);
-	cfq_blkiocg_update_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
-					   &blkio_policy_cfq);
+	cfqg_stats_update_idle_time(cfqq->cfqg);
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -2035,8 +2019,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 	if (cfqq) {
 		cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
 				cfqd->serving_prio, cfqd->serving_type);
-		cfq_blkiocg_update_avg_queue_size_stats(cfqg_to_blkg(cfqq->cfqg),
-							&blkio_policy_cfq);
+		cfqg_stats_update_avg_queue_size(cfqq->cfqg);
 		cfqq->slice_start = 0;
 		cfqq->dispatch_start = jiffies;
 		cfqq->allocated_slice = 0;
@@ -2384,8 +2367,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 		sl = cfqd->cfq_slice_idle;
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-	cfq_blkiocg_update_set_idle_time_stats(cfqg_to_blkg(cfqq->cfqg),
-					       &blkio_policy_cfq);
+	cfqg_stats_set_start_idle_time(cfqq->cfqg);
 	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
 			group_idle ? 1 : 0);
 }
@@ -2408,9 +2390,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
 	cfqq->nr_sectors += blk_rq_sectors(rq);
-	cfq_blkiocg_update_dispatch_stats(cfqg_to_blkg(cfqq->cfqg),
-					  &blkio_policy_cfq, blk_rq_bytes(rq),
-					  rq_data_dir(rq), rq_is_sync(rq));
+	cfqg_stats_update_dispatch(cfqq->cfqg, blk_rq_bytes(rq), rq->cmd_flags);
 }
 
 /*
@@ -3513,9 +3493,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 				cfq_clear_cfqq_wait_request(cfqq);
 				__blk_run_queue(cfqd->queue);
 			} else {
-				cfq_blkiocg_update_idle_time_stats(
-						cfqg_to_blkg(cfqq->cfqg),
-						&blkio_policy_cfq);
+				cfqg_stats_update_idle_time(cfqq->cfqg);
 				cfq_mark_cfqq_must_dispatch(cfqq);
 			}
 		}
@@ -3542,10 +3520,8 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
-	cfq_blkiocg_update_io_add_stats(cfqg_to_blkg(RQ_CFQG(rq)),
-					&blkio_policy_cfq,
-					cfqg_to_blkg(cfqd->serving_group),
-					rq_data_dir(rq), rq_is_sync(rq));
+	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
+				 rq->cmd_flags);
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
@@ -3641,10 +3617,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	cfqd->rq_in_driver--;
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
-	cfq_blkiocg_update_completion_stats(cfqg_to_blkg(cfqq->cfqg),
-			&blkio_policy_cfq, rq_start_time_ns(rq),
-			rq_io_start_time_ns(rq), rq_data_dir(rq),
-			rq_is_sync(rq));
+	cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
+				     rq_io_start_time_ns(rq), rq->cmd_flags);
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -4184,6 +4158,7 @@ static struct elevator_type iosched_cfq = {
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
 		.blkio_init_group_fn =		cfq_init_blkio_group,
+		.blkio_reset_group_stats_fn =	cfqg_stats_reset,
 	},
 	.plid = BLKIO_POLICY_PROP,
 	.pdata_size = sizeof(struct cfq_group),
-- 
cgit v1.2.3-59-g8ed1b


From 8a3d26151f24e2a2ffa550890144c3d54d2edb15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: move blkio_group_stats_cpu and friends to blk-throttle.c

blkio_group_stats_cpu is used only by blk-throtl and has no reason to
be defined in blkcg core.

* Move blkio_group_stats_cpu to blk-throttle.c and rename it to
  tg_stats_cpu.

* blkg_policy_data->stats_cpu is replaced with throtl_grp->stats_cpu.
  prfill functions updated accordingly.

* All related macros / functions are renamed so that they have tg_
  prefix and the unnecessary @pol arguments are dropped.

* Per-cpu stats allocation code is also moved from blk-cgroup.c to
  blk-throttle.c and gets simplified to only deal with
  BLKIO_POLICY_THROTL.  percpu stat free is performed by the exit
  method throtl_exit_blkio_group().

* throtl_reset_group_stats() implemented for
  blkio_reset_group_stats_fn method so that tg->stats_cpu can be
  reset.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   |  98 +--------------------------------------
 block/blk-cgroup.h   |  13 ------
 block/blk-throttle.c | 128 +++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 114 insertions(+), 125 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cfdda44f4a0b..16f6ee65a593 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,13 +30,6 @@ static LIST_HEAD(blkio_list);
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
 
-/* List of groups pending per cpu stats allocation */
-static DEFINE_SPINLOCK(alloc_list_lock);
-static LIST_HEAD(alloc_list);
-
-static void blkio_stat_alloc_fn(struct work_struct *);
-static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
-
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
@@ -63,60 +56,6 @@ struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
 
-/*
- * Worker for allocating per cpu stat for blk groups. This is scheduled on
- * the system_nrt_wq once there are some groups on the alloc_list waiting
- * for allocation.
- */
-static void blkio_stat_alloc_fn(struct work_struct *work)
-{
-	static void *pcpu_stats[BLKIO_NR_POLICIES];
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct blkio_group *blkg;
-	int i;
-	bool empty = false;
-
-alloc_stats:
-	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
-		if (pcpu_stats[i] != NULL)
-			continue;
-
-		pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
-
-		/* Allocation failed. Try again after some time. */
-		if (pcpu_stats[i] == NULL) {
-			queue_delayed_work(system_nrt_wq, dwork,
-						msecs_to_jiffies(10));
-			return;
-		}
-	}
-
-	spin_lock_irq(&blkio_list_lock);
-	spin_lock(&alloc_list_lock);
-
-	/* cgroup got deleted or queue exited. */
-	if (!list_empty(&alloc_list)) {
-		blkg = list_first_entry(&alloc_list, struct blkio_group,
-						alloc_node);
-		for (i = 0; i < BLKIO_NR_POLICIES; i++) {
-			struct blkg_policy_data *pd = blkg->pd[i];
-
-			if (blkio_policy[i] && pd && !pd->stats_cpu)
-				swap(pd->stats_cpu, pcpu_stats[i]);
-		}
-
-		list_del_init(&blkg->alloc_node);
-	}
-
-	empty = list_empty(&alloc_list);
-
-	spin_unlock(&alloc_list_lock);
-	spin_unlock_irq(&blkio_list_lock);
-
-	if (!empty)
-		goto alloc_stats;
-}
-
 /**
  * blkg_free - free a blkg
  * @blkg: blkg to free
@@ -140,7 +79,6 @@ static void blkg_free(struct blkio_group *blkg)
 		if (pol && pol->ops.blkio_exit_group_fn)
 			pol->ops.blkio_exit_group_fn(blkg);
 
-		free_percpu(pd->stats_cpu);
 		kfree(pd);
 	}
 
@@ -167,7 +105,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
-	INIT_LIST_HEAD(&blkg->alloc_node);
 	blkg->blkcg = blkcg;
 	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
@@ -245,12 +182,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 	list_add(&blkg->q_node, &q->blkg_list);
 	spin_unlock(&blkcg->lock);
-
-	spin_lock(&alloc_list_lock);
-	list_add(&blkg->alloc_node, &alloc_list);
-	/* Queue per cpu stat allocation from worker thread. */
-	queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
-	spin_unlock(&alloc_list_lock);
 out:
 	return blkg;
 }
@@ -284,10 +215,6 @@ static void blkg_destroy(struct blkio_group *blkg)
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
-	spin_lock(&alloc_list_lock);
-	list_del_init(&blkg->alloc_node);
-	spin_unlock(&alloc_list_lock);
-
 	/*
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
@@ -319,9 +246,6 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
 	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
 	WARN_ON_ONCE(!pd);
 
-	pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
-	WARN_ON_ONCE(!pd->stats_cpu);
-
 	blkg->pd[plid] = pd;
 	pd->blkg = blkg;
 	pol->ops.blkio_init_group_fn(blkg);
@@ -381,23 +305,6 @@ void __blkg_release(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(__blkg_release);
 
-static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
-{
-	struct blkg_policy_data *pd = blkg->pd[plid];
-	int cpu;
-
-	if (pd->stats_cpu == NULL)
-		return;
-
-	for_each_possible_cpu(cpu) {
-		struct blkio_group_stats_cpu *sc =
-			per_cpu_ptr(pd->stats_cpu, cpu);
-
-		blkg_rwstat_reset(&sc->service_bytes);
-		blkg_rwstat_reset(&sc->serviced);
-	}
-}
-
 static int
 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 {
@@ -416,12 +323,9 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		struct blkio_policy_type *pol;
 
-		list_for_each_entry(pol, &blkio_list, list) {
-			blkio_reset_stats_cpu(blkg, pol->plid);
-
+		list_for_each_entry(pol, &blkio_list, list)
 			if (pol->ops.blkio_reset_group_stats_fn)
 				pol->ops.blkio_reset_group_stats_fn(blkg);
-		}
 	}
 
 	spin_unlock_irq(&blkcg->lock);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 791570394e87..e368dd00b8cf 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -64,14 +64,6 @@ struct blkg_rwstat {
 	uint64_t			cnt[BLKG_RWSTAT_NR];
 };
 
-/* Per cpu blkio group stats */
-struct blkio_group_stats_cpu {
-	/* total bytes transferred */
-	struct blkg_rwstat		service_bytes;
-	/* total IOs serviced, post merge */
-	struct blkg_rwstat		serviced;
-};
-
 struct blkio_group_conf {
 	unsigned int weight;
 	u64 iops[2];
@@ -86,9 +78,6 @@ struct blkg_policy_data {
 	/* Configuration */
 	struct blkio_group_conf conf;
 
-	/* Per cpu stats pointer */
-	struct blkio_group_stats_cpu __percpu *stats_cpu;
-
 	/* pol->pdata_size bytes of private data used by policy impl */
 	char pdata[] __aligned(__alignof__(unsigned long long));
 };
@@ -106,8 +95,6 @@ struct blkio_group {
 
 	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
 
-	/* List of blkg waiting for per cpu stats memory to be allocated */
-	struct list_head alloc_node;
 	struct rcu_head rcu_head;
 };
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index cb259bc46f43..27f7960dd421 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -40,6 +40,14 @@ struct throtl_rb_root {
 
 #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
 
+/* Per-cpu group stats */
+struct tg_stats_cpu {
+	/* total bytes transferred */
+	struct blkg_rwstat		service_bytes;
+	/* total IOs serviced, post merge */
+	struct blkg_rwstat		serviced;
+};
+
 struct throtl_grp {
 	/* active throtl group service_tree member */
 	struct rb_node rb_node;
@@ -76,6 +84,12 @@ struct throtl_grp {
 
 	/* Some throttle limits got updated for the group */
 	int limits_changed;
+
+	/* Per cpu stats pointer */
+	struct tg_stats_cpu __percpu *stats_cpu;
+
+	/* List of tgs waiting for per cpu stats memory to be allocated */
+	struct list_head stats_alloc_node;
 };
 
 struct throtl_data
@@ -100,6 +114,13 @@ struct throtl_data
 	int limits_changed;
 };
 
+/* list and work item to allocate percpu group stats */
+static DEFINE_SPINLOCK(tg_stats_alloc_lock);
+static LIST_HEAD(tg_stats_alloc_list);
+
+static void tg_stats_alloc_fn(struct work_struct *);
+static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn);
+
 static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg)
 {
 	return blkg_to_pdata(blkg, &blkio_policy_throtl);
@@ -142,6 +163,44 @@ static inline unsigned int total_nr_queued(struct throtl_data *td)
 	return td->nr_queued[0] + td->nr_queued[1];
 }
 
+/*
+ * Worker for allocating per cpu stat for tgs. This is scheduled on the
+ * system_nrt_wq once there are some groups on the alloc_list waiting for
+ * allocation.
+ */
+static void tg_stats_alloc_fn(struct work_struct *work)
+{
+	static struct tg_stats_cpu *stats_cpu;	/* this fn is non-reentrant */
+	struct delayed_work *dwork = to_delayed_work(work);
+	bool empty = false;
+
+alloc_stats:
+	if (!stats_cpu) {
+		stats_cpu = alloc_percpu(struct tg_stats_cpu);
+		if (!stats_cpu) {
+			/* allocation failed, try again after some time */
+			queue_delayed_work(system_nrt_wq, dwork,
+					   msecs_to_jiffies(10));
+			return;
+		}
+	}
+
+	spin_lock_irq(&tg_stats_alloc_lock);
+
+	if (!list_empty(&tg_stats_alloc_list)) {
+		struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list,
+							 struct throtl_grp,
+							 stats_alloc_node);
+		swap(tg->stats_cpu, stats_cpu);
+		list_del_init(&tg->stats_alloc_node);
+	}
+
+	empty = list_empty(&tg_stats_alloc_list);
+	spin_unlock_irq(&tg_stats_alloc_lock);
+	if (!empty)
+		goto alloc_stats;
+}
+
 static void throtl_init_blkio_group(struct blkio_group *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -155,6 +214,43 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	tg->bps[WRITE] = -1;
 	tg->iops[READ] = -1;
 	tg->iops[WRITE] = -1;
+
+	/*
+	 * Ugh... We need to perform per-cpu allocation for tg->stats_cpu
+	 * but percpu allocator can't be called from IO path.  Queue tg on
+	 * tg_stats_alloc_list and allocate from work item.
+	 */
+	spin_lock(&tg_stats_alloc_lock);
+	list_add(&tg->stats_alloc_node, &tg_stats_alloc_list);
+	queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0);
+	spin_unlock(&tg_stats_alloc_lock);
+}
+
+static void throtl_exit_blkio_group(struct blkio_group *blkg)
+{
+	struct throtl_grp *tg = blkg_to_tg(blkg);
+
+	spin_lock(&tg_stats_alloc_lock);
+	list_del_init(&tg->stats_alloc_node);
+	spin_unlock(&tg_stats_alloc_lock);
+
+	free_percpu(tg->stats_cpu);
+}
+
+static void throtl_reset_group_stats(struct blkio_group *blkg)
+{
+	struct throtl_grp *tg = blkg_to_tg(blkg);
+	int cpu;
+
+	if (tg->stats_cpu == NULL)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu);
+
+		blkg_rwstat_reset(&sc->service_bytes);
+		blkg_rwstat_reset(&sc->serviced);
+	}
 }
 
 static struct
@@ -565,12 +661,12 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
 static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes,
 					 int rw)
 {
-	struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_THROTL];
-	struct blkio_group_stats_cpu *stats_cpu;
+	struct throtl_grp *tg = blkg_to_tg(blkg);
+	struct tg_stats_cpu *stats_cpu;
 	unsigned long flags;
 
 	/* If per cpu stats are not allocated yet, don't do any accounting. */
-	if (pd->stats_cpu == NULL)
+	if (tg->stats_cpu == NULL)
 		return;
 
 	/*
@@ -580,7 +676,7 @@ static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes,
 	 */
 	local_irq_save(flags);
 
-	stats_cpu = this_cpu_ptr(pd->stats_cpu);
+	stats_cpu = this_cpu_ptr(tg->stats_cpu);
 
 	blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
 	blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
@@ -842,15 +938,15 @@ static void throtl_update_blkio_group_common(struct throtl_data *td,
 	throtl_schedule_delayed_work(td, 0);
 }
 
-static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
-				  struct blkg_policy_data *pd, int off)
+static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
+				struct blkg_policy_data *pd, int off)
 {
+	struct throtl_grp *tg = (void *)pd->pdata;
 	struct blkg_rwstat rwstat = { }, tmp;
 	int i, cpu;
 
 	for_each_possible_cpu(cpu) {
-		struct blkio_group_stats_cpu *sc =
-			per_cpu_ptr(pd->stats_cpu, cpu);
+		struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu);
 
 		tmp = blkg_rwstat_read((void *)sc + off);
 		for (i = 0; i < BLKG_RWSTAT_NR; i++)
@@ -861,12 +957,12 @@ static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
 }
 
 /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
-static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
-				  struct seq_file *sf)
+static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
+			       struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
+	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat,
 			  BLKCG_STAT_POL(cft->private),
 			  BLKCG_STAT_OFF(cft->private), true);
 	return 0;
@@ -1012,14 +1108,14 @@ static struct cftype throtl_files[] = {
 	{
 		.name = "throttle.io_service_bytes",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct blkio_group_stats_cpu, service_bytes)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
+				offsetof(struct tg_stats_cpu, service_bytes)),
+		.read_seq_string = tg_print_cpu_rwstat,
 	},
 	{
 		.name = "throttle.io_serviced",
 		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct blkio_group_stats_cpu, serviced)),
-		.read_seq_string = blkcg_print_cpu_rwstat,
+				offsetof(struct tg_stats_cpu, serviced)),
+		.read_seq_string = tg_print_cpu_rwstat,
 	},
 	{ }	/* terminate */
 };
@@ -1034,6 +1130,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
 	.ops = {
 		.blkio_init_group_fn = throtl_init_blkio_group,
+		.blkio_exit_group_fn = throtl_exit_blkio_group,
+		.blkio_reset_group_stats_fn = throtl_reset_group_stats,
 	},
 	.plid = BLKIO_POLICY_THROTL,
 	.pdata_size = sizeof(struct throtl_grp),
-- 
cgit v1.2.3-59-g8ed1b


From 3381cb8d2ef1523dbaeec99161d766c25f1e52d6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: move blkio_group_conf->weight to cfq

blkio_group_conf->weight is owned by cfq and has no reason to be
defined in blkcg core.  Replace it with cfq_group->dev_weight and let
conf setting functions directly set it.  If dev_weight is zero, the
cfqg doesn't have device specific weight configured.

Also, rename BLKIO_WEIGHT_* constants to CFQ_WEIGHT_* and rename
blkio_cgroup->weight to blkio_cgroup->cfq_weight.  We eventually want
per-policy storage in blkio_cgroup but just mark the ownership of the
field for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c  |  4 +--
 block/blk-cgroup.h  | 14 +++++-----
 block/cfq-iosched.c | 77 ++++++++++++++++++++++++-----------------------------
 3 files changed, 45 insertions(+), 50 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 16f6ee65a593..c0e239b8455e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,7 +30,7 @@ static LIST_HEAD(blkio_list);
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
 
-struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
+struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
 static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
@@ -611,7 +611,7 @@ static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
 	if (!blkcg)
 		return ERR_PTR(-ENOMEM);
 
-	blkcg->weight = BLKIO_WEIGHT_DEFAULT;
+	blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
 	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
 done:
 	spin_lock_init(&blkcg->lock);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e368dd00b8cf..386db2974a08 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -29,6 +29,11 @@ enum blkio_policy_id {
 
 #ifdef CONFIG_BLK_CGROUP
 
+/* CFQ specific, out here for blkcg->cfq_weight */
+#define CFQ_WEIGHT_MIN		10
+#define CFQ_WEIGHT_MAX		1000
+#define CFQ_WEIGHT_DEFAULT	500
+
 /* cft->private [un]packing for stat printing */
 #define BLKCG_STAT_PRIV(pol, off)	(((unsigned)(pol) << 16) | (off))
 #define BLKCG_STAT_POL(prv)		((unsigned)(prv) >> 16)
@@ -46,12 +51,14 @@ enum blkg_rwstat_type {
 
 struct blkio_cgroup {
 	struct cgroup_subsys_state css;
-	unsigned int weight;
 	spinlock_t lock;
 	struct hlist_head blkg_list;
 
 	/* for policies to test whether associated blkcg has changed */
 	uint64_t id;
+
+	/* TODO: per-policy storage in blkio_cgroup */
+	unsigned int cfq_weight;	/* belongs to cfq */
 };
 
 struct blkg_stat {
@@ -65,7 +72,6 @@ struct blkg_rwstat {
 };
 
 struct blkio_group_conf {
-	unsigned int weight;
 	u64 iops[2];
 	u64 bps[2];
 };
@@ -355,10 +361,6 @@ static inline void blkg_put(struct blkio_group *blkg) { }
 
 #endif
 
-#define BLKIO_WEIGHT_MIN	10
-#define BLKIO_WEIGHT_MAX	1000
-#define BLKIO_WEIGHT_DEFAULT	500
-
 #ifdef CONFIG_BLK_CGROUP
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a1f37dfd1b8b..adab10de6eea 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -224,7 +224,7 @@ struct cfq_group {
 	u64 vdisktime;
 	unsigned int weight;
 	unsigned int new_weight;
-	bool needs_update;
+	unsigned int dev_weight;
 
 	/* number of cfqq currently on this group */
 	int nr_cfqq;
@@ -838,7 +838,7 @@ static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
 {
 	u64 d = delta << CFQ_SERVICE_SHIFT;
 
-	d = d * BLKIO_WEIGHT_DEFAULT;
+	d = d * CFQ_WEIGHT_DEFAULT;
 	do_div(d, cfqg->weight);
 	return d;
 }
@@ -1165,9 +1165,9 @@ static void
 cfq_update_group_weight(struct cfq_group *cfqg)
 {
 	BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
-	if (cfqg->needs_update) {
+	if (cfqg->new_weight) {
 		cfqg->weight = cfqg->new_weight;
-		cfqg->needs_update = false;
+		cfqg->new_weight = 0;
 	}
 }
 
@@ -1325,21 +1325,12 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void cfq_update_blkio_group_weight(struct blkio_group *blkg,
-					  unsigned int weight)
-{
-	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
-
-	cfqg->new_weight = weight;
-	cfqg->needs_update = true;
-}
-
 static void cfq_init_blkio_group(struct blkio_group *blkg)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
 	cfq_init_cfqg_base(cfqg);
-	cfqg->weight = blkg->blkcg->weight;
+	cfqg->weight = blkg->blkcg->cfq_weight;
 }
 
 /*
@@ -1377,36 +1368,38 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 	cfqg_get(cfqg);
 }
 
-static u64 blkg_prfill_weight_device(struct seq_file *sf,
+static u64 cfqg_prfill_weight_device(struct seq_file *sf,
 				     struct blkg_policy_data *pd, int off)
 {
-	if (!pd->conf.weight)
+	struct cfq_group *cfqg = (void *)pd->pdata;
+
+	if (!cfqg->dev_weight)
 		return 0;
-	return __blkg_prfill_u64(sf, pd, pd->conf.weight);
+	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
 }
 
-static int blkcg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				     struct seq_file *sf)
+static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				    struct seq_file *sf)
 {
 	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  blkg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
+			  cfqg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
 			  false);
 	return 0;
 }
 
-static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
-			      struct seq_file *sf)
+static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
+			    struct seq_file *sf)
 {
-	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->weight);
+	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->cfq_weight);
 	return 0;
 }
 
-static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
-				   const char *buf)
+static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+				  const char *buf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkg_policy_data *pd;
 	struct blkg_conf_ctx ctx;
+	struct cfq_group *cfqg;
 	int ret;
 
 	ret = blkg_conf_prep(blkcg, buf, &ctx);
@@ -1414,11 +1407,11 @@ static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 		return ret;
 
 	ret = -EINVAL;
-	pd = ctx.blkg->pd[BLKIO_POLICY_PROP];
-	if (pd && (!ctx.v || (ctx.v >= BLKIO_WEIGHT_MIN &&
-			      ctx.v <= BLKIO_WEIGHT_MAX))) {
-		pd->conf.weight = ctx.v;
-		cfq_update_blkio_group_weight(ctx.blkg, ctx.v ?: blkcg->weight);
+	cfqg = blkg_to_cfqg(ctx.blkg);
+	if (cfqg && (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN &&
+				ctx.v <= CFQ_WEIGHT_MAX))) {
+		cfqg->dev_weight = ctx.v;
+		cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight;
 		ret = 0;
 	}
 
@@ -1426,23 +1419,23 @@ static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 	return ret;
 }
 
-static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 	struct blkio_group *blkg;
 	struct hlist_node *n;
 
-	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
+	if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
 		return -EINVAL;
 
 	spin_lock_irq(&blkcg->lock);
-	blkcg->weight = (unsigned int)val;
+	blkcg->cfq_weight = (unsigned int)val;
 
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_PROP];
+		struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
-		if (pd && !pd->conf.weight)
-			cfq_update_blkio_group_weight(blkg, blkcg->weight);
+		if (cfqg && !cfqg->dev_weight)
+			cfqg->new_weight = blkcg->cfq_weight;
 	}
 
 	spin_unlock_irq(&blkcg->lock);
@@ -1480,14 +1473,14 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 static struct cftype cfq_blkcg_files[] = {
 	{
 		.name = "weight_device",
-		.read_seq_string = blkcg_print_weight_device,
-		.write_string = blkcg_set_weight_device,
+		.read_seq_string = cfqg_print_weight_device,
+		.write_string = cfqg_set_weight_device,
 		.max_write_len = 256,
 	},
 	{
 		.name = "weight",
-		.read_seq_string = blkcg_print_weight,
-		.write_u64 = blkcg_set_weight,
+		.read_seq_string = cfq_print_weight,
+		.write_u64 = cfq_set_weight,
 	},
 	{
 		.name = "time",
@@ -3983,7 +3976,7 @@ static int cfq_init_queue(struct request_queue *q)
 		return -ENOMEM;
 	}
 
-	cfqd->root_group->weight = 2*BLKIO_WEIGHT_DEFAULT;
+	cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT;
 
 	/*
 	 * Not strictly needed (since RB_ROOT just clears the node and we
-- 
cgit v1.2.3-59-g8ed1b


From af133ceb261033eb43c03d161a991c3b772e8c56 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: move blkio_group_conf->iops and ->bps to blk-throttle

blkio_cgroup_conf->iops and ->bps are owned by blk-throttle and has no
reason to be defined in blkcg core.  Drop them and let conf setting
functions directly manipulate throtl_grp->bps[] and ->iops[].

This makes blkio_group_conf empty.  Drop it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.h   |   8 ---
 block/blk-throttle.c | 153 +++++++++++++++++++--------------------------------
 2 files changed, 58 insertions(+), 103 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 386db2974a08..a77ab1ac53a7 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -71,19 +71,11 @@ struct blkg_rwstat {
 	uint64_t			cnt[BLKG_RWSTAT_NR];
 };
 
-struct blkio_group_conf {
-	u64 iops[2];
-	u64 bps[2];
-};
-
 /* per-blkg per-policy data */
 struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
 	struct blkio_group *blkg;
 
-	/* Configuration */
-	struct blkio_group_conf conf;
-
 	/* pol->pdata_size bytes of private data used by policy impl */
 	char pdata[] __aligned(__alignof__(unsigned long long));
 };
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 27f7960dd421..004964bb6fdd 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -924,20 +924,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
-/*
- * Can not take queue lock in update functions as queue lock under
- * blkcg_lock is not allowed. Under other paths we take blkcg_lock under
- * queue_lock.
- */
-static void throtl_update_blkio_group_common(struct throtl_data *td,
-				struct throtl_grp *tg)
-{
-	xchg(&tg->limits_changed, true);
-	xchg(&td->limits_changed, true);
-	/* Schedule a work now to process the limit change */
-	throtl_schedule_delayed_work(td, 0);
-}
-
 static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
 				struct blkg_policy_data *pd, int off)
 {
@@ -968,68 +954,48 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static u64 blkg_prfill_conf_u64(struct seq_file *sf,
-				struct blkg_policy_data *pd, int off)
+static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
+			      int off)
 {
-	u64 v = *(u64 *)((void *)&pd->conf + off);
+	u64 v = *(u64 *)((void *)pd->pdata + off);
 
-	if (!v)
+	if (v == -1)
 		return 0;
 	return __blkg_prfill_u64(sf, pd, v);
 }
 
-static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
-				struct seq_file *sf)
+static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
+			       int off)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
-			  cft->private, false);
-	return 0;
-}
+	unsigned int v = *(unsigned int *)((void *)pd->pdata + off);
 
-static void throtl_update_blkio_group_read_bps(struct blkio_group *blkg,
-					       u64 read_bps)
-{
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-
-	tg->bps[READ] = read_bps;
-	throtl_update_blkio_group_common(blkg->q->td, tg);
-}
-
-static void throtl_update_blkio_group_write_bps(struct blkio_group *blkg,
-						u64 write_bps)
-{
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-
-	tg->bps[WRITE] = write_bps;
-	throtl_update_blkio_group_common(blkg->q->td, tg);
+	if (v == -1)
+		return 0;
+	return __blkg_prfill_u64(sf, pd, v);
 }
 
-static void throtl_update_blkio_group_read_iops(struct blkio_group *blkg,
-						u64 read_iops)
+static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+			     struct seq_file *sf)
 {
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-
-	tg->iops[READ] = read_iops;
-	throtl_update_blkio_group_common(blkg->q->td, tg);
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64,
+			  BLKIO_POLICY_THROTL, cft->private, false);
+	return 0;
 }
 
-static void throtl_update_blkio_group_write_iops(struct blkio_group *blkg,
-						 u64 write_iops)
+static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft,
+			      struct seq_file *sf)
 {
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-
-	tg->iops[WRITE] = write_iops;
-	throtl_update_blkio_group_common(blkg->q->td, tg);
+	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint,
+			  BLKIO_POLICY_THROTL, cft->private, false);
+	return 0;
 }
 
-static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
-			      const char *buf,
-			      void (*update)(struct blkio_group *, u64))
+static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
+		       bool is_u64)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkg_policy_data *pd;
 	struct blkg_conf_ctx ctx;
+	struct throtl_grp *tg;
 	int ret;
 
 	ret = blkg_conf_prep(blkcg, buf, &ctx);
@@ -1037,10 +1003,23 @@ static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
 		return ret;
 
 	ret = -EINVAL;
-	pd = ctx.blkg->pd[BLKIO_POLICY_THROTL];
-	if (pd) {
-		*(u64 *)((void *)&pd->conf + cft->private) = ctx.v;
-		update(ctx.blkg, ctx.v ?: -1);
+	tg = blkg_to_tg(ctx.blkg);
+	if (tg) {
+		struct throtl_data *td = ctx.blkg->q->td;
+
+		if (!ctx.v)
+			ctx.v = -1;
+
+		if (is_u64)
+			*(u64 *)((void *)tg + cft->private) = ctx.v;
+		else
+			*(unsigned int *)((void *)tg + cft->private) = ctx.v;
+
+		/* XXX: we don't need the following deferred processing */
+		xchg(&tg->limits_changed, true);
+		xchg(&td->limits_changed, true);
+		throtl_schedule_delayed_work(td, 0);
+
 		ret = 0;
 	}
 
@@ -1048,61 +1027,45 @@ static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
 	return ret;
 }
 
-static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft,
-				const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf,
-				  throtl_update_blkio_group_read_bps);
-}
-
-static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft,
-				const char *buf)
-{
-	return blkcg_set_conf_u64(cgrp, cft, buf,
-				  throtl_update_blkio_group_write_bps);
-}
-
-static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft,
-				 const char *buf)
+static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
+			   const char *buf)
 {
-	return blkcg_set_conf_u64(cgrp, cft, buf,
-				  throtl_update_blkio_group_read_iops);
+	return tg_set_conf(cgrp, cft, buf, true);
 }
 
-static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft,
-				 const char *buf)
+static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft,
+			    const char *buf)
 {
-	return blkcg_set_conf_u64(cgrp, cft, buf,
-				  throtl_update_blkio_group_write_iops);
+	return tg_set_conf(cgrp, cft, buf, false);
 }
 
 static struct cftype throtl_files[] = {
 	{
 		.name = "throttle.read_bps_device",
-		.private = offsetof(struct blkio_group_conf, bps[READ]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_bps_r,
+		.private = offsetof(struct throtl_grp, bps[READ]),
+		.read_seq_string = tg_print_conf_u64,
+		.write_string = tg_set_conf_u64,
 		.max_write_len = 256,
 	},
 	{
 		.name = "throttle.write_bps_device",
-		.private = offsetof(struct blkio_group_conf, bps[WRITE]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_bps_w,
+		.private = offsetof(struct throtl_grp, bps[WRITE]),
+		.read_seq_string = tg_print_conf_u64,
+		.write_string = tg_set_conf_u64,
 		.max_write_len = 256,
 	},
 	{
 		.name = "throttle.read_iops_device",
-		.private = offsetof(struct blkio_group_conf, iops[READ]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_iops_r,
+		.private = offsetof(struct throtl_grp, iops[READ]),
+		.read_seq_string = tg_print_conf_uint,
+		.write_string = tg_set_conf_uint,
 		.max_write_len = 256,
 	},
 	{
 		.name = "throttle.write_iops_device",
-		.private = offsetof(struct blkio_group_conf, iops[WRITE]),
-		.read_seq_string = blkcg_print_conf_u64,
-		.write_string = blkcg_set_conf_iops_w,
+		.private = offsetof(struct throtl_grp, iops[WRITE]),
+		.read_seq_string = tg_print_conf_uint,
+		.write_string = tg_set_conf_uint,
 		.max_write_len = 256,
 	},
 	{
-- 
cgit v1.2.3-59-g8ed1b


From d366e7ec41882791c970dfb7c67b737be8c3a174 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:44 -0700
Subject: blkcg: pass around pd->pdata instead of pd itself in prfill functions

Now that all conf and stat fields are moved into policy specific
blkio_policy_data->pdata areas, there's no reason to use
blkio_policy_data itself in prfill functions.  Pass around @pd->pdata
instead of @pd.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 33 +++++++++++++++------------------
 block/blk-cgroup.h   |  6 +++---
 block/blk-throttle.c | 21 +++++++++------------
 block/cfq-iosched.c  | 14 ++++++--------
 4 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c0e239b8455e..7de19d7954c7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -359,7 +359,7 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
  * cftype->read_seq_string method.
  */
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
-		       u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
+		       u64 (*prfill)(struct seq_file *, void *, int),
 		       int pol, int data, bool show_total)
 {
 	struct blkio_group *blkg;
@@ -369,7 +369,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
 		if (blkg->pd[pol])
-			total += prfill(sf, blkg->pd[pol], data);
+			total += prfill(sf, blkg->pd[pol]->pdata, data);
 	spin_unlock_irq(&blkcg->lock);
 
 	if (show_total)
@@ -380,14 +380,14 @@ EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
 /**
  * __blkg_prfill_u64 - prfill helper for a single u64 value
  * @sf: seq_file to print to
- * @pd: policy data of interest
+ * @pdata: policy private data of interest
  * @v: value to print
  *
- * Print @v to @sf for the device assocaited with @pd.
+ * Print @v to @sf for the device assocaited with @pdata.
  */
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
+u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
 {
-	const char *dname = blkg_dev_name(pd->blkg);
+	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
 
 	if (!dname)
 		return 0;
@@ -400,12 +400,12 @@ EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
 /**
  * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  * @sf: seq_file to print to
- * @pd: policy data of interest
+ * @pdata: policy private data of interest
  * @rwstat: rwstat to print
  *
- * Print @rwstat to @sf for the device assocaited with @pd.
+ * Print @rwstat to @sf for the device assocaited with @pdata.
  */
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 			 const struct blkg_rwstat *rwstat)
 {
 	static const char *rwstr[] = {
@@ -414,7 +414,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 		[BLKG_RWSTAT_SYNC]	= "Sync",
 		[BLKG_RWSTAT_ASYNC]	= "Async",
 	};
-	const char *dname = blkg_dev_name(pd->blkg);
+	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
 	u64 v;
 	int i;
 
@@ -430,19 +430,16 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 	return v;
 }
 
-static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
-			    int off)
+static u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
 {
-	return __blkg_prfill_u64(sf, pd,
-				 blkg_stat_read((void *)pd->pdata + off));
+	return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
 }
 
-static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
-			      int off)
+static u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
 {
-	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->pdata + off);
+	struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
 
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
 }
 
 /* print blkg_stat specified by BLKCG_STAT_PRIV() */
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index a77ab1ac53a7..c930895bfac9 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -126,10 +126,10 @@ extern void update_root_blkg_pd(struct request_queue *q,
 				enum blkio_policy_id plid);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
-		       u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
+		       u64 (*prfill)(struct seq_file *, void *, int),
 		       int pol, int data, bool show_total);
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 			 const struct blkg_rwstat *rwstat);
 int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 		     struct seq_file *sf);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 004964bb6fdd..bd6dbfe1e4e9 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -924,10 +924,9 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
-static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
-				struct blkg_policy_data *pd, int off)
+static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off)
 {
-	struct throtl_grp *tg = (void *)pd->pdata;
+	struct throtl_grp *tg = pdata;
 	struct blkg_rwstat rwstat = { }, tmp;
 	int i, cpu;
 
@@ -939,7 +938,7 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
 			rwstat.cnt[i] += tmp.cnt[i];
 	}
 
-	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
 }
 
 /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
@@ -954,24 +953,22 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
-			      int off)
+static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off)
 {
-	u64 v = *(u64 *)((void *)pd->pdata + off);
+	u64 v = *(u64 *)(pdata + off);
 
 	if (v == -1)
 		return 0;
-	return __blkg_prfill_u64(sf, pd, v);
+	return __blkg_prfill_u64(sf, pdata, v);
 }
 
-static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
-			       int off)
+static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off)
 {
-	unsigned int v = *(unsigned int *)((void *)pd->pdata + off);
+	unsigned int v = *(unsigned int *)(pdata + off);
 
 	if (v == -1)
 		return 0;
-	return __blkg_prfill_u64(sf, pd, v);
+	return __blkg_prfill_u64(sf, pdata, v);
 }
 
 static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index adab10de6eea..fd505f721f61 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1368,14 +1368,13 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 	cfqg_get(cfqg);
 }
 
-static u64 cfqg_prfill_weight_device(struct seq_file *sf,
-				     struct blkg_policy_data *pd, int off)
+static u64 cfqg_prfill_weight_device(struct seq_file *sf, void *pdata, int off)
 {
-	struct cfq_group *cfqg = (void *)pd->pdata;
+	struct cfq_group *cfqg = pdata;
 
 	if (!cfqg->dev_weight)
 		return 0;
-	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
+	return __blkg_prfill_u64(sf, pdata, cfqg->dev_weight);
 }
 
 static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
@@ -1443,10 +1442,9 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 }
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
-				      struct blkg_policy_data *pd, int off)
+static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off)
 {
-	struct cfq_group *cfqg = (void *)pd->pdata;
+	struct cfq_group *cfqg = pdata;
 	u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
 	u64 v = 0;
 
@@ -1454,7 +1452,7 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
 		v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
 		do_div(v, samples);
 	}
-	__blkg_prfill_u64(sf, pd, v);
+	__blkg_prfill_u64(sf, pdata, v);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 5bc4afb1ec6aa562fac4d9aba34d957ee42f5813 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 1 Apr 2012 14:38:45 -0700
Subject: blkcg: drop BLKCG_STAT_{PRIV|POL|OFF} macros

Now that all stat handling code lives in policy implementations,
there's no need to encode policy ID in cft->private.

* Export blkcg_prfill_[rw]stat() from blkcg, remove
  blkcg_print_[rw]stat(), and implement cfqg_print_[rw]stat() which
  use hard-code BLKIO_POLICY_PROP.

* Use cft->private for offset of the target field directly and drop
  BLKCG_STAT_{PRIV|POL|OFF}().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-cgroup.c   | 48 +++++++++++++----------------
 block/blk-cgroup.h   | 11 ++-----
 block/blk-throttle.c | 12 +++-----
 block/cfq-iosched.c  | 85 ++++++++++++++++++++++++++++------------------------
 4 files changed, 72 insertions(+), 84 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7de19d7954c7..9449c383b7ba 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -430,43 +430,35 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 	return v;
 }
 
-static u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
+/**
+ * blkg_prfill_stat - prfill callback for blkg_stat
+ * @sf: seq_file to print to
+ * @pdata: policy private data of interest
+ * @off: offset to the blkg_stat in @pdata
+ *
+ * prfill callback for printing a blkg_stat.
+ */
+u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
 {
 	return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
 }
+EXPORT_SYMBOL_GPL(blkg_prfill_stat);
 
-static u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
+/**
+ * blkg_prfill_rwstat - prfill callback for blkg_rwstat
+ * @sf: seq_file to print to
+ * @pdata: policy private data of interest
+ * @off: offset to the blkg_rwstat in @pdata
+ *
+ * prfill callback for printing a blkg_rwstat.
+ */
+u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
 {
 	struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
 
 	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
 }
-
-/* print blkg_stat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
-		     struct seq_file *sf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
-			  BLKCG_STAT_POL(cft->private),
-			  BLKCG_STAT_OFF(cft->private), false);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_stat);
-
-/* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
-		       struct seq_file *sf)
-{
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
-			  BLKCG_STAT_POL(cft->private),
-			  BLKCG_STAT_OFF(cft->private), true);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
+EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
 
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c930895bfac9..ca0ff7c0ffb6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -34,11 +34,6 @@ enum blkio_policy_id {
 #define CFQ_WEIGHT_MAX		1000
 #define CFQ_WEIGHT_DEFAULT	500
 
-/* cft->private [un]packing for stat printing */
-#define BLKCG_STAT_PRIV(pol, off)	(((unsigned)(pol) << 16) | (off))
-#define BLKCG_STAT_POL(prv)		((unsigned)(prv) >> 16)
-#define BLKCG_STAT_OFF(prv)		((unsigned)(prv) & 0xffff)
-
 enum blkg_rwstat_type {
 	BLKG_RWSTAT_READ,
 	BLKG_RWSTAT_WRITE,
@@ -131,10 +126,8 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v);
 u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 			 const struct blkg_rwstat *rwstat);
-int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
-		     struct seq_file *sf);
-int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
-		       struct seq_file *sf);
+u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off);
+u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off);
 
 struct blkg_conf_ctx {
 	struct gendisk		*disk;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index bd6dbfe1e4e9..60240142f5ae 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -941,15 +941,13 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off)
 	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
 }
 
-/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
 static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 			       struct seq_file *sf)
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat,
-			  BLKCG_STAT_POL(cft->private),
-			  BLKCG_STAT_OFF(cft->private), true);
+	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, BLKIO_POLICY_THROTL,
+			  cft->private, true);
 	return 0;
 }
 
@@ -1067,14 +1065,12 @@ static struct cftype throtl_files[] = {
 	},
 	{
 		.name = "throttle.io_service_bytes",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct tg_stats_cpu, service_bytes)),
+		.private = offsetof(struct tg_stats_cpu, service_bytes),
 		.read_seq_string = tg_print_cpu_rwstat,
 	},
 	{
 		.name = "throttle.io_serviced",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
-				offsetof(struct tg_stats_cpu, serviced)),
+		.private = offsetof(struct tg_stats_cpu, serviced),
 		.read_seq_string = tg_print_cpu_rwstat,
 	},
 	{ }	/* terminate */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fd505f721f61..cff8b5ba6208 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1441,6 +1441,26 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 	return 0;
 }
 
+static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
+			   struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, BLKIO_POLICY_PROP,
+			  cft->private, false);
+	return 0;
+}
+
+static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
+			     struct seq_file *sf)
+{
+	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, BLKIO_POLICY_PROP,
+			  cft->private, true);
+	return 0;
+}
+
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off)
 {
@@ -1482,51 +1502,43 @@ static struct cftype cfq_blkcg_files[] = {
 	},
 	{
 		.name = "time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.time)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.time),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "sectors",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.sectors)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.sectors),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "io_service_bytes",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.service_bytes)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.service_bytes),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 	{
 		.name = "io_serviced",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.serviced)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.serviced),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 	{
 		.name = "io_service_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.service_time)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.service_time),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 	{
 		.name = "io_wait_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.wait_time)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.wait_time),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 	{
 		.name = "io_merged",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.merged)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.merged),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 	{
 		.name = "io_queued",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.queued)),
-		.read_seq_string = blkcg_print_rwstat,
+		.private = offsetof(struct cfq_group, stats.queued),
+		.read_seq_string = cfqg_print_rwstat,
 	},
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 	{
@@ -1535,33 +1547,28 @@ static struct cftype cfq_blkcg_files[] = {
 	},
 	{
 		.name = "group_wait_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.group_wait_time)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.group_wait_time),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "idle_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.idle_time)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.idle_time),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "empty_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.empty_time)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.empty_time),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "dequeue",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.dequeue)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.dequeue),
+		.read_seq_string = cfqg_print_stat,
 	},
 	{
 		.name = "unaccounted_time",
-		.private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
-				offsetof(struct cfq_group, stats.unaccounted_time)),
-		.read_seq_string = blkcg_print_stat,
+		.private = offsetof(struct cfq_group, stats.unaccounted_time),
+		.read_seq_string = cfqg_print_stat,
 	},
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
 	{ }	/* terminate */
-- 
cgit v1.2.3-59-g8ed1b


From f48ec1d7885281a9c6cd7779d61f321d1b1fd741 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:25 -0700
Subject: cfq: fix build breakage & warnings

* CFQ_WEIGHT_* defined inside CONFIG_BLK_CGROUP causes cfq-iosched.c
  compile failure when the config is disabled.  Move it outside the
  ifdef block.

* Dummy cfqg_stats_*() definitions were lacking inline modifiers
  causing unused functions warning if !CONFIG_CFQ_GROUP_IOSCHED.  Add
  them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.h  |  4 ++--
 block/cfq-iosched.c | 17 ++++++++---------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ca0ff7c0ffb6..64392ac76ce8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -27,13 +27,13 @@ enum blkio_policy_id {
 /* Max limits for throttle policy */
 #define THROTL_IOPS_MAX		UINT_MAX
 
-#ifdef CONFIG_BLK_CGROUP
-
 /* CFQ specific, out here for blkcg->cfq_weight */
 #define CFQ_WEIGHT_MIN		10
 #define CFQ_WEIGHT_MAX		1000
 #define CFQ_WEIGHT_DEFAULT	500
 
+#ifdef CONFIG_BLK_CGROUP
+
 enum blkg_rwstat_type {
 	BLKG_RWSTAT_READ,
 	BLKG_RWSTAT_WRITE,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cff8b5ba6208..7a8c3e0ab3a2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,7 +17,7 @@
 #include "blk.h"
 #include "blk-cgroup.h"
 
-static struct blkio_policy_type blkio_policy_cfq;
+static struct blkio_policy_type blkio_policy_cfq __maybe_unused;
 
 /*
  * tunables
@@ -541,14 +541,13 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
 
 #else	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 
-static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
-						 struct cfq_group *curr_cfqg) { }
-static void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
-static void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
-static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
-static void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
-static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
-static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
+static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { }
+static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
+static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
+static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
+static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
+static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
+static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 
-- 
cgit v1.2.3-59-g8ed1b


From bc0d6501a844392ab6ad419d7ca5af4693b6afac Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:26 -0700
Subject: blkcg: kill blkio_list and replace blkio_list_lock with a mutex

With blkio_policy[], blkio_list is redundant and hinders with
per-queue policy activation.  Remove it.  Also, replace
blkio_list_lock with a mutex blkcg_pol_mutex and let it protect the
whole [un]registration.

This is to prepare for per-queue policy activation and doesn't cause
any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 32 +++++++++++++++++---------------
 block/blk-cgroup.h |  1 -
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9449c383b7ba..af665fe7f4f2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -24,9 +24,7 @@
 
 #define MAX_KEY_LEN 100
 
-static DEFINE_SPINLOCK(blkio_list_lock);
-static LIST_HEAD(blkio_list);
-
+static DEFINE_MUTEX(blkcg_pol_mutex);
 static DEFINE_MUTEX(all_q_mutex);
 static LIST_HEAD(all_q_list);
 
@@ -311,8 +309,9 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
 	struct blkio_group *blkg;
 	struct hlist_node *n;
+	int i;
 
-	spin_lock(&blkio_list_lock);
+	mutex_lock(&blkcg_pol_mutex);
 	spin_lock_irq(&blkcg->lock);
 
 	/*
@@ -321,15 +320,16 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	 * anyway.  If you get hit by a race, retry.
 	 */
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		struct blkio_policy_type *pol;
+		for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+			struct blkio_policy_type *pol = blkio_policy[i];
 
-		list_for_each_entry(pol, &blkio_list, list)
-			if (pol->ops.blkio_reset_group_stats_fn)
+			if (pol && pol->ops.blkio_reset_group_stats_fn)
 				pol->ops.blkio_reset_group_stats_fn(blkg);
+		}
 	}
 
 	spin_unlock_irq(&blkcg->lock);
-	spin_unlock(&blkio_list_lock);
+	mutex_unlock(&blkcg_pol_mutex);
 	return 0;
 }
 
@@ -732,20 +732,21 @@ void blkio_policy_register(struct blkio_policy_type *blkiop)
 {
 	struct request_queue *q;
 
+	mutex_lock(&blkcg_pol_mutex);
+
 	blkcg_bypass_start();
-	spin_lock(&blkio_list_lock);
 
 	BUG_ON(blkio_policy[blkiop->plid]);
 	blkio_policy[blkiop->plid] = blkiop;
-	list_add_tail(&blkiop->list, &blkio_list);
-
-	spin_unlock(&blkio_list_lock);
 	list_for_each_entry(q, &all_q_list, all_q_node)
 		update_root_blkg_pd(q, blkiop->plid);
+
 	blkcg_bypass_end();
 
 	if (blkiop->cftypes)
 		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
+
+	mutex_unlock(&blkcg_pol_mutex);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_register);
 
@@ -753,19 +754,20 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
 	struct request_queue *q;
 
+	mutex_lock(&blkcg_pol_mutex);
+
 	if (blkiop->cftypes)
 		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
 
 	blkcg_bypass_start();
-	spin_lock(&blkio_list_lock);
 
 	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
 	blkio_policy[blkiop->plid] = NULL;
-	list_del_init(&blkiop->list);
 
-	spin_unlock(&blkio_list_lock);
 	list_for_each_entry(q, &all_q_list, all_q_node)
 		update_root_blkg_pd(q, blkiop->plid);
 	blkcg_bypass_end();
+
+	mutex_unlock(&blkcg_pol_mutex);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 64392ac76ce8..c772581c9011 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -102,7 +102,6 @@ struct blkio_policy_ops {
 };
 
 struct blkio_policy_type {
-	struct list_head list;
 	struct blkio_policy_ops ops;
 	enum blkio_policy_id plid;
 	size_t pdata_size;		/* policy specific private data size */
-- 
cgit v1.2.3-59-g8ed1b


From ec399347d39fb2337ebace928cf4a2855bd0ec37 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:27 -0700
Subject: blkcg: use @pol instead of @plid in update_root_blkg_pd() and
 blkcg_print_blkgs()

The two functions were taking "enum blkio_policy_id plid".  Make them
take "const struct blkio_policy_type *pol" instead.

This is to prepare for per-queue policy activation and doesn't cause
any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 21 +++++++++++----------
 block/blk-cgroup.h   |  7 ++++---
 block/blk-throttle.c |  6 +++---
 block/cfq-iosched.c  | 10 +++++-----
 4 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index af665fe7f4f2..b1231524a097 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -226,17 +226,17 @@ static void blkg_destroy(struct blkio_group *blkg)
  * aren't shot down.  This broken and racy implementation is temporary.
  * Eventually, blkg shoot down will be replaced by proper in-place update.
  */
-void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
+void update_root_blkg_pd(struct request_queue *q,
+			 const struct blkio_policy_type *pol)
 {
-	struct blkio_policy_type *pol = blkio_policy[plid];
 	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
 	struct blkg_policy_data *pd;
 
 	if (!blkg)
 		return;
 
-	kfree(blkg->pd[plid]);
-	blkg->pd[plid] = NULL;
+	kfree(blkg->pd[pol->plid]);
+	blkg->pd[pol->plid] = NULL;
 
 	if (!pol)
 		return;
@@ -244,7 +244,7 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
 	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
 	WARN_ON_ONCE(!pd);
 
-	blkg->pd[plid] = pd;
+	blkg->pd[pol->plid] = pd;
 	pd->blkg = blkg;
 	pol->ops.blkio_init_group_fn(blkg);
 }
@@ -360,7 +360,8 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
  */
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
-		       int pol, int data, bool show_total)
+		       const struct blkio_policy_type *pol, int data,
+		       bool show_total)
 {
 	struct blkio_group *blkg;
 	struct hlist_node *n;
@@ -368,8 +369,8 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (blkg->pd[pol])
-			total += prfill(sf, blkg->pd[pol]->pdata, data);
+		if (blkg->pd[pol->plid])
+			total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
 	spin_unlock_irq(&blkcg->lock);
 
 	if (show_total)
@@ -739,7 +740,7 @@ void blkio_policy_register(struct blkio_policy_type *blkiop)
 	BUG_ON(blkio_policy[blkiop->plid]);
 	blkio_policy[blkiop->plid] = blkiop;
 	list_for_each_entry(q, &all_q_list, all_q_node)
-		update_root_blkg_pd(q, blkiop->plid);
+		update_root_blkg_pd(q, blkiop);
 
 	blkcg_bypass_end();
 
@@ -765,7 +766,7 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 	blkio_policy[blkiop->plid] = NULL;
 
 	list_for_each_entry(q, &all_q_list, all_q_node)
-		update_root_blkg_pd(q, blkiop->plid);
+		update_root_blkg_pd(q, blkiop);
 	blkcg_bypass_end();
 
 	mutex_unlock(&blkcg_pol_mutex);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c772581c9011..26949731108f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -117,11 +117,12 @@ extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
 extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
 extern void update_root_blkg_pd(struct request_queue *q,
-				enum blkio_policy_id plid);
+				const struct blkio_policy_type *pol);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
-		       int pol, int data, bool show_total);
+		       const struct blkio_policy_type *pol, int data,
+		       bool show_total);
 u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v);
 u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 			 const struct blkg_rwstat *rwstat);
@@ -333,7 +334,7 @@ static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
 static inline void blkg_destroy_all(struct request_queue *q,
 				    bool destory_root) { }
 static inline void update_root_blkg_pd(struct request_queue *q,
-				       enum blkio_policy_id plid) { }
+				       const struct blkio_policy_type *pol) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 60240142f5ae..07c17c27a628 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -946,7 +946,7 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, BLKIO_POLICY_THROTL,
+	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl,
 			  cft->private, true);
 	return 0;
 }
@@ -973,7 +973,7 @@ static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
 			     struct seq_file *sf)
 {
 	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64,
-			  BLKIO_POLICY_THROTL, cft->private, false);
+			  &blkio_policy_throtl, cft->private, false);
 	return 0;
 }
 
@@ -981,7 +981,7 @@ static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft,
 			      struct seq_file *sf)
 {
 	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint,
-			  BLKIO_POLICY_THROTL, cft->private, false);
+			  &blkio_policy_throtl, cft->private, false);
 	return 0;
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7a8c3e0ab3a2..d02f0ae9637f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1380,7 +1380,7 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
 				    struct seq_file *sf)
 {
 	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  cfqg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
+			  cfqg_prfill_weight_device, &blkio_policy_cfq, 0,
 			  false);
 	return 0;
 }
@@ -1445,7 +1445,7 @@ static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, BLKIO_POLICY_PROP,
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkio_policy_cfq,
 			  cft->private, false);
 	return 0;
 }
@@ -1455,7 +1455,7 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 {
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, BLKIO_POLICY_PROP,
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkio_policy_cfq,
 			  cft->private, true);
 	return 0;
 }
@@ -1482,7 +1482,7 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 
 	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
-			  BLKIO_POLICY_PROP, 0, false);
+			  &blkio_policy_cfq, 0, false);
 	return 0;
 }
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
@@ -3938,7 +3938,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
 	kfree(cfqd->root_group);
 #endif
-	update_root_blkg_pd(q, BLKIO_POLICY_PROP);
+	update_root_blkg_pd(q, &blkio_policy_cfq);
 	kfree(cfqd);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8bd435b30ecacb69bbb8b2d3e251f770b807c5b2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:28 -0700
Subject: blkcg: remove static policy ID enums

Remove BLKIO_POLICY_* enums and let blkio_policy_register() allocate
@pol->plid dynamically on registration.  The maximum number of blkcg
policies which can be registered at the same time is defined by
BLKCG_MAX_POLS constant added to include/linux/blkdev.h.

Note that blkio_policy_register() now may fail.  Policy init functions
updated accordingly and unnecessary ifdefs removed from cfq_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     | 59 +++++++++++++++++++++++++++++++++++++-------------
 block/blk-cgroup.h     | 15 ++++---------
 block/blk-throttle.c   |  4 +---
 block/cfq-iosched.c    | 25 +++++++++++----------
 include/linux/blkdev.h |  7 +++++-
 5 files changed, 69 insertions(+), 41 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1231524a097..2d4d7d6d9ae9 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -31,7 +31,7 @@ static LIST_HEAD(all_q_list);
 struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
-static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
+static struct blkio_policy_type *blkio_policy[BLKCG_MAX_POLS];
 
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
@@ -67,7 +67,7 @@ static void blkg_free(struct blkio_group *blkg)
 	if (!blkg)
 		return;
 
-	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkio_policy_type *pol = blkio_policy[i];
 		struct blkg_policy_data *pd = blkg->pd[i];
 
@@ -107,7 +107,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
-	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkio_policy_type *pol = blkio_policy[i];
 		struct blkg_policy_data *pd;
 
@@ -127,7 +127,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	}
 
 	/* invoke per-policy init */
-	for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkio_policy_type *pol = blkio_policy[i];
 
 		if (pol)
@@ -320,7 +320,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	 * anyway.  If you get hit by a race, retry.
 	 */
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
-		for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkio_policy_type *pol = blkio_policy[i];
 
 			if (pol && pol->ops.blkio_reset_group_stats_fn)
@@ -729,46 +729,75 @@ struct cgroup_subsys blkio_subsys = {
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
 
-void blkio_policy_register(struct blkio_policy_type *blkiop)
+/**
+ * blkio_policy_register - register a blkcg policy
+ * @blkiop: blkcg policy to register
+ *
+ * Register @blkiop with blkcg core.  Might sleep and @blkiop may be
+ * modified on successful registration.  Returns 0 on success and -errno on
+ * failure.
+ */
+int blkio_policy_register(struct blkio_policy_type *blkiop)
 {
 	struct request_queue *q;
+	int i, ret;
 
 	mutex_lock(&blkcg_pol_mutex);
 
-	blkcg_bypass_start();
+	/* find an empty slot */
+	ret = -ENOSPC;
+	for (i = 0; i < BLKCG_MAX_POLS; i++)
+		if (!blkio_policy[i])
+			break;
+	if (i >= BLKCG_MAX_POLS)
+		goto out_unlock;
 
-	BUG_ON(blkio_policy[blkiop->plid]);
-	blkio_policy[blkiop->plid] = blkiop;
+	/* register and update blkgs */
+	blkiop->plid = i;
+	blkio_policy[i] = blkiop;
+
+	blkcg_bypass_start();
 	list_for_each_entry(q, &all_q_list, all_q_node)
 		update_root_blkg_pd(q, blkiop);
-
 	blkcg_bypass_end();
 
+	/* everything is in place, add intf files for the new policy */
 	if (blkiop->cftypes)
 		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
-
+	ret = 0;
+out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(blkio_policy_register);
 
+/**
+ * blkiop_policy_unregister - unregister a blkcg policy
+ * @blkiop: blkcg policy to unregister
+ *
+ * Undo blkio_policy_register(@blkiop).  Might sleep.
+ */
 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
 	struct request_queue *q;
 
 	mutex_lock(&blkcg_pol_mutex);
 
+	if (WARN_ON(blkio_policy[blkiop->plid] != blkiop))
+		goto out_unlock;
+
+	/* kill the intf files first */
 	if (blkiop->cftypes)
 		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
 
-	blkcg_bypass_start();
-
-	BUG_ON(blkio_policy[blkiop->plid] != blkiop);
+	/* unregister and update blkgs */
 	blkio_policy[blkiop->plid] = NULL;
 
+	blkcg_bypass_start();
 	list_for_each_entry(q, &all_q_list, all_q_node)
 		update_root_blkg_pd(q, blkiop);
 	blkcg_bypass_end();
-
+out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 26949731108f..be80d6eb6531 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -17,13 +17,6 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/seq_file.h>
 
-enum blkio_policy_id {
-	BLKIO_POLICY_PROP = 0,		/* Proportional Bandwidth division */
-	BLKIO_POLICY_THROTL,		/* Throttling */
-
-	BLKIO_NR_POLICIES,
-};
-
 /* Max limits for throttle policy */
 #define THROTL_IOPS_MAX		UINT_MAX
 
@@ -86,7 +79,7 @@ struct blkio_group {
 	/* reference count */
 	int refcnt;
 
-	struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
+	struct blkg_policy_data *pd[BLKCG_MAX_POLS];
 
 	struct rcu_head rcu_head;
 };
@@ -103,7 +96,7 @@ struct blkio_policy_ops {
 
 struct blkio_policy_type {
 	struct blkio_policy_ops ops;
-	enum blkio_policy_id plid;
+	int plid;
 	size_t pdata_size;		/* policy specific private data size */
 	struct cftype *cftypes;		/* cgroup files for the policy */
 };
@@ -113,7 +106,7 @@ extern void blkcg_drain_queue(struct request_queue *q);
 extern void blkcg_exit_queue(struct request_queue *q);
 
 /* Blkio controller policy registration */
-extern void blkio_policy_register(struct blkio_policy_type *);
+extern int blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
 extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
 extern void update_root_blkg_pd(struct request_queue *q,
@@ -329,7 +322,7 @@ struct blkio_policy_type {
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
+static inline int blkio_policy_register(struct blkio_policy_type *blkiop) { return 0; }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
 static inline void blkg_destroy_all(struct request_queue *q,
 				    bool destory_root) { }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 07c17c27a628..0dc4645aa7fe 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1089,7 +1089,6 @@ static struct blkio_policy_type blkio_policy_throtl = {
 		.blkio_exit_group_fn = throtl_exit_blkio_group,
 		.blkio_reset_group_stats_fn = throtl_reset_group_stats,
 	},
-	.plid = BLKIO_POLICY_THROTL,
 	.pdata_size = sizeof(struct throtl_grp),
 	.cftypes = throtl_files,
 };
@@ -1271,8 +1270,7 @@ static int __init throtl_init(void)
 	if (!kthrotld_workqueue)
 		panic("Failed to create kthrotld\n");
 
-	blkio_policy_register(&blkio_policy_throtl);
-	return 0;
+	return blkio_policy_register(&blkio_policy_throtl);
 }
 
 module_init(throtl_init);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d02f0ae9637f..08db2fc70c29 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4157,7 +4157,6 @@ static struct blkio_policy_type blkio_policy_cfq = {
 		.blkio_init_group_fn =		cfq_init_blkio_group,
 		.blkio_reset_group_stats_fn =	cfqg_stats_reset,
 	},
-	.plid = BLKIO_POLICY_PROP,
 	.pdata_size = sizeof(struct cfq_group),
 	.cftypes = cfq_blkcg_files,
 };
@@ -4181,27 +4180,31 @@ static int __init cfq_init(void)
 #else
 		cfq_group_idle = 0;
 #endif
+
+	ret = blkio_policy_register(&blkio_policy_cfq);
+	if (ret)
+		return ret;
+
 	cfq_pool = KMEM_CACHE(cfq_queue, 0);
 	if (!cfq_pool)
-		return -ENOMEM;
+		goto err_pol_unreg;
 
 	ret = elv_register(&iosched_cfq);
-	if (ret) {
-		kmem_cache_destroy(cfq_pool);
-		return ret;
-	}
+	if (ret)
+		goto err_free_pool;
 
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-	blkio_policy_register(&blkio_policy_cfq);
-#endif
 	return 0;
+
+err_free_pool:
+	kmem_cache_destroy(cfq_pool);
+err_pol_unreg:
+	blkio_policy_unregister(&blkio_policy_cfq);
+	return ret;
 }
 
 static void __exit cfq_exit(void)
 {
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
 	blkio_policy_unregister(&blkio_policy_cfq);
-#endif
 	elv_unregister(&iosched_cfq);
 	kmem_cache_destroy(cfq_pool);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 33f1b29e53f4..d2c69f8c188a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -35,6 +35,12 @@ struct bsg_job;
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
 
+/*
+ * Maximum number of blkcg policies allowed to be registered concurrently.
+ * Defined here to simplify include dependency.
+ */
+#define BLKCG_MAX_POLS		2
+
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);
 
@@ -363,7 +369,6 @@ struct request_queue {
 
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
-	/* XXX: array size hardcoded to avoid include dependency (temporary) */
 	struct list_head	blkg_list;
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From da8b066262e12d1d0a3b1e6d3486e500169bf730 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:29 -0700
Subject: blkcg: make blkg_conf_prep() take @pol and return with queue lock
 held

Add @pol to blkg_conf_prep() and let it return with queue lock held
(to be released by blkg_conf_finish()).  Note that @pol isn't used
yet.

This is to prepare for per-queue policy activation and doesn't cause
any visible difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 17 ++++++++++-------
 block/blk-cgroup.h   |  3 ++-
 block/blk-throttle.c |  2 +-
 block/cfq-iosched.c  |  2 +-
 4 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2d4d7d6d9ae9..f6581a090b9d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -464,17 +464,19 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
+ * @pol: target policy
  * @input: input string
  * @ctx: blkg_conf_ctx to be filled
  *
  * Parse per-blkg config update from @input and initialize @ctx with the
  * result.  @ctx->blkg points to the blkg to be updated and @ctx->v the new
- * value.  This function returns with RCU read locked and must be paired
- * with blkg_conf_finish().
+ * value.  This function returns with RCU read lock and queue lock held and
+ * must be paired with blkg_conf_finish().
  */
-int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+int blkg_conf_prep(struct blkio_cgroup *blkcg,
+		   const struct blkio_policy_type *pol, const char *input,
 		   struct blkg_conf_ctx *ctx)
-	__acquires(rcu)
+	__acquires(rcu) __acquires(disk->queue->queue_lock)
 {
 	struct gendisk *disk;
 	struct blkio_group *blkg;
@@ -490,14 +492,14 @@ int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
 		return -EINVAL;
 
 	rcu_read_lock();
-
 	spin_lock_irq(disk->queue->queue_lock);
+
 	blkg = blkg_lookup_create(blkcg, disk->queue, false);
-	spin_unlock_irq(disk->queue->queue_lock);
 
 	if (IS_ERR(blkg)) {
 		ret = PTR_ERR(blkg);
 		rcu_read_unlock();
+		spin_unlock_irq(disk->queue->queue_lock);
 		put_disk(disk);
 		/*
 		 * If queue was bypassing, we should retry.  Do so after a
@@ -527,8 +529,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
  * with blkg_conf_prep().
  */
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
-	__releases(rcu)
+	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
 	put_disk(ctx->disk);
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index be80d6eb6531..df1c7b290c22 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -128,7 +128,8 @@ struct blkg_conf_ctx {
 	u64			v;
 };
 
-int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+int blkg_conf_prep(struct blkio_cgroup *blkcg,
+		   const struct blkio_policy_type *pol, const char *input,
 		   struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0dc4645aa7fe..6f1bfdf9a1b7 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -993,7 +993,7 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
 	struct throtl_grp *tg;
 	int ret;
 
-	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx);
 	if (ret)
 		return ret;
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 08db2fc70c29..de95f9a2acf8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1400,7 +1400,7 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 	struct cfq_group *cfqg;
 	int ret;
 
-	ret = blkg_conf_prep(blkcg, buf, &ctx);
+	ret = blkg_conf_prep(blkcg, &blkio_policy_cfq, buf, &ctx);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From a2b1693bac45ea3fe3ba612fd22c45f17449f610 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:33 -0700
Subject: blkcg: implement per-queue policy activation

All blkcg policies were assumed to be enabled on all request_queues.
Due to various implementation obstacles, during the recent blkcg core
updates, this was temporarily implemented as shooting down all !root
blkgs on elevator switch and policy [de]registration combined with
half-broken in-place root blkg updates.  In addition to being buggy
and racy, this meant losing all blkcg configurations across those
events.

Now that blkcg is cleaned up enough, this patch replaces the temporary
implementation with proper per-queue policy activation.  Each blkcg
policy should call the new blkcg_[de]activate_policy() to enable and
disable the policy on a specific queue.  blkcg_activate_policy()
allocates and installs policy data for the policy for all existing
blkgs.  blkcg_deactivate_policy() does the reverse.  If a policy is
not enabled for a given queue, blkg printing / config functions skip
the respective blkg for the queue.

blkcg_activate_policy() also takes care of root blkg creation, and
cfq_init_queue() and blk_throtl_init() are updated accordingly.

This replaces blkcg_bypass_{start|end}() and update_root_blkg_pd()
unnecessary.  Dropped.

v2: cfq_init_queue() was returning uninitialized @ret on root_group
    alloc failure if !CONFIG_CFQ_GROUP_IOSCHED.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     | 228 +++++++++++++++++++++++++++++++++----------------
 block/blk-cgroup.h     |  15 +++-
 block/blk-throttle.c   |  52 +++++------
 block/cfq-iosched.c    |  37 ++++----
 block/elevator.c       |   2 -
 include/linux/blkdev.h |   1 +
 6 files changed, 201 insertions(+), 134 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d6e4555c982f..d6d59ad105b4 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -54,6 +54,17 @@ struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
 
+static bool blkcg_policy_enabled(struct request_queue *q,
+				 const struct blkio_policy_type *pol)
+{
+	return pol && test_bit(pol->plid, q->blkcg_pols);
+}
+
+static size_t blkg_pd_size(const struct blkio_policy_type *pol)
+{
+	return sizeof(struct blkg_policy_data) + pol->pdata_size;
+}
+
 /**
  * blkg_free - free a blkg
  * @blkg: blkg to free
@@ -111,12 +122,11 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 		struct blkio_policy_type *pol = blkio_policy[i];
 		struct blkg_policy_data *pd;
 
-		if (!pol)
+		if (!blkcg_policy_enabled(q, pol))
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
-		pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
-				  q->node);
+		pd = kzalloc_node(blkg_pd_size(pol), GFP_ATOMIC, q->node);
 		if (!pd) {
 			blkg_free(blkg);
 			return NULL;
@@ -130,7 +140,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkio_policy_type *pol = blkio_policy[i];
 
-		if (pol)
+		if (blkcg_policy_enabled(blkg->q, pol))
 			pol->ops.blkio_init_group_fn(blkg);
 	}
 
@@ -236,36 +246,6 @@ static void blkg_destroy(struct blkio_group *blkg)
 	blkg_put(blkg);
 }
 
-/*
- * XXX: This updates blkg policy data in-place for root blkg, which is
- * necessary across elevator switch and policy registration as root blkgs
- * aren't shot down.  This broken and racy implementation is temporary.
- * Eventually, blkg shoot down will be replaced by proper in-place update.
- */
-void update_root_blkg_pd(struct request_queue *q,
-			 const struct blkio_policy_type *pol)
-{
-	struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
-	struct blkg_policy_data *pd;
-
-	if (!blkg)
-		return;
-
-	kfree(blkg->pd[pol->plid]);
-	blkg->pd[pol->plid] = NULL;
-
-	if (!pol)
-		return;
-
-	pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
-	WARN_ON_ONCE(!pd);
-
-	blkg->pd[pol->plid] = pd;
-	pd->blkg = blkg;
-	pol->ops.blkio_init_group_fn(blkg);
-}
-EXPORT_SYMBOL_GPL(update_root_blkg_pd);
-
 /**
  * blkg_destroy_all - destroy all blkgs associated with a request_queue
  * @q: request_queue of interest
@@ -339,7 +319,8 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkio_policy_type *pol = blkio_policy[i];
 
-			if (pol && pol->ops.blkio_reset_group_stats_fn)
+			if (blkcg_policy_enabled(blkg->q, pol) &&
+			    pol->ops.blkio_reset_group_stats_fn)
 				pol->ops.blkio_reset_group_stats_fn(blkg);
 		}
 	}
@@ -385,7 +366,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (blkg->pd[pol->plid])
+		if (blkcg_policy_enabled(blkg->q, pol))
 			total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
 	spin_unlock_irq(&blkcg->lock);
 
@@ -510,7 +491,10 @@ int blkg_conf_prep(struct blkio_cgroup *blkcg,
 	rcu_read_lock();
 	spin_lock_irq(disk->queue->queue_lock);
 
-	blkg = blkg_lookup_create(blkcg, disk->queue, false);
+	if (blkcg_policy_enabled(disk->queue, pol))
+		blkg = blkg_lookup_create(blkcg, disk->queue, false);
+	else
+		blkg = ERR_PTR(-EINVAL);
 
 	if (IS_ERR(blkg)) {
 		ret = PTR_ERR(blkg);
@@ -712,30 +696,6 @@ static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 	return ret;
 }
 
-static void blkcg_bypass_start(void)
-	__acquires(&all_q_mutex)
-{
-	struct request_queue *q;
-
-	mutex_lock(&all_q_mutex);
-
-	list_for_each_entry(q, &all_q_list, all_q_node) {
-		blk_queue_bypass_start(q);
-		blkg_destroy_all(q, false);
-	}
-}
-
-static void blkcg_bypass_end(void)
-	__releases(&all_q_mutex)
-{
-	struct request_queue *q;
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_queue_bypass_end(q);
-
-	mutex_unlock(&all_q_mutex);
-}
-
 struct cgroup_subsys blkio_subsys = {
 	.name = "blkio",
 	.create = blkiocg_create,
@@ -748,6 +708,139 @@ struct cgroup_subsys blkio_subsys = {
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
 
+/**
+ * blkcg_activate_policy - activate a blkcg policy on a request_queue
+ * @q: request_queue of interest
+ * @pol: blkcg policy to activate
+ *
+ * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
+ * bypass mode to populate its blkgs with policy_data for @pol.
+ *
+ * Activation happens with @q bypassed, so nobody would be accessing blkgs
+ * from IO path.  Update of each blkg is protected by both queue and blkcg
+ * locks so that holding either lock and testing blkcg_policy_enabled() is
+ * always enough for dereferencing policy data.
+ *
+ * The caller is responsible for synchronizing [de]activations and policy
+ * [un]registerations.  Returns 0 on success, -errno on failure.
+ */
+int blkcg_activate_policy(struct request_queue *q,
+			  const struct blkio_policy_type *pol)
+{
+	LIST_HEAD(pds);
+	struct blkio_group *blkg;
+	struct blkg_policy_data *pd, *n;
+	int cnt = 0, ret;
+
+	if (blkcg_policy_enabled(q, pol))
+		return 0;
+
+	blk_queue_bypass_start(q);
+
+	/* make sure the root blkg exists and count the existing blkgs */
+	spin_lock_irq(q->queue_lock);
+
+	rcu_read_lock();
+	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
+	rcu_read_unlock();
+
+	if (IS_ERR(blkg)) {
+		ret = PTR_ERR(blkg);
+		goto out_unlock;
+	}
+	q->root_blkg = blkg;
+
+	list_for_each_entry(blkg, &q->blkg_list, q_node)
+		cnt++;
+
+	spin_unlock_irq(q->queue_lock);
+
+	/* allocate policy_data for all existing blkgs */
+	while (cnt--) {
+		pd = kzalloc_node(blkg_pd_size(pol), GFP_KERNEL, q->node);
+		if (!pd) {
+			ret = -ENOMEM;
+			goto out_free;
+		}
+		list_add_tail(&pd->alloc_node, &pds);
+	}
+
+	/*
+	 * Install the allocated pds.  With @q bypassing, no new blkg
+	 * should have been created while the queue lock was dropped.
+	 */
+	spin_lock_irq(q->queue_lock);
+
+	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+		if (WARN_ON(list_empty(&pds))) {
+			/* umm... this shouldn't happen, just abort */
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
+		pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
+		list_del_init(&pd->alloc_node);
+
+		/* grab blkcg lock too while installing @pd on @blkg */
+		spin_lock(&blkg->blkcg->lock);
+
+		blkg->pd[pol->plid] = pd;
+		pd->blkg = blkg;
+		pol->ops.blkio_init_group_fn(blkg);
+
+		spin_unlock(&blkg->blkcg->lock);
+	}
+
+	__set_bit(pol->plid, q->blkcg_pols);
+	ret = 0;
+out_unlock:
+	spin_unlock_irq(q->queue_lock);
+out_free:
+	blk_queue_bypass_end(q);
+	list_for_each_entry_safe(pd, n, &pds, alloc_node)
+		kfree(pd);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_activate_policy);
+
+/**
+ * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
+ * @q: request_queue of interest
+ * @pol: blkcg policy to deactivate
+ *
+ * Deactivate @pol on @q.  Follows the same synchronization rules as
+ * blkcg_activate_policy().
+ */
+void blkcg_deactivate_policy(struct request_queue *q,
+			     const struct blkio_policy_type *pol)
+{
+	struct blkio_group *blkg;
+
+	if (!blkcg_policy_enabled(q, pol))
+		return;
+
+	blk_queue_bypass_start(q);
+	spin_lock_irq(q->queue_lock);
+
+	__clear_bit(pol->plid, q->blkcg_pols);
+
+	list_for_each_entry(blkg, &q->blkg_list, q_node) {
+		/* grab blkcg lock too while removing @pd from @blkg */
+		spin_lock(&blkg->blkcg->lock);
+
+		if (pol->ops.blkio_exit_group_fn)
+			pol->ops.blkio_exit_group_fn(blkg);
+
+		kfree(blkg->pd[pol->plid]);
+		blkg->pd[pol->plid] = NULL;
+
+		spin_unlock(&blkg->blkcg->lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
+	blk_queue_bypass_end(q);
+}
+EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
+
 /**
  * blkio_policy_register - register a blkcg policy
  * @blkiop: blkcg policy to register
@@ -758,7 +851,6 @@ EXPORT_SYMBOL_GPL(blkio_subsys);
  */
 int blkio_policy_register(struct blkio_policy_type *blkiop)
 {
-	struct request_queue *q;
 	int i, ret;
 
 	mutex_lock(&blkcg_pol_mutex);
@@ -775,11 +867,6 @@ int blkio_policy_register(struct blkio_policy_type *blkiop)
 	blkiop->plid = i;
 	blkio_policy[i] = blkiop;
 
-	blkcg_bypass_start();
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		update_root_blkg_pd(q, blkiop);
-	blkcg_bypass_end();
-
 	/* everything is in place, add intf files for the new policy */
 	if (blkiop->cftypes)
 		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
@@ -798,8 +885,6 @@ EXPORT_SYMBOL_GPL(blkio_policy_register);
  */
 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 {
-	struct request_queue *q;
-
 	mutex_lock(&blkcg_pol_mutex);
 
 	if (WARN_ON(blkio_policy[blkiop->plid] != blkiop))
@@ -811,11 +896,6 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 
 	/* unregister and update blkgs */
 	blkio_policy[blkiop->plid] = NULL;
-
-	blkcg_bypass_start();
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		update_root_blkg_pd(q, blkiop);
-	blkcg_bypass_end();
 out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index df1c7b290c22..66253a7c8ff4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -64,6 +64,9 @@ struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
 	struct blkio_group *blkg;
 
+	/* used during policy activation */
+	struct list_head alloc_node;
+
 	/* pol->pdata_size bytes of private data used by policy impl */
 	char pdata[] __aligned(__alignof__(unsigned long long));
 };
@@ -108,9 +111,11 @@ extern void blkcg_exit_queue(struct request_queue *q);
 /* Blkio controller policy registration */
 extern int blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
+extern int blkcg_activate_policy(struct request_queue *q,
+				 const struct blkio_policy_type *pol);
+extern void blkcg_deactivate_policy(struct request_queue *q,
+				    const struct blkio_policy_type *pol);
 extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
-extern void update_root_blkg_pd(struct request_queue *q,
-				const struct blkio_policy_type *pol);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
@@ -325,10 +330,12 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline int blkio_policy_register(struct blkio_policy_type *blkiop) { return 0; }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
+static inline int blkcg_activate_policy(struct request_queue *q,
+					const struct blkio_policy_type *pol) { return 0; }
+static inline void blkcg_deactivate_policy(struct request_queue *q,
+					   const struct blkio_policy_type *pol) { }
 static inline void blkg_destroy_all(struct request_queue *q,
 				    bool destory_root) { }
-static inline void update_root_blkg_pd(struct request_queue *q,
-				       const struct blkio_policy_type *pol) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8c520fad6885..2fc964e06ea4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -995,35 +995,31 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
+	struct throtl_data *td;
 	int ret;
 
 	ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx);
 	if (ret)
 		return ret;
 
-	ret = -EINVAL;
 	tg = blkg_to_tg(ctx.blkg);
-	if (tg) {
-		struct throtl_data *td = ctx.blkg->q->td;
-
-		if (!ctx.v)
-			ctx.v = -1;
+	td = ctx.blkg->q->td;
 
-		if (is_u64)
-			*(u64 *)((void *)tg + cft->private) = ctx.v;
-		else
-			*(unsigned int *)((void *)tg + cft->private) = ctx.v;
+	if (!ctx.v)
+		ctx.v = -1;
 
-		/* XXX: we don't need the following deferred processing */
-		xchg(&tg->limits_changed, true);
-		xchg(&td->limits_changed, true);
-		throtl_schedule_delayed_work(td, 0);
+	if (is_u64)
+		*(u64 *)((void *)tg + cft->private) = ctx.v;
+	else
+		*(unsigned int *)((void *)tg + cft->private) = ctx.v;
 
-		ret = 0;
-	}
+	/* XXX: we don't need the following deferred processing */
+	xchg(&tg->limits_changed, true);
+	xchg(&td->limits_changed, true);
+	throtl_schedule_delayed_work(td, 0);
 
 	blkg_conf_finish(&ctx);
-	return ret;
+	return 0;
 }
 
 static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
@@ -1230,7 +1226,7 @@ void blk_throtl_drain(struct request_queue *q)
 int blk_throtl_init(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct blkio_group *blkg;
+	int ret;
 
 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
 	if (!td)
@@ -1243,28 +1239,18 @@ int blk_throtl_init(struct request_queue *q)
 	q->td = td;
 	td->queue = q;
 
-	/* alloc and init root group. */
-	rcu_read_lock();
-	spin_lock_irq(q->queue_lock);
-
-	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
-	if (!IS_ERR(blkg))
-		q->root_blkg = blkg;
-
-	spin_unlock_irq(q->queue_lock);
-	rcu_read_unlock();
-
-	if (!q->root_blkg) {
+	/* activate policy */
+	ret = blkcg_activate_policy(q, &blkio_policy_throtl);
+	if (ret)
 		kfree(td);
-		return -ENOMEM;
-	}
-	return 0;
+	return ret;
 }
 
 void blk_throtl_exit(struct request_queue *q)
 {
 	BUG_ON(!q->td);
 	throtl_shutdown_wq(q);
+	blkcg_deactivate_policy(q, &blkio_policy_throtl);
 	kfree(q->td);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 86440e04f3ee..0203652e1f34 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1406,8 +1406,7 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 
 	ret = -EINVAL;
 	cfqg = blkg_to_cfqg(ctx.blkg);
-	if (cfqg && (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN &&
-				ctx.v <= CFQ_WEIGHT_MAX))) {
+	if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
 		cfqg->dev_weight = ctx.v;
 		cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight;
 		ret = 0;
@@ -3938,7 +3937,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
 	kfree(cfqd->root_group);
 #endif
-	update_root_blkg_pd(q, &blkio_policy_cfq);
+	blkcg_deactivate_policy(q, &blkio_policy_cfq);
 	kfree(cfqd);
 }
 
@@ -3946,7 +3945,7 @@ static int cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
 	struct blkio_group *blkg __maybe_unused;
-	int i;
+	int i, ret;
 
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (!cfqd)
@@ -3960,28 +3959,20 @@ static int cfq_init_queue(struct request_queue *q)
 
 	/* Init root group and prefer root group over other groups by default */
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	rcu_read_lock();
-	spin_lock_irq(q->queue_lock);
-
-	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
-	if (!IS_ERR(blkg)) {
-		q->root_blkg = blkg;
-		cfqd->root_group = blkg_to_cfqg(blkg);
-	}
+	ret = blkcg_activate_policy(q, &blkio_policy_cfq);
+	if (ret)
+		goto out_free;
 
-	spin_unlock_irq(q->queue_lock);
-	rcu_read_unlock();
+	cfqd->root_group = blkg_to_cfqg(q->root_blkg);
 #else
+	ret = -ENOMEM;
 	cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
 					GFP_KERNEL, cfqd->queue->node);
-	if (cfqd->root_group)
-		cfq_init_cfqg_base(cfqd->root_group);
-#endif
-	if (!cfqd->root_group) {
-		kfree(cfqd);
-		return -ENOMEM;
-	}
+	if (!cfqd->root_group)
+		goto out_free;
 
+	cfq_init_cfqg_base(cfqd->root_group);
+#endif
 	cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT;
 
 	/*
@@ -4031,6 +4022,10 @@ static int cfq_init_queue(struct request_queue *q)
 	 */
 	cfqd->last_delayed_sync = jiffies - HZ;
 	return 0;
+
+out_free:
+	kfree(cfqd);
+	return ret;
 }
 
 /*
diff --git a/block/elevator.c b/block/elevator.c
index be3ab6df0fea..6a55d418896f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -896,8 +896,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	ioc_clear_queue(q);
 	spin_unlock_irq(q->queue_lock);
 
-	blkg_destroy_all(q, false);
-
 	/* allocate, init and register new elevator */
 	err = -ENOMEM;
 	q->elevator = elevator_alloc(q, new_e);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b01c377fd739..68720ab275d4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -370,6 +370,7 @@ struct request_queue {
 
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
+	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
 	struct blkio_group	*root_blkg;
 	struct list_head	blkg_list;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3c96cb32d318f323c1bf972a4c66821f8499e34d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 13 Apr 2012 13:11:34 -0700
Subject: blkcg: drop stuff unused after per-queue policy activation update

* All_q_list is unused.  Drop all_q_{mutex|list}.

* @for_root of blkg_lookup_create() is always %false when called from
  outside blk-cgroup.c proper.  Factor out __blkg_lookup_create() so
  that it doesn't check whether @q is bypassing and use the
  underscored version for the @for_root callsite.

* blkg_destroy_all() is used only from blkcg proper and @destroy_root
  is always %true.  Make it static and drop @destroy_root.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 61 +++++++++++++++++-----------------------------------
 block/blk-cgroup.h   |  6 +-----
 block/blk-throttle.c |  2 +-
 block/cfq-iosched.c  |  2 +-
 4 files changed, 23 insertions(+), 48 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d6d59ad105b4..10f0d2fc0b23 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -25,8 +25,6 @@
 #define MAX_KEY_LEN 100
 
 static DEFINE_MUTEX(blkcg_pol_mutex);
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
 
 struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
@@ -179,9 +177,8 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
-				       struct request_queue *q,
-				       bool for_root)
+static struct blkio_group *__blkg_lookup_create(struct blkio_cgroup *blkcg,
+						struct request_queue *q)
 	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
 	struct blkio_group *blkg;
@@ -189,13 +186,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lockdep_assert_held(q->queue_lock);
 
-	/*
-	 * This could be the first entry point of blkcg implementation and
-	 * we shouldn't allow anything to go through for a bypassing queue.
-	 */
-	if (unlikely(blk_queue_bypass(q)) && !for_root)
-		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
-
 	blkg = __blkg_lookup(blkcg, q);
 	if (blkg)
 		return blkg;
@@ -223,6 +213,18 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 out:
 	return blkg;
 }
+
+struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
+				       struct request_queue *q)
+{
+	/*
+	 * This could be the first entry point of blkcg implementation and
+	 * we shouldn't allow anything to go through for a bypassing queue.
+	 */
+	if (unlikely(blk_queue_bypass(q)))
+		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+	return __blkg_lookup_create(blkcg, q);
+}
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
 static void blkg_destroy(struct blkio_group *blkg)
@@ -249,12 +251,10 @@ static void blkg_destroy(struct blkio_group *blkg)
 /**
  * blkg_destroy_all - destroy all blkgs associated with a request_queue
  * @q: request_queue of interest
- * @destroy_root: whether to destroy root blkg or not
  *
- * Destroy blkgs associated with @q.  If @destroy_root is %true, all are
- * destroyed; otherwise, root blkg is left alone.
+ * Destroy all blkgs associated with @q.
  */
-void blkg_destroy_all(struct request_queue *q, bool destroy_root)
+static void blkg_destroy_all(struct request_queue *q)
 {
 	struct blkio_group *blkg, *n;
 
@@ -263,10 +263,6 @@ void blkg_destroy_all(struct request_queue *q, bool destroy_root)
 	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
 		struct blkio_cgroup *blkcg = blkg->blkcg;
 
-		/* skip root? */
-		if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
-			continue;
-
 		spin_lock(&blkcg->lock);
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
@@ -274,7 +270,6 @@ void blkg_destroy_all(struct request_queue *q, bool destroy_root)
 
 	spin_unlock_irq(q->queue_lock);
 }
-EXPORT_SYMBOL_GPL(blkg_destroy_all);
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
@@ -492,7 +487,7 @@ int blkg_conf_prep(struct blkio_cgroup *blkcg,
 	spin_lock_irq(disk->queue->queue_lock);
 
 	if (blkcg_policy_enabled(disk->queue, pol))
-		blkg = blkg_lookup_create(blkcg, disk->queue, false);
+		blkg = blkg_lookup_create(blkcg, disk->queue);
 	else
 		blkg = ERR_PTR(-EINVAL);
 
@@ -625,20 +620,9 @@ done:
  */
 int blkcg_init_queue(struct request_queue *q)
 {
-	int ret;
-
 	might_sleep();
 
-	ret = blk_throtl_init(q);
-	if (ret)
-		return ret;
-
-	mutex_lock(&all_q_mutex);
-	INIT_LIST_HEAD(&q->all_q_node);
-	list_add_tail(&q->all_q_node, &all_q_list);
-	mutex_unlock(&all_q_mutex);
-
-	return 0;
+	return blk_throtl_init(q);
 }
 
 /**
@@ -662,12 +646,7 @@ void blkcg_drain_queue(struct request_queue *q)
  */
 void blkcg_exit_queue(struct request_queue *q)
 {
-	mutex_lock(&all_q_mutex);
-	list_del_init(&q->all_q_node);
-	mutex_unlock(&all_q_mutex);
-
-	blkg_destroy_all(q, true);
-
+	blkg_destroy_all(q);
 	blk_throtl_exit(q);
 }
 
@@ -741,7 +720,7 @@ int blkcg_activate_policy(struct request_queue *q,
 	spin_lock_irq(q->queue_lock);
 
 	rcu_read_lock();
-	blkg = blkg_lookup_create(&blkio_root_cgroup, q, true);
+	blkg = __blkg_lookup_create(&blkio_root_cgroup, q);
 	rcu_read_unlock();
 
 	if (IS_ERR(blkg)) {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 66253a7c8ff4..222063d36355 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -115,7 +115,6 @@ extern int blkcg_activate_policy(struct request_queue *q,
 				 const struct blkio_policy_type *pol);
 extern void blkcg_deactivate_policy(struct request_queue *q,
 				    const struct blkio_policy_type *pol);
-extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
@@ -334,8 +333,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 					const struct blkio_policy_type *pol) { return 0; }
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkio_policy_type *pol) { }
-static inline void blkg_destroy_all(struct request_queue *q,
-				    bool destory_root) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
 				struct blkio_policy_type *pol) { return NULL; }
@@ -354,8 +351,7 @@ extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
 extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 				       struct request_queue *q);
 struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
-				       struct request_queue *q,
-				       bool for_root);
+				       struct request_queue *q);
 #else
 struct cgroup;
 static inline struct blkio_cgroup *
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2fc964e06ea4..e2aaf27e1f10 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -285,7 +285,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 	} else {
 		struct blkio_group *blkg;
 
-		blkg = blkg_lookup_create(blkcg, q, false);
+		blkg = blkg_lookup_create(blkcg, q);
 
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0203652e1f34..eb07eb64e85b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1348,7 +1348,7 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	} else {
 		struct blkio_group *blkg;
 
-		blkg = blkg_lookup_create(blkcg, q, false);
+		blkg = blkg_lookup_create(blkcg, q);
 		if (!IS_ERR(blkg))
 			cfqg = blkg_to_cfqg(blkg);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c94bed89995e638e43a6663177358b9d20617361 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:22 -0700
Subject: blkcg: blkg_rwstat_read() was missing inline

blkg_rwstat_read() in blk-cgroup.h was missing inline modifier causing
compile warning depending on configuration.  Add it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 222063d36355..ef6550a67a69 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -279,7 +279,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
  * This function can be called without synchronization and takes care of
  * u64 atomicity.
  */
-static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
+static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
 {
 	unsigned int start;
 	struct blkg_rwstat tmp;
-- 
cgit v1.2.3-59-g8ed1b


From 54e7ed12bad1e3aa2a28558fab6850240465f973 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:23 -0700
Subject: blkcg: remove blkio_group->path[]

blkio_group->path[] stores the path of the associated cgroup and is
used only for debug messages.  Just format the path from blkg->cgroup
when printing debug messages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   |  1 -
 block/blk-cgroup.h   | 21 +++++++++++++++++----
 block/blk-throttle.c |  9 ++++++---
 block/cfq-iosched.c  | 21 ++++++++++++++-------
 4 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1807d4ecedb..63337024e4d7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -114,7 +114,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
 	blkg->refcnt = 1;
-	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkio_policy_type *pol = blkio_policy[i];
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ef6550a67a69..c524267e4f7f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -77,8 +77,6 @@ struct blkio_group {
 	struct list_head q_node;
 	struct hlist_node blkcg_node;
 	struct blkio_cgroup *blkcg;
-	/* Store cgroup path */
-	char path[128];
 	/* reference count */
 	int refcnt;
 
@@ -167,9 +165,24 @@ static inline struct blkio_group *pdata_to_blkg(void *pdata)
 	return NULL;
 }
 
-static inline char *blkg_path(struct blkio_group *blkg)
+/**
+ * blkg_path - format cgroup path of blkg
+ * @blkg: blkg of interest
+ * @buf: target buffer
+ * @buflen: target buffer length
+ *
+ * Format the path of the cgroup of @blkg into @buf.
+ */
+static inline int blkg_path(struct blkio_group *blkg, char *buf, int buflen)
 {
-	return blkg->path;
+	int ret;
+
+	rcu_read_lock();
+	ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
+	rcu_read_unlock();
+	if (ret)
+		strncpy(buf, "<unavailable>", buflen);
+	return ret;
 }
 
 /**
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e2aaf27e1f10..e9b7a47f6da0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -155,9 +155,12 @@ static inline int throtl_tg_##name(const struct throtl_grp *tg)		\
 
 THROTL_TG_FNS(on_rr);
 
-#define throtl_log_tg(td, tg, fmt, args...)				\
-	blk_add_trace_msg((td)->queue, "throtl %s " fmt,		\
-			  blkg_path(tg_to_blkg(tg)), ##args);		\
+#define throtl_log_tg(td, tg, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(tg_to_blkg(tg), __pbuf, sizeof(__pbuf));		\
+	blk_add_trace_msg((td)->queue, "throtl %s " fmt, __pbuf, ##args); \
+} while (0)
 
 #define throtl_log(td, fmt, args...)	\
 	blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index eb07eb64e85b..901286b5f5cb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -573,14 +573,21 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 	return blkg_put(cfqg_to_blkg(cfqg));
 }
 
-#define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	\
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf));	\
 	blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
-			cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
-			blkg_path(cfqg_to_blkg((cfqq)->cfqg)), ##args)
-
-#define cfq_log_cfqg(cfqd, cfqg, fmt, args...)				\
-	blk_add_trace_msg((cfqd)->queue, "%s " fmt,			\
-			blkg_path(cfqg_to_blkg((cfqg))), ##args)	\
+			  cfq_cfqq_sync((cfqq)) ? 'S' : 'A',		\
+			  __pbuf, ##args);				\
+} while (0)
+
+#define cfq_log_cfqg(cfqd, cfqg, fmt, args...)	do {			\
+	char __pbuf[128];						\
+									\
+	blkg_path(cfqg_to_blkg(cfqg), __pbuf, sizeof(__pbuf));		\
+	blk_add_trace_msg((cfqd)->queue, "%s " fmt, __pbuf, ##args);	\
+} while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
 					    struct cfq_group *curr_cfqg, int rw)
-- 
cgit v1.2.3-59-g8ed1b


From 36558c8a30e121f97b5852ae33e28081af21bdbf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:24 -0700
Subject: blkcg: style cleanups for blk-cgroup.h

* Update indentation on struct field declarations.

* Uniformly don't use "extern" on function declarations.

* Merge the two #ifdef CONFIG_BLK_CGROUP blocks.

All changes in this patch are cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.h | 108 ++++++++++++++++++++++++++---------------------------
 1 file changed, 52 insertions(+), 56 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c524267e4f7f..b347aa08d166 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -38,15 +38,15 @@ enum blkg_rwstat_type {
 };
 
 struct blkio_cgroup {
-	struct cgroup_subsys_state css;
-	spinlock_t lock;
-	struct hlist_head blkg_list;
+	struct cgroup_subsys_state	css;
+	spinlock_t			lock;
+	struct hlist_head		blkg_list;
 
 	/* for policies to test whether associated blkcg has changed */
-	uint64_t id;
+	uint64_t			id;
 
 	/* TODO: per-policy storage in blkio_cgroup */
-	unsigned int cfq_weight;	/* belongs to cfq */
+	unsigned int			cfq_weight;	/* belongs to cfq */
 };
 
 struct blkg_stat {
@@ -62,27 +62,27 @@ struct blkg_rwstat {
 /* per-blkg per-policy data */
 struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
-	struct blkio_group *blkg;
+	struct blkio_group		*blkg;
 
 	/* used during policy activation */
-	struct list_head alloc_node;
+	struct list_head		alloc_node;
 
 	/* pol->pdata_size bytes of private data used by policy impl */
-	char pdata[] __aligned(__alignof__(unsigned long long));
+	char				pdata[] __aligned(__alignof__(unsigned long long));
 };
 
 struct blkio_group {
 	/* Pointer to the associated request_queue */
-	struct request_queue *q;
-	struct list_head q_node;
-	struct hlist_node blkcg_node;
-	struct blkio_cgroup *blkcg;
+	struct request_queue		*q;
+	struct list_head		q_node;
+	struct hlist_node		blkcg_node;
+	struct blkio_cgroup		*blkcg;
 	/* reference count */
-	int refcnt;
+	int				refcnt;
 
-	struct blkg_policy_data *pd[BLKCG_MAX_POLS];
+	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];
 
-	struct rcu_head rcu_head;
+	struct rcu_head			rcu_head;
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
@@ -90,29 +90,39 @@ typedef void (blkio_exit_group_fn)(struct blkio_group *blkg);
 typedef void (blkio_reset_group_stats_fn)(struct blkio_group *blkg);
 
 struct blkio_policy_ops {
-	blkio_init_group_fn *blkio_init_group_fn;
-	blkio_exit_group_fn *blkio_exit_group_fn;
-	blkio_reset_group_stats_fn *blkio_reset_group_stats_fn;
+	blkio_init_group_fn		*blkio_init_group_fn;
+	blkio_exit_group_fn		*blkio_exit_group_fn;
+	blkio_reset_group_stats_fn	*blkio_reset_group_stats_fn;
 };
 
 struct blkio_policy_type {
-	struct blkio_policy_ops ops;
-	int plid;
-	size_t pdata_size;		/* policy specific private data size */
-	struct cftype *cftypes;		/* cgroup files for the policy */
+	struct blkio_policy_ops		ops;
+	int				plid;
+	/* policy specific private data size */
+	size_t				pdata_size;
+	/* cgroup files for the policy */
+	struct cftype			*cftypes;
 };
 
-extern int blkcg_init_queue(struct request_queue *q);
-extern void blkcg_drain_queue(struct request_queue *q);
-extern void blkcg_exit_queue(struct request_queue *q);
+extern struct blkio_cgroup blkio_root_cgroup;
+
+struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
+struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
+struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
+				struct request_queue *q);
+struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
+				       struct request_queue *q);
+int blkcg_init_queue(struct request_queue *q);
+void blkcg_drain_queue(struct request_queue *q);
+void blkcg_exit_queue(struct request_queue *q);
 
 /* Blkio controller policy registration */
-extern int blkio_policy_register(struct blkio_policy_type *);
-extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern int blkcg_activate_policy(struct request_queue *q,
-				 const struct blkio_policy_type *pol);
-extern void blkcg_deactivate_policy(struct request_queue *q,
-				    const struct blkio_policy_type *pol);
+int blkio_policy_register(struct blkio_policy_type *);
+void blkio_policy_unregister(struct blkio_policy_type *);
+int blkcg_activate_policy(struct request_queue *q,
+			  const struct blkio_policy_type *pol);
+void blkcg_deactivate_policy(struct request_queue *q,
+			     const struct blkio_policy_type *pol);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
@@ -125,9 +135,9 @@ u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off);
 u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off);
 
 struct blkg_conf_ctx {
-	struct gendisk		*disk;
-	struct blkio_group	*blkg;
-	u64			v;
+	struct gendisk			*disk;
+	struct blkio_group		*blkg;
+	u64				v;
 };
 
 int blkg_conf_prep(struct blkio_cgroup *blkcg,
@@ -329,7 +339,9 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 	memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
 }
 
-#else
+#else	/* CONFIG_BLK_CGROUP */
+
+struct cgroup;
 
 struct blkio_group {
 };
@@ -337,6 +349,9 @@ struct blkio_group {
 struct blkio_policy_type {
 };
 
+static inline struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
+static inline struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio) { return NULL; }
+static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, void *key) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
@@ -355,24 +370,5 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
 static inline void blkg_get(struct blkio_group *blkg) { }
 static inline void blkg_put(struct blkio_group *blkg) { }
 
-#endif
-
-#ifdef CONFIG_BLK_CGROUP
-extern struct blkio_cgroup blkio_root_cgroup;
-extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
-extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
-extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-				       struct request_queue *q);
-struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
-				       struct request_queue *q);
-#else
-struct cgroup;
-static inline struct blkio_cgroup *
-cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
-static inline struct blkio_cgroup *
-bio_blkio_cgroup(struct bio *bio) { return NULL; }
-
-static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-					      void *key) { return NULL; }
-#endif
-#endif /* _BLK_CGROUP_H */
+#endif	/* CONFIG_BLK_CGROUP */
+#endif	/* _BLK_CGROUP_H */
-- 
cgit v1.2.3-59-g8ed1b


From 3c798398e393e5f9502dbab2b51e6c25e2e8f2ac Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:25 -0700
Subject: blkcg: mass rename of blkcg API

During the recent blkcg cleanup, most of blkcg API has changed to such
extent that mass renaming wouldn't cause any noticeable pain.  Take
the chance and cleanup the naming.

* Rename blkio_cgroup to blkcg.

* Drop blkio / blkiocg prefixes and consistently use blkcg.

* Rename blkio_group to blkcg_gq, which is consistent with io_cq but
  keep the blkg prefix / variable name.

* Rename policy method type and field names to signify they're dealing
  with policy data.

* Rename blkio_policy_type to blkcg_policy.

This patch doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     | 202 ++++++++++++++++++++++++-------------------------
 block/blk-cgroup.h     | 109 +++++++++++++-------------
 block/blk-throttle.c   |  72 +++++++++---------
 block/cfq-iosched.c    |  78 +++++++++----------
 include/linux/blkdev.h |   4 +-
 5 files changed, 230 insertions(+), 235 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 63337024e4d7..997570329517 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -26,39 +26,39 @@
 
 static DEFINE_MUTEX(blkcg_pol_mutex);
 
-struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
-EXPORT_SYMBOL_GPL(blkio_root_cgroup);
+struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
+EXPORT_SYMBOL_GPL(blkcg_root);
 
-static struct blkio_policy_type *blkio_policy[BLKCG_MAX_POLS];
+static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
-struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
+struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
-			    struct blkio_cgroup, css);
+			    struct blkcg, css);
 }
-EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
+EXPORT_SYMBOL_GPL(cgroup_to_blkcg);
 
-static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
+static struct blkcg *task_blkcg(struct task_struct *tsk)
 {
 	return container_of(task_subsys_state(tsk, blkio_subsys_id),
-			    struct blkio_cgroup, css);
+			    struct blkcg, css);
 }
 
-struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
+struct blkcg *bio_blkcg(struct bio *bio)
 {
 	if (bio && bio->bi_css)
-		return container_of(bio->bi_css, struct blkio_cgroup, css);
-	return task_blkio_cgroup(current);
+		return container_of(bio->bi_css, struct blkcg, css);
+	return task_blkcg(current);
 }
-EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
+EXPORT_SYMBOL_GPL(bio_blkcg);
 
 static bool blkcg_policy_enabled(struct request_queue *q,
-				 const struct blkio_policy_type *pol)
+				 const struct blkcg_policy *pol)
 {
 	return pol && test_bit(pol->plid, q->blkcg_pols);
 }
 
-static size_t blkg_pd_size(const struct blkio_policy_type *pol)
+static size_t blkg_pd_size(const struct blkcg_policy *pol)
 {
 	return sizeof(struct blkg_policy_data) + pol->pdata_size;
 }
@@ -69,7 +69,7 @@ static size_t blkg_pd_size(const struct blkio_policy_type *pol)
  *
  * Free @blkg which may be partially allocated.
  */
-static void blkg_free(struct blkio_group *blkg)
+static void blkg_free(struct blkcg_gq *blkg)
 {
 	int i;
 
@@ -77,14 +77,14 @@ static void blkg_free(struct blkio_group *blkg)
 		return;
 
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkio_policy_type *pol = blkio_policy[i];
+		struct blkcg_policy *pol = blkcg_policy[i];
 		struct blkg_policy_data *pd = blkg->pd[i];
 
 		if (!pd)
 			continue;
 
-		if (pol && pol->ops.blkio_exit_group_fn)
-			pol->ops.blkio_exit_group_fn(blkg);
+		if (pol && pol->ops.pd_exit_fn)
+			pol->ops.pd_exit_fn(blkg);
 
 		kfree(pd);
 	}
@@ -99,10 +99,9 @@ static void blkg_free(struct blkio_group *blkg)
  *
  * Allocate a new blkg assocating @blkcg and @q.
  */
-static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
-				      struct request_queue *q)
+static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 {
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 	int i;
 
 	/* alloc and init base part */
@@ -116,7 +115,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	blkg->refcnt = 1;
 
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkio_policy_type *pol = blkio_policy[i];
+		struct blkcg_policy *pol = blkcg_policy[i];
 		struct blkg_policy_data *pd;
 
 		if (!blkcg_policy_enabled(q, pol))
@@ -135,19 +134,19 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 
 	/* invoke per-policy init */
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkio_policy_type *pol = blkio_policy[i];
+		struct blkcg_policy *pol = blkcg_policy[i];
 
 		if (blkcg_policy_enabled(blkg->q, pol))
-			pol->ops.blkio_init_group_fn(blkg);
+			pol->ops.pd_init_fn(blkg);
 	}
 
 	return blkg;
 }
 
-static struct blkio_group *__blkg_lookup(struct blkio_cgroup *blkcg,
-					 struct request_queue *q)
+static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+				      struct request_queue *q)
 {
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 	struct hlist_node *n;
 
 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
@@ -165,8 +164,7 @@ static struct blkio_group *__blkg_lookup(struct blkio_cgroup *blkcg,
  * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
  * - see blk_queue_bypass_start() for details.
  */
-struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-				struct request_queue *q)
+struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
@@ -176,11 +174,11 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-static struct blkio_group *__blkg_lookup_create(struct blkio_cgroup *blkcg,
-						struct request_queue *q)
+static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+					     struct request_queue *q)
 	__releases(q->queue_lock) __acquires(q->queue_lock)
 {
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lockdep_assert_held(q->queue_lock);
@@ -213,8 +211,8 @@ out:
 	return blkg;
 }
 
-struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
-				       struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q)
 {
 	/*
 	 * This could be the first entry point of blkcg implementation and
@@ -226,10 +224,10 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
-static void blkg_destroy(struct blkio_group *blkg)
+static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct request_queue *q = blkg->q;
-	struct blkio_cgroup *blkcg = blkg->blkcg;
+	struct blkcg *blkcg = blkg->blkcg;
 
 	lockdep_assert_held(q->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
@@ -255,12 +253,12 @@ static void blkg_destroy(struct blkio_group *blkg)
  */
 static void blkg_destroy_all(struct request_queue *q)
 {
-	struct blkio_group *blkg, *n;
+	struct blkcg_gq *blkg, *n;
 
 	lockdep_assert_held(q->queue_lock);
 
 	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
-		struct blkio_cgroup *blkcg = blkg->blkcg;
+		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
 		blkg_destroy(blkg);
@@ -270,10 +268,10 @@ static void blkg_destroy_all(struct request_queue *q)
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
-	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
+	blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
 }
 
-void __blkg_release(struct blkio_group *blkg)
+void __blkg_release(struct blkcg_gq *blkg)
 {
 	/* release the extra blkcg reference this blkg has been holding */
 	css_put(&blkg->blkcg->css);
@@ -291,11 +289,11 @@ void __blkg_release(struct blkio_group *blkg)
 }
 EXPORT_SYMBOL_GPL(__blkg_release);
 
-static int
-blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
+static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
+			     u64 val)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
-	struct blkio_group *blkg;
+	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+	struct blkcg_gq *blkg;
 	struct hlist_node *n;
 	int i;
 
@@ -309,11 +307,11 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	 */
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
-			struct blkio_policy_type *pol = blkio_policy[i];
+			struct blkcg_policy *pol = blkcg_policy[i];
 
 			if (blkcg_policy_enabled(blkg->q, pol) &&
-			    pol->ops.blkio_reset_group_stats_fn)
-				pol->ops.blkio_reset_group_stats_fn(blkg);
+			    pol->ops.pd_reset_stats_fn)
+				pol->ops.pd_reset_stats_fn(blkg);
 		}
 	}
 
@@ -322,7 +320,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 	return 0;
 }
 
-static const char *blkg_dev_name(struct blkio_group *blkg)
+static const char *blkg_dev_name(struct blkcg_gq *blkg)
 {
 	/* some drivers (floppy) instantiate a queue w/o disk registered */
 	if (blkg->q->backing_dev_info.dev)
@@ -347,12 +345,12 @@ static const char *blkg_dev_name(struct blkio_group *blkg)
  * This is to be used to construct print functions for
  * cftype->read_seq_string method.
  */
-void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
+void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
-		       const struct blkio_policy_type *pol, int data,
+		       const struct blkcg_policy *pol, int data,
 		       bool show_total)
 {
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 	struct hlist_node *n;
 	u64 total = 0;
 
@@ -462,13 +460,12 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
  * value.  This function returns with RCU read lock and queue lock held and
  * must be paired with blkg_conf_finish().
  */
-int blkg_conf_prep(struct blkio_cgroup *blkcg,
-		   const struct blkio_policy_type *pol, const char *input,
-		   struct blkg_conf_ctx *ctx)
+int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+		   const char *input, struct blkg_conf_ctx *ctx)
 	__acquires(rcu) __acquires(disk->queue->queue_lock)
 {
 	struct gendisk *disk;
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 	unsigned int major, minor;
 	unsigned long long v;
 	int part, ret;
@@ -529,16 +526,16 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
-struct cftype blkio_files[] = {
+struct cftype blkcg_files[] = {
 	{
 		.name = "reset_stats",
-		.write_u64 = blkiocg_reset_stats,
+		.write_u64 = blkcg_reset_stats,
 	},
 	{ }	/* terminate */
 };
 
 /**
- * blkiocg_pre_destroy - cgroup pre_destroy callback
+ * blkcg_pre_destroy - cgroup pre_destroy callback
  * @cgroup: cgroup of interest
  *
  * This function is called when @cgroup is about to go away and responsible
@@ -548,15 +545,15 @@ struct cftype blkio_files[] = {
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
-static int blkiocg_pre_destroy(struct cgroup *cgroup)
+static int blkcg_pre_destroy(struct cgroup *cgroup)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 
 	spin_lock_irq(&blkcg->lock);
 
 	while (!hlist_empty(&blkcg->blkg_list)) {
-		struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
-						struct blkio_group, blkcg_node);
+		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
+						struct blkcg_gq, blkcg_node);
 		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
@@ -573,22 +570,22 @@ static int blkiocg_pre_destroy(struct cgroup *cgroup)
 	return 0;
 }
 
-static void blkiocg_destroy(struct cgroup *cgroup)
+static void blkcg_destroy(struct cgroup *cgroup)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 
-	if (blkcg != &blkio_root_cgroup)
+	if (blkcg != &blkcg_root)
 		kfree(blkcg);
 }
 
-static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
 {
 	static atomic64_t id_seq = ATOMIC64_INIT(0);
-	struct blkio_cgroup *blkcg;
+	struct blkcg *blkcg;
 	struct cgroup *parent = cgroup->parent;
 
 	if (!parent) {
-		blkcg = &blkio_root_cgroup;
+		blkcg = &blkcg_root;
 		goto done;
 	}
 
@@ -656,7 +653,7 @@ void blkcg_exit_queue(struct request_queue *q)
  * of the main cic data structures.  For now we allow a task to change
  * its cgroup only if it's the only owner of its ioc.
  */
-static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct io_context *ioc;
@@ -677,12 +674,12 @@ static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 
 struct cgroup_subsys blkio_subsys = {
 	.name = "blkio",
-	.create = blkiocg_create,
-	.can_attach = blkiocg_can_attach,
-	.pre_destroy = blkiocg_pre_destroy,
-	.destroy = blkiocg_destroy,
+	.create = blkcg_create,
+	.can_attach = blkcg_can_attach,
+	.pre_destroy = blkcg_pre_destroy,
+	.destroy = blkcg_destroy,
 	.subsys_id = blkio_subsys_id,
-	.base_cftypes = blkio_files,
+	.base_cftypes = blkcg_files,
 	.module = THIS_MODULE,
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
@@ -704,10 +701,10 @@ EXPORT_SYMBOL_GPL(blkio_subsys);
  * [un]registerations.  Returns 0 on success, -errno on failure.
  */
 int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkio_policy_type *pol)
+			  const struct blkcg_policy *pol)
 {
 	LIST_HEAD(pds);
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 	struct blkg_policy_data *pd, *n;
 	int cnt = 0, ret;
 
@@ -720,7 +717,7 @@ int blkcg_activate_policy(struct request_queue *q,
 	spin_lock_irq(q->queue_lock);
 
 	rcu_read_lock();
-	blkg = __blkg_lookup_create(&blkio_root_cgroup, q);
+	blkg = __blkg_lookup_create(&blkcg_root, q);
 	rcu_read_unlock();
 
 	if (IS_ERR(blkg)) {
@@ -764,7 +761,7 @@ int blkcg_activate_policy(struct request_queue *q,
 
 		blkg->pd[pol->plid] = pd;
 		pd->blkg = blkg;
-		pol->ops.blkio_init_group_fn(blkg);
+		pol->ops.pd_init_fn(blkg);
 
 		spin_unlock(&blkg->blkcg->lock);
 	}
@@ -790,9 +787,9 @@ EXPORT_SYMBOL_GPL(blkcg_activate_policy);
  * blkcg_activate_policy().
  */
 void blkcg_deactivate_policy(struct request_queue *q,
-			     const struct blkio_policy_type *pol)
+			     const struct blkcg_policy *pol)
 {
-	struct blkio_group *blkg;
+	struct blkcg_gq *blkg;
 
 	if (!blkcg_policy_enabled(q, pol))
 		return;
@@ -810,8 +807,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
 		/* grab blkcg lock too while removing @pd from @blkg */
 		spin_lock(&blkg->blkcg->lock);
 
-		if (pol->ops.blkio_exit_group_fn)
-			pol->ops.blkio_exit_group_fn(blkg);
+		if (pol->ops.pd_exit_fn)
+			pol->ops.pd_exit_fn(blkg);
 
 		kfree(blkg->pd[pol->plid]);
 		blkg->pd[pol->plid] = NULL;
@@ -825,14 +822,13 @@ void blkcg_deactivate_policy(struct request_queue *q,
 EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
 
 /**
- * blkio_policy_register - register a blkcg policy
- * @blkiop: blkcg policy to register
+ * blkcg_policy_register - register a blkcg policy
+ * @pol: blkcg policy to register
  *
- * Register @blkiop with blkcg core.  Might sleep and @blkiop may be
- * modified on successful registration.  Returns 0 on success and -errno on
- * failure.
+ * Register @pol with blkcg core.  Might sleep and @pol may be modified on
+ * successful registration.  Returns 0 on success and -errno on failure.
  */
-int blkio_policy_register(struct blkio_policy_type *blkiop)
+int blkcg_policy_register(struct blkcg_policy *pol)
 {
 	int i, ret;
 
@@ -841,45 +837,45 @@ int blkio_policy_register(struct blkio_policy_type *blkiop)
 	/* find an empty slot */
 	ret = -ENOSPC;
 	for (i = 0; i < BLKCG_MAX_POLS; i++)
-		if (!blkio_policy[i])
+		if (!blkcg_policy[i])
 			break;
 	if (i >= BLKCG_MAX_POLS)
 		goto out_unlock;
 
 	/* register and update blkgs */
-	blkiop->plid = i;
-	blkio_policy[i] = blkiop;
+	pol->plid = i;
+	blkcg_policy[i] = pol;
 
 	/* everything is in place, add intf files for the new policy */
-	if (blkiop->cftypes)
-		WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
+	if (pol->cftypes)
+		WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes));
 	ret = 0;
 out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(blkio_policy_register);
+EXPORT_SYMBOL_GPL(blkcg_policy_register);
 
 /**
- * blkiop_policy_unregister - unregister a blkcg policy
- * @blkiop: blkcg policy to unregister
+ * blkcg_policy_unregister - unregister a blkcg policy
+ * @pol: blkcg policy to unregister
  *
- * Undo blkio_policy_register(@blkiop).  Might sleep.
+ * Undo blkcg_policy_register(@pol).  Might sleep.
  */
-void blkio_policy_unregister(struct blkio_policy_type *blkiop)
+void blkcg_policy_unregister(struct blkcg_policy *pol)
 {
 	mutex_lock(&blkcg_pol_mutex);
 
-	if (WARN_ON(blkio_policy[blkiop->plid] != blkiop))
+	if (WARN_ON(blkcg_policy[pol->plid] != pol))
 		goto out_unlock;
 
 	/* kill the intf files first */
-	if (blkiop->cftypes)
-		cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
+	if (pol->cftypes)
+		cgroup_rm_cftypes(&blkio_subsys, pol->cftypes);
 
 	/* unregister and update blkgs */
-	blkio_policy[blkiop->plid] = NULL;
+	blkcg_policy[pol->plid] = NULL;
 out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
 }
-EXPORT_SYMBOL_GPL(blkio_policy_unregister);
+EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b347aa08d166..a443b84d2c16 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -37,7 +37,7 @@ enum blkg_rwstat_type {
 	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
 };
 
-struct blkio_cgroup {
+struct blkcg {
 	struct cgroup_subsys_state	css;
 	spinlock_t			lock;
 	struct hlist_head		blkg_list;
@@ -45,7 +45,7 @@ struct blkio_cgroup {
 	/* for policies to test whether associated blkcg has changed */
 	uint64_t			id;
 
-	/* TODO: per-policy storage in blkio_cgroup */
+	/* TODO: per-policy storage in blkcg */
 	unsigned int			cfq_weight;	/* belongs to cfq */
 };
 
@@ -62,7 +62,7 @@ struct blkg_rwstat {
 /* per-blkg per-policy data */
 struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
-	struct blkio_group		*blkg;
+	struct blkcg_gq			*blkg;
 
 	/* used during policy activation */
 	struct list_head		alloc_node;
@@ -71,12 +71,13 @@ struct blkg_policy_data {
 	char				pdata[] __aligned(__alignof__(unsigned long long));
 };
 
-struct blkio_group {
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
 	/* Pointer to the associated request_queue */
 	struct request_queue		*q;
 	struct list_head		q_node;
 	struct hlist_node		blkcg_node;
-	struct blkio_cgroup		*blkcg;
+	struct blkcg			*blkcg;
 	/* reference count */
 	int				refcnt;
 
@@ -85,18 +86,18 @@ struct blkio_group {
 	struct rcu_head			rcu_head;
 };
 
-typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_exit_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_reset_group_stats_fn)(struct blkio_group *blkg);
+typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
 
-struct blkio_policy_ops {
-	blkio_init_group_fn		*blkio_init_group_fn;
-	blkio_exit_group_fn		*blkio_exit_group_fn;
-	blkio_reset_group_stats_fn	*blkio_reset_group_stats_fn;
+struct blkcg_policy_ops {
+	blkcg_pol_init_pd_fn		*pd_init_fn;
+	blkcg_pol_exit_pd_fn		*pd_exit_fn;
+	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
 };
 
-struct blkio_policy_type {
-	struct blkio_policy_ops		ops;
+struct blkcg_policy {
+	struct blkcg_policy_ops		ops;
 	int				plid;
 	/* policy specific private data size */
 	size_t				pdata_size;
@@ -104,29 +105,28 @@ struct blkio_policy_type {
 	struct cftype			*cftypes;
 };
 
-extern struct blkio_cgroup blkio_root_cgroup;
+extern struct blkcg blkcg_root;
 
-struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
-struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
-struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
-				struct request_queue *q);
-struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
-				       struct request_queue *q);
+struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup);
+struct blkcg *bio_blkcg(struct bio *bio);
+struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
 void blkcg_drain_queue(struct request_queue *q);
 void blkcg_exit_queue(struct request_queue *q);
 
 /* Blkio controller policy registration */
-int blkio_policy_register(struct blkio_policy_type *);
-void blkio_policy_unregister(struct blkio_policy_type *);
+int blkcg_policy_register(struct blkcg_policy *pol);
+void blkcg_policy_unregister(struct blkcg_policy *pol);
 int blkcg_activate_policy(struct request_queue *q,
-			  const struct blkio_policy_type *pol);
+			  const struct blkcg_policy *pol);
 void blkcg_deactivate_policy(struct request_queue *q,
-			     const struct blkio_policy_type *pol);
+			     const struct blkcg_policy *pol);
 
-void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
+void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 		       u64 (*prfill)(struct seq_file *, void *, int),
-		       const struct blkio_policy_type *pol, int data,
+		       const struct blkcg_policy *pol, int data,
 		       bool show_total);
 u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v);
 u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
@@ -136,13 +136,12 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off);
 
 struct blkg_conf_ctx {
 	struct gendisk			*disk;
-	struct blkio_group		*blkg;
+	struct blkcg_gq			*blkg;
 	u64				v;
 };
 
-int blkg_conf_prep(struct blkio_cgroup *blkcg,
-		   const struct blkio_policy_type *pol, const char *input,
-		   struct blkg_conf_ctx *ctx);
+int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
+		   const char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
 
@@ -153,8 +152,8 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx);
  *
  * Return pointer to private data associated with the @blkg-@pol pair.
  */
-static inline void *blkg_to_pdata(struct blkio_group *blkg,
-			      struct blkio_policy_type *pol)
+static inline void *blkg_to_pdata(struct blkcg_gq *blkg,
+				  struct blkcg_policy *pol)
 {
 	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
 }
@@ -165,7 +164,7 @@ static inline void *blkg_to_pdata(struct blkio_group *blkg,
  *
  * @pdata is policy private data.  Determine the blkg it's associated with.
  */
-static inline struct blkio_group *pdata_to_blkg(void *pdata)
+static inline struct blkcg_gq *pdata_to_blkg(void *pdata)
 {
 	if (pdata) {
 		struct blkg_policy_data *pd =
@@ -183,7 +182,7 @@ static inline struct blkio_group *pdata_to_blkg(void *pdata)
  *
  * Format the path of the cgroup of @blkg into @buf.
  */
-static inline int blkg_path(struct blkio_group *blkg, char *buf, int buflen)
+static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 {
 	int ret;
 
@@ -201,14 +200,14 @@ static inline int blkg_path(struct blkio_group *blkg, char *buf, int buflen)
  *
  * The caller should be holding queue_lock and an existing reference.
  */
-static inline void blkg_get(struct blkio_group *blkg)
+static inline void blkg_get(struct blkcg_gq *blkg)
 {
 	lockdep_assert_held(blkg->q->queue_lock);
 	WARN_ON_ONCE(!blkg->refcnt);
 	blkg->refcnt++;
 }
 
-void __blkg_release(struct blkio_group *blkg);
+void __blkg_release(struct blkcg_gq *blkg);
 
 /**
  * blkg_put - put a blkg reference
@@ -216,7 +215,7 @@ void __blkg_release(struct blkio_group *blkg);
  *
  * The caller should be holding queue_lock.
  */
-static inline void blkg_put(struct blkio_group *blkg)
+static inline void blkg_put(struct blkcg_gq *blkg)
 {
 	lockdep_assert_held(blkg->q->queue_lock);
 	WARN_ON_ONCE(blkg->refcnt <= 0);
@@ -343,32 +342,32 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 
 struct cgroup;
 
-struct blkio_group {
+struct blkcg_gq {
 };
 
-struct blkio_policy_type {
+struct blkcg_policy {
 };
 
-static inline struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
-static inline struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio) { return NULL; }
-static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, void *key) { return NULL; }
+static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
+static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
+static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkio_policy_register(struct blkio_policy_type *blkiop) { return 0; }
-static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
+static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 static inline int blkcg_activate_policy(struct request_queue *q,
-					const struct blkio_policy_type *pol) { return 0; }
+					const struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_deactivate_policy(struct request_queue *q,
-					   const struct blkio_policy_type *pol) { }
-
-static inline void *blkg_to_pdata(struct blkio_group *blkg,
-				struct blkio_policy_type *pol) { return NULL; }
-static inline struct blkio_group *pdata_to_blkg(void *pdata,
-				struct blkio_policy_type *pol) { return NULL; }
-static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
-static inline void blkg_get(struct blkio_group *blkg) { }
-static inline void blkg_put(struct blkio_group *blkg) { }
+					   const struct blkcg_policy *pol) { }
+
+static inline void *blkg_to_pdata(struct blkcg_gq *blkg,
+				  struct blkcg_policy *pol) { return NULL; }
+static inline struct blkcg_gq *pdata_to_blkg(void *pdata,
+				  struct blkcg_policy *pol) { return NULL; }
+static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
+static inline void blkg_get(struct blkcg_gq *blkg) { }
+static inline void blkg_put(struct blkcg_gq *blkg) { }
 
 #endif	/* CONFIG_BLK_CGROUP */
 #endif	/* _BLK_CGROUP_H */
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e9b7a47f6da0..00c7eff66ecf 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -21,7 +21,7 @@ static int throtl_quantum = 32;
 /* Throttling is performed over 100ms slice and after that slice is renewed */
 static unsigned long throtl_slice = HZ/10;	/* 100 ms */
 
-static struct blkio_policy_type blkio_policy_throtl;
+static struct blkcg_policy blkcg_policy_throtl;
 
 /* A workqueue to queue throttle related work */
 static struct workqueue_struct *kthrotld_workqueue;
@@ -120,12 +120,12 @@ static LIST_HEAD(tg_stats_alloc_list);
 static void tg_stats_alloc_fn(struct work_struct *);
 static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn);
 
-static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg)
+static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
 {
-	return blkg_to_pdata(blkg, &blkio_policy_throtl);
+	return blkg_to_pdata(blkg, &blkcg_policy_throtl);
 }
 
-static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg)
+static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
 {
 	return pdata_to_blkg(tg);
 }
@@ -208,7 +208,7 @@ alloc_stats:
 		goto alloc_stats;
 }
 
-static void throtl_init_blkio_group(struct blkio_group *blkg)
+static void throtl_pd_init(struct blkcg_gq *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
@@ -233,7 +233,7 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	spin_unlock(&tg_stats_alloc_lock);
 }
 
-static void throtl_exit_blkio_group(struct blkio_group *blkg)
+static void throtl_pd_exit(struct blkcg_gq *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 
@@ -244,7 +244,7 @@ static void throtl_exit_blkio_group(struct blkio_group *blkg)
 	free_percpu(tg->stats_cpu);
 }
 
-static void throtl_reset_group_stats(struct blkio_group *blkg)
+static void throtl_pd_reset_stats(struct blkcg_gq *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
 	int cpu;
@@ -260,33 +260,33 @@ static void throtl_reset_group_stats(struct blkio_group *blkg)
 	}
 }
 
-static struct
-throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
+static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td,
+					   struct blkcg *blkcg)
 {
 	/*
-	 * This is the common case when there are no blkio cgroups.
-	 * Avoid lookup in this case
+	 * This is the common case when there are no blkcgs.  Avoid lookup
+	 * in this case
 	 */
-	if (blkcg == &blkio_root_cgroup)
+	if (blkcg == &blkcg_root)
 		return td_root_tg(td);
 
 	return blkg_to_tg(blkg_lookup(blkcg, td->queue));
 }
 
 static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
-						  struct blkio_cgroup *blkcg)
+						  struct blkcg *blkcg)
 {
 	struct request_queue *q = td->queue;
 	struct throtl_grp *tg = NULL;
 
 	/*
-	 * This is the common case when there are no blkio cgroups.
-	 * Avoid lookup in this case
+	 * This is the common case when there are no blkcgs.  Avoid lookup
+	 * in this case
 	 */
-	if (blkcg == &blkio_root_cgroup) {
+	if (blkcg == &blkcg_root) {
 		tg = td_root_tg(td);
 	} else {
-		struct blkio_group *blkg;
+		struct blkcg_gq *blkg;
 
 		blkg = blkg_lookup_create(blkcg, q);
 
@@ -665,7 +665,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
 	return 0;
 }
 
-static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes,
+static void throtl_update_dispatch_stats(struct blkcg_gq *blkg, u64 bytes,
 					 int rw)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -822,7 +822,7 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
 static void throtl_process_limit_change(struct throtl_data *td)
 {
 	struct request_queue *q = td->queue;
-	struct blkio_group *blkg, *n;
+	struct blkcg_gq *blkg, *n;
 
 	if (!td->limits_changed)
 		return;
@@ -951,9 +951,9 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off)
 static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 			       struct seq_file *sf)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl,
+	blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
 			  cft->private, true);
 	return 0;
 }
@@ -979,29 +979,29 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off)
 static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
 			     struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64,
-			  &blkio_policy_throtl, cft->private, false);
+	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_u64,
+			  &blkcg_policy_throtl, cft->private, false);
 	return 0;
 }
 
 static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft,
 			      struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint,
-			  &blkio_policy_throtl, cft->private, false);
+	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_uint,
+			  &blkcg_policy_throtl, cft->private, false);
 	return 0;
 }
 
 static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf,
 		       bool is_u64)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	struct throtl_data *td;
 	int ret;
 
-	ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx);
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
 	if (ret)
 		return ret;
 
@@ -1086,11 +1086,11 @@ static void throtl_shutdown_wq(struct request_queue *q)
 	cancel_delayed_work_sync(&td->throtl_work);
 }
 
-static struct blkio_policy_type blkio_policy_throtl = {
+static struct blkcg_policy blkcg_policy_throtl = {
 	.ops = {
-		.blkio_init_group_fn = throtl_init_blkio_group,
-		.blkio_exit_group_fn = throtl_exit_blkio_group,
-		.blkio_reset_group_stats_fn = throtl_reset_group_stats,
+		.pd_init_fn		= throtl_pd_init,
+		.pd_exit_fn		= throtl_pd_exit,
+		.pd_reset_stats_fn	= throtl_pd_reset_stats,
 	},
 	.pdata_size = sizeof(struct throtl_grp),
 	.cftypes = throtl_files,
@@ -1101,7 +1101,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	struct throtl_data *td = q->td;
 	struct throtl_grp *tg;
 	bool rw = bio_data_dir(bio), update_disptime = true;
-	struct blkio_cgroup *blkcg;
+	struct blkcg *blkcg;
 	bool throttled = false;
 
 	if (bio->bi_rw & REQ_THROTTLED) {
@@ -1118,7 +1118,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 * just update the dispatch stats in lockless manner and return.
 	 */
 	rcu_read_lock();
-	blkcg = bio_blkio_cgroup(bio);
+	blkcg = bio_blkcg(bio);
 	tg = throtl_lookup_tg(td, blkcg);
 	if (tg) {
 		if (tg_no_rule_group(tg, rw)) {
@@ -1243,7 +1243,7 @@ int blk_throtl_init(struct request_queue *q)
 	td->queue = q;
 
 	/* activate policy */
-	ret = blkcg_activate_policy(q, &blkio_policy_throtl);
+	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
 	if (ret)
 		kfree(td);
 	return ret;
@@ -1253,7 +1253,7 @@ void blk_throtl_exit(struct request_queue *q)
 {
 	BUG_ON(!q->td);
 	throtl_shutdown_wq(q);
-	blkcg_deactivate_policy(q, &blkio_policy_throtl);
+	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
 	kfree(q->td);
 }
 
@@ -1263,7 +1263,7 @@ static int __init throtl_init(void)
 	if (!kthrotld_workqueue)
 		panic("Failed to create kthrotld\n");
 
-	return blkio_policy_register(&blkio_policy_throtl);
+	return blkcg_policy_register(&blkcg_policy_throtl);
 }
 
 module_init(throtl_init);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 901286b5f5cb..792218281d91 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,7 +17,7 @@
 #include "blk.h"
 #include "blk-cgroup.h"
 
-static struct blkio_policy_type blkio_policy_cfq __maybe_unused;
+static struct blkcg_policy blkcg_policy_cfq __maybe_unused;
 
 /*
  * tunables
@@ -202,7 +202,7 @@ struct cfqg_stats {
 	struct blkg_stat		dequeue;
 	/* total time spent waiting for it to be assigned a timeslice. */
 	struct blkg_stat		group_wait_time;
-	/* time spent idling for this blkio_group */
+	/* time spent idling for this blkcg_gq */
 	struct blkg_stat		idle_time;
 	/* total time with empty current active q with other requests queued */
 	struct blkg_stat		empty_time;
@@ -553,12 +553,12 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 
-static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg)
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
 {
-	return blkg_to_pdata(blkg, &blkio_policy_cfq);
+	return blkg_to_pdata(blkg, &blkcg_policy_cfq);
 }
 
-static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg)
+static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
 {
 	return pdata_to_blkg(cfqg);
 }
@@ -637,7 +637,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
 				io_start_time - start_time);
 }
 
-static void cfqg_stats_reset(struct blkio_group *blkg)
+static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 	struct cfqg_stats *stats = &cfqg->stats;
@@ -662,8 +662,8 @@ static void cfqg_stats_reset(struct blkio_group *blkg)
 
 #else	/* CONFIG_CFQ_GROUP_IOSCHED */
 
-static inline struct cfq_group *blkg_to_cfqg(struct blkio_group *blkg) { return NULL; }
-static inline struct blkio_group *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; }
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) { return NULL; }
+static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; }
 static inline void cfqg_get(struct cfq_group *cfqg) { }
 static inline void cfqg_put(struct cfq_group *cfqg) { }
 
@@ -1331,7 +1331,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void cfq_init_blkio_group(struct blkio_group *blkg)
+static void cfq_pd_init(struct blkcg_gq *blkg)
 {
 	struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
@@ -1344,16 +1344,16 @@ static void cfq_init_blkio_group(struct blkio_group *blkg)
  * be held.
  */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
-						struct blkio_cgroup *blkcg)
+						struct blkcg *blkcg)
 {
 	struct request_queue *q = cfqd->queue;
 	struct cfq_group *cfqg = NULL;
 
-	/* avoid lookup for the common case where there's no blkio cgroup */
-	if (blkcg == &blkio_root_cgroup) {
+	/* avoid lookup for the common case where there's no blkcg */
+	if (blkcg == &blkcg_root) {
 		cfqg = cfqd->root_group;
 	} else {
-		struct blkio_group *blkg;
+		struct blkcg_gq *blkg;
 
 		blkg = blkg_lookup_create(blkcg, q);
 		if (!IS_ERR(blkg))
@@ -1386,8 +1386,8 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf, void *pdata, int off)
 static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
 				    struct seq_file *sf)
 {
-	blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
-			  cfqg_prfill_weight_device, &blkio_policy_cfq, 0,
+	blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
+			  cfqg_prfill_weight_device, &blkcg_policy_cfq, 0,
 			  false);
 	return 0;
 }
@@ -1395,19 +1395,19 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
 static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
 			    struct seq_file *sf)
 {
-	seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->cfq_weight);
+	seq_printf(sf, "%u\n", cgroup_to_blkcg(cgrp)->cfq_weight);
 	return 0;
 }
 
 static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 				  const char *buf)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 	struct blkg_conf_ctx ctx;
 	struct cfq_group *cfqg;
 	int ret;
 
-	ret = blkg_conf_prep(blkcg, &blkio_policy_cfq, buf, &ctx);
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
 	if (ret)
 		return ret;
 
@@ -1425,8 +1425,8 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
 
 static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-	struct blkio_group *blkg;
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+	struct blkcg_gq *blkg;
 	struct hlist_node *n;
 
 	if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
@@ -1449,9 +1449,9 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 			   struct seq_file *sf)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkio_policy_cfq,
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
 			  cft->private, false);
 	return 0;
 }
@@ -1459,9 +1459,9 @@ static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
 static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 			     struct seq_file *sf)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 
-	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkio_policy_cfq,
+	blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
 			  cft->private, true);
 	return 0;
 }
@@ -1485,10 +1485,10 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off)
 static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 				     struct seq_file *sf)
 {
-	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 
 	blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
-			  &blkio_policy_cfq, 0, false);
+			  &blkcg_policy_cfq, 0, false);
 	return 0;
 }
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
@@ -1580,7 +1580,7 @@ static struct cftype cfq_blkcg_files[] = {
 };
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
-						struct blkio_cgroup *blkcg)
+						struct blkcg *blkcg)
 {
 	return cfqd->root_group;
 }
@@ -3135,7 +3135,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	uint64_t id;
 
 	rcu_read_lock();
-	id = bio_blkio_cgroup(bio)->id;
+	id = bio_blkcg(bio)->id;
 	rcu_read_unlock();
 
 	/*
@@ -3166,14 +3166,14 @@ static struct cfq_queue *
 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 		     struct bio *bio, gfp_t gfp_mask)
 {
-	struct blkio_cgroup *blkcg;
+	struct blkcg *blkcg;
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
 	struct cfq_group *cfqg;
 
 retry:
 	rcu_read_lock();
 
-	blkcg = bio_blkio_cgroup(bio);
+	blkcg = bio_blkcg(bio);
 	cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
 	cfqq = cic_to_cfqq(cic, is_sync);
 
@@ -3944,14 +3944,14 @@ static void cfq_exit_queue(struct elevator_queue *e)
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
 	kfree(cfqd->root_group);
 #endif
-	blkcg_deactivate_policy(q, &blkio_policy_cfq);
+	blkcg_deactivate_policy(q, &blkcg_policy_cfq);
 	kfree(cfqd);
 }
 
 static int cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
-	struct blkio_group *blkg __maybe_unused;
+	struct blkcg_gq *blkg __maybe_unused;
 	int i, ret;
 
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
@@ -3966,7 +3966,7 @@ static int cfq_init_queue(struct request_queue *q)
 
 	/* Init root group and prefer root group over other groups by default */
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	ret = blkcg_activate_policy(q, &blkio_policy_cfq);
+	ret = blkcg_activate_policy(q, &blkcg_policy_cfq);
 	if (ret)
 		goto out_free;
 
@@ -4156,10 +4156,10 @@ static struct elevator_type iosched_cfq = {
 };
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static struct blkio_policy_type blkio_policy_cfq = {
+static struct blkcg_policy blkcg_policy_cfq = {
 	.ops = {
-		.blkio_init_group_fn =		cfq_init_blkio_group,
-		.blkio_reset_group_stats_fn =	cfqg_stats_reset,
+		.pd_init_fn		= cfq_pd_init,
+		.pd_reset_stats_fn	= cfq_pd_reset_stats,
 	},
 	.pdata_size = sizeof(struct cfq_group),
 	.cftypes = cfq_blkcg_files,
@@ -4185,7 +4185,7 @@ static int __init cfq_init(void)
 		cfq_group_idle = 0;
 #endif
 
-	ret = blkio_policy_register(&blkio_policy_cfq);
+	ret = blkcg_policy_register(&blkcg_policy_cfq);
 	if (ret)
 		return ret;
 
@@ -4202,13 +4202,13 @@ static int __init cfq_init(void)
 err_free_pool:
 	kmem_cache_destroy(cfq_pool);
 err_pol_unreg:
-	blkio_policy_unregister(&blkio_policy_cfq);
+	blkcg_policy_unregister(&blkcg_policy_cfq);
 	return ret;
 }
 
 static void __exit cfq_exit(void)
 {
-	blkio_policy_unregister(&blkio_policy_cfq);
+	blkcg_policy_unregister(&blkcg_policy_cfq);
 	elv_unregister(&iosched_cfq);
 	kmem_cache_destroy(cfq_pool);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 68720ab275d4..af33fb1adfee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -31,7 +31,7 @@ struct blk_trace;
 struct request;
 struct sg_io_hdr;
 struct bsg_job;
-struct blkio_group;
+struct blkcg_gq;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -371,7 +371,7 @@ struct request_queue {
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
 	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
-	struct blkio_group	*root_blkg;
+	struct blkcg_gq		*root_blkg;
 	struct list_head	blkg_list;
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From f95a04afa80c0f4ddd645ef6a84ed118b5d1ad46 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:26 -0700
Subject: blkcg: embed struct blkg_policy_data in policy specific data

Currently blkg_policy_data carries policy specific data as char flex
array instead of being embedded in policy specific data.  This was
forced by oddities around blkg allocation which are all gone now.

This patch makes blkg_policy_data embedded in policy specific data -
throtl_grp and cfq_group so that it's more conventional and consistent
with how io_cq is handled.

* blkcg_policy->pdata_size is renamed to ->pd_size.

* Functions which used to take void *pdata now takes struct
  blkg_policy_data *pd.

* blkg_to_pdata/pdata_to_blkg() updated to blkg_to_pd/pd_to_blkg().

* Dummy struct blkg_policy_data definition added.  Dummy
  pdata_to_blkg() definition was unused and inconsistent with the
  non-dummy version - correct dummy pd_to_blkg() added.

* throtl and cfq updated accordingly.

* As dummy blkg_to_pd/pd_to_blkg() are provided,
  blkg_to_cfqg/cfqg_to_blkg() don't need to be ifdef'd.  Moved outside
  ifdef block.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 52 ++++++++++++++++++++++-----------------------
 block/blk-cgroup.h   | 60 +++++++++++++++++++++++++++++-----------------------
 block/blk-throttle.c | 37 +++++++++++++++++++++-----------
 block/cfq-iosched.c  | 46 +++++++++++++++++++++++-----------------
 4 files changed, 112 insertions(+), 83 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 997570329517..3d495528a765 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -58,11 +58,6 @@ static bool blkcg_policy_enabled(struct request_queue *q,
 	return pol && test_bit(pol->plid, q->blkcg_pols);
 }
 
-static size_t blkg_pd_size(const struct blkcg_policy *pol)
-{
-	return sizeof(struct blkg_policy_data) + pol->pdata_size;
-}
-
 /**
  * blkg_free - free a blkg
  * @blkg: blkg to free
@@ -122,7 +117,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
-		pd = kzalloc_node(blkg_pd_size(pol), GFP_ATOMIC, q->node);
+		pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
 		if (!pd) {
 			blkg_free(blkg);
 			return NULL;
@@ -346,7 +341,8 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg)
  * cftype->read_seq_string method.
  */
 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
-		       u64 (*prfill)(struct seq_file *, void *, int),
+		       u64 (*prfill)(struct seq_file *,
+				     struct blkg_policy_data *, int),
 		       const struct blkcg_policy *pol, int data,
 		       bool show_total)
 {
@@ -357,7 +353,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 	spin_lock_irq(&blkcg->lock);
 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
 		if (blkcg_policy_enabled(blkg->q, pol))
-			total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
+			total += prfill(sf, blkg->pd[pol->plid], data);
 	spin_unlock_irq(&blkcg->lock);
 
 	if (show_total)
@@ -368,14 +364,14 @@ EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
 /**
  * __blkg_prfill_u64 - prfill helper for a single u64 value
  * @sf: seq_file to print to
- * @pdata: policy private data of interest
+ * @pd: policy private data of interest
  * @v: value to print
  *
- * Print @v to @sf for the device assocaited with @pdata.
+ * Print @v to @sf for the device assocaited with @pd.
  */
-u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
 {
-	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
+	const char *dname = blkg_dev_name(pd->blkg);
 
 	if (!dname)
 		return 0;
@@ -388,12 +384,12 @@ EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
 /**
  * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  * @sf: seq_file to print to
- * @pdata: policy private data of interest
+ * @pd: policy private data of interest
  * @rwstat: rwstat to print
  *
- * Print @rwstat to @sf for the device assocaited with @pdata.
+ * Print @rwstat to @sf for the device assocaited with @pd.
  */
-u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 			 const struct blkg_rwstat *rwstat)
 {
 	static const char *rwstr[] = {
@@ -402,7 +398,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 		[BLKG_RWSTAT_SYNC]	= "Sync",
 		[BLKG_RWSTAT_ASYNC]	= "Async",
 	};
-	const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
+	const char *dname = blkg_dev_name(pd->blkg);
 	u64 v;
 	int i;
 
@@ -421,30 +417,31 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
 /**
  * blkg_prfill_stat - prfill callback for blkg_stat
  * @sf: seq_file to print to
- * @pdata: policy private data of interest
- * @off: offset to the blkg_stat in @pdata
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_stat in @pd
  *
  * prfill callback for printing a blkg_stat.
  */
-u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
+u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
 {
-	return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
+	return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
 }
 EXPORT_SYMBOL_GPL(blkg_prfill_stat);
 
 /**
  * blkg_prfill_rwstat - prfill callback for blkg_rwstat
  * @sf: seq_file to print to
- * @pdata: policy private data of interest
- * @off: offset to the blkg_rwstat in @pdata
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_rwstat in @pd
  *
  * prfill callback for printing a blkg_rwstat.
  */
-u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+		       int off)
 {
-	struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
+	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
 
-	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
 }
 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
 
@@ -733,7 +730,7 @@ int blkcg_activate_policy(struct request_queue *q,
 
 	/* allocate policy_data for all existing blkgs */
 	while (cnt--) {
-		pd = kzalloc_node(blkg_pd_size(pol), GFP_KERNEL, q->node);
+		pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
 		if (!pd) {
 			ret = -ENOMEM;
 			goto out_free;
@@ -832,6 +829,9 @@ int blkcg_policy_register(struct blkcg_policy *pol)
 {
 	int i, ret;
 
+	if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
+		return -EINVAL;
+
 	mutex_lock(&blkcg_pol_mutex);
 
 	/* find an empty slot */
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index a443b84d2c16..18b021e1c05f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -59,16 +59,25 @@ struct blkg_rwstat {
 	uint64_t			cnt[BLKG_RWSTAT_NR];
 };
 
-/* per-blkg per-policy data */
+/*
+ * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
+ * request_queue (q).  This is used by blkcg policies which need to track
+ * information per blkcg - q pair.
+ *
+ * There can be multiple active blkcg policies and each has its private
+ * data on each blkg, the size of which is determined by
+ * blkcg_policy->pd_size.  blkcg core allocates and frees such areas
+ * together with blkg and invokes pd_init/exit_fn() methods.
+ *
+ * Such private data must embed struct blkg_policy_data (pd) at the
+ * beginning and pd_size can't be smaller than pd.
+ */
 struct blkg_policy_data {
 	/* the blkg this per-policy data belongs to */
 	struct blkcg_gq			*blkg;
 
 	/* used during policy activation */
 	struct list_head		alloc_node;
-
-	/* pol->pdata_size bytes of private data used by policy impl */
-	char				pdata[] __aligned(__alignof__(unsigned long long));
 };
 
 /* association between a blk cgroup and a request queue */
@@ -100,7 +109,7 @@ struct blkcg_policy {
 	struct blkcg_policy_ops		ops;
 	int				plid;
 	/* policy specific private data size */
-	size_t				pdata_size;
+	size_t				pd_size;
 	/* cgroup files for the policy */
 	struct cftype			*cftypes;
 };
@@ -125,14 +134,16 @@ void blkcg_deactivate_policy(struct request_queue *q,
 			     const struct blkcg_policy *pol);
 
 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
-		       u64 (*prfill)(struct seq_file *, void *, int),
+		       u64 (*prfill)(struct seq_file *,
+				     struct blkg_policy_data *, int),
 		       const struct blkcg_policy *pol, int data,
 		       bool show_total);
-u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v);
-u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
+u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 			 const struct blkg_rwstat *rwstat);
-u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off);
-u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off);
+u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+		       int off);
 
 struct blkg_conf_ctx {
 	struct gendisk			*disk;
@@ -152,26 +163,21 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx);
  *
  * Return pointer to private data associated with the @blkg-@pol pair.
  */
-static inline void *blkg_to_pdata(struct blkcg_gq *blkg,
-				  struct blkcg_policy *pol)
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol)
 {
-	return blkg ? blkg->pd[pol->plid]->pdata : NULL;
+	return blkg ? blkg->pd[pol->plid] : NULL;
 }
 
 /**
  * pdata_to_blkg - get blkg associated with policy private data
- * @pdata: policy private data of interest
+ * @pd: policy private data of interest
  *
- * @pdata is policy private data.  Determine the blkg it's associated with.
+ * @pd is policy private data.  Determine the blkg it's associated with.
  */
-static inline struct blkcg_gq *pdata_to_blkg(void *pdata)
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
 {
-	if (pdata) {
-		struct blkg_policy_data *pd =
-			container_of(pdata, struct blkg_policy_data, pdata);
-		return pd->blkg;
-	}
-	return NULL;
+	return pd ? pd->blkg : NULL;
 }
 
 /**
@@ -342,6 +348,9 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
 
 struct cgroup;
 
+struct blkg_policy_data {
+};
+
 struct blkcg_gq {
 };
 
@@ -361,10 +370,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
-static inline void *blkg_to_pdata(struct blkcg_gq *blkg,
-				  struct blkcg_policy *pol) { return NULL; }
-static inline struct blkcg_gq *pdata_to_blkg(void *pdata,
-				  struct blkcg_policy *pol) { return NULL; }
+static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
+						  struct blkcg_policy *pol) { return NULL; }
+static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
 static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 static inline void blkg_get(struct blkcg_gq *blkg) { }
 static inline void blkg_put(struct blkcg_gq *blkg) { }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 00c7eff66ecf..6a0a17a83862 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -49,6 +49,9 @@ struct tg_stats_cpu {
 };
 
 struct throtl_grp {
+	/* must be the first member */
+	struct blkg_policy_data pd;
+
 	/* active throtl group service_tree member */
 	struct rb_node rb_node;
 
@@ -120,14 +123,19 @@ static LIST_HEAD(tg_stats_alloc_list);
 static void tg_stats_alloc_fn(struct work_struct *);
 static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn);
 
+static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
+}
+
 static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
 {
-	return blkg_to_pdata(blkg, &blkcg_policy_throtl);
+	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
 }
 
 static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
 {
-	return pdata_to_blkg(tg);
+	return pd_to_blkg(&tg->pd);
 }
 
 static inline struct throtl_grp *td_root_tg(struct throtl_data *td)
@@ -931,9 +939,10 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
 	}
 }
 
-static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off)
+static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
+				struct blkg_policy_data *pd, int off)
 {
-	struct throtl_grp *tg = pdata;
+	struct throtl_grp *tg = pd_to_tg(pd);
 	struct blkg_rwstat rwstat = { }, tmp;
 	int i, cpu;
 
@@ -945,7 +954,7 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off)
 			rwstat.cnt[i] += tmp.cnt[i];
 	}
 
-	return __blkg_prfill_rwstat(sf, pdata, &rwstat);
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
 }
 
 static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
@@ -958,22 +967,26 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
-static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off)
+static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
+			      int off)
 {
-	u64 v = *(u64 *)(pdata + off);
+	struct throtl_grp *tg = pd_to_tg(pd);
+	u64 v = *(u64 *)((void *)tg + off);
 
 	if (v == -1)
 		return 0;
-	return __blkg_prfill_u64(sf, pdata, v);
+	return __blkg_prfill_u64(sf, pd, v);
 }
 
-static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off)
+static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
+			       int off)
 {
-	unsigned int v = *(unsigned int *)(pdata + off);
+	struct throtl_grp *tg = pd_to_tg(pd);
+	unsigned int v = *(unsigned int *)((void *)tg + off);
 
 	if (v == -1)
 		return 0;
-	return __blkg_prfill_u64(sf, pdata, v);
+	return __blkg_prfill_u64(sf, pd, v);
 }
 
 static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
@@ -1092,7 +1105,7 @@ static struct blkcg_policy blkcg_policy_throtl = {
 		.pd_exit_fn		= throtl_pd_exit,
 		.pd_reset_stats_fn	= throtl_pd_reset_stats,
 	},
-	.pdata_size = sizeof(struct throtl_grp),
+	.pd_size = sizeof(struct throtl_grp),
 	.cftypes = throtl_files,
 };
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 792218281d91..7865cc38ea77 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -217,6 +217,9 @@ struct cfqg_stats {
 
 /* This is per cgroup per device grouping structure */
 struct cfq_group {
+	/* must be the first member */
+	struct blkg_policy_data pd;
+
 	/* group service_tree member */
 	struct rb_node rb_node;
 
@@ -409,6 +412,21 @@ CFQ_CFQQ_FNS(deep);
 CFQ_CFQQ_FNS(wait_busy);
 #undef CFQ_CFQQ_FNS
 
+static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct cfq_group, pd) : NULL;
+}
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
+{
+	return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
+}
+
+static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
+{
+	return pd_to_blkg(&cfqg->pd);
+}
+
 #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
 
 /* cfqg stats flags */
@@ -553,16 +571,6 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
-{
-	return blkg_to_pdata(blkg, &blkcg_policy_cfq);
-}
-
-static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
-{
-	return pdata_to_blkg(cfqg);
-}
-
 static inline void cfqg_get(struct cfq_group *cfqg)
 {
 	return blkg_get(cfqg_to_blkg(cfqg));
@@ -662,8 +670,6 @@ static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
 
 #else	/* CONFIG_CFQ_GROUP_IOSCHED */
 
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) { return NULL; }
-static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) { return NULL; }
 static inline void cfqg_get(struct cfq_group *cfqg) { }
 static inline void cfqg_put(struct cfq_group *cfqg) { }
 
@@ -1374,13 +1380,14 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 	cfqg_get(cfqg);
 }
 
-static u64 cfqg_prfill_weight_device(struct seq_file *sf, void *pdata, int off)
+static u64 cfqg_prfill_weight_device(struct seq_file *sf,
+				     struct blkg_policy_data *pd, int off)
 {
-	struct cfq_group *cfqg = pdata;
+	struct cfq_group *cfqg = pd_to_cfqg(pd);
 
 	if (!cfqg->dev_weight)
 		return 0;
-	return __blkg_prfill_u64(sf, pdata, cfqg->dev_weight);
+	return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
 }
 
 static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
@@ -1467,9 +1474,10 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
 }
 
 #ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off)
+static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
+				      struct blkg_policy_data *pd, int off)
 {
-	struct cfq_group *cfqg = pdata;
+	struct cfq_group *cfqg = pd_to_cfqg(pd);
 	u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
 	u64 v = 0;
 
@@ -1477,7 +1485,7 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, void *pdata, int off)
 		v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
 		do_div(v, samples);
 	}
-	__blkg_prfill_u64(sf, pdata, v);
+	__blkg_prfill_u64(sf, pd, v);
 	return 0;
 }
 
@@ -4161,7 +4169,7 @@ static struct blkcg_policy blkcg_policy_cfq = {
 		.pd_init_fn		= cfq_pd_init,
 		.pd_reset_stats_fn	= cfq_pd_reset_stats,
 	},
-	.pdata_size = sizeof(struct cfq_group),
+	.pd_size = sizeof(struct cfq_group),
 	.cftypes = cfq_blkcg_files,
 };
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From f9fcc2d3919b8eb575b3cee9274feefafb641bca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 16 Apr 2012 13:57:27 -0700
Subject: blkcg: collapse blkcg_policy_ops into blkcg_policy

There's no reason to keep blkcg_policy_ops separate.  Collapse it into
blkcg_policy.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   | 16 ++++++++--------
 block/blk-cgroup.h   | 12 +++++-------
 block/blk-throttle.c | 13 ++++++-------
 block/cfq-iosched.c  | 11 +++++------
 4 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3d495528a765..82283859727e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -78,8 +78,8 @@ static void blkg_free(struct blkcg_gq *blkg)
 		if (!pd)
 			continue;
 
-		if (pol && pol->ops.pd_exit_fn)
-			pol->ops.pd_exit_fn(blkg);
+		if (pol && pol->pd_exit_fn)
+			pol->pd_exit_fn(blkg);
 
 		kfree(pd);
 	}
@@ -132,7 +132,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
 		struct blkcg_policy *pol = blkcg_policy[i];
 
 		if (blkcg_policy_enabled(blkg->q, pol))
-			pol->ops.pd_init_fn(blkg);
+			pol->pd_init_fn(blkg);
 	}
 
 	return blkg;
@@ -305,8 +305,8 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
 			struct blkcg_policy *pol = blkcg_policy[i];
 
 			if (blkcg_policy_enabled(blkg->q, pol) &&
-			    pol->ops.pd_reset_stats_fn)
-				pol->ops.pd_reset_stats_fn(blkg);
+			    pol->pd_reset_stats_fn)
+				pol->pd_reset_stats_fn(blkg);
 		}
 	}
 
@@ -758,7 +758,7 @@ int blkcg_activate_policy(struct request_queue *q,
 
 		blkg->pd[pol->plid] = pd;
 		pd->blkg = blkg;
-		pol->ops.pd_init_fn(blkg);
+		pol->pd_init_fn(blkg);
 
 		spin_unlock(&blkg->blkcg->lock);
 	}
@@ -804,8 +804,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
 		/* grab blkcg lock too while removing @pd from @blkg */
 		spin_lock(&blkg->blkcg->lock);
 
-		if (pol->ops.pd_exit_fn)
-			pol->ops.pd_exit_fn(blkg);
+		if (pol->pd_exit_fn)
+			pol->pd_exit_fn(blkg);
 
 		kfree(blkg->pd[pol->plid]);
 		blkg->pd[pol->plid] = NULL;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 18b021e1c05f..44cb9086ed42 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -99,19 +99,17 @@ typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
 typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
 
-struct blkcg_policy_ops {
-	blkcg_pol_init_pd_fn		*pd_init_fn;
-	blkcg_pol_exit_pd_fn		*pd_exit_fn;
-	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
-};
-
 struct blkcg_policy {
-	struct blkcg_policy_ops		ops;
 	int				plid;
 	/* policy specific private data size */
 	size_t				pd_size;
 	/* cgroup files for the policy */
 	struct cftype			*cftypes;
+
+	/* operations */
+	blkcg_pol_init_pd_fn		*pd_init_fn;
+	blkcg_pol_exit_pd_fn		*pd_exit_fn;
+	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
 };
 
 extern struct blkcg blkcg_root;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6a0a17a83862..46310ec93d1c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1100,13 +1100,12 @@ static void throtl_shutdown_wq(struct request_queue *q)
 }
 
 static struct blkcg_policy blkcg_policy_throtl = {
-	.ops = {
-		.pd_init_fn		= throtl_pd_init,
-		.pd_exit_fn		= throtl_pd_exit,
-		.pd_reset_stats_fn	= throtl_pd_reset_stats,
-	},
-	.pd_size = sizeof(struct throtl_grp),
-	.cftypes = throtl_files,
+	.pd_size		= sizeof(struct throtl_grp),
+	.cftypes		= throtl_files,
+
+	.pd_init_fn		= throtl_pd_init,
+	.pd_exit_fn		= throtl_pd_exit,
+	.pd_reset_stats_fn	= throtl_pd_reset_stats,
 };
 
 bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7865cc38ea77..832b2ac8cb8d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4165,12 +4165,11 @@ static struct elevator_type iosched_cfq = {
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static struct blkcg_policy blkcg_policy_cfq = {
-	.ops = {
-		.pd_init_fn		= cfq_pd_init,
-		.pd_reset_stats_fn	= cfq_pd_reset_stats,
-	},
-	.pd_size = sizeof(struct cfq_group),
-	.cftypes = cfq_blkcg_files,
+	.pd_size		= sizeof(struct cfq_group),
+	.cftypes		= cfq_blkcg_files,
+
+	.pd_init_fn		= cfq_pd_init,
+	.pd_reset_stats_fn	= cfq_pd_reset_stats,
 };
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From a637120e49021d197e9578cba545bbaa459cbb51 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 19 Apr 2012 16:29:24 -0700
Subject: blkcg: use radix tree to index blkgs from blkcg

blkg lookup is currently performed by traversing linked list anchored
at blkcg->blkg_list.  This is very unscalable and with blk-throttle
enabled and enough request queues on the system, this can get very
ugly quickly (blk-throttle performs look up on every bio submission).

This patch makes blkcg use radix tree to index blkgs combined with
simple last-looked-up hint.  This is mostly identical to how icqs are
indexed from ioc.

Note that because __blkg_lookup() may be invoked without holding queue
lock, hint is only updated from __blkg_lookup_create().  Due to cfq's
cfqq caching, this makes hint updates overly lazy.  This will be
improved with scheduled blkcg aware request allocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 52 ++++++++++++++++++++++++++++++++++++++++++++--------
 block/blk-cgroup.h |  6 ++++++
 2 files changed, 50 insertions(+), 8 deletions(-)

(limited to 'block/blk-cgroup.h')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 30a7a9c58b38..02cf6335e9bd 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -142,11 +142,21 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
 				      struct request_queue *q)
 {
 	struct blkcg_gq *blkg;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
-		if (blkg->q == q)
-			return blkg;
+	blkg = rcu_dereference(blkcg->blkg_hint);
+	if (blkg && blkg->q == q)
+		return blkg;
+
+	/*
+	 * Hint didn't match.  Look up from the radix tree.  Note that we
+	 * may not be holding queue_lock and thus are not sure whether
+	 * @blkg from blkg_tree has already been removed or not, so we
+	 * can't update hint to the lookup result.  Leave it to the caller.
+	 */
+	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
+	if (blkg && blkg->q == q)
+		return blkg;
+
 	return NULL;
 }
 
@@ -179,9 +189,12 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	lockdep_assert_held(q->queue_lock);
 
+	/* lookup and update hint on success, see __blkg_lookup() for details */
 	blkg = __blkg_lookup(blkcg, q);
-	if (blkg)
+	if (blkg) {
+		rcu_assign_pointer(blkcg->blkg_hint, blkg);
 		return blkg;
+	}
 
 	/* blkg holds a reference to blkcg */
 	if (!css_tryget(&blkcg->css))
@@ -194,12 +207,24 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 		goto err_put;
 
 	/* insert */
+	ret = radix_tree_preload(GFP_ATOMIC);
+	if (ret)
+		goto err_free;
+
 	spin_lock(&blkcg->lock);
-	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-	list_add(&blkg->q_node, &q->blkg_list);
+	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
+	if (likely(!ret)) {
+		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
+		list_add(&blkg->q_node, &q->blkg_list);
+	}
 	spin_unlock(&blkcg->lock);
-	return blkg;
 
+	radix_tree_preload_end();
+
+	if (!ret)
+		return blkg;
+err_free:
+	blkg_free(blkg);
 err_put:
 	css_put(&blkcg->css);
 	return ERR_PTR(ret);
@@ -229,9 +254,19 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	/* Something wrong if we are trying to remove same group twice */
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
+
+	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
 	list_del_init(&blkg->q_node);
 	hlist_del_init_rcu(&blkg->blkcg_node);
 
+	/*
+	 * Both setting lookup hint to and clearing it from @blkg are done
+	 * under queue_lock.  If it's not pointing to @blkg now, it never
+	 * will.  Hint assignment itself can race safely.
+	 */
+	if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
+		rcu_assign_pointer(blkcg->blkg_hint, NULL);
+
 	/*
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
@@ -593,6 +628,7 @@ static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
 	blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
 done:
 	spin_lock_init(&blkcg->lock);
+	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 
 	return &blkcg->css;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 44cb9086ed42..8ac457ce7783 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -16,6 +16,7 @@
 #include <linux/cgroup.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/seq_file.h>
+#include <linux/radix-tree.h>
 
 /* Max limits for throttle policy */
 #define THROTL_IOPS_MAX		UINT_MAX
@@ -37,9 +38,14 @@ enum blkg_rwstat_type {
 	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
 };
 
+struct blkcg_gq;
+
 struct blkcg {
 	struct cgroup_subsys_state	css;
 	spinlock_t			lock;
+
+	struct radix_tree_root		blkg_tree;
+	struct blkcg_gq			*blkg_hint;
 	struct hlist_head		blkg_list;
 
 	/* for policies to test whether associated blkcg has changed */
-- 
cgit v1.2.3-59-g8ed1b