aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c128
1 files changed, 71 insertions, 57 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index fee3325edf27..b771c4299982 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -15,10 +15,10 @@
#include "blk-cgroup-rwstat.h"
/* Max dispatch from a group in 1 round */
-static int throtl_grp_quantum = 8;
+#define THROTL_GRP_QUANTUM 8
/* Total max dispatch from all groups in one round */
-static int throtl_quantum = 32;
+#define THROTL_QUANTUM 32
/* Throttling is performed over a slice and after that slice is renewed */
#define DFL_THROTL_SLICE_HD (HZ / 10)
@@ -150,7 +150,7 @@ struct throtl_grp {
/* user configured IOPS limits */
unsigned int iops_conf[2][LIMIT_CNT];
- /* Number of bytes disptached in current slice */
+ /* Number of bytes dispatched in current slice */
uint64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */
unsigned int io_disp[2];
@@ -423,12 +423,13 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
*/
static struct bio *throtl_peek_queued(struct list_head *queued)
{
- struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
+ struct throtl_qnode *qn;
struct bio *bio;
if (list_empty(queued))
return NULL;
+ qn = list_first_entry(queued, struct throtl_qnode, node);
bio = bio_list_peek(&qn->bios);
WARN_ON_ONCE(!bio);
return bio;
@@ -451,12 +452,13 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
static struct bio *throtl_pop_queued(struct list_head *queued,
struct throtl_grp **tg_to_put)
{
- struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
+ struct throtl_qnode *qn;
struct bio *bio;
if (list_empty(queued))
return NULL;
+ qn = list_first_entry(queued, struct throtl_qnode, node);
bio = bio_list_pop(&qn->bios);
WARN_ON_ONCE(!bio);
@@ -636,9 +638,6 @@ static struct throtl_grp *
throtl_rb_first(struct throtl_service_queue *parent_sq)
{
struct rb_node *n;
- /* Service tree is empty */
- if (!parent_sq->nr_pending)
- return NULL;
n = rb_first_cached(&parent_sq->pending_tree);
WARN_ON_ONCE(!n);
@@ -692,29 +691,21 @@ static void tg_service_queue_add(struct throtl_grp *tg)
leftmost);
}
-static void __throtl_enqueue_tg(struct throtl_grp *tg)
-{
- tg_service_queue_add(tg);
- tg->flags |= THROTL_TG_PENDING;
- tg->service_queue.parent_sq->nr_pending++;
-}
-
static void throtl_enqueue_tg(struct throtl_grp *tg)
{
- if (!(tg->flags & THROTL_TG_PENDING))
- __throtl_enqueue_tg(tg);
-}
-
-static void __throtl_dequeue_tg(struct throtl_grp *tg)
-{
- throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
- tg->flags &= ~THROTL_TG_PENDING;
+ if (!(tg->flags & THROTL_TG_PENDING)) {
+ tg_service_queue_add(tg);
+ tg->flags |= THROTL_TG_PENDING;
+ tg->service_queue.parent_sq->nr_pending++;
+ }
}
static void throtl_dequeue_tg(struct throtl_grp *tg)
{
- if (tg->flags & THROTL_TG_PENDING)
- __throtl_dequeue_tg(tg);
+ if (tg->flags & THROTL_TG_PENDING) {
+ throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
+ tg->flags &= ~THROTL_TG_PENDING;
+ }
}
/* Call with queue lock held */
@@ -817,7 +808,7 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
unsigned long jiffy_end)
{
- tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
+ throtl_set_slice_end(tg, rw, jiffy_end);
throtl_log(&tg->service_queue,
"[%c] extend slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -852,7 +843,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
/*
* A bio has been dispatched. Also adjust slice_end. It might happen
* that initially cgroup limit was very low resulting in high
- * slice_end, but later limit was bumped up and bio was dispached
+ * slice_end, but later limit was bumped up and bio was dispatched
* sooner, then we need to reduce slice_end. A high bogus slice_end
* is bad because it does not allow new slice to start.
*/
@@ -894,13 +885,19 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
}
static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
- unsigned long *wait)
+ u32 iops_limit, unsigned long *wait)
{
bool rw = bio_data_dir(bio);
unsigned int io_allowed;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
u64 tmp;
+ if (iops_limit == UINT_MAX) {
+ if (wait)
+ *wait = 0;
+ return true;
+ }
+
jiffy_elapsed = jiffies - tg->slice_start[rw];
/* Round up to the next throttle slice, wait time must be nonzero */
@@ -913,7 +910,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
* have been trimmed.
*/
- tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd;
+ tmp = (u64)iops_limit * jiffy_elapsed_rnd;
do_div(tmp, HZ);
if (tmp > UINT_MAX)
@@ -936,13 +933,19 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
}
static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
- unsigned long *wait)
+ u64 bps_limit, unsigned long *wait)
{
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
+ if (bps_limit == U64_MAX) {
+ if (wait)
+ *wait = 0;
+ return true;
+ }
+
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
/* Slice has just started. Consider one slice interval */
@@ -951,7 +954,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
- tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd;
+ tmp = bps_limit * jiffy_elapsed_rnd;
do_div(tmp, HZ);
bytes_allowed = tmp;
@@ -963,7 +966,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
/* Calc approx time to dispatch */
extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
- jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
+ jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
if (!jiffy_wait)
jiffy_wait = 1;
@@ -987,6 +990,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
{
bool rw = bio_data_dir(bio);
unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
+ u64 bps_limit = tg_bps_limit(tg, rw);
+ u32 iops_limit = tg_iops_limit(tg, rw);
/*
* Currently whole state machine of group depends on first bio
@@ -998,8 +1003,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
/* If tg->bps = -1, then BW is unlimited */
- if (tg_bps_limit(tg, rw) == U64_MAX &&
- tg_iops_limit(tg, rw) == UINT_MAX) {
+ if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
if (wait)
*wait = 0;
return true;
@@ -1021,8 +1025,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
- if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
- tg_with_in_iops_limit(tg, bio, &iops_wait)) {
+ if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
+ tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
*wait = 0;
return true;
@@ -1082,7 +1086,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
* If @tg doesn't currently have any bios queued in the same
* direction, queueing @bio can change when @tg should be
* dispatched. Mark that @tg was empty. This is automatically
- * cleaered on the next tg_update_disptime().
+ * cleared on the next tg_update_disptime().
*/
if (!sq->nr_queued[rw])
tg->flags |= THROTL_TG_WAS_EMPTY;
@@ -1175,8 +1179,8 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
{
struct throtl_service_queue *sq = &tg->service_queue;
unsigned int nr_reads = 0, nr_writes = 0;
- unsigned int max_nr_reads = throtl_grp_quantum*3/4;
- unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
+ unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
+ unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
struct bio *bio;
/* Try to dispatch 75% READS and 25% WRITES */
@@ -1209,9 +1213,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
unsigned int nr_disp = 0;
while (1) {
- struct throtl_grp *tg = throtl_rb_first(parent_sq);
+ struct throtl_grp *tg;
struct throtl_service_queue *sq;
+ if (!parent_sq->nr_pending)
+ break;
+
+ tg = throtl_rb_first(parent_sq);
if (!tg)
break;
@@ -1226,7 +1234,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
if (sq->nr_queued[0] || sq->nr_queued[1])
tg_update_disptime(tg);
- if (nr_disp >= throtl_quantum)
+ if (nr_disp >= THROTL_QUANTUM)
break;
}
@@ -1303,7 +1311,7 @@ again:
}
}
} else {
- /* reached the top-level, queue issueing */
+ /* reached the top-level, queue issuing */
queue_work(kthrotld_workqueue, &td->dispatch_work);
}
out_unlock:
@@ -1314,8 +1322,8 @@ out_unlock:
* blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
* @work: work item being executed
*
- * This function is queued for execution when bio's reach the bio_lists[]
- * of throtl_data->service_queue. Those bio's are ready and issued by this
+ * This function is queued for execution when bios reach the bio_lists[]
+ * of throtl_data->service_queue. Those bios are ready and issued by this
* function.
*/
static void blk_throtl_dispatch_work_fn(struct work_struct *work)
@@ -1428,8 +1436,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
* that a group's limit are dropped suddenly and we don't want to
* account recently dispatched IO with new low rate.
*/
- throtl_start_new_slice(tg, 0);
- throtl_start_new_slice(tg, 1);
+ throtl_start_new_slice(tg, READ);
+ throtl_start_new_slice(tg, WRITE);
if (tg->flags & THROTL_TG_PENDING) {
tg_update_disptime(tg);
@@ -1674,13 +1682,13 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
goto out_finish;
ret = -EINVAL;
- if (!strcmp(tok, "rbps"))
+ if (!strcmp(tok, "rbps") && val > 1)
v[0] = val;
- else if (!strcmp(tok, "wbps"))
+ else if (!strcmp(tok, "wbps") && val > 1)
v[1] = val;
- else if (!strcmp(tok, "riops"))
+ else if (!strcmp(tok, "riops") && val > 1)
v[2] = min_t(u64, val, UINT_MAX);
- else if (!strcmp(tok, "wiops"))
+ else if (!strcmp(tok, "wiops") && val > 1)
v[3] = min_t(u64, val, UINT_MAX);
else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
idle_time = val;
@@ -1957,7 +1965,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
queue_work(kthrotld_workqueue, &td->dispatch_work);
}
-static void throtl_downgrade_state(struct throtl_data *td, int new)
+static void throtl_downgrade_state(struct throtl_data *td)
{
td->scale /= 2;
@@ -1967,7 +1975,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
return;
}
- td->limit_index = new;
+ td->limit_index = LIMIT_LOW;
td->low_downgrade_time = jiffies;
}
@@ -2054,7 +2062,7 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
* cgroups
*/
if (throtl_hierarchy_can_downgrade(tg))
- throtl_downgrade_state(tg->td, LIMIT_LOW);
+ throtl_downgrade_state(tg->td);
tg->last_bytes_disp[READ] = 0;
tg->last_bytes_disp[WRITE] = 0;
@@ -2064,10 +2072,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
static void blk_throtl_update_idletime(struct throtl_grp *tg)
{
- unsigned long now = ktime_get_ns() >> 10;
+ unsigned long now;
unsigned long last_finish_time = tg->last_finish_time;
- if (now <= last_finish_time || last_finish_time == 0 ||
+ if (last_finish_time == 0)
+ return;
+
+ now = ktime_get_ns() >> 10;
+ if (now <= last_finish_time ||
last_finish_time == tg->checked_last_finish_time)
return;
@@ -2083,7 +2095,7 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
unsigned long last_latency[2] = { 0 };
unsigned long latency[2];
- if (!blk_queue_nonrot(td->queue))
+ if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
return;
if (time_before(jiffies, td->last_calculate_time + HZ))
return;
@@ -2230,7 +2242,7 @@ again:
/*
* @bio passed through this layer without being throttled.
- * Climb up the ladder. If we''re already at the top, it
+ * Climb up the ladder. If we're already at the top, it
* can be executed directly.
*/
qn = &tg->qnode_on_parent[rw];
@@ -2321,6 +2333,8 @@ void blk_throtl_bio_endio(struct bio *bio)
if (!blkg)
return;
tg = blkg_to_tg(blkg);
+ if (!tg->td->limit_valid[LIMIT_LOW])
+ return;
finish_time_ns = ktime_get_ns();
tg->last_finish_time = finish_time_ns >> 10;