From 1aa50d020c7148f5f0bde15ca80fe6f91a8c5a4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Sep 2020 14:52:44 -0400 Subject: blk-iocost: calculate iocg->usages[] from iocg->local_stat.usage_us Currently, iocg->usages[] which are used to guide inuse adjustments are calculated from vtime deltas. This, however, assumes that the hierarchical inuse weight at the time of calculation held for the entire period, which often isn't true and can lead to significant errors. Now that we have absolute usage information collected, we can derive iocg->usages[] from iocg->local_stat.usage_us so that inuse adjustment decisions are made based on actual absolute usage. The calculated usage is clamped between 1 and WEIGHT_ONE and WEIGHT_ONE is also used to signal saturation regardless of the current hierarchical inuse weight. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/trace/events/iocost.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index c2f580fd371b..a905ecc0342f 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -26,7 +26,6 @@ TRACE_EVENT(iocost_iocg_activate, __field(u64, vrate) __field(u64, last_period) __field(u64, cur_period) - __field(u64, last_vtime) __field(u64, vtime) __field(u32, weight) __field(u32, inuse) @@ -42,7 +41,6 @@ TRACE_EVENT(iocost_iocg_activate, __entry->vrate = now->vrate; __entry->last_period = last_period; __entry->cur_period = cur_period; - __entry->last_vtime = iocg->last_vtime; __entry->vtime = vtime; __entry->weight = iocg->weight; __entry->inuse = iocg->inuse; @@ -51,13 +49,12 @@ TRACE_EVENT(iocost_iocg_activate, ), TP_printk("[%s:%s] now=%llu:%llu vrate=%llu " - "period=%llu->%llu vtime=%llu->%llu " + "period=%llu->%llu vtime=%llu " "weight=%u/%u hweight=%llu/%llu", __get_str(devname), __get_str(cgroup), __entry->now, __entry->vnow, __entry->vrate, __entry->last_period, __entry->cur_period, - __entry->last_vtime, __entry->vtime, - __entry->inuse, __entry->weight, + __entry->vtime, __entry->inuse, __entry->weight, __entry->hweight_inuse, __entry->hweight_active ) ); -- cgit v1.2.3-59-g8ed1b From 065655c862fedf4b04e1b28b83ca6f338d81cf0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Sep 2020 14:52:46 -0400 Subject: blk-iocost: decouple vrate adjustment from surplus transfers Budget donations are inaccurate and could take multiple periods to converge. To prevent triggering vrate adjustments while surplus transfers were catching up, vrate adjustment was suppressed if donations were increasing, which was indicated by non-zero nr_surpluses. This entangling won't be necessary with the scheduled rewrite of donation mechanism which will make it precise and immediate. Let's decouple the two in preparation. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-iocost.c | 19 +++++++------------ include/trace/events/iocost.h | 13 ++++--------- 2 files changed, 11 insertions(+), 21 deletions(-) (limited to 'include/trace') diff --git a/block/blk-iocost.c b/block/blk-iocost.c index c1cd66cfa2a8..a3889a8b0a33 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1508,7 +1508,7 @@ static void ioc_timer_fn(struct timer_list *timer) struct ioc_gq *iocg, *tiocg; struct ioc_now now; LIST_HEAD(surpluses); - int nr_surpluses = 0, nr_shortages = 0, nr_lagging = 0; + int nr_shortages = 0, nr_lagging = 0; u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; u32 missed_ppm[2], rq_wait_pct; @@ -1640,10 +1640,8 @@ static void ioc_timer_fn(struct timer_list *timer) atomic64_add(delta, &iocg->vtime); atomic64_add(delta, &iocg->done_vtime); /* if usage is sufficiently low, maybe it can donate */ - if (surplus_adjusted_hweight_inuse(usage, hw_inuse)) { + if (surplus_adjusted_hweight_inuse(usage, hw_inuse)) list_add(&iocg->surplus_list, &surpluses); - nr_surpluses++; - } } else if (hw_inuse < hw_active) { u32 new_hwi, new_inuse; @@ -1673,7 +1671,7 @@ static void ioc_timer_fn(struct timer_list *timer) } } - if (!nr_shortages || !nr_surpluses) + if (!nr_shortages || list_empty(&surpluses)) goto skip_surplus_transfers; /* there are both shortages and surpluses, transfer surpluses */ @@ -1738,11 +1736,9 @@ skip_surplus_transfers: /* * If there are IOs spanning multiple periods, wait - * them out before pushing the device harder. If - * there are surpluses, let redistribution work it - * out first. + * them out before pushing the device harder. */ - if (!nr_lagging && !nr_surpluses) + if (!nr_lagging) ioc->busy_level--; } else { /* @@ -1796,15 +1792,14 @@ skip_surplus_transfers: } trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, - nr_lagging, nr_shortages, - nr_surpluses); + nr_lagging, nr_shortages); atomic64_set(&ioc->vtime_rate, vrate); ioc_refresh_margins(ioc); } else if (ioc->busy_level != prev_busy_level || nr_lagging) { trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), missed_ppm, rq_wait_pct, nr_lagging, - nr_shortages, nr_surpluses); + nr_shortages); } ioc_refresh_params(ioc, false); diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index a905ecc0342f..ee024fe8fef6 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -128,11 +128,9 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, TRACE_EVENT(iocost_ioc_vrate_adj, TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm, - u32 rq_wait_pct, int nr_lagging, int nr_shortages, - int nr_surpluses), + u32 rq_wait_pct, int nr_lagging, int nr_shortages), - TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, - nr_surpluses), + TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages), TP_STRUCT__entry ( __string(devname, ioc_name(ioc)) @@ -144,7 +142,6 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __field(u32, rq_wait_pct) __field(int, nr_lagging) __field(int, nr_shortages) - __field(int, nr_surpluses) ), TP_fast_assign( @@ -157,15 +154,13 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __entry->rq_wait_pct = rq_wait_pct; __entry->nr_lagging = nr_lagging; __entry->nr_shortages = nr_shortages; - __entry->nr_surpluses = nr_surpluses; ), - TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d surpluses=%d", + TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d", __get_str(devname), __entry->old_vrate, __entry->new_vrate, __entry->busy_level, __entry->read_missed_ppm, __entry->write_missed_ppm, - __entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages, - __entry->nr_surpluses + __entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages ) ); -- cgit v1.2.3-59-g8ed1b From 046037551721e8831f6718ac2149887f6bb1f802 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 1 Sep 2020 14:52:55 -0400 Subject: blk-iocost: restore inuse update tracepoints Update and restore the inuse update tracepoints. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-iocost.c | 16 ++++++++++++++++ include/trace/events/iocost.h | 6 +++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 0270a504e6b5..9366527d8c12 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1919,6 +1919,12 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now) inuse = DIV64_U64_ROUND_UP( parent->child_adjusted_sum * iocg->hweight_after_donation, parent->hweight_inuse); + + TRACE_IOCG_PATH(inuse_transfer, iocg, now, + iocg->inuse, inuse, + iocg->hweight_inuse, + iocg->hweight_after_donation); + __propagate_weights(iocg, iocg->active, inuse, true, now); } @@ -2076,6 +2082,10 @@ static void ioc_timer_fn(struct timer_list *timer) iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); } else { + TRACE_IOCG_PATH(inuse_shortage, iocg, &now, + iocg->inuse, iocg->active, + iocg->hweight_inuse, new_hwi); + __propagate_weights(iocg, iocg->active, iocg->active, true, &now); nr_shortages++; @@ -2248,11 +2258,13 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, struct ioc *ioc = iocg->ioc; struct ioc_margins *margins = &ioc->margins; u32 adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100); + u32 __maybe_unused old_inuse = iocg->inuse, __maybe_unused old_hwi; u32 hwi; s64 margin; u64 cost, new_inuse; current_hweight(iocg, NULL, &hwi); + old_hwi = hwi; cost = abs_cost_to_cost(abs_cost, hwi); margin = now->vnow - vtime - cost; @@ -2287,6 +2299,10 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, iocg->inuse != iocg->active); spin_unlock_irq(&ioc->lock); + + TRACE_IOCG_PATH(inuse_adjust, iocg, now, + old_inuse, iocg->inuse, old_hwi, hwi); + return cost; } diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index ee024fe8fef6..b350860d2e71 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -95,7 +95,7 @@ DECLARE_EVENT_CLASS(iocg_inuse_update, ) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_takeback, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_shortage, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, @@ -105,7 +105,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_takeback, old_hw_inuse, new_hw_inuse) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_giveaway, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_transfer, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, @@ -115,7 +115,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_giveaway, old_hw_inuse, new_hw_inuse) ); -DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, +DEFINE_EVENT(iocg_inuse_update, iocost_inuse_adjust, TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, u32 old_inuse, u32 new_inuse, -- cgit v1.2.3-59-g8ed1b