diff options
Diffstat (limited to '')
| -rw-r--r-- | kernel/sched/fair.c | 102 | 
1 files changed, 73 insertions, 29 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4037e19bbca2..2fe3aa853e4d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3413,9 +3413,9 @@ void set_task_rq_fair(struct sched_entity *se,   * _IFF_ we look at the pure running and runnable sums. Because they   * represent the very same entity, just at different points in the hierarchy.   * - * - * Per the above update_tg_cfs_util() is trivial (and still 'wrong') and - * simply copies the running sum over. + * Per the above update_tg_cfs_util() is trivial and simply copies the running + * sum over (but still wrong, because the group entity and group rq do not have + * their PELT windows aligned).   *   * However, update_tg_cfs_runnable() is more complex. So we have:   * @@ -3424,11 +3424,11 @@ void set_task_rq_fair(struct sched_entity *se,   * And since, like util, the runnable part should be directly transferable,   * the following would _appear_ to be the straight forward approach:   * - *   grq->avg.load_avg = grq->load.weight * grq->avg.running_avg	(3) + *   grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg	(3)   *   * And per (1) we have:   * - *   ge->avg.running_avg == grq->avg.running_avg + *   ge->avg.runnable_avg == grq->avg.runnable_avg   *   * Which gives:   * @@ -3447,27 +3447,28 @@ void set_task_rq_fair(struct sched_entity *se,   * to (shortly) return to us. This only works by keeping the weights as   * integral part of the sum. We therefore cannot decompose as per (3).   * - * OK, so what then? + * Another reason this doesn't work is that runnable isn't a 0-sum entity. + * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the + * rq itself is runnable anywhere between 2/3 and 1 depending on how the + * runnable section of these tasks overlap (or not). If they were to perfectly + * align the rq as a whole would be runnable 2/3 of the time. If however we + * always have at least 1 runnable task, the rq as a whole is always runnable.   * + * So we'll have to approximate.. :/   * - * Another way to look at things is: + * Given the constraint:   * - *   grq->avg.load_avg = \Sum se->avg.load_avg + *   ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX   * - * Therefore, per (2): + * We can construct a rule that adds runnable to a rq by assuming minimal + * overlap.   * - *   grq->avg.load_avg = \Sum se->load.weight * se->avg.runnable_avg + * On removal, we'll assume each task is equally runnable; which yields:   * - * And the very thing we're propagating is a change in that sum (someone - * joined/left). So we can easily know the runnable change, which would be, per - * (2) the already tracked se->load_avg divided by the corresponding - * se->weight. + *   grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight   * - * Basically (4) but in differential form: + * XXX: only do this for the part of runnable > running ?   * - *   d(runnable_avg) += se->avg.load_avg / se->load.weight - *								   (5) - *   ge->avg.load_avg += ge->load.weight * d(runnable_avg)   */  static inline void @@ -3479,6 +3480,14 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq  	if (!delta)  		return; +	/* +	 * The relation between sum and avg is: +	 * +	 *   LOAD_AVG_MAX - 1024 + sa->period_contrib +	 * +	 * however, the PELT windows are not aligned between grq and gse. +	 */ +  	/* Set new sched_entity's utilization */  	se->avg.util_avg = gcfs_rq->avg.util_avg;  	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; @@ -3491,33 +3500,68 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq  static inline void  update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)  { -	long runnable_sum = gcfs_rq->prop_runnable_sum; -	long runnable_load_avg, load_avg; -	s64 runnable_load_sum, load_sum; +	long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; +	unsigned long runnable_load_avg, load_avg; +	u64 runnable_load_sum, load_sum = 0; +	s64 delta_sum;  	if (!runnable_sum)  		return;  	gcfs_rq->prop_runnable_sum = 0; +	if (runnable_sum >= 0) { +		/* +		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until +		 * the CPU is saturated running == runnable. +		 */ +		runnable_sum += se->avg.load_sum; +		runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX); +	} else { +		/* +		 * Estimate the new unweighted runnable_sum of the gcfs_rq by +		 * assuming all tasks are equally runnable. +		 */ +		if (scale_load_down(gcfs_rq->load.weight)) { +			load_sum = div_s64(gcfs_rq->avg.load_sum, +				scale_load_down(gcfs_rq->load.weight)); +		} + +		/* But make sure to not inflate se's runnable */ +		runnable_sum = min(se->avg.load_sum, load_sum); +	} + +	/* +	 * runnable_sum can't be lower than running_sum +	 * As running sum is scale with cpu capacity wehreas the runnable sum +	 * is not we rescale running_sum 1st +	 */ +	running_sum = se->avg.util_sum / +		arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq))); +	runnable_sum = max(runnable_sum, running_sum); +  	load_sum = (s64)se_weight(se) * runnable_sum;  	load_avg = div_s64(load_sum, LOAD_AVG_MAX); -	add_positive(&se->avg.load_sum, runnable_sum); -	add_positive(&se->avg.load_avg, load_avg); +	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; +	delta_avg = load_avg - se->avg.load_avg; -	add_positive(&cfs_rq->avg.load_avg, load_avg); -	add_positive(&cfs_rq->avg.load_sum, load_sum); +	se->avg.load_sum = runnable_sum; +	se->avg.load_avg = load_avg; +	add_positive(&cfs_rq->avg.load_avg, delta_avg); +	add_positive(&cfs_rq->avg.load_sum, delta_sum);  	runnable_load_sum = (s64)se_runnable(se) * runnable_sum;  	runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX); +	delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum; +	delta_avg = runnable_load_avg - se->avg.runnable_load_avg; -	add_positive(&se->avg.runnable_load_sum, runnable_sum); -	add_positive(&se->avg.runnable_load_avg, runnable_load_avg); +	se->avg.runnable_load_sum = runnable_sum; +	se->avg.runnable_load_avg = runnable_load_avg;  	if (se->on_rq) { -		add_positive(&cfs_rq->avg.runnable_load_avg, runnable_load_avg); -		add_positive(&cfs_rq->avg.runnable_load_sum, runnable_load_sum); +		add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg); +		add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);  	}  }  | 
