diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 97 |
1 files changed, 58 insertions, 39 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fc1dfc007604..a2694ba82874 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -269,7 +269,6 @@ static void __hrtick_start(void *arg) rq_lock(rq, &rf); __hrtick_restart(rq); - rq->hrtick_csd_pending = 0; rq_unlock(rq, &rf); } @@ -293,12 +292,10 @@ void hrtick_start(struct rq *rq, u64 delay) hrtimer_set_expires(timer, time); - if (rq == this_rq()) { + if (rq == this_rq()) __hrtick_restart(rq); - } else if (!rq->hrtick_csd_pending) { + else smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); - rq->hrtick_csd_pending = 1; - } } #else @@ -322,8 +319,6 @@ void hrtick_start(struct rq *rq, u64 delay) static void hrtick_rq_init(struct rq *rq) { #ifdef CONFIG_SMP - rq->hrtick_csd_pending = 0; - rq->hrtick_csd.flags = 0; rq->hrtick_csd.func = __hrtick_start; rq->hrtick_csd.info = rq; @@ -552,27 +547,32 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { - int i, cpu = smp_processor_id(); + int i, cpu = smp_processor_id(), default_cpu = -1; struct sched_domain *sd; - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) - return cpu; + if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { + if (!idle_cpu(cpu)) + return cpu; + default_cpu = cpu; + } rcu_read_lock(); for_each_domain(cpu, sd) { - for_each_cpu(i, sched_domain_span(sd)) { + for_each_cpu_and(i, sched_domain_span(sd), + housekeeping_cpumask(HK_FLAG_TIMER)) { if (cpu == i) continue; - if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { + if (!idle_cpu(i)) { cpu = i; goto unlock; } } } - if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) - cpu = housekeeping_any_cpu(HK_FLAG_TIMER); + if (default_cpu == -1) + default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); + cpu = default_cpu; unlock: rcu_read_unlock(); return cpu; @@ -756,7 +756,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) if (task_has_idle_policy(p)) { load->weight = scale_load(WEIGHT_IDLEPRIO); load->inv_weight = WMULT_IDLEPRIO; - p->se.runnable_weight = load->weight; return; } @@ -769,7 +768,6 @@ static void set_load_weight(struct task_struct *p, bool update_load) } else { load->weight = scale_load(sched_prio_to_weight[prio]); load->inv_weight = sched_prio_to_wmult[prio]; - p->se.runnable_weight = load->weight; } } @@ -1442,17 +1440,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP -static inline bool is_per_cpu_kthread(struct task_struct *p) -{ - if (!(p->flags & PF_KTHREAD)) - return false; - - if (p->nr_cpus_allowed != 1) - return false; - - return true; -} - /* * Per-CPU kthreads are allowed to run on !active && online CPUs, see * __set_cpus_allowed_ptr() and select_fallback_rq(). @@ -1658,7 +1645,12 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, if (cpumask_equal(p->cpus_ptr, new_mask)) goto out; - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + /* + * Picking a ~random cpu helps in cases where we are changing affinity + * for groups of tasks (ie. cpuset), so that load balancing is not + * immediately required to distribute the tasks within their new mask. + */ + dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask); if (dest_cpu >= nr_cpu_ids) { ret = -EINVAL; goto out; @@ -3584,6 +3576,17 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } +DEFINE_PER_CPU(unsigned long, thermal_pressure); + +void arch_set_thermal_pressure(struct cpumask *cpus, + unsigned long th_pressure) +{ + int cpu; + + for_each_cpu(cpu, cpus) + WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); +} + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3594,12 +3597,16 @@ void scheduler_tick(void) struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; struct rq_flags rf; + unsigned long thermal_pressure; + arch_scale_freq_tick(); sched_clock_tick(); rq_lock(rq, &rf); update_rq_clock(rq); + thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); + update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); curr->sched_class->task_tick(rq, curr, 0); calc_global_load_tick(rq); psi_task_tick(rq); @@ -3669,28 +3676,31 @@ static void sched_tick_remote(struct work_struct *work) * statistics and checks timeslices in a time-independent way, regardless * of when exactly it is running. */ - if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) + if (!tick_nohz_tick_stopped_cpu(cpu)) goto out_requeue; rq_lock_irq(rq, &rf); curr = rq->curr; - if (is_idle_task(curr) || cpu_is_offline(cpu)) + if (cpu_is_offline(cpu)) goto out_unlock; update_rq_clock(rq); - delta = rq_clock_task(rq) - curr->se.exec_start; - /* - * Make sure the next tick runs within a reasonable - * amount of time. - */ - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + if (!is_idle_task(curr)) { + /* + * Make sure the next tick runs within a reasonable + * amount of time. + */ + delta = rq_clock_task(rq) - curr->se.exec_start; + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + } curr->sched_class->task_tick(rq, curr, 0); + calc_load_nohz_remote(rq); out_unlock: rq_unlock_irq(rq, &rf); - out_requeue: + /* * Run the remote tick once per second (1Hz). This arbitrary * frequency is large enough to avoid overload but short enough @@ -4076,6 +4086,8 @@ static void __sched notrace __schedule(bool preempt) */ ++*switch_count; + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + trace_sched_switch(preempt, prev, next); /* Also unlocks the rq: */ @@ -7063,8 +7075,15 @@ void sched_move_task(struct task_struct *tsk) if (queued) enqueue_task(rq, tsk, queue_flags); - if (running) + if (running) { set_next_task(rq, tsk); + /* + * After changing group, the running task may have joined a + * throttled one but it's still the running task. Trigger a + * resched to make sure that task can still run. + */ + resched_curr(rq); + } task_rq_unlock(rq, tsk, &rf); } @@ -7260,7 +7279,7 @@ capacity_from_percent(char *buf) &req.percent); if (req.ret) return req; - if (req.percent > UCLAMP_PERCENT_SCALE) { + if ((u64)req.percent > UCLAMP_PERCENT_SCALE) { req.ret = -ERANGE; return req; } |