diff options
Diffstat (limited to 'kernel/sched/cputime.c')
-rw-r--r-- | kernel/sched/cputime.c | 161 |
1 files changed, 69 insertions, 92 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index cff3e656566d..95fc77853743 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -2,7 +2,6 @@ /* * Simple CPU accounting cgroup controller */ -#include "sched.h" #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -44,12 +43,13 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, } /* - * Called before incrementing preempt_count on {soft,}irq_enter + * Called after incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. */ -void irqtime_account_irq(struct task_struct *curr) +void irqtime_account_irq(struct task_struct *curr, unsigned int offset) { struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); + unsigned int pc; s64 delta; int cpu; @@ -59,6 +59,7 @@ void irqtime_account_irq(struct task_struct *curr) cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; + pc = irq_count() - offset; /* * We do not account for softirq time from ksoftirqd here. @@ -66,12 +67,11 @@ void irqtime_account_irq(struct task_struct *curr) * in that case, so as not to confuse scheduler with a special task * that do not consume any time, but still wants to run. */ - if (hardirq_count()) + if (pc & HARDIRQ_MASK) irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd()) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } -EXPORT_SYMBOL_GPL(irqtime_account_irq); static u64 irqtime_tick_accounted(u64 maxtime) { @@ -147,10 +147,10 @@ void account_guest_time(struct task_struct *p, u64 cputime) /* Add guest time to cpustat. */ if (task_nice(p) > 0) { - cpustat[CPUTIME_NICE] += cputime; + task_group_account_field(p, CPUTIME_NICE, cputime); cpustat[CPUTIME_GUEST_NICE] += cputime; } else { - cpustat[CPUTIME_USER] += cputime; + task_group_account_field(p, CPUTIME_USER, cputime); cpustat[CPUTIME_GUEST] += cputime; } } @@ -226,6 +226,21 @@ void account_idle_time(u64 cputime) cpustat[CPUTIME_IDLE] += cputime; } + +#ifdef CONFIG_SCHED_CORE +/* + * Account for forceidle time due to core scheduling. + * + * REQUIRES: schedstat is enabled. + */ +void __account_forceidle_time(struct task_struct *p, u64 delta) +{ + __schedstat_add(p->stats.core_forceidle_sum, delta); + + task_group_account_field(p, CPUTIME_FORCEIDLE, delta); +} +#endif + /* * When a guest is interrupted for a longer amount of time, missed clock * ticks are not redelivered later. Due to that, this function may on @@ -418,24 +433,21 @@ void vtime_task_switch(struct task_struct *prev) } # endif -/* - * Archs that account the whole time spent in the idle task - * (outside irq) as idle time can rely on this and just implement - * vtime_account_kernel() and vtime_account_idle(). Archs that - * have other meaning of the idle time (s390 only includes the - * time spent by the CPU when it's in low power mode) must override - * vtime_account(). - */ -#ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { - if (!in_interrupt() && is_idle_task(tsk)) + unsigned int pc = irq_count() - offset; + + if (pc & HARDIRQ_OFFSET) { + vtime_account_hardirq(tsk); + } else if (pc & SOFTIRQ_OFFSET) { + vtime_account_softirq(tsk); + } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && + is_idle_task(tsk)) { vtime_account_idle(tsk); - else + } else { vtime_account_kernel(tsk); + } } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); -#endif /* __ARCH_HAS_VTIME_ACCOUNT */ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, u64 *ut, u64 *st) @@ -520,50 +532,6 @@ void account_idle_ticks(unsigned long ticks) } /* - * Perform (stime * rtime) / total, but avoid multiplication overflow by - * losing precision when the numbers are big. - */ -static u64 scale_stime(u64 stime, u64 rtime, u64 total) -{ - u64 scaled; - - for (;;) { - /* Make sure "rtime" is the bigger of stime/rtime */ - if (stime > rtime) - swap(rtime, stime); - - /* Make sure 'total' fits in 32 bits */ - if (total >> 32) - goto drop_precision; - - /* Does rtime (and thus stime) fit in 32 bits? */ - if (!(rtime >> 32)) - break; - - /* Can we just balance rtime/stime rather than dropping bits? */ - if (stime >> 31) - goto drop_precision; - - /* We can grow stime and shrink rtime and try to make them both fit */ - stime <<= 1; - rtime >>= 1; - continue; - -drop_precision: - /* We drop from rtime, it has more bits than stime */ - rtime >>= 1; - total >>= 1; - } - - /* - * Make sure gcc understands that this is a 32x32->64 multiply, - * followed by a 64/32->64 divide. - */ - scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); - return scaled; -} - -/* * Adjust tick based cputime random precision against scheduler runtime * accounting. * @@ -609,7 +577,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, /* * If either stime or utime are 0, assume all runtime is userspace. - * Once a task gets some ticks, the monotonicy code at 'update:' + * Once a task gets some ticks, the monotonicity code at 'update:' * will ensure things converge to the observed ratio. */ if (stime == 0) { @@ -622,7 +590,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, goto update; } - stime = scale_stime(stime, rtime, stime + utime); + stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); update: /* @@ -661,7 +629,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) .sum_exec_runtime = p->se.sum_exec_runtime, }; - task_cputime(p, &cputime.utime, &cputime.stime); + if (task_cputime(p, &cputime.utime, &cputime.stime)) + cputime.sum_exec_runtime = task_sched_runtime(p); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } EXPORT_SYMBOL_GPL(task_cputime_adjusted); @@ -874,19 +843,21 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) +bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { struct vtime *vtime = &t->vtime; unsigned int seq; u64 delta; + int ret; if (!vtime_accounting_enabled()) { *utime = t->utime; *stime = t->stime; - return; + return false; } do { + ret = false; seq = read_seqcount_begin(&vtime->seqcount); *utime = t->utime; @@ -896,6 +867,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) if (vtime->state < VTIME_SYS) continue; + ret = true; delta = vtime_delta(vtime); /* @@ -907,10 +879,14 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) else *utime += vtime->utime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); + + return ret; } -static int vtime_state_check(struct vtime *vtime, int cpu) +static int vtime_state_fetch(struct vtime *vtime, int cpu) { + int state = READ_ONCE(vtime->state); + /* * We raced against a context switch, fetch the * kcpustat task again. @@ -927,10 +903,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu) * * Case 1) is ok but 2) is not. So wait for a safe VTIME state. */ - if (vtime->state == VTIME_INACTIVE) + if (state == VTIME_INACTIVE) return -EAGAIN; - return 0; + return state; } static u64 kcpustat_user_vtime(struct vtime *vtime) @@ -949,14 +925,15 @@ static int kcpustat_field_vtime(u64 *cpustat, { struct vtime *vtime = &tsk->vtime; unsigned int seq; - int err; do { + int state; + seq = read_seqcount_begin(&vtime->seqcount); - err = vtime_state_check(vtime, cpu); - if (err < 0) - return err; + state = vtime_state_fetch(vtime, cpu); + if (state < 0) + return state; *val = cpustat[usage]; @@ -969,7 +946,7 @@ static int kcpustat_field_vtime(u64 *cpustat, */ switch (usage) { case CPUTIME_SYSTEM: - if (vtime->state == VTIME_SYS) + if (state == VTIME_SYS) *val += vtime->stime + vtime_delta(vtime); break; case CPUTIME_USER: @@ -981,11 +958,11 @@ static int kcpustat_field_vtime(u64 *cpustat, *val += kcpustat_user_vtime(vtime); break; case CPUTIME_GUEST: - if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0) + if (state == VTIME_GUEST && task_nice(tsk) <= 0) *val += vtime->gtime + vtime_delta(vtime); break; case CPUTIME_GUEST_NICE: - if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0) + if (state == VTIME_GUEST && task_nice(tsk) > 0) *val += vtime->gtime + vtime_delta(vtime); break; default: @@ -1000,12 +977,12 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat, enum cpu_usage_stat usage, int cpu) { u64 *cpustat = kcpustat->cpustat; + u64 val = cpustat[usage]; struct rq *rq; - u64 val; int err; if (!vtime_accounting_enabled_cpu(cpu)) - return cpustat[usage]; + return val; rq = cpu_rq(cpu); @@ -1036,23 +1013,23 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, { struct vtime *vtime = &tsk->vtime; unsigned int seq; - int err; do { u64 *cpustat; u64 delta; + int state; seq = read_seqcount_begin(&vtime->seqcount); - err = vtime_state_check(vtime, cpu); - if (err < 0) - return err; + state = vtime_state_fetch(vtime, cpu); + if (state < 0) + return state; *dst = *src; cpustat = dst->cpustat; /* Task is sleeping, dead or idle, nothing to add */ - if (vtime->state < VTIME_SYS) + if (state < VTIME_SYS) continue; delta = vtime_delta(vtime); @@ -1061,15 +1038,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, * Task runs either in user (including guest) or kernel space, * add pending nohz time to the right place. */ - if (vtime->state == VTIME_SYS) { + if (state == VTIME_SYS) { cpustat[CPUTIME_SYSTEM] += vtime->stime + delta; - } else if (vtime->state == VTIME_USER) { + } else if (state == VTIME_USER) { if (task_nice(tsk) > 0) cpustat[CPUTIME_NICE] += vtime->utime + delta; else cpustat[CPUTIME_USER] += vtime->utime + delta; } else { - WARN_ON_ONCE(vtime->state != VTIME_GUEST); + WARN_ON_ONCE(state != VTIME_GUEST); if (task_nice(tsk) > 0) { cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta; cpustat[CPUTIME_NICE] += vtime->gtime + delta; @@ -1080,7 +1057,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, } } while (read_seqcount_retry(&vtime->seqcount, seq)); - return err; + return 0; } void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) |