From 5a1c95580f1d89c8a736bb02ecd82a8858388b8a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 21 Nov 2019 03:44:25 +0100 Subject: sched/cputime: Support other fields on kcpustat_field() Provide support for user, nice, guest and guest_nice fields through kcpustat_field(). Whether we account the delta to a nice or not nice field is decided on top of the nice value snapshot taken at the time we call kcpustat_field(). If the nice value of the task has been changed since the last vtime update, we may have inacurrate distribution of the nice VS unnice cputime. However this is considered as a minor issue compared to the proper fix that would involve interrupting the target on nice updates, which is undesired on nohz_full CPUs. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191121024430.19938-2-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/cputime.c | 54 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e0cd20693ef5..27b5406222fc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -912,11 +912,21 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) } while (read_seqcount_retry(&vtime->seqcount, seq)); } +static u64 kcpustat_user_vtime(struct vtime *vtime) +{ + if (vtime->state == VTIME_USER) + return vtime->utime + vtime_delta(vtime); + else if (vtime->state == VTIME_GUEST) + return vtime->gtime + vtime_delta(vtime); + return 0; +} + static int kcpustat_field_vtime(u64 *cpustat, - struct vtime *vtime, + struct task_struct *tsk, enum cpu_usage_stat usage, int cpu, u64 *val) { + struct vtime *vtime = &tsk->vtime; unsigned int seq; int err; @@ -946,9 +956,37 @@ static int kcpustat_field_vtime(u64 *cpustat, *val = cpustat[usage]; - if (vtime->state == VTIME_SYS) - *val += vtime->stime + vtime_delta(vtime); - + /* + * Nice VS unnice cputime accounting may be inaccurate if + * the nice value has changed since the last vtime update. + * But proper fix would involve interrupting target on nice + * updates which is a no go on nohz_full (although the scheduler + * may still interrupt the target if rescheduling is needed...) + */ + switch (usage) { + case CPUTIME_SYSTEM: + if (vtime->state == VTIME_SYS) + *val += vtime->stime + vtime_delta(vtime); + break; + case CPUTIME_USER: + if (task_nice(tsk) <= 0) + *val += kcpustat_user_vtime(vtime); + break; + case CPUTIME_NICE: + if (task_nice(tsk) > 0) + *val += kcpustat_user_vtime(vtime); + break; + case CPUTIME_GUEST: + if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0) + *val += vtime->gtime + vtime_delta(vtime); + break; + case CPUTIME_GUEST_NICE: + if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0) + *val += vtime->gtime + vtime_delta(vtime); + break; + default: + break; + } } while (read_seqcount_retry(&vtime->seqcount, seq)); return 0; @@ -965,15 +1003,10 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat, if (!vtime_accounting_enabled_cpu(cpu)) return cpustat[usage]; - /* Only support sys vtime for now */ - if (usage != CPUTIME_SYSTEM) - return cpustat[usage]; - rq = cpu_rq(cpu); for (;;) { struct task_struct *curr; - struct vtime *vtime; rcu_read_lock(); curr = rcu_dereference(rq->curr); @@ -982,8 +1015,7 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat, return cpustat[usage]; } - vtime = &curr->vtime; - err = kcpustat_field_vtime(cpustat, vtime, usage, cpu, &val); + err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val); rcu_read_unlock(); if (!err) -- cgit v1.2.3-59-g8ed1b