From 5c87579e65ee4f419b2369407f82326d38b5d2d8 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 30 Sep 2006 23:27:17 -0700 Subject: [PATCH] maximum latency tracking infrastructure Add infrastructure to track "maximum allowable latency" for power saving policies. The reason for adding this infrastructure is that power management in the idle loop needs to make a tradeoff between latency and power savings (deeper power save modes have a longer latency to running code again). The code that today makes this tradeoff just does a rather simple algorithm; however this is not good enough: There are devices and use cases where a lower latency is required than that the higher power saving states provide. An example would be audio playback, but another example is the ipw2100 wireless driver that right now has a very direct and ugly acpi hook to disable some higher power states randomly when it gets certain types of error. The proposed solution is to have an interface where drivers can * announce the maximum latency (in microseconds) that they can deal with * modify this latency * give up their constraint and a function where the code that decides on power saving strategy can query the current global desired maximum. This patch has a user of each side: on the consumer side, ACPI is patched to use this, on the producer side the ipw2100 driver is patched. A generic maximum latency is also registered of 2 timer ticks (more and you lose accurate time tracking after all). While the existing users of the patch are x86 specific, the infrastructure is not. I'd like to ask the arch maintainers of other architectures if the infrastructure is generic enough for their use (assuming the architecture has such a tradeoff as concept at all), and the sound/multimedia driver owners to look at the driver facing API to see if this is something they can use. [akpm@osdl.org: cleanups] Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Acked-by: Jesse Barnes Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/Makefile') diff --git a/kernel/Makefile b/kernel/Makefile index d62ec66c1af2..e210e8cf7237 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o + hrtimer.o rwsem.o latency.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ -- cgit v1.3-8-gc7d7 From f3cef7a99469afc159fec3a61b42dc7ca5b6824f Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:55 -0700 Subject: [PATCH] csa: basic accounting over taskstats Add some basic accounting fields to the taskstats struct, add a new kernel/tsacct.c to handle basic accounting data handling upon exit. A handle is added to taskstats.c to invoke the basic accounting data handling. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Cc: "Michal Piotrowski" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 29 ++++++++++++++---- include/linux/tsacct_kern.h | 19 ++++++++++++ kernel/Makefile | 2 +- kernel/taskstats.c | 4 +++ kernel/tsacct.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 include/linux/tsacct_kern.h create mode 100644 kernel/tsacct.c (limited to 'kernel/Makefile') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index f1cb6cddd19d..af93a63a5092 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -2,6 +2,7 @@ * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License @@ -29,16 +30,18 @@ * c) add new fields after version comment; maintain 64-bit alignment */ -#define TASKSTATS_VERSION 1 + +#define TASKSTATS_VERSION 2 +#define TS_COMM_LEN 16 /* should sync up with TASK_COMM_LEN + * in linux/sched.h */ struct taskstats { /* Version 1 */ __u16 version; - __u16 padding[3]; /* Userspace should not interpret the padding - * field which can be replaced by useful - * fields if struct taskstats is extended. - */ + __u32 ac_exitcode; /* Exit status */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ /* Delay accounting fields start * @@ -88,6 +91,22 @@ struct taskstats { __u64 cpu_run_virtual_total; /* Delay accounting fields end */ /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched; /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid; /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime; /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault */ + __u64 ac_majflt; /* Major Page Fault */ + /* Basic Accounting Fields end */ }; diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h new file mode 100644 index 000000000000..7e8196a02118 --- /dev/null +++ b/include/linux/tsacct_kern.h @@ -0,0 +1,19 @@ +/* + * tsacct_kern.h - kernel header for system accounting over taskstats interface + * + * Copyright (C) Jay Lan SGI + */ + +#ifndef _LINUX_TSACCT_KERN_H +#define _LINUX_TSACCT_KERN_H + +#include + +#ifdef CONFIG_TASKSTATS +extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk); +#else +static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{} +#endif /* CONFIG_TASKSTATS */ + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index e210e8cf7237..aacaafb28b9d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o -obj-$(CONFIG_TASKSTATS) += taskstats.o +obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c451af2ddb50..6c38dce88e8c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -198,7 +199,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, */ delayacct_add_tsk(stats, tsk); + + /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; + bacct_add_tsk(stats, tsk); /* Define err: label here if needed */ put_task_struct(tsk); diff --git a/kernel/tsacct.c b/kernel/tsacct.c new file mode 100644 index 000000000000..899067950a88 --- /dev/null +++ b/kernel/tsacct.c @@ -0,0 +1,72 @@ +/* + * tsacct.c - System accounting over taskstats interface + * + * Copyright (C) Jay Lan, + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + + +#define USEC_PER_TICK (USEC_PER_SEC/HZ) +/* + * fill in basic accounting fields + */ +void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{ + struct timespec uptime, ts; + s64 ac_etime; + + BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); + + /* calculate task elapsed time in timespec */ + do_posix_clock_monotonic_gettime(&uptime); + ts = timespec_sub(uptime, current->group_leader->start_time); + /* rebase elapsed time to usec */ + ac_etime = timespec_to_ns(&ts); + do_div(ac_etime, NSEC_PER_USEC); + stats->ac_etime = ac_etime; + stats->ac_btime = xtime.tv_sec - ts.tv_sec; + if (thread_group_leader(tsk)) { + stats->ac_exitcode = tsk->exit_code; + if (tsk->flags & PF_FORKNOEXEC) + stats->ac_flag |= AFORK; + } + if (tsk->flags & PF_SUPERPRIV) + stats->ac_flag |= ASU; + if (tsk->flags & PF_DUMPCORE) + stats->ac_flag |= ACORE; + if (tsk->flags & PF_SIGNALED) + stats->ac_flag |= AXSIG; + stats->ac_nice = task_nice(tsk); + stats->ac_sched = tsk->policy; + stats->ac_uid = tsk->uid; + stats->ac_gid = tsk->gid; + stats->ac_pid = tsk->pid; + stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0; + stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; + stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; + stats->ac_minflt = tsk->min_flt; + stats->ac_majflt = tsk->maj_flt; + /* Each process gets a minimum of one usec cpu time */ + if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) { + stats->ac_stime = 1; + } + + strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); +} + -- cgit v1.3-8-gc7d7