aboutsummaryrefslogtreecommitdiffstats
path: root/tools/power/cpupower/utils/idle_monitor
diff options
context:
space:
mode:
authorDominik Brodowski <linux@dominikbrodowski.net>2011-03-30 16:30:11 +0200
committerDominik Brodowski <linux@dominikbrodowski.net>2011-07-29 18:35:36 +0200
commit7fe2f6399a84760a9af8896ac152728250f82adb (patch)
treefa4bf236359b8d6d9f8d6ff823ddd3e839da5768 /tools/power/cpupower/utils/idle_monitor
parentLinux 3.0 (diff)
downloadlinux-dev-7fe2f6399a84760a9af8896ac152728250f82adb.tar.xz
linux-dev-7fe2f6399a84760a9af8896ac152728250f82adb.zip
cpupowerutils - cpufrequtils extended with quite some features
CPU power consumption vs performance tuning is no longer limited to CPU frequency switching anymore: deep sleep states, traditional dynamic frequency scaling and hidden turbo/boost frequencies are tied close together and depend on each other. The first two exist on different architectures like PPC, Itanium and ARM, the latter (so far) only on X86. On X86 the APU (CPU+GPU) will only run most efficiently if CPU and GPU has proper power management in place. Users and Developers want to have *one* tool to get an overview what their system supports and to monitor and debug CPU power management in detail. The tool should compile and work on as many architectures as possible. Once this tool stabilizes a bit, it is intended to replace the Intel-specific tools in tools/power/x86 Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Diffstat (limited to '')
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c340
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c185
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c446
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h68
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.def7
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.h18
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c258
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c212
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c189
9 files changed, 1723 insertions, 0 deletions
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
new file mode 100644
index 000000000000..3de94322dfb8
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -0,0 +1,340 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * PCI initialization based on example code from:
+ * Andreas Herrmann <andreas.herrmann3@amd.com>
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+
+#include <pci/pci.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "helpers/helpers.h"
+
+/******** PCI parts could go into own file and get shared ***************/
+
+#define PCI_NON_PC0_OFFSET 0xb0
+#define PCI_PC1_OFFSET 0xb4
+#define PCI_PC6_OFFSET 0xb8
+
+#define PCI_MONITOR_ENABLE_REG 0xe0
+
+#define PCI_NON_PC0_ENABLE_BIT 0
+#define PCI_PC1_ENABLE_BIT 1
+#define PCI_PC6_ENABLE_BIT 2
+
+#define PCI_NBP1_STAT_OFFSET 0x98
+#define PCI_NBP1_ACTIVE_BIT 2
+#define PCI_NBP1_ENTERED_BIT 1
+
+#define PCI_NBP1_CAP_OFFSET 0x90
+#define PCI_NBP1_CAPABLE_BIT 31
+
+#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ
+ (1 tick per 80ns) */
+
+enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1,
+ AMD_FAM14H_STATE_NUM};
+
+static int fam14h_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+
+static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = {
+ {
+ .name = "!PC0",
+ .desc = N_("Package in sleep state (PC1 or deeper)"),
+ .id = NON_PC0,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC1",
+ .desc = N_("Processor Package C1"),
+ .id = PC1,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "NBP1",
+ .desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"),
+ .id = NBP1,
+ .range = RANGE_PACKAGE,
+ .get_count = fam14h_nbp1_count,
+ },
+};
+
+static struct pci_access *pci_acc;
+static int pci_vendor_id = 0x1022;
+static int pci_dev_ids[2] = {0x1716, 0};
+static struct pci_dev *amd_fam14h_pci_dev;
+
+static int nbp1_entered;
+
+struct timespec start_time;
+static unsigned long long timediff;
+
+#ifdef DEBUG
+struct timespec dbg_time;
+long dbg_timediff;
+#endif
+
+static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM];
+static unsigned long long *current_count[AMD_FAM14H_STATE_NUM];
+
+static int amd_fam14h_get_pci_info(struct cstate *state,
+ unsigned int *pci_offset,
+ unsigned int *enable_bit,
+ unsigned int cpu)
+{
+ switch(state->id) {
+ case NON_PC0:
+ *enable_bit = PCI_NON_PC0_ENABLE_BIT;
+ *pci_offset = PCI_NON_PC0_OFFSET;
+ break;
+ case PC1:
+ *enable_bit = PCI_PC1_ENABLE_BIT;
+ *pci_offset = PCI_PC1_OFFSET;
+ break;
+ case PC6:
+ *enable_bit = PCI_PC6_ENABLE_BIT;
+ *pci_offset = PCI_PC6_OFFSET;
+ break;
+ case NBP1:
+ *enable_bit = PCI_NBP1_ENTERED_BIT;
+ *pci_offset = PCI_NBP1_STAT_OFFSET;
+ break;
+ default:
+ return -1;
+ };
+ return 0;
+}
+
+static int amd_fam14h_init(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ /* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */
+ if (state->id == NBP1) {
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ val |= 1 << enable_bit;
+ val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val);
+ return ret;
+ }
+
+ /* Enable monitor */
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ dprint("Init %s: read at offset: 0x%x val: %u\n", state->name,
+ PCI_MONITOR_ENABLE_REG, (unsigned int) val);
+ val |= 1 << enable_bit;
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n",
+ state->name, PCI_MONITOR_ENABLE_REG, enable_bit,
+ (unsigned int) val, cpu);
+
+ /* Set counter to zero */
+ pci_write_long(amd_fam14h_pci_dev, pci_offset, 0);
+ previous_count[state->id][cpu] = 0;
+
+ return 0;
+}
+
+static int amd_fam14h_disable(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val);
+ if (state->id == NBP1) {
+ /* was the bit whether NBP1 got entered set? */
+ nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) |
+ (val & (1 << PCI_NBP1_ENTERED_BIT));
+
+ dprint("NBP1 was %sentered - 0x%x - enable_bit: "
+ "%d - pci_offset: 0x%x\n",
+ nbp1_entered ? "" : "not ",
+ val, enable_bit, pci_offset);
+ return ret;
+ }
+ current_count[state->id][cpu] = val;
+
+ dprint("%s: Current - %llu (%u)\n", state->name,
+ current_count[state->id][cpu], cpu);
+ dprint("%s: Previous - %llu (%u)\n", state->name,
+ previous_count[state->id][cpu], cpu);
+
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ val &= ~(1 << enable_bit);
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ return 0;
+}
+
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ if (id == NBP1) {
+ if (nbp1_entered)
+ *count = 1;
+ else
+ *count = 0;
+ return 0;
+ }
+ return -1;
+}
+static int fam14h_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long diff;
+
+ if (id >= AMD_FAM14H_STATE_NUM)
+ return -1;
+ /* residency count in 80ns -> divide through 12.5 to get us residency */
+ diff = current_count[id][cpu] - previous_count[id][cpu];
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = 100.0 * diff / timediff / 12.5;
+
+ dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n",
+ timediff, diff * 10 / 125, *percent);
+
+ return 0;
+}
+
+static int amd_fam14h_start(void)
+{
+ int num, cpu;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ amd_fam14h_init(&amd_fam14h_cstates[num], cpu);
+ }
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(start_time, dbg_time);
+ dprint("Enabling counters took: %lu us\n",
+ dbg_timediff);
+#endif
+ return 0;
+}
+
+static int amd_fam14h_stop(void)
+{
+ int num, cpu;
+ struct timespec end_time;
+
+ clock_gettime(CLOCK_REALTIME, &end_time);
+
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ amd_fam14h_disable(&amd_fam14h_cstates[num], cpu);
+ }
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(end_time, dbg_time);
+ dprint("Disabling counters took: %lu ns\n", dbg_timediff);
+#endif
+ timediff = timespec_diff_us(start_time, end_time);
+ if (timediff / 1000 > OVERFLOW_MS)
+ print_overflow_err((unsigned int)timediff / 1000000,
+ OVERFLOW_MS / 1000);
+
+ return 0;
+}
+
+static int is_nbp1_capable(void)
+{
+ uint32_t val;
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET);
+ return val & (1 << 31);
+}
+
+struct cpuidle_monitor* amd_fam14h_register(void) {
+
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_AMD)
+ return NULL;
+
+ if (cpupower_cpu_info.family == 0x14) {
+ if (cpu_count <= 0 || cpu_count > 2) {
+ fprintf(stderr, "AMD fam14h: Invalid cpu count: %d\n",
+ cpu_count);
+ return NULL;
+ }
+ } else
+ return NULL;
+
+ /* We do not alloc for nbp1 machine wide counter */
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ previous_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ amd_fam14h_pci_dev = pci_acc_init(&pci_acc, pci_vendor_id, pci_dev_ids);
+ if (amd_fam14h_pci_dev == NULL || pci_acc == NULL)
+ return NULL;
+
+ if (!is_nbp1_capable())
+ amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1;
+
+ amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name);
+ return &amd_fam14h_monitor;
+}
+
+static void amd_fam14h_unregister(void)
+{
+ int num;
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ pci_cleanup(pci_acc);
+}
+
+struct cpuidle_monitor amd_fam14h_monitor = {
+ .name = "Ontario",
+ .hw_states = amd_fam14h_cstates,
+ .hw_states_num = AMD_FAM14H_STATE_NUM,
+ .start = amd_fam14h_start,
+ .stop = amd_fam14h_stop,
+ .do_register = amd_fam14h_register,
+ .unregister = amd_fam14h_unregister,
+ .needs_root = 1,
+ .overflow_s = OVERFLOW_MS / 1000,
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
new file mode 100644
index 000000000000..63f6d670517b
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -0,0 +1,185 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+
+#include "helpers/sysfs.h"
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define CPUIDLE_STATES_MAX 10
+static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX];
+struct cpuidle_monitor cpuidle_sysfs_monitor;
+
+static unsigned long long **previous_count;
+static unsigned long long **current_count;
+struct timespec start_time;
+static unsigned long long timediff;
+
+static int cpuidle_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long statediff = current_count[cpu][id]
+ - previous_count[cpu][id];
+ dprint("%s: - diff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, *percent, cpu);
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = ((100.0 * statediff) / timediff);
+
+ dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, statediff, *percent, cpu);
+
+ return 0;
+}
+
+static int cpuidle_start(void)
+{
+ int cpu, state;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ previous_count[cpu][state] =
+ sysfs_get_idlestate_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+
+ };
+ return 0;
+}
+
+static int cpuidle_stop(void)
+{
+ int cpu, state;
+ struct timespec end_time;
+ clock_gettime(CLOCK_REALTIME, &end_time);
+ timediff = timespec_diff_us(start_time, end_time);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ current_count[cpu][state] =
+ sysfs_get_idlestate_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+ };
+ return 0;
+}
+
+void fix_up_intel_idle_driver_name(char *tmp, int num)
+{
+ /* fix up cpuidle name for intel idle driver */
+ if (!strncmp(tmp, "NHM-", 4)) {
+ switch(num) {
+ case 1: strcpy(tmp, "C1");
+ break;
+ case 2: strcpy(tmp, "C3");
+ break;
+ case 3: strcpy(tmp, "C6");
+ break;
+ }
+ } else if (!strncmp(tmp, "SNB-", 4)) {
+ switch(num) {
+ case 1: strcpy(tmp, "C1");
+ break;
+ case 2: strcpy(tmp, "C3");
+ break;
+ case 3: strcpy(tmp, "C6");
+ break;
+ case 4: strcpy(tmp, "C7");
+ break;
+ }
+ } else if (!strncmp(tmp, "ATM-", 4)) {
+ switch(num) {
+ case 1: strcpy(tmp, "C1");
+ break;
+ case 2: strcpy(tmp, "C2");
+ break;
+ case 3: strcpy(tmp, "C4");
+ break;
+ case 4: strcpy(tmp, "C6");
+ break;
+ }
+ }
+}
+
+static struct cpuidle_monitor* cpuidle_register(void)
+{
+ int num;
+ char *tmp;
+
+ /* Assume idle state count is the same for all CPUs */
+ cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
+
+ if (cpuidle_sysfs_monitor.hw_states_num == 0)
+ return NULL;
+
+ for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num ++) {
+ tmp = sysfs_get_idlestate_name(0, num);
+ if (tmp == NULL)
+ continue;
+
+ fix_up_intel_idle_driver_name(tmp, num);
+ strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
+ free(tmp);
+
+ tmp = sysfs_get_idlestate_desc(0, num);
+ if (tmp == NULL)
+ continue;
+ strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
+ free(tmp);
+
+ cpuidle_cstates[num].range = RANGE_THREAD;
+ cpuidle_cstates[num].id = num;
+ cpuidle_cstates[num].get_count_percent = cpuidle_get_count_percent;
+ };
+
+ /* Free this at program termination */
+ previous_count = malloc(sizeof (long long*) * cpu_count);
+ current_count = malloc(sizeof (long long*) * cpu_count);
+ for (num = 0; num < cpu_count; num++) {
+ previous_count[num] = malloc (sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ current_count[num] = malloc (sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ }
+
+ cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name);
+ return &cpuidle_sysfs_monitor;
+}
+
+void cpuidle_unregister(void)
+{
+ int num;
+
+ for (num = 0; num < cpu_count; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(previous_count);
+ free(current_count);
+}
+
+struct cpuidle_monitor cpuidle_sysfs_monitor = {
+ .name = "Idle_Stats",
+ .hw_states = cpuidle_cstates,
+ .start = cpuidle_start,
+ .stop = cpuidle_stop,
+ .do_register = cpuidle_register,
+ .unregister = cpuidle_unregister,
+ .needs_root = 0,
+ .overflow_s = UINT_MAX,
+};
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
new file mode 100644
index 000000000000..3e96e79de3c2
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -0,0 +1,446 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool.
+ *
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <libgen.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "idle_monitor/idle_monitors.h"
+#include "helpers/helpers.h"
+
+/* Define pointers to all monitors. */
+#define DEF(x) & x ## _monitor ,
+struct cpuidle_monitor * all_monitors[] = {
+#include "idle_monitors.def"
+0
+};
+
+static struct cpuidle_monitor *monitors[MONITORS_MAX];
+static unsigned int avail_monitors;
+
+static char *progname;
+
+enum operation_mode_e { list = 1, show, show_all };
+static int mode;
+static int interval = 1;
+static char *show_monitors_param;
+static struct cpupower_topology cpu_top;
+
+/* ToDo: Document this in the manpage */
+static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
+
+long long timespec_diff_us(struct timespec start, struct timespec end)
+{
+ struct timespec temp;
+ if ((end.tv_nsec - start.tv_nsec) < 0) {
+ temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+ temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+ } else {
+ temp.tv_sec = end.tv_sec - start.tv_sec;
+ temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+ }
+ return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
+}
+
+void monitor_help(void)
+{
+ printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n"));
+ printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n"));
+ printf(_("cpupower monitor: -l\n"));
+ printf(_("\t command: pass an arbitrary command to measure specific workload\n"));
+ printf(_("\t -i: time intervall to measure for in seconds (default 1)\n"));
+ printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n"));
+ printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n"));
+ printf(_("\t -h: print this help\n"));
+ printf("\n");
+ printf(_("only one of: -l, -m are allowed\nIf none of them is passed,"));
+ printf(_(" all supported monitors are shown\n"));
+}
+
+void print_n_spaces(int n)
+{
+ int x;
+ for (x = 0; x < n; x++)
+ printf(" ");
+}
+
+/* size of s must be at least n + 1 */
+int fill_string_with_spaces(char *s, int n)
+{
+ int len = strlen(s);
+ if (len > n)
+ return -1;
+ for (; len < n; len++)
+ s[len] = ' ';
+ s[len] = '\0';
+ return 0;
+}
+
+void print_header(int topology_depth)
+{
+ int unsigned mon;
+ int state, need_len, pr_mon_len;
+ cstate_t s;
+ char buf[128] = "";
+ int percent_width = 4;
+
+ fill_string_with_spaces(buf, topology_depth * 5 - 1);
+ printf("%s|", buf);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ pr_mon_len = 0;
+ need_len = monitors[mon]->hw_states_num * (percent_width + 3)
+ - 1;
+ if (mon != 0) {
+ printf("|| ");
+ need_len --;
+ }
+ sprintf(buf, "%s", monitors[mon]->name);
+ fill_string_with_spaces(buf, need_len);
+ printf("%s", buf);
+ }
+ printf("\n");
+
+ if (topology_depth > 2)
+ printf("PKG |");
+ if (topology_depth > 1)
+ printf("CORE|");
+ if (topology_depth > 0)
+ printf("CPU |");
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("|| ");
+ else
+ printf(" ");
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf(" | ");
+ s = monitors[mon]->hw_states[state];
+ sprintf(buf, "%s", s.name);
+ fill_string_with_spaces(buf, percent_width);
+ printf("%s", buf);
+ }
+ printf(" ");
+ }
+ printf("\n");
+}
+
+
+void print_results(int topology_depth, int cpu)
+{
+ unsigned int mon;
+ int state, ret;
+ double percent;
+ unsigned long long result;
+ cstate_t s;
+
+ if (topology_depth > 2)
+ printf("%4d|", cpu_top.core_info[cpu].pkg);
+ if (topology_depth > 1)
+ printf("%4d|", cpu_top.core_info[cpu].core);
+ if (topology_depth > 0)
+ printf("%4d|", cpu_top.core_info[cpu].cpu);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("||");
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf("|");
+
+ s = monitors[mon]->hw_states[state];
+
+ if (s.get_count_percent) {
+ ret = s.get_count_percent(s.id, &percent,
+ cpu_top.core_info[cpu].cpu);
+ if (ret) {
+ printf("******");
+ } else if (percent >= 100.0)
+ printf("%6.1f", percent);
+ else
+ printf("%6.2f", percent);
+ }
+ else if (s.get_count) {
+ ret = s.get_count(s.id, &result,
+ cpu_top.core_info[cpu].cpu);
+ if (ret) {
+ printf("******");
+ } else
+ printf("%6llu", result);
+ }
+ else {
+ printf(_("Monitor %s, Counter %s has no count "
+ "function. Implementation error\n"),
+ monitors[mon]->name, s.name);
+ exit (EXIT_FAILURE);
+ }
+ }
+ }
+ /* cpu offline */
+ if (cpu_top.core_info[cpu].pkg == -1 ||
+ cpu_top.core_info[cpu].core == -1) {
+ printf(_(" *is offline\n"));
+ return;
+ } else
+ printf("\n");
+}
+
+
+/* param: string passed by -m param (The list of monitors to show)
+ *
+ * Monitors must have been registered already, matching monitors
+ * are picked out and available monitors array is overridden
+ * with matching ones
+ *
+ * Monitors get sorted in the same order the user passes them
+*/
+
+static void parse_monitor_param(char* param)
+{
+ unsigned int num;
+ int mon, hits = 0;
+ char *tmp = param, *token;
+ struct cpuidle_monitor *tmp_mons[MONITORS_MAX];
+
+
+ for (mon = 0; mon < MONITORS_MAX;mon++, tmp = NULL) {
+ token = strtok(tmp, ",");
+ if (token == NULL)
+ break;
+ if (strlen(token) >= MONITOR_NAME_LEN) {
+ printf(_("%s: max monitor name length"
+ " (%d) exceeded\n"), token, MONITOR_NAME_LEN);
+ continue;
+ }
+
+ for (num = 0; num < avail_monitors; num++) {
+ if (!strcmp(monitors[num]->name, token)) {
+ dprint("Found requested monitor: %s\n", token);
+ tmp_mons[hits] = monitors[num];
+ hits++;
+ }
+ }
+ }
+ if (hits == 0) {
+ printf(_("No matching monitor found in %s, "
+ "try -l option\n"), param);
+ monitor_help();
+ exit(EXIT_FAILURE);
+ }
+ /* Override detected/registerd monitors array with requested one */
+ memcpy(monitors, tmp_mons, sizeof(struct cpuidle_monitor*) * MONITORS_MAX);
+ avail_monitors = hits;
+}
+
+void list_monitors(void) {
+ unsigned int mon;
+ int state;
+ cstate_t s;
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ printf(_("Monitor \"%s\" (%d states) - Might overflow after %u "
+ "s\n"), monitors[mon]->name, monitors[mon]->hw_states_num,
+ monitors[mon]->overflow_s);
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ s = monitors[mon]->hw_states[state];
+ /*
+ * ToDo show more state capabilities:
+ * percent, time (granlarity)
+ */
+ printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range],
+ gettext(s.desc));
+ }
+ }
+}
+
+int fork_it(char **argv)
+{
+ int status;
+ unsigned int num;
+ unsigned long long timediff;
+ pid_t child_pid;
+ struct timespec start, end;
+
+ child_pid = fork();
+ clock_gettime(CLOCK_REALTIME, &start);
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->start();
+
+ if (!child_pid) {
+ /* child */
+ execvp(argv[0], argv);
+ } else {
+ /* parent */
+ if (child_pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGQUIT, SIG_IGN);
+ if (waitpid(child_pid, &status, 0) == -1) {
+ perror("wait");
+ exit(1);
+ }
+ }
+ clock_gettime(CLOCK_REALTIME, &end);
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->stop();
+
+ timediff = timespec_diff_us(start, end);
+ if (WIFEXITED(status))
+ printf(_("%s took %.5f seconds and exited with status %d\n"),
+ argv[0], timediff / (1000.0 * 1000), WEXITSTATUS(status));
+ return 0;
+}
+
+int do_interval_measure(int i)
+{
+ unsigned int num;
+
+ for (num = 0; num < avail_monitors; num++) {
+ dprint("HW C-state residency monitor: %s - States: %d\n",
+ monitors[num]->name, monitors[num]->hw_states_num);
+ monitors[num]->start();
+ }
+ sleep(i);
+ for (num = 0; num < avail_monitors; num++) {
+ monitors[num]->stop();
+ }
+ return 0;
+}
+
+static void cmdline(int argc, char *argv[])
+{
+ int opt;
+ progname = basename(argv[0]);
+
+ while ((opt = getopt(argc, argv, "+hli:m:")) != -1) {
+ switch (opt) {
+ case 'h':
+ monitor_help();
+ exit(EXIT_SUCCESS);
+ case 'l':
+ if (mode) {
+ monitor_help();
+ exit(EXIT_FAILURE);
+ }
+ mode = list;
+ break;
+ case 'i':
+ /* only allow -i with -m or no option */
+ if (mode && mode != show) {
+ monitor_help();
+ exit(EXIT_FAILURE);
+ }
+ interval = atoi(optarg);
+ break;
+ case 'm':
+ if (mode) {
+ monitor_help();
+ exit(EXIT_FAILURE);
+ }
+ mode = show;
+ show_monitors_param = optarg;
+ break;
+ default:
+ monitor_help();
+ exit(EXIT_FAILURE);
+ }
+ }
+ if (!mode)
+ mode = show_all;
+}
+
+int cmd_monitor(int argc, char **argv)
+{
+ unsigned int num;
+ struct cpuidle_monitor *test_mon;
+ int cpu;
+
+ cmdline(argc, argv);
+ cpu_count = get_cpu_topology(&cpu_top);
+ if (cpu_count < 0) {
+ printf(_("Cannot read number of available processors\n"));
+ return EXIT_FAILURE;
+ }
+
+ dprint("System has up to %d CPU cores\n", cpu_count);
+
+ for (num = 0; all_monitors[num]; num++) {
+ dprint("Try to register: %s\n", all_monitors[num]->name);
+ test_mon = all_monitors[num]->do_register();
+ if (test_mon) {
+ if (test_mon->needs_root && !run_as_root) {
+ fprintf(stderr, _("Available monitor %s needs "
+ "root access\n"), test_mon->name);
+ continue;
+ }
+ monitors[avail_monitors] = test_mon;
+ dprint("%s registered\n", all_monitors[num]->name);
+ avail_monitors++;
+ }
+ }
+
+ if (avail_monitors == 0) {
+ printf(_("No HW Cstate monitors found\n"));
+ return 1;
+ }
+
+ if (mode == list) {
+ list_monitors();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (mode == show)
+ parse_monitor_param(show_monitors_param);
+
+ dprint("Packages: %d - Cores: %d - CPUs: %d\n",
+ cpu_top.pkgs, cpu_top.cores, cpu_count);
+
+ /*
+ * if any params left, it must be a command to fork
+ */
+ if (argc - optind)
+ fork_it(argv + optind);
+ else
+ do_interval_measure(interval);
+
+ /* ToDo: Topology parsing needs fixing first to do
+ this more generically */
+ if (cpu_top.pkgs > 1)
+ print_header(3);
+ else
+ print_header(1);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ if (cpu_top.pkgs > 1)
+ print_results(3, cpu);
+ else
+ print_results(1, cpu);
+ }
+
+ for (num = 0; num < avail_monitors; num++) {
+ monitors[num]->unregister();
+ }
+ cpu_topology_release(cpu_top);
+ return 0;
+}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
new file mode 100644
index 000000000000..9312ee1f2dbc
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -0,0 +1,68 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#ifndef __CPUIDLE_INFO_HW__
+#define __CPUIDLE_INFO_HW__
+
+#include <stdarg.h>
+#include <time.h>
+
+#include "idle_monitor/idle_monitors.h"
+
+#define MONITORS_MAX 20
+#define MONITOR_NAME_LEN 20
+#define CSTATE_NAME_LEN 5
+#define CSTATE_DESC_LEN 60
+
+int cpu_count;
+
+/* Hard to define the right names ...: */
+enum power_range_e {
+ RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread
+ kernel sysfs: cpu */
+ RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */
+ RANGE_PACKAGE, /* Package, processor socket */
+ RANGE_MACHINE, /* Machine, platform wide */
+ RANGE_MAX };
+
+typedef struct cstate {
+ int id;
+ enum power_range_e range;
+ char name[CSTATE_NAME_LEN];
+ char desc[CSTATE_DESC_LEN];
+
+ /* either provide a percentage or a general count */
+ int (*get_count_percent)(unsigned int self_id, double *percent,
+ unsigned int cpu);
+ int (*get_count)(unsigned int self_id, unsigned long long *count,
+ unsigned int cpu);
+} cstate_t;
+
+struct cpuidle_monitor {
+ /* Name must not contain whitespaces */
+ char name[MONITOR_NAME_LEN];
+ int name_len;
+ int hw_states_num;
+ cstate_t *hw_states;
+ int (*start) (void);
+ int (*stop) (void);
+ struct cpuidle_monitor* (*do_register) (void);
+ void (*unregister)(void);
+ unsigned int overflow_s;
+ int needs_root;
+};
+
+extern long long timespec_diff_us(struct timespec start, struct timespec end);
+
+#define print_overflow_err(mes, ov) \
+{ \
+ fprintf(stderr, gettext("Measure took %u seconds, but registers could " \
+ "overflow at %u seconds, results " \
+ "could be inaccurate\n"), mes, ov); \
+}
+
+#endif /* __CPUIDLE_INFO_HW__ */
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
new file mode 100644
index 000000000000..e3f8d9b2b18f
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
@@ -0,0 +1,7 @@
+#if defined(__i386__) || defined(__x86_64__)
+DEF(amd_fam14h)
+DEF(intel_nhm)
+DEF(intel_snb)
+DEF(mperf)
+#endif
+DEF(cpuidle_sysfs)
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.h b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
new file mode 100644
index 000000000000..4fcdeb1e07e8
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
@@ -0,0 +1,18 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on the idea from Michael Matz <matz@suse.de>
+ *
+ */
+
+#ifndef _CPUIDLE_IDLE_MONITORS_H_
+#define _CPUIDLE_IDLE_MONITORS_H_
+
+#define DEF(x) extern struct cpuidle_monitor x ##_monitor;
+#include "idle_monitors.def"
+#undef DEF
+extern struct cpuidle_monitor *all_monitors[];
+
+#endif /* _CPUIDLE_IDLE_MONITORS_H_ */
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
new file mode 100644
index 000000000000..f8545e40e232
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -0,0 +1,258 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include <cpufreq.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_APERF 0xE8
+#define MSR_MPERF 0xE7
+
+#define MSR_TSC 0x10
+
+enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
+
+static int mperf_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+
+static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
+ {
+ .name = "C0",
+ .desc = N_("Processor Core not idle"),
+ .id = C0,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+ {
+ .name = "Cx",
+ .desc = N_("Processor Core in an idle state"),
+ .id = Cx,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+
+ {
+ .name = "Freq",
+ .desc = N_("Average Frequency (including boost) in MHz"),
+ .id = AVG_FREQ,
+ .range = RANGE_THREAD,
+ .get_count = mperf_get_count_freq,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long max_frequency;
+static unsigned long long *mperf_previous_count;
+static unsigned long long *aperf_previous_count;
+static unsigned long long *mperf_current_count;
+static unsigned long long *aperf_current_count;
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int mperf_get_tsc(unsigned long long *tsc)
+{
+ return read_msr(0, MSR_TSC, tsc);
+}
+
+static int mperf_init_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_previous_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_previous_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+static int mperf_measure_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_current_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_current_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+/*
+ * get_average_perf()
+ *
+ * Returns the average performance (also considers boosted frequencies)
+ *
+ * Input:
+ * aperf_diff: Difference of the aperf register over a time period
+ * mperf_diff: Difference of the mperf register over the same time period
+ * max_freq: Maximum frequency (P0)
+ *
+ * Returns:
+ * Average performance over the time period
+ */
+static unsigned long get_average_perf(unsigned long long aperf_diff,
+ unsigned long long mperf_diff)
+{
+ unsigned int perf_percent = 0;
+ if (((unsigned long)(-1) / 100) < aperf_diff) {
+ int shift_count = 7;
+ aperf_diff >>= shift_count;
+ mperf_diff >>= shift_count;
+ }
+ perf_percent = (aperf_diff * 100) / mperf_diff;
+ return (max_frequency * perf_percent) / 100;
+}
+
+static int mperf_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff, tsc_diff;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ if (id != C0 && id != Cx)
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+ tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+
+ *percent = 100.0 * mperf_diff / tsc_diff;
+ dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, tsc_diff);
+
+ if (id == Cx)
+ *percent = 100.0 - *percent;
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n", mperf_cstates[id].name,
+ mperf_diff, aperf_diff, cpu);
+ dprint("%s: %f\n", mperf_cstates[id].name, *percent);
+ return 0;
+}
+
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff;
+
+ if (id != AVG_FREQ)
+ return 1;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ /* Return MHz for now, might want to return KHz if column width is more
+ generic */
+ *count = get_average_perf(aperf_diff, mperf_diff) / 1000;
+ dprint("%s: %llu\n", mperf_cstates[id].name, *count);
+
+ return 0;
+}
+
+static int mperf_start(void)
+{
+ int cpu;
+ unsigned long long dbg;
+
+ mperf_get_tsc(&tsc_at_measure_start);
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_init_stats(cpu);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int mperf_stop(void)
+{
+ unsigned long long dbg;
+ int cpu;
+
+ mperf_get_tsc(&tsc_at_measure_end);
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_measure_stats(cpu);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+struct cpuidle_monitor mperf_monitor;
+
+struct cpuidle_monitor* mperf_register(void) {
+
+ unsigned long min;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ /* Assume min/max all the same on all cores */
+ if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
+ dprint("Cannot retrieve max freq from cpufreq kernel "
+ "subsystem\n");
+ return NULL;
+ }
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof (int));
+ mperf_previous_count = calloc (cpu_count,
+ sizeof(unsigned long long));
+ aperf_previous_count = calloc (cpu_count,
+ sizeof(unsigned long long));
+ mperf_current_count = calloc (cpu_count,
+ sizeof(unsigned long long));
+ aperf_current_count = calloc (cpu_count,
+ sizeof(unsigned long long));
+
+ mperf_monitor.name_len = strlen(mperf_monitor.name);
+ return &mperf_monitor;
+}
+
+void mperf_unregister(void) {
+ free(mperf_previous_count);
+ free(aperf_previous_count);
+ free(mperf_current_count);
+ free(aperf_current_count);
+ free(is_valid);
+}
+
+struct cpuidle_monitor mperf_monitor = {
+ .name = "Mperf",
+ .hw_states_num = MPERF_CSTATE_COUNT,
+ .hw_states = mperf_cstates,
+ .start = mperf_start,
+ .stop = mperf_stop,
+ .do_register = mperf_register,
+ .unregister = mperf_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
new file mode 100644
index 000000000000..6424b6dd3fa5
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -0,0 +1,212 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C3_RESIDENCY 0x3F8
+#define MSR_PKG_C6_RESIDENCY 0x3F9
+#define MSR_CORE_C3_RESIDENCY 0x3FC
+#define MSR_CORE_C6_RESIDENCY 0x3FD
+
+#define MSR_TSC 0x10
+
+#define NHM_CSTATE_COUNT 4
+
+enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF };
+
+static int nhm_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = {
+ {
+ .name = "C3",
+ .desc = N_("Processor Core C3"),
+ .id = C3,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "C6",
+ .desc = N_("Processor Core C6"),
+ .id = C6,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+
+ {
+ .name = "PC3",
+ .desc = N_("Processor Package C3"),
+ .id = PC3,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[NHM_CSTATE_COUNT];
+static unsigned long long *current_count[NHM_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val, unsigned int cpu)
+{
+ int msr;
+
+ switch(id) {
+ case C3:
+ msr = MSR_CORE_C3_RESIDENCY;
+ break;
+ case C6:
+ msr = MSR_CORE_C6_RESIDENCY;
+ break;
+ case PC3:
+ msr = MSR_PKG_C3_RESIDENCY;
+ break;
+ case PC6:
+ msr = MSR_PKG_C6_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+
+ return 0;
+}
+
+static int nhm_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 * (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n", nhm_cstates[id].name,
+ previous_count[id][cpu], current_count[id][cpu],
+ cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ nhm_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int nhm_start(void)
+{
+ int num, cpu;
+ unsigned long long dbg, val;
+
+ nhm_get_count(TSC, &tsc_at_measure_start, 0);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, 0);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int nhm_stop(void)
+{
+ unsigned long long val;
+ unsigned long long dbg;
+ int num, cpu;
+
+ nhm_get_count(TSC, &tsc_at_measure_end, 0);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, 0);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+struct cpuidle_monitor intel_nhm_monitor;
+
+struct cpuidle_monitor* intel_nhm_register(void) {
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof (int));
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name);
+ return &intel_nhm_monitor;
+}
+
+void intel_nhm_unregister(void) {
+ int num;
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(is_valid);
+}
+
+struct cpuidle_monitor intel_nhm_monitor = {
+ .name = "Nehalem",
+ .hw_states_num = NHM_CSTATE_COUNT,
+ .hw_states = nhm_cstates,
+ .start = nhm_start,
+ .stop = nhm_stop,
+ .do_register = intel_nhm_register,
+ .unregister = intel_nhm_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
new file mode 100644
index 000000000000..8cc80a5b530c
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -0,0 +1,189 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C2_RESIDENCY 0x60D
+#define MSR_PKG_C7_RESIDENCY 0x3FA
+#define MSR_CORE_C7_RESIDENCY 0x3FE
+
+#define MSR_TSC 0x10
+
+enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF };
+
+static int snb_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t snb_cstates[SNB_CSTATE_COUNT] = {
+ {
+ .name = "C7",
+ .desc = N_("Processor Core C7"),
+ .id = C7,
+ .range = RANGE_CORE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC2",
+ .desc = N_("Processor Package C2"),
+ .id = PC2,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC7",
+ .desc = N_("Processor Package C7"),
+ .id = PC7,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[SNB_CSTATE_COUNT];
+static unsigned long long *current_count[SNB_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int snb_get_count(enum intel_snb_id id, unsigned long long *val, unsigned int cpu)
+{
+ int msr;
+
+ switch(id) {
+ case C7:
+ msr = MSR_CORE_C7_RESIDENCY;
+ break;
+ case PC2:
+ msr = MSR_PKG_C2_RESIDENCY;
+ break;
+ case PC7:
+ msr = MSR_PKG_C7_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+ return 0;
+}
+
+static int snb_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 * (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n", snb_cstates[id].name,
+ previous_count[id][cpu], current_count[id][cpu],
+ cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ snb_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu]
+ - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int snb_start(void)
+{
+ int num, cpu;
+ unsigned long long val;
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ snb_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ snb_get_count(TSC, &tsc_at_measure_start, 0);
+ return 0;
+}
+
+static int snb_stop(void)
+{
+ unsigned long long val;
+ int num, cpu;
+
+ snb_get_count(TSC, &tsc_at_measure_end, 0);
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !snb_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ return 0;
+}
+
+struct cpuidle_monitor intel_snb_monitor;
+
+static struct cpuidle_monitor* snb_register(void) {
+
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
+ || cpupower_cpu_info.family != 6)
+ return NULL;
+
+ if (cpupower_cpu_info.model != 0x2A
+ && cpupower_cpu_info.model != 0x2D)
+ return NULL;
+
+ is_valid = calloc(cpu_count, sizeof (int));
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc (cpu_count,
+ sizeof(unsigned long long));
+ }
+ intel_snb_monitor.name_len = strlen(intel_snb_monitor.name);
+ return &intel_snb_monitor;
+}
+
+void snb_unregister(void)
+{
+ int num;
+ free(is_valid);
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+}
+
+struct cpuidle_monitor intel_snb_monitor = {
+ .name = "SandyBridge",
+ .hw_states = snb_cstates,
+ .hw_states_num = SNB_CSTATE_COUNT,
+ .start = snb_start,
+ .stop = snb_stop,
+ .do_register = snb_register,
+ .unregister = snb_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* defined(__i386__) || defined(__x86_64__) */