aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig22
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh_sysfs.c1
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c1
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S1
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c49
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c10
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c12
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c205
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c326
-rw-r--r--arch/powerpc/platforms/pseries/ras.c102
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c2
15 files changed, 657 insertions, 81 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index c667f0f02c34..5b3da4b4ea79 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -10,7 +10,7 @@ config PPC_PSERIES
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
select PPC_NATIVE
- select PPC_PCI_CHOICE if EMBEDDED
+ select PPC_PCI_CHOICE if EXPERT
default y
config PPC_SPLPAR
@@ -24,15 +24,25 @@ config PPC_SPLPAR
two or more partitions.
config EEH
- bool "PCI Extended Error Handling (EEH)" if EMBEDDED
+ bool "PCI Extended Error Handling (EEH)" if EXPERT
depends on PPC_PSERIES && PCI
- default y if !EMBEDDED
+ default y if !EXPERT
config PSERIES_MSI
bool
depends on PCI_MSI && EEH
default y
+config PSERIES_ENERGY
+ tristate "pSeries energy management capabilities driver"
+ depends on PPC_PSERIES
+ default y
+ help
+ Provides interface to platform energy management capabilities
+ on supported PSERIES platforms.
+ Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+ and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
config SCANLOG
tristate "Scanlog dump interface"
depends on RTAS_PROC && PPC_PSERIES
@@ -47,6 +57,12 @@ config LPARCFG
config PPC_PSERIES_DEBUG
depends on PPC_PSERIES && PPC_EARLY_DEBUG
bool "Enable extra debug logging in platforms/pseries"
+ help
+ Say Y here if you want the pseries core to produce a bunch of
+ debug messages to the system log. Select this if you are having a
+ problem with the pseries core and want to see more of what is
+ going on. This does not enable debugging in lpar.c, which must
+ be manually done due to its verbosity.
default y
config PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 59eb8bdaa79d..fc5237810ece 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PSERIES_MSI) += msi.o
+obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 34b7dc12e731..17a11c82e6f8 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -21,8 +21,6 @@
* Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
-#undef DEBUG
-
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/list.h>
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index 15e13b568904..23982c7892d2 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -25,7 +25,6 @@
#include <linux/pci.h>
#include <asm/ppc-pci.h>
#include <asm/pci-bridge.h>
-#include <linux/kobject.h>
/**
* EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 0a14d8cd314f..0b0eff0cce35 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -55,6 +55,7 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = {
{FW_FEATURE_XDABR, "hcall-xdabr"},
{FW_FEATURE_MULTITCE, "hcall-multi-tce"},
{FW_FEATURE_SPLPAR, "hcall-splpar"},
+ {FW_FEATURE_VPHN, "hcall-vphn"},
};
/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 48d20573e4de..fd05fdee576a 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -11,6 +11,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
#define STK_PARM(i) (48 + ((i)-3)*8)
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index e19ff021e711..f106662f4381 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -55,7 +55,7 @@ static void hc_stop(struct seq_file *m, void *p)
static int hc_show(struct seq_file *m, void *p)
{
unsigned long h_num = (unsigned long)p;
- struct hcall_stats *hs = (struct hcall_stats *)m->private;
+ struct hcall_stats *hs = m->private;
if (hs[h_num].num_calls) {
if (cpu_has_feature(CPU_FTR_PURR))
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a77bcaed80af..edea60b7ee90 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
return ret;
}
-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
+static DEFINE_PER_CPU(u64 *, tce_page);
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -323,14 +323,13 @@ static void iommu_table_setparms(struct pci_controller *phb,
static void iommu_table_setparms_lpar(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl,
- const void *dma_window,
- int bussubno)
+ const void *dma_window)
{
unsigned long offset, size;
- tbl->it_busno = bussubno;
of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+ tbl->it_busno = phb->bus->number;
tbl->it_base = 0;
tbl->it_blocksize = 16;
tbl->it_type = TCE_PCI;
@@ -450,14 +449,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->iommu_table) {
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
ppci->phb->node);
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window,
- bus->number);
+ iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
pr_debug(" created table: %p\n", ppci->iommu_table);
}
-
- if (pdn != dn)
- PCI_DN(dn)->iommu_table = ppci->iommu_table;
}
@@ -533,21 +528,11 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
}
pr_debug(" parent is %s\n", pdn->full_name);
- /* Check for parent == NULL so we don't try to setup the empty EADS
- * slots on POWER4 machines.
- */
- if (dma_window == NULL || pdn->parent == NULL) {
- pr_debug(" no dma window for device, linking to parent\n");
- set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table);
- return;
- }
-
pci = PCI_DN(pdn);
if (!pci->iommu_table) {
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
pci->phb->node);
- iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window,
- pci->phb->bus->number);
+ iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
pr_debug(" created table: %p\n", pci->iommu_table);
} else {
@@ -571,8 +556,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
switch (action) {
case PSERIES_RECONFIG_REMOVE:
- if (pci && pci->iommu_table &&
- of_get_property(np, "ibm,dma-window", NULL))
+ if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
break;
default:
@@ -589,13 +573,8 @@ static struct notifier_block iommu_reconfig_nb = {
/* These are called very early. */
void iommu_init_early_pSeries(void)
{
- if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) {
- /* Direct I/O, IOMMU off */
- ppc_md.pci_dma_dev_setup = NULL;
- ppc_md.pci_dma_bus_setup = NULL;
- set_pci_dma_ops(&dma_direct_ops);
+ if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
return;
- }
if (firmware_has_feature(FW_FEATURE_LPAR)) {
if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
@@ -622,3 +601,17 @@ void iommu_init_early_pSeries(void)
set_pci_dma_ops(&dma_iommu_ops);
}
+static int __init disable_multitce(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ firmware_has_feature(FW_FEATURE_MULTITCE)) {
+ printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
+ ppc_md.tce_build = tce_build_pSeriesLP;
+ ppc_md.tce_free = tce_free_pSeriesLP;
+ powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+ }
+ return 1;
+}
+
+__setup("multitce=", disable_multitce);
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 53cbd53d8740..77d38a5e2ff9 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -61,13 +61,3 @@ void __init setup_kexec_cpu_down_xics(void)
{
ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics;
}
-
-static int __init pseries_kexec_setup(void)
-{
- ppc_md.machine_kexec = default_machine_kexec;
- ppc_md.machine_kexec_prepare = default_machine_kexec_prepare;
- ppc_md.machine_crash_shutdown = default_machine_crash_shutdown;
-
- return 0;
-}
-machine_device_initcall(pseries, pseries_kexec_setup);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f129040d974c..5d3ea9f60dd7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -627,6 +627,18 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
+static int __init disable_bulk_remove(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ printk(KERN_INFO "Disabling BULK_REMOVE firmware feature");
+ powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+ }
+ return 1;
+}
+
+__setup("bulk_remove=", disable_bulk_remove);
+
void __init hpte_init_lpar(void)
{
ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index bc3c7f2abd79..7e828ba29bc3 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -22,11 +22,25 @@
#include <asm/prom.h>
#include <asm/machdep.h>
+/* Max bytes to read/write in one go */
+#define NVRW_CNT 0x20
+
static unsigned int nvram_size;
static int nvram_fetch, nvram_store;
static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
static DEFINE_SPINLOCK(nvram_lock);
+static long nvram_error_log_index = -1;
+static long nvram_error_log_size = 0;
+
+struct err_log_info {
+ int error_type;
+ unsigned int seq_num;
+};
+#define NVRAM_MAX_REQ 2079
+#define NVRAM_MIN_REQ 1055
+
+#define NVRAM_LOG_PART_NAME "ibm,rtas-log"
static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
{
@@ -119,6 +133,197 @@ static ssize_t pSeries_nvram_get_size(void)
return nvram_size ? nvram_size : -ENODEV;
}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode. If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk. For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name | data |
+ * |0 |1 |2 3|4 15|16 length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log |
+ * |0 3|4 7|8 nvram_error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_error_log(char * buff, int length,
+ unsigned int err_type, unsigned int error_log_cnt)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1) {
+ return -ESPIPE;
+ }
+
+ if (length > nvram_error_log_size) {
+ length = nvram_error_log_size;
+ }
+
+ info.error_type = err_type;
+ info.seq_num = error_log_cnt;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_write(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char * buff, int length,
+ unsigned int * err_type, unsigned int * error_log_cnt)
+{
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ if (length > nvram_error_log_size)
+ length = nvram_error_log_size;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ rc = ppc_md.nvram_read(buff, length, &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
+
+ *error_log_cnt = info.seq_num;
+ *err_type = info.error_type;
+
+ return 0;
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+ loff_t tmp_index;
+ int clear_word = ERR_FLAG_ALREADY_LOGGED;
+ int rc;
+
+ if (nvram_error_log_index == -1)
+ return -1;
+
+ tmp_index = nvram_error_log_index;
+
+ rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* pseries_nvram_init_log_partition
+ *
+ * This will setup the partition we need for buffering the
+ * error logs and cleanup partitions if needed.
+ *
+ * The general strategy is the following:
+ * 1.) If there is log partition large enough then use it.
+ * 2.) If there is none large enough, search
+ * for a free partition that is large enough.
+ * 3.) If there is not a free partition large enough remove
+ * _all_ OS partitions and consolidate the space.
+ * 4.) Will first try getting a chunk that will satisfy the maximum
+ * error log size (NVRAM_MAX_REQ).
+ * 5.) If the max chunk cannot be allocated then try finding a chunk
+ * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ */
+static int __init pseries_nvram_init_log_partition(void)
+{
+ loff_t p;
+ int size;
+
+ /* Scan nvram for partitions */
+ nvram_scan_partitions();
+
+ /* Lookg for ours */
+ p = nvram_find_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS, &size);
+
+ /* Found one but too small, remove it */
+ if (p && size < NVRAM_MIN_REQ) {
+ pr_info("nvram: Found too small "NVRAM_LOG_PART_NAME" partition"
+ ",removing it...");
+ nvram_remove_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS);
+ p = 0;
+ }
+
+ /* Create one if we didn't find */
+ if (!p) {
+ p = nvram_create_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS,
+ NVRAM_MAX_REQ, NVRAM_MIN_REQ);
+ /* No room for it, try to get rid of any OS partition
+ * and try again
+ */
+ if (p == -ENOSPC) {
+ pr_info("nvram: No room to create "NVRAM_LOG_PART_NAME
+ " partition, deleting all OS partitions...");
+ nvram_remove_partition(NULL, NVRAM_SIG_OS);
+ p = nvram_create_partition(NVRAM_LOG_PART_NAME,
+ NVRAM_SIG_OS, NVRAM_MAX_REQ,
+ NVRAM_MIN_REQ);
+ }
+ }
+
+ if (p <= 0) {
+ pr_err("nvram: Failed to find or create "NVRAM_LOG_PART_NAME
+ " partition, err %d\n", (int)p);
+ return 0;
+ }
+
+ nvram_error_log_index = p;
+ nvram_error_log_size = nvram_get_partition_size(p) -
+ sizeof(struct err_log_info);
+
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partition);
+
int __init pSeries_nvram_init(void)
{
struct device_node *nvram;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 4b7a062dee15..5fcc92a12d3e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -25,8 +25,6 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#undef DEBUG
-
#include <linux/pci.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 000000000000..c8b3c69fe891
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,326 @@
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Driver flags */
+
+static int sysfs_entries;
+
+/* Helper routines */
+
+/*
+ * Routine to detect firmware support for hcall
+ * return 1 if H_BEST_ENERGY is supported
+ * else return 0
+ */
+
+static int check_for_h_best_energy(void)
+{
+ struct device_node *rtas = NULL;
+ const char *hypertas, *s;
+ int length;
+ int rc = 0;
+
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return 0;
+
+ hypertas = of_get_property(rtas, "ibm,hypertas-functions", &length);
+ if (!hypertas) {
+ of_node_put(rtas);
+ return 0;
+ }
+
+ /* hypertas will have list of strings with hcall names */
+ for (s = hypertas; s < hypertas + length; s += strlen(s) + 1) {
+ if (!strncmp("hcall-best-energy-1", s, 19)) {
+ rc = 1; /* Found the string */
+ break;
+ }
+ }
+ of_node_put(rtas);
+ return rc;
+}
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+ struct device_node *dn = NULL;
+ const int *indexes;
+ int i;
+ int rc = 1;
+ u32 ret = 0;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /* Convert logical cpu number to core number */
+ i = cpu_core_index_of_thread(cpu);
+ /*
+ * The first element indexes[0] is the number of drc_indexes
+ * returned in the list. Hence i+1 will get the drc_index
+ * corresponding to core number i.
+ */
+ WARN_ON(i > indexes[0]);
+ ret = indexes[i + 1];
+ rc = 0;
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu);
+ return ret;
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+ struct device_node *dn = NULL;
+ const int *indexes;
+ int i, cpu = 0;
+ int rc = 1;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ cpu = cpu_first_thread_of_core(i);
+ rc = 0;
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
+ return cpu;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1 0x004E200000080E01
+#define FLAGS_MODE2 0x004E200000080401
+#define FLAGS_ACTIVATE 0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+ int rc, cnt, i, cpu;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+ u32 *buf_page;
+ char *s = page;
+
+ buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+ if (!buf_page)
+ return -ENOMEM;
+
+ flags = FLAGS_MODE1;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+ 0, 0, 0, 0, 0, 0);
+ if (rc != H_SUCCESS) {
+ free_page((unsigned long) buf_page);
+ return -EINVAL;
+ }
+
+ cnt = retbuf[0];
+ for (i = 0; i < cnt; i++) {
+ cpu = drc_index_to_cpu(buf_page[2*i+1]);
+ if ((cpu_online(cpu) && !activate) ||
+ (!cpu_online(cpu) && activate))
+ s += sprintf(s, "%d,", cpu);
+ }
+ if (s > page) { /* Something to show */
+ s--; /* Suppress last comma */
+ s += sprintf(s, "\n");
+ }
+
+ free_page((unsigned long) buf_page);
+ return s-page;
+}
+
+static ssize_t get_best_energy_data(struct sys_device *dev,
+ char *page, int activate)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+
+ flags = FLAGS_MODE2;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+ cpu_to_drc_index(dev->id),
+ 0, 0, 0, 0, 0, 0, 0);
+
+ if (rc != H_SUCCESS)
+ return -EINVAL;
+
+ return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ * Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ * Per-cpu value of the hint
+ */
+
+struct sysdev_class_attribute attr_cpu_activate_hint_list =
+ _SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
+ cpu_activate_hint_list_show, NULL);
+
+struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
+ _SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
+ cpu_deactivate_hint_list_show, NULL);
+
+struct sysdev_attribute attr_percpu_activate_hint =
+ _SYSDEV_ATTR(pseries_activate_hint, 0444,
+ percpu_activate_hint_show, NULL);
+
+struct sysdev_attribute attr_percpu_deactivate_hint =
+ _SYSDEV_ATTR(pseries_deactivate_hint, 0444,
+ percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+ int cpu, err;
+ struct sys_device *cpu_sys_dev;
+
+ if (!check_for_h_best_energy()) {
+ printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
+ return 0;
+ }
+ /* Create the sysfs files */
+ err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_activate_hint_list.attr);
+ if (!err)
+ err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_deactivate_hint_list.attr);
+
+ if (err)
+ return err;
+ for_each_possible_cpu(cpu) {
+ cpu_sys_dev = get_cpu_sysdev(cpu);
+ err = sysfs_create_file(&cpu_sys_dev->kobj,
+ &attr_percpu_activate_hint.attr);
+ if (err)
+ break;
+ err = sysfs_create_file(&cpu_sys_dev->kobj,
+ &attr_percpu_deactivate_hint.attr);
+ if (err)
+ break;
+ }
+
+ if (err)
+ return err;
+
+ sysfs_entries = 1; /* Removed entries on cleanup */
+ return 0;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+ int cpu;
+ struct sys_device *cpu_sys_dev;
+
+ if (!sysfs_entries)
+ return;
+
+ /* Remove the sysfs files */
+ sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_activate_hint_list.attr);
+
+ sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
+ &attr_cpu_deactivate_hint_list.attr);
+
+ for_each_possible_cpu(cpu) {
+ cpu_sys_dev = get_cpu_sysdev(cpu);
+ sysfs_remove_file(&cpu_sys_dev->kobj,
+ &attr_percpu_activate_hint.attr);
+ sysfs_remove_file(&cpu_sys_dev->kobj,
+ &attr_percpu_deactivate_hint.attr);
+ }
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pSeries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a4fc6da87c2e..c55d7ad9c648 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -54,7 +54,8 @@
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
-static char mce_data_buf[RTAS_ERROR_LOG_MAX];
+static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
+static DEFINE_PER_CPU(__u64, mce_data_buf);
static int ras_get_sensor_state_token;
static int ras_check_exception_token;
@@ -196,12 +197,24 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-/* Get the error information for errors coming through the
+/*
+ * Some versions of FWNMI place the buffer inside the 4kB page starting at
+ * 0x7000. Other versions place it inside the rtas buffer. We check both.
+ */
+#define VALID_FWNMI_BUFFER(A) \
+ ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
+ (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
+
+/*
+ * Get the error information for errors coming through the
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
* the actual r3 if possible, and a ptr to the error log entry
* will be returned if found.
*
- * The mce_data_buf does not have any locks or protection around it,
+ * If the RTAS error is not of the extended type, then we put it in a per
+ * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf.
+ *
+ * The global_mce_data_buf does not have any locks or protection around it,
* if a second machine check comes in, or a system reset is done
* before we have logged the error, then we will get corruption in the
* error log. This is preferable over holding off on calling
@@ -210,20 +223,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
*/
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
- unsigned long errdata = regs->gpr[3];
- struct rtas_error_log *errhdr = NULL;
unsigned long *savep;
+ struct rtas_error_log *h, *errhdr = NULL;
+
+ if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
+ printk(KERN_ERR "FWNMI: corrupt r3\n");
+ return NULL;
+ }
- if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
- (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
- savep = __va(errdata);
- regs->gpr[3] = savep[0]; /* restore original r3 */
- memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
- memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX);
- errhdr = (struct rtas_error_log *)mce_data_buf;
+ savep = __va(regs->gpr[3]);
+ regs->gpr[3] = savep[0]; /* restore original r3 */
+
+ /* If it isn't an extended log we can use the per cpu 64bit buffer */
+ h = (struct rtas_error_log *)&savep[1];
+ if (!h->extended) {
+ memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64));
+ errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf);
} else {
- printk("FWNMI: corrupt r3\n");
+ int len;
+
+ len = max_t(int, 8+h->extended_log_length, RTAS_ERROR_LOG_MAX);
+ memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
+ memcpy(global_mce_data_buf, h, len);
+ errhdr = (struct rtas_error_log *)global_mce_data_buf;
}
+
return errhdr;
}
@@ -235,7 +259,7 @@ static void fwnmi_release_errinfo(void)
{
int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
if (ret != 0)
- printk("FWNMI: nmi-interlock failed: %d\n", ret);
+ printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
}
int pSeries_system_reset_exception(struct pt_regs *regs)
@@ -259,31 +283,43 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
-static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
+static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
{
- int nonfatal = 0;
+ int recovered = 0;
- if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
+ if (!(regs->msr & MSR_RI)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ recovered = 0;
+
+ } else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
/* Platform corrected itself */
- nonfatal = 1;
- } else if ((regs->msr & MSR_RI) &&
- user_mode(regs) &&
- err->severity == RTAS_SEVERITY_ERROR_SYNC &&
- err->disposition == RTAS_DISP_NOT_RECOVERED &&
- err->target == RTAS_TARGET_MEMORY &&
- err->type == RTAS_TYPE_ECC_UNCORR &&
- !(current->pid == 0 || is_global_init(current))) {
- /* Kill off a user process with an ECC error */
- printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
- current->pid);
- /* XXX something better for ECC error? */
- _exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
- nonfatal = 1;
+ recovered = 1;
+
+ } else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) {
+ /* Platform corrected itself but could be degraded */
+ printk(KERN_ERR "MCE: limited recovery, system may "
+ "be degraded\n");
+ recovered = 1;
+
+ } else if (user_mode(regs) && !is_global_init(current) &&
+ err->severity == RTAS_SEVERITY_ERROR_SYNC) {
+
+ /*
+ * If we received a synchronous error when in userspace
+ * kill the task. Firmware may report details of the fail
+ * asynchronously, so we can't rely on the target and type
+ * fields being valid here.
+ */
+ printk(KERN_ERR "MCE: uncorrectable error, killing task "
+ "%s:%d\n", current->comm, current->pid);
+
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
}
- log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal);
+ log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
- return nonfatal;
+ return recovered;
}
/*
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index ed72098bb4e3..a8ca289ff267 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -153,7 +153,7 @@ static struct sysdev_class suspend_sysdev_class = {
.name = "power",
};
-static struct platform_suspend_ops pseries_suspend_ops = {
+static const struct platform_suspend_ops pseries_suspend_ops = {
.valid = suspend_valid_only_mem,
.begin = pseries_suspend_begin,
.prepare_late = pseries_prepare_late,