aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c16
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c65
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c20
-rw-r--r--arch/powerpc/platforms/powernv/opal-tracepoints.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S45
-rw-r--r--arch/powerpc/platforms/powernv/opal.c71
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c217
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c44
-rw-r--r--arch/powerpc/platforms/powernv/pci.c3
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c172
-rw-r--r--arch/powerpc/platforms/powernv/smp.c50
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c34
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h9
16 files changed, 606 insertions, 151 deletions
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index eba9cb10619c..2809c9895288 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -11,7 +11,6 @@
* (at your option) any later version.
*/
-#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/io.h>
@@ -354,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
} else if (!(pe->state & EEH_PE_ISOLATED)) {
eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
}
return result;
@@ -373,7 +375,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
* moving forward, we have to return operational
* state during PE reset.
*/
- if (pe->state & EEH_PE_CFG_BLOCKED) {
+ if (pe->state & EEH_PE_RESET) {
result = (EEH_STATE_MMIO_ACTIVE |
EEH_STATE_DMA_ACTIVE |
EEH_STATE_MMIO_ENABLED |
@@ -452,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
}
return result;
@@ -731,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
char *drv_log, unsigned long len)
{
- pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+ if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
return 0;
}
@@ -1087,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
!((*pe)->state & EEH_PE_ISOLATED)) {
eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(*pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data((*pe)->phb,
+ (*pe)->data);
}
/*
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index e462ab947d16..693b6cdac691 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -71,6 +71,7 @@ int opal_async_get_token_interruptible(void)
return token;
}
+EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
int __opal_async_release_token(int token)
{
@@ -102,6 +103,7 @@ int opal_async_release_token(int token)
return 0;
}
+EXPORT_SYMBOL_GPL(opal_async_release_token);
int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
{
@@ -120,6 +122,7 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
return 0;
}
+EXPORT_SYMBOL_GPL(opal_async_wait_response);
static int opal_async_comp_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 499707ddaa9c..37dbee15769f 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -15,6 +15,8 @@
#include <linux/bcd.h>
#include <linux/rtc.h>
#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
#include <asm/opal.h>
#include <asm/firmware.h>
@@ -43,7 +45,7 @@ unsigned long __init opal_get_boot_time(void)
long rc = OPAL_BUSY;
if (!opal_check_token(OPAL_RTC_READ))
- goto out;
+ return 0;
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
@@ -53,62 +55,33 @@ unsigned long __init opal_get_boot_time(void)
mdelay(10);
}
if (rc != OPAL_SUCCESS)
- goto out;
+ return 0;
y_m_d = be32_to_cpu(__y_m_d);
h_m_s_ms = be64_to_cpu(__h_m_s_ms);
opal_to_tm(y_m_d, h_m_s_ms, &tm);
return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec);
-out:
- ppc_md.get_rtc_time = NULL;
- ppc_md.set_rtc_time = NULL;
- return 0;
}
-void opal_get_rtc_time(struct rtc_time *tm)
+static __init int opal_time_init(void)
{
- long rc = OPAL_BUSY;
- u32 y_m_d;
- u64 h_m_s_ms;
- __be32 __y_m_d;
- __be64 __h_m_s_ms;
+ struct platform_device *pdev;
+ struct device_node *rtc;
- while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
- rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
- if (rc == OPAL_BUSY_EVENT)
- opal_poll_events(NULL);
+ rtc = of_find_node_by_path("/ibm,opal/rtc");
+ if (rtc) {
+ pdev = of_platform_device_create(rtc, "opal-rtc", NULL);
+ of_node_put(rtc);
+ } else {
+ if (opal_check_token(OPAL_RTC_READ) ||
+ opal_check_token(OPAL_READ_TPO))
+ pdev = platform_device_register_simple("opal-rtc", -1,
+ NULL, 0);
else
- mdelay(10);
+ return -ENODEV;
}
- if (rc != OPAL_SUCCESS)
- return;
- y_m_d = be32_to_cpu(__y_m_d);
- h_m_s_ms = be64_to_cpu(__h_m_s_ms);
- opal_to_tm(y_m_d, h_m_s_ms, tm);
-}
-
-int opal_set_rtc_time(struct rtc_time *tm)
-{
- long rc = OPAL_BUSY;
- u32 y_m_d = 0;
- u64 h_m_s_ms = 0;
-
- y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24;
- y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16;
- y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8;
- y_m_d |= ((u32)bin2bcd(tm->tm_mday));
-
- h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56;
- h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48;
- h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40;
- while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
- rc = opal_rtc_write(y_m_d, h_m_s_ms);
- if (rc == OPAL_BUSY_EVENT)
- opal_poll_events(NULL);
- else
- mdelay(10);
- }
- return rc == OPAL_SUCCESS ? 0 : -EIO;
+ return PTR_ERR_OR_ZERO(pdev);
}
+machine_subsys_initcall(powernv, opal_time_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 10271ad1fac4..4ab67ef7abc9 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -20,7 +20,9 @@
#include <linux/delay.h>
#include <linux/mutex.h>
+#include <linux/of_platform.h>
#include <asm/opal.h>
+#include <asm/machdep.h>
static DEFINE_MUTEX(opal_sensor_mutex);
@@ -64,3 +66,21 @@ out:
return ret;
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);
+
+static __init int opal_sensor_init(void)
+{
+ struct platform_device *pdev;
+ struct device_node *sensor;
+
+ sensor = of_find_node_by_path("/ibm,opal/sensors");
+ if (!sensor) {
+ pr_err("Opal node 'sensors' not found\n");
+ return -ENODEV;
+ }
+
+ pdev = of_platform_device_create(sensor, "opal-sensor", NULL);
+ of_node_put(sensor);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+machine_subsys_initcall(powernv, opal_sensor_init);
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
index ae14c40b4b1c..e11273b2386d 100644
--- a/arch/powerpc/platforms/powernv/opal-tracepoints.c
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args)
local_irq_save(flags);
- depth = &__get_cpu_var(opal_trace_depth);
+ depth = this_cpu_ptr(&opal_trace_depth);
if (*depth)
goto out;
@@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval)
local_irq_save(flags);
- depth = &__get_cpu_var(opal_trace_depth);
+ depth = this_cpu_ptr(&opal_trace_depth);
if (*depth)
goto out;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index feb549aa3eea..54eca8b3b288 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -18,7 +18,7 @@
.section ".text"
#ifdef CONFIG_TRACEPOINTS
-#ifdef CONFIG_JUMP_LABEL
+#ifdef HAVE_JUMP_LABEL
#define OPAL_BRANCH(LABEL) \
ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
#else
@@ -158,6 +158,43 @@ opal_tracepoint_return:
blr
#endif
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1 - stack pointer
+ * r0 - opal token
+ */
+_GLOBAL(opal_call_realmode)
+ mflr r12
+ std r12,PPC_LR_STKOFF(r1)
+ ld r2,PACATOC(r13)
+ /* Set opal return address */
+ LOAD_REG_ADDR(r12,return_from_opal_call)
+ mtlr r12
+
+ mfmsr r12
+#ifdef __LITTLE_ENDIAN__
+ /* Handle endian-ness */
+ li r11,MSR_LE
+ andc r12,r12,r11
+#endif
+ mtspr SPRN_HSRR1,r12
+ LOAD_REG_ADDR(r11,opal)
+ ld r12,8(r11)
+ ld r2,0(r11)
+ mtspr SPRN_HSRR0,r12
+ hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+ FIXUP_ENDIAN
+#endif
+ ld r12,PPC_LR_STKOFF(r1)
+ mtlr r12
+ blr
+
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
@@ -247,6 +284,12 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE);
+OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
+OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
+OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
+OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index d019b081df9d..f10b9ec8c1f5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG
+#define pr_fmt(fmt) "opal: " fmt
+#include <linux/printk.h>
#include <linux/types.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -50,7 +51,6 @@ static int mc_recoverable_range_len;
struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
-extern u64 opal_mc_secondary_handler[];
static unsigned int *opal_irqs;
static unsigned int opal_irq_count;
static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
@@ -626,6 +626,39 @@ static int opal_sysfs_init(void)
return 0;
}
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+ bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+ const __be64 *syms;
+ unsigned int size;
+ struct device_node *fw;
+ int rc;
+
+ fw = of_find_node_by_path("/ibm,opal/firmware");
+ if (!fw)
+ return;
+ syms = of_get_property(fw, "symbol-map", &size);
+ if (!syms || size != 2 * sizeof(__be64))
+ return;
+
+ /* Setup attributes */
+ bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+ bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+ rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+ if (rc)
+ pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
static void __init opal_dump_region_init(void)
{
void *addr;
@@ -644,6 +677,24 @@ static void __init opal_dump_region_init(void)
pr_warn("DUMP: Failed to register kernel log buffer. "
"rc = %d\n", rc);
}
+
+static void opal_ipmi_init(struct device_node *opal_node)
+{
+ struct device_node *np;
+
+ for_each_child_of_node(opal_node, np)
+ if (of_device_is_compatible(np, "ibm,opal-ipmi"))
+ of_platform_device_create(np, NULL, NULL);
+}
+
+static void opal_i2c_create_devs(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+ of_platform_device_create(np, NULL, NULL);
+}
+
static int __init opal_init(void)
{
struct device_node *np, *consoles;
@@ -670,6 +721,9 @@ static int __init opal_init(void)
of_node_put(consoles);
}
+ /* Create i2c platform devices */
+ opal_i2c_create_devs();
+
/* Find all OPAL interrupts and request them */
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -693,6 +747,8 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
+ /* Export symbol map to userspace */
+ opal_export_symmap();
/* Setup dump region interface */
opal_dump_region_init();
/* Setup error log interface */
@@ -707,6 +763,8 @@ static int __init opal_init(void)
opal_msglog_init();
}
+ opal_ipmi_init(opal_node);
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
@@ -742,6 +800,8 @@ void opal_shutdown(void)
/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_ipmi_send);
+EXPORT_SYMBOL_GPL(opal_ipmi_recv);
/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -805,3 +865,10 @@ void opal_free_sg_list(struct opal_sg_list *sg)
sg = NULL;
}
}
+
+EXPORT_SYMBOL_GPL(opal_poll_events);
+EXPORT_SYMBOL_GPL(opal_rtc_read);
+EXPORT_SYMBOL_GPL(opal_rtc_write);
+EXPORT_SYMBOL_GPL(opal_tpo_read);
+EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ba435ec3dcd..fac88ed8a915 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -91,6 +91,24 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
}
+static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
+{
+ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+ pr_warn("%s: Invalid PE %d on PHB#%x\n",
+ __func__, pe_no, phb->hose->global_number);
+ return;
+ }
+
+ if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) {
+ pr_warn("%s: PE %d was assigned on PHB#%x\n",
+ __func__, pe_no, phb->hose->global_number);
+ return;
+ }
+
+ phb->ioda.pe_array[pe_no].phb = phb;
+ phb->ioda.pe_array[pe_no].pe_number = pe_no;
+}
+
static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
unsigned long pe;
@@ -172,7 +190,7 @@ fail:
return -EIO;
}
-static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
+static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb)
{
resource_size_t sgsz = phb->ioda.m64_segsize;
struct pci_dev *pdev;
@@ -185,16 +203,15 @@ static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
* instead of root bus.
*/
list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
- for (i = PCI_BRIDGE_RESOURCES;
- i <= PCI_BRIDGE_RESOURCE_END; i++) {
- r = &pdev->resource[i];
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+ r = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
if (!r->parent ||
!pnv_pci_is_mem_pref_64(r->flags))
continue;
base = (r->start - phb->ioda.m64_base) / sgsz;
for (step = 0; step < resource_size(r) / sgsz; step++)
- set_bit(base + step, phb->ioda.pe_alloc);
+ pnv_ioda_reserve_pe(phb, base + step);
}
}
}
@@ -287,8 +304,6 @@ done:
while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
phb->ioda.total_pe) {
pe = &phb->ioda.pe_array[i];
- pe->phb = phb;
- pe->pe_number = i;
if (!master_pe) {
pe->flags |= PNV_IODA_PE_MASTER;
@@ -313,6 +328,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
const u32 *r;
u64 pci_addr;
+ /* FIXME: Support M64 for P7IOC */
+ if (phb->type != PNV_PHB_IODA2) {
+ pr_info(" Not support M64 window\n");
+ return;
+ }
+
if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
pr_info(" Firmware too old to support M64 window\n");
return;
@@ -325,12 +346,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
return;
}
- /* FIXME: Support M64 for P7IOC */
- if (phb->type != PNV_PHB_IODA2) {
- pr_info(" Not support M64 window\n");
- return;
- }
-
res = &hose->mem_resources[1];
res->start = of_translate_address(dn, r + 2);
res->end = res->start + of_read_number(r + 4, 2) - 1;
@@ -345,7 +360,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
/* Use last M64 BAR to cover M64 window */
phb->ioda.m64_bar_idx = 15;
phb->init_m64 = pnv_ioda2_init_m64;
- phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
+ phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
}
@@ -358,7 +373,9 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
/* Fetch master PE */
if (pe->flags & PNV_IODA_PE_SLAVE) {
pe = pe->master;
- WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)))
+ return;
+
pe_no = pe->pe_number;
}
@@ -507,6 +524,106 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
}
#endif /* CONFIG_PCI_MSI */
+static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *parent,
+ struct pnv_ioda_pe *child,
+ bool is_add)
+{
+ const char *desc = is_add ? "adding" : "removing";
+ uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
+ OPAL_REMOVE_PE_FROM_DOMAIN;
+ struct pnv_ioda_pe *slave;
+ long rc;
+
+ /* Parent PE affects child PE */
+ rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+ child->pe_number, op);
+ if (rc != OPAL_SUCCESS) {
+ pe_warn(child, "OPAL error %ld %s to parent PELTV\n",
+ rc, desc);
+ return -ENXIO;
+ }
+
+ if (!(child->flags & PNV_IODA_PE_MASTER))
+ return 0;
+
+ /* Compound case: parent PE affects slave PEs */
+ list_for_each_entry(slave, &child->slaves, list) {
+ rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+ slave->pe_number, op);
+ if (rc != OPAL_SUCCESS) {
+ pe_warn(slave, "OPAL error %ld %s to parent PELTV\n",
+ rc, desc);
+ return -ENXIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pnv_ioda_set_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe,
+ bool is_add)
+{
+ struct pnv_ioda_pe *slave;
+ struct pci_dev *pdev;
+ int ret;
+
+ /*
+ * Clear PE frozen state. If it's master PE, we need
+ * clear slave PE frozen state as well.
+ */
+ if (is_add) {
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry(slave, &pe->slaves, list)
+ opal_pci_eeh_freeze_clear(phb->opal_id,
+ slave->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ }
+ }
+
+ /*
+ * Associate PE in PELT. We need add the PE into the
+ * corresponding PELT-V as well. Otherwise, the error
+ * originated from the PE might contribute to other
+ * PEs.
+ */
+ ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add);
+ if (ret)
+ return ret;
+
+ /* For compound PEs, any one affects all of them */
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry(slave, &pe->slaves, list) {
+ ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
+ pdev = pe->pbus->self;
+ else
+ pdev = pe->pdev->bus->self;
+ while (pdev) {
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *parent;
+
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ parent = &phb->ioda.pe_array[pdn->pe_number];
+ ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
+ if (ret)
+ return ret;
+ }
+
+ pdev = pdev->bus->self;
+ }
+
+ return 0;
+}
+
static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
struct pci_dev *parent;
@@ -561,48 +678,36 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
return -ENXIO;
}
- rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
- pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
- if (rc)
- pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
- opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
- OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ /* Configure PELTV */
+ pnv_ioda_set_peltv(phb, pe, true);
- /* Add to all parents PELT-V */
- while (parent) {
- struct pci_dn *pdn = pci_get_pdn(parent);
- if (pdn && pdn->pe_number != IODA_INVALID_PE) {
- rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
- pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
- /* XXX What to do in case of error ? */
- }
- parent = parent->bus->self;
- }
/* Setup reverse map */
for (rid = pe->rid; rid < rid_end; rid++)
phb->ioda.pe_rmap[rid] = pe->pe_number;
/* Setup one MVTs on IODA1 */
- if (phb->type == PNV_PHB_IODA1) {
- pe->mve_number = pe->pe_number;
- rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
- pe->pe_number);
+ if (phb->type != PNV_PHB_IODA1) {
+ pe->mve_number = 0;
+ goto out;
+ }
+
+ pe->mve_number = pe->pe_number;
+ rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number);
+ if (rc != OPAL_SUCCESS) {
+ pe_err(pe, "OPAL error %ld setting up MVE %d\n",
+ rc, pe->mve_number);
+ pe->mve_number = -1;
+ } else {
+ rc = opal_pci_set_mve_enable(phb->opal_id,
+ pe->mve_number, OPAL_ENABLE_MVE);
if (rc) {
- pe_err(pe, "OPAL error %ld setting up MVE %d\n",
+ pe_err(pe, "OPAL error %ld enabling MVE %d\n",
rc, pe->mve_number);
pe->mve_number = -1;
- } else {
- rc = opal_pci_set_mve_enable(phb->opal_id,
- pe->mve_number, OPAL_ENABLE_MVE);
- if (rc) {
- pe_err(pe, "OPAL error %ld enabling MVE %d\n",
- rc, pe->mve_number);
- pe->mve_number = -1;
- }
}
- } else if (phb->type == PNV_PHB_IODA2)
- pe->mve_number = 0;
+ }
+out:
return 0;
}
@@ -837,8 +942,8 @@ static void pnv_pci_ioda_setup_PEs(void)
phb = hose->private_data;
/* M64 layout might affect PE allocation */
- if (phb->alloc_m64_pe)
- phb->alloc_m64_pe(phb);
+ if (phb->reserve_m64_pe)
+ phb->reserve_m64_pe(phb);
pnv_ioda_setup_PEs(hose->bus);
}
@@ -1834,19 +1939,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb_id = be64_to_cpup(prop64);
pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
- phb = alloc_bootmem(sizeof(struct pnv_phb));
- if (!phb) {
- pr_err(" Out of memory !\n");
- return;
- }
+ phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
/* Allocate PCI controller */
- memset(phb, 0, sizeof(struct pnv_phb));
phb->hose = hose = pcibios_alloc_controller(np);
if (!phb->hose) {
pr_err(" Can't allocate PCI controller for %s\n",
np->full_name);
- free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
+ memblock_free(__pa(phb), sizeof(struct pnv_phb));
return;
}
@@ -1913,8 +2013,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
}
pemap_off = size;
size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
- aux = alloc_bootmem(size);
- memset(aux, 0, size);
+ aux = memblock_virt_alloc(size, 0);
phb->ioda.pe_alloc = aux;
phb->ioda.m32_segmap = aux + m32map_off;
if (phb->type == PNV_PHB_IODA1)
@@ -1999,8 +2098,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
}
- /* Configure M64 window */
- if (phb->init_m64 && phb->init_m64(phb))
+ /* Remove M64 resource if we can't configure it successfully */
+ if (!phb->init_m64 || phb->init_m64(phb))
hose->mem_resources[1].flags = 0;
}
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 94ce3481490b..6ef6d4d8e7e2 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
return;
}
- phb = alloc_bootmem(sizeof(struct pnv_phb));
- if (phb) {
- memset(phb, 0, sizeof(struct pnv_phb));
- phb->hose = pcibios_alloc_controller(np);
- }
- if (!phb || !phb->hose) {
+ phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+ phb->hose = pcibios_alloc_controller(np);
+ if (!phb->hose) {
pr_err(" Failed to allocate PCI controller\n");
return;
}
@@ -196,16 +193,27 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
hub_id = be64_to_cpup(prop64);
pr_info(" HUB-ID : 0x%016llx\n", hub_id);
+ /* Count child PHBs and calculate TCE space per PHB */
+ for_each_child_of_node(np, phbn) {
+ if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
+ of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
+ phb_count++;
+ }
+
+ if (phb_count <= 0) {
+ pr_info(" No PHBs for Hub %s\n", np->full_name);
+ return;
+ }
+
+ tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
+ pr_info(" Allocating %lld MB of TCE memory per PHB\n",
+ tce_per_phb >> 20);
+
/* Currently allocate 16M of TCE memory for every Hub
*
* XXX TODO: Make it chip local if possible
*/
- tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY,
- __pa(MAX_DMA_ADDRESS));
- if (!tce_mem) {
- pr_err(" Failed to allocate TCE Memory !\n");
- return;
- }
+ tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
pr_debug(" TCE : 0x%016lx..0x%016lx\n",
__pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
@@ -215,18 +223,6 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
return;
}
- /* Count child PHBs */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
- phb_count++;
- }
-
- /* Calculate how much TCE space we can give per PHB */
- tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
- pr_info(" Allocating %lld MB of TCE memory per PHB\n",
- tce_per_phb >> 20);
-
/* Initialize PHBs */
for_each_child_of_node(np, phbn) {
if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 4b20f2c6b3b2..4945e87f12dc 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -16,7 +16,6 @@
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/msi.h>
@@ -90,7 +89,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return rc;
}
irq_set_msi_desc(virq, entry);
- write_msi_msg(virq, &msg);
+ pci_write_msi_msg(virq, &msg);
}
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 34d29eb2a4de..6c02ff8dd69f 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -130,7 +130,7 @@ struct pnv_phb {
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
void (*shutdown)(struct pnv_phb *phb);
int (*init_m64)(struct pnv_phb *phb);
- void (*alloc_m64_pe)(struct pnv_phb *phb);
+ void (*reserve_m64_pe)(struct pnv_phb *phb);
int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d188cd0..604c48e7879a 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
}
#endif
+extern u32 pnv_get_supported_cpuidle_states(void);
+
extern void pnv_lpc_init(void);
bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 3f9546d8a51f..b700a329c31d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
#include "powernv.h"
+#include "subcore.h"
static void __init pnv_setup_arch(void)
{
@@ -265,10 +269,8 @@ static unsigned long pnv_memory_block_size(void)
static void __init pnv_setup_machdep_opal(void)
{
ppc_md.get_boot_time = opal_get_boot_time;
- ppc_md.get_rtc_time = opal_get_rtc_time;
- ppc_md.set_rtc_time = opal_set_rtc_time;
ppc_md.restart = pnv_restart;
- ppc_md.power_off = pnv_power_off;
+ pm_power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
@@ -285,11 +287,173 @@ static void __init pnv_setup_machdep_rtas(void)
ppc_md.set_rtc_time = rtas_set_rtc_time;
}
ppc_md.restart = rtas_restart;
- ppc_md.power_off = rtas_power_off;
+ pm_power_off = rtas_power_off;
ppc_md.halt = rtas_halt;
}
#endif /* CONFIG_PPC_POWERNV_RTAS */
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+ int cpu;
+ int rc;
+
+ /*
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * all cpus at boot. Get these reg values of current cpu and use the
+ * same accross all cpus.
+ */
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+ for_each_possible_cpu(cpu) {
+ uint64_t pir = get_hard_smp_processor_id(cpu);
+ uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+ /*
+ * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+ * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+ * with 63rd bit set, so that when a thread wakes up at 0x100 we
+ * can use this bit to distinguish between fastsleep and
+ * deep winkle.
+ */
+ hsprg0_val |= 1;
+
+ rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+ if (rc != 0)
+ return rc;
+
+ /* HIDs are per core registers */
+ if (cpu_thread_in_core(cpu) == 0) {
+
+ rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+ int i, j;
+ int nr_cores = cpu_nr_cores();
+ u32 *core_idle_state;
+
+ /*
+ * core_idle_state - First 8 bits track the idle state of each thread
+ * of the core. The 8th bit is the lock bit. Initially all thread bits
+ * are set. They are cleared when the thread enters deep idle state
+ * like sleep and winkle. Initially the lock bit is cleared.
+ * The lock bit has 2 purposes
+ * a. While the first thread is restoring core state, it prevents
+ * other threads in the core from switching to process context.
+ * b. While the last thread in the core is saving the core state, it
+ * prevents a different thread from waking up.
+ */
+ for (i = 0; i < nr_cores; i++) {
+ int first_cpu = i * threads_per_core;
+ int node = cpu_to_node(first_cpu);
+
+ core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+ *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+ for (j = 0; j < threads_per_core; j++) {
+ int cpu = first_cpu + j;
+
+ paca[cpu].core_idle_state_ptr = core_idle_state;
+ paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+ paca[cpu].thread_mask = 1 << j;
+ }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+ pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+ return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+ struct device_node *power_mgt;
+ int dt_idle_states;
+ const __be32 *idle_state_flags;
+ u32 len_flags, flags;
+ int i;
+
+ supported_cpuidle_states = 0;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return 0;
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ return 0;
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return 0;
+ }
+
+ idle_state_flags = of_get_property(power_mgt,
+ "ibm,cpu-idle-state-flags", &len_flags);
+ if (!idle_state_flags) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+ return 0;
+ }
+
+ dt_idle_states = len_flags / sizeof(u32);
+
+ for (i = 0; i < dt_idle_states; i++) {
+ flags = be32_to_cpu(idle_state_flags[i]);
+ supported_cpuidle_states |= flags;
+ }
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_entry,
+ PPC_INST_NOP);
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_exit,
+ PPC_INST_NOP);
+ }
+ pnv_alloc_idle_core_states();
+ return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
static int __init pnv_probe(void)
{
unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 4753958cd509..fc34025ef822 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -149,6 +149,8 @@ static int pnv_smp_cpu_disable(void)
static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
+ unsigned long srr1;
+ u32 idle_states;
/* Standard hot unplug procedure */
local_irq_disable();
@@ -159,19 +161,41 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
+ idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
+
ppc64_runlatch_off();
- power7_nap(1);
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED)
+ srr1 = power7_winkle();
+ else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ srr1 = power7_sleep();
+ else
+ srr1 = power7_nap(1);
+
ppc64_runlatch_on();
- /* Clear the IPI that woke us up */
- icp_native_flush_interrupt();
- local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
- mb();
+ /*
+ * If the SRR1 value indicates that we woke up due to
+ * an external interrupt, then clear the interrupt.
+ * We clear the interrupt before checking for the
+ * reason, so as to avoid a race where we wake up for
+ * some other reason, find nothing and clear the interrupt
+ * just as some other cpu is sending us an interrupt.
+ * If we returned from power7_nap as a result of
+ * having finished executing in a KVM guest, then srr1
+ * contains 0.
+ */
+ if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+ icp_native_flush_interrupt();
+ local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
+ smp_mb();
+ }
if (cpu_core_split_required())
continue;
@@ -185,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void)
#endif /* CONFIG_HOTPLUG_CPU */
+static int pnv_cpu_bootable(unsigned int nr)
+{
+ /*
+ * Starting with POWER8, the subcore logic relies on all threads of a
+ * core being booted so that they can participate in split mode
+ * switches. So on those machines we ignore the smt_enabled_at_boot
+ * setting (smt-enabled on the kernel command line).
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 1;
+
+ return smp_generic_cpu_bootable(nr);
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = smp_muxed_ipi_message_pass,
.cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
.probe = xics_smp_probe,
.kick_cpu = pnv_smp_kick_cpu,
.setup_cpu = pnv_smp_setup_cpu,
- .cpu_bootable = smp_generic_cpu_bootable,
+ .cpu_bootable = pnv_cpu_bootable,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b79d1a..f60f80ada903 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
mb();
}
+static void update_hid_in_slw(u64 hid0)
+{
+ u64 idle_states = pnv_get_supported_cpuidle_states();
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ /* OPAL call to patch slw with the new HID0 value */
+ u64 cpu_pir = hard_smp_processor_id();
+
+ opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+ }
+}
+
static void unsplit_core(void)
{
u64 hid0, mask;
@@ -179,6 +191,7 @@ static void unsplit_core(void)
hid0 = mfspr(SPRN_HID0);
hid0 &= ~HID0_POWER8_DYNLPARDIS;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
while (mfspr(SPRN_HID0) & mask)
cpu_relax();
@@ -215,6 +228,7 @@ static void split_core(int new_mode)
hid0 = mfspr(SPRN_HID0);
hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
/* Wait for it to happen */
while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
return true;
}
+void update_subcore_sibling_mask(void)
+{
+ int cpu;
+ /*
+ * sibling mask for the first cpu. Left shift this by required bits
+ * to get sibling mask for the rest of the cpus.
+ */
+ int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1;
+
+ for_each_possible_cpu(cpu) {
+ int tid = cpu_thread_in_core(cpu);
+ int offset = (tid / threads_per_subcore) * threads_per_subcore;
+ int mask = sibling_mask_first_cpu << offset;
+
+ paca[cpu].subcore_sibling_mask = mask;
+
+ }
+}
+
static int cpu_update_split_mode(void *data)
{
int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
/* Make the new mode public */
subcores_per_core = new_mode;
threads_per_subcore = threads_per_core / subcores_per_core;
+ update_subcore_sibling_mask();
/* Make sure the new mode is written before we exit */
mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc91debf..84e02ae52895 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */