diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 14 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 57 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-async.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-hmi.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-lpc.c | 63 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-rtc.c | 65 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-sensor.c | 20 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-tracepoints.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-wrappers.S | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 42 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 222 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-p5ioc2.c | 44 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/setup.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/smp.c | 23 |
16 files changed, 423 insertions, 160 deletions
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 426814a2ede3..2809c9895288 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include <linux/bootmem.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/io.h> @@ -354,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe) } else if (!(pe->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -452,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -731,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); return 0; } @@ -1087,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); } /* diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 3e89cbf55885..1d19e7917d7f 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -169,6 +169,26 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) } /* + * If the PE contains any one of following adapters, the + * PCI config space can't be accessed when dumping EEH log. + * Otherwise, we will run into fenced PHB caused by shortage + * of outbound credits in the adapter. The PCI config access + * should be blocked until PE reset. MMIO access is dropped + * by hardware certainly. In order to drop PCI config requests, + * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which + * will be checked in the backend for PE state retrival. If + * the PE becomes frozen for the first time and the flag has + * been set for the PE, we will set EEH_PE_CFG_BLOCKED for + * that PE to block its config space. + * + * Broadcom Austin 4-ports NICs (14e4:1657) + * Broadcom Shiner 2-ports 10G NICs (14e4:168e) + */ + if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) || + (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e)) + edev->pe->state |= EEH_PE_CFG_RESTRICTED; + + /* * Cache the PE primary bus, which can't be fetched when * full hotplug is in progress. In that case, all child * PCI devices of the PE are expected to be removed prior @@ -383,6 +403,39 @@ static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, return ret; } +static inline bool powernv_eeh_cfg_blocked(struct device_node *dn) +{ + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + + if (!edev || !edev->pe) + return false; + + if (edev->pe->state & EEH_PE_CFG_BLOCKED) + return true; + + return false; +} + +static int powernv_eeh_read_config(struct device_node *dn, + int where, int size, u32 *val) +{ + if (powernv_eeh_cfg_blocked(dn)) { + *val = 0xFFFFFFFF; + return PCIBIOS_SET_FAILED; + } + + return pnv_pci_cfg_read(dn, where, size, val); +} + +static int powernv_eeh_write_config(struct device_node *dn, + int where, int size, u32 val) +{ + if (powernv_eeh_cfg_blocked(dn)) + return PCIBIOS_SET_FAILED; + + return pnv_pci_cfg_write(dn, where, size, val); +} + /** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE @@ -440,8 +493,8 @@ static struct eeh_ops powernv_eeh_ops = { .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, .err_inject = powernv_eeh_err_inject, - .read_config = pnv_pci_cfg_read, - .write_config = pnv_pci_cfg_write, + .read_config = powernv_eeh_read_config, + .write_config = powernv_eeh_write_config, .next_error = powernv_eeh_next_error, .restore_config = powernv_eeh_restore_config }; diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index e462ab947d16..693b6cdac691 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -71,6 +71,7 @@ int opal_async_get_token_interruptible(void) return token; } +EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); int __opal_async_release_token(int token) { @@ -102,6 +103,7 @@ int opal_async_release_token(int token) return 0; } +EXPORT_SYMBOL_GPL(opal_async_release_token); int opal_async_wait_response(uint64_t token, struct opal_msg *msg) { @@ -120,6 +122,7 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return 0; } +EXPORT_SYMBOL_GPL(opal_async_wait_response); static int opal_async_comp_event(struct notifier_block *nb, unsigned long msg_type, void *msg) diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 5e1ed1575aab..b322bfb51343 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -57,7 +57,7 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) }; /* Print things out */ - if (hmi_evt->version != OpalHMIEvt_V1) { + if (hmi_evt->version < OpalHMIEvt_V1) { pr_err("HMI Interrupt, Unknown event version %d !\n", hmi_evt->version); return; diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index dd2c285ad170..e4169d68cb32 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -191,7 +191,6 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, { struct lpc_debugfs_entry *lpc = filp->private_data; u32 data, pos, len, todo; - __be32 bedata; int rc; if (!access_ok(VERIFY_WRITE, ubuf, count)) @@ -214,18 +213,57 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, len = 2; } rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, - &bedata, len); + &data, len); if (rc) return -ENXIO; - data = be32_to_cpu(bedata); + + /* + * Now there is some trickery with the data returned by OPAL + * as it's the desired data right justified in a 32-bit BE + * word. + * + * This is a very bad interface and I'm to blame for it :-( + * + * So we can't just apply a 32-bit swap to what comes from OPAL, + * because user space expects the *bytes* to be in their proper + * respective positions (ie, LPC position). + * + * So what we really want to do here is to shift data right + * appropriately on a LE kernel. + * + * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that + * order, we have in memory written to by OPAL at the "data" + * pointer: + * + * Bytes: OPAL "data" LE "data" + * 32-bit: B0 B1 B2 B3 B0B1B2B3 B3B2B1B0 + * 16-bit: B0 B1 0000B0B1 B1B00000 + * 8-bit: B0 000000B0 B0000000 + * + * So a BE kernel will have the leftmost of the above in the MSB + * and rightmost in the LSB and can just then "cast" the u32 "data" + * down to the appropriate quantity and write it. + * + * However, an LE kernel can't. It doesn't need to swap because a + * load from data followed by a store to user are going to preserve + * the byte ordering which is the wire byte order which is what the + * user wants, but in order to "crop" to the right size, we need to + * shift right first. + */ switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); break; case 2: +#ifdef __LITTLE_ENDIAN__ + data >>= 16; +#endif rc = __put_user((u16)data, (u16 __user *)ubuf); break; default: +#ifdef __LITTLE_ENDIAN__ + data >>= 24; +#endif rc = __put_user((u8)data, (u8 __user *)ubuf); break; } @@ -265,12 +303,31 @@ static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf, else if (todo > 1 && (pos & 1) == 0) len = 2; } + + /* + * Similarly to the read case, we have some trickery here but + * it's different to handle. We need to pass the value to OPAL in + * a register whose layout depends on the access size. We want + * to reproduce the memory layout of the user, however we aren't + * doing a load from user and a store to another memory location + * which would achieve that. Here we pass the value to OPAL via + * a register which is expected to contain the "BE" interpretation + * of the byte sequence. IE: for a 32-bit access, byte 0 should be + * in the MSB. So here we *do* need to byteswap on LE. + * + * User bytes: LE "data" OPAL "data" + * 32-bit: B0 B1 B2 B3 B3B2B1B0 B0B1B2B3 + * 16-bit: B0 B1 0000B1B0 0000B0B1 + * 8-bit: B0 000000B0 000000B0 + */ switch(len) { case 4: rc = __get_user(data, (u32 __user *)ubuf); + data = cpu_to_be32(data); break; case 2: rc = __get_user(data, (u16 __user *)ubuf); + data = cpu_to_be16(data); break; default: rc = __get_user(data, (u8 __user *)ubuf); diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 499707ddaa9c..37dbee15769f 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -15,6 +15,8 @@ #include <linux/bcd.h> #include <linux/rtc.h> #include <linux/delay.h> +#include <linux/platform_device.h> +#include <linux/of_platform.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -43,7 +45,7 @@ unsigned long __init opal_get_boot_time(void) long rc = OPAL_BUSY; if (!opal_check_token(OPAL_RTC_READ)) - goto out; + return 0; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); @@ -53,62 +55,33 @@ unsigned long __init opal_get_boot_time(void) mdelay(10); } if (rc != OPAL_SUCCESS) - goto out; + return 0; y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -out: - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; } -void opal_get_rtc_time(struct rtc_time *tm) +static __init int opal_time_init(void) { - long rc = OPAL_BUSY; - u32 y_m_d; - u64 h_m_s_ms; - __be32 __y_m_d; - __be64 __h_m_s_ms; + struct platform_device *pdev; + struct device_node *rtc; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + rtc = of_find_node_by_path("/ibm,opal/rtc"); + if (rtc) { + pdev = of_platform_device_create(rtc, "opal-rtc", NULL); + of_node_put(rtc); + } else { + if (opal_check_token(OPAL_RTC_READ) || + opal_check_token(OPAL_READ_TPO)) + pdev = platform_device_register_simple("opal-rtc", -1, + NULL, 0); else - mdelay(10); + return -ENODEV; } - if (rc != OPAL_SUCCESS) - return; - y_m_d = be32_to_cpu(__y_m_d); - h_m_s_ms = be64_to_cpu(__h_m_s_ms); - opal_to_tm(y_m_d, h_m_s_ms, tm); -} - -int opal_set_rtc_time(struct rtc_time *tm) -{ - long rc = OPAL_BUSY; - u32 y_m_d = 0; - u64 h_m_s_ms = 0; - - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; - y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; - y_m_d |= ((u32)bin2bcd(tm->tm_mday)); - - h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_write(y_m_d, h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); - else - mdelay(10); - } - return rc == OPAL_SUCCESS ? 0 : -EIO; + return PTR_ERR_OR_ZERO(pdev); } +machine_subsys_initcall(powernv, opal_time_init); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 10271ad1fac4..4ab67ef7abc9 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -20,7 +20,9 @@ #include <linux/delay.h> #include <linux/mutex.h> +#include <linux/of_platform.h> #include <asm/opal.h> +#include <asm/machdep.h> static DEFINE_MUTEX(opal_sensor_mutex); @@ -64,3 +66,21 @@ out: return ret; } EXPORT_SYMBOL_GPL(opal_get_sensor_data); + +static __init int opal_sensor_init(void) +{ + struct platform_device *pdev; + struct device_node *sensor; + + sensor = of_find_node_by_path("/ibm,opal/sensors"); + if (!sensor) { + pr_err("Opal node 'sensors' not found\n"); + return -ENODEV; + } + + pdev = of_platform_device_create(sensor, "opal-sensor", NULL); + of_node_put(sensor); + + return PTR_ERR_OR_ZERO(pdev); +} +machine_subsys_initcall(powernv, opal_sensor_init); diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index ae14c40b4b1c..e11273b2386d 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index e9e2450c1fdd..0a299be588af 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -18,7 +18,7 @@ .section ".text" #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define OPAL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) #else @@ -58,7 +58,7 @@ END_FTR_SECTION(0, 1); \ */ #define OPAL_CALL(name, token) \ - _GLOBAL(name); \ + _GLOBAL_TOC(name); \ mflr r0; \ std r0,16(r1); \ li r0,token; \ @@ -250,3 +250,7 @@ OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b642b0562f5a..cb0b6de79cd4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -50,7 +50,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); @@ -194,6 +193,27 @@ static int __init opal_register_exception_handlers(void) * fwnmi area at 0x7000 to provide the glue space to OPAL */ glue = 0x7000; + + /* + * Check if we are running on newer firmware that exports + * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch + * the HMI interrupt and we catch it directly in Linux. + * + * For older firmware (i.e currently released POWER8 System Firmware + * as of today <= SV810_087), we fallback to old behavior and let OPAL + * patch the HMI vector and handle it inside OPAL firmware. + * + * For newer firmware (in development/yet to be released) we will + * start catching/handling HMI directly in Linux. + */ + if (!opal_check_token(OPAL_HANDLE_HMI)) { + pr_info("opal: Old firmware detected, OPAL handles HMIs.\n"); + opal_register_exception_handler( + OPAL_HYPERVISOR_MAINTENANCE_HANDLER, + 0, glue); + glue += 128; + } + opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif @@ -623,6 +643,16 @@ static void __init opal_dump_region_init(void) pr_warn("DUMP: Failed to register kernel log buffer. " "rc = %d\n", rc); } + +static void opal_ipmi_init(struct device_node *opal_node) +{ + struct device_node *np; + + for_each_child_of_node(opal_node, np) + if (of_device_is_compatible(np, "ibm,opal-ipmi")) + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -686,6 +716,8 @@ static int __init opal_init(void) opal_msglog_init(); } + opal_ipmi_init(opal_node); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -721,6 +753,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_ipmi_send); +EXPORT_SYMBOL_GPL(opal_ipmi_recv); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, @@ -784,3 +818,9 @@ void opal_free_sg_list(struct opal_sg_list *sg) sg = NULL; } } + +EXPORT_SYMBOL_GPL(opal_poll_events); +EXPORT_SYMBOL_GPL(opal_rtc_read); +EXPORT_SYMBOL_GPL(opal_rtc_write); +EXPORT_SYMBOL_GPL(opal_tpo_read); +EXPORT_SYMBOL_GPL(opal_tpo_write); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 468a0f23c7f2..fac88ed8a915 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -91,6 +91,24 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +{ + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { + pr_warn("%s: Invalid PE %d on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { + pr_warn("%s: PE %d was assigned on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -172,7 +190,7 @@ fail: return -EIO; } -static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) { resource_size_t sgsz = phb->ioda.m64_segsize; struct pci_dev *pdev; @@ -185,16 +203,15 @@ static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) * instead of root bus. */ list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = PCI_BRIDGE_RESOURCES; - i <= PCI_BRIDGE_RESOURCE_END; i++) { - r = &pdev->resource[i]; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) continue; base = (r->start - phb->ioda.m64_base) / sgsz; for (step = 0; step < resource_size(r) / sgsz; step++) - set_bit(base + step, phb->ioda.pe_alloc); + pnv_ioda_reserve_pe(phb, base + step); } } } @@ -287,8 +304,6 @@ done: while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < phb->ioda.total_pe) { pe = &phb->ioda.pe_array[i]; - pe->phb = phb; - pe->pe_number = i; if (!master_pe) { pe->flags |= PNV_IODA_PE_MASTER; @@ -313,6 +328,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) const u32 *r; u64 pci_addr; + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { pr_info(" Firmware too old to support M64 window\n"); return; @@ -325,12 +346,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } - /* FIXME: Support M64 for P7IOC */ - if (phb->type != PNV_PHB_IODA2) { - pr_info(" Not support M64 window\n"); - return; - } - res = &hose->mem_resources[1]; res->start = of_translate_address(dn, r + 2); res->end = res->start + of_read_number(r + 4, 2) - 1; @@ -345,7 +360,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; } @@ -358,7 +373,9 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) /* Fetch master PE */ if (pe->flags & PNV_IODA_PE_SLAVE) { pe = pe->master; - WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) + return; + pe_no = pe->pe_number; } @@ -507,6 +524,106 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) } #endif /* CONFIG_PCI_MSI */ +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? "adding" : "removing"; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + child->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + + if (!(child->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, &child->slaves, list) { + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + slave->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + } + + return 0; +} + +static int pnv_ioda_set_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + bool is_add) +{ + struct pnv_ioda_pe *slave; + struct pci_dev *pdev; + int ret; + + /* + * Clear PE frozen state. If it's master PE, we need + * clear slave PE frozen state as well. + */ + if (is_add) { + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) + opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); + if (ret) + return ret; + + /* For compound PEs, any one affects all of them */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) { + ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); + if (ret) + return ret; + } + } + + if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) + pdev = pe->pbus->self; + else + pdev = pe->pdev->bus->self; + while (pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *parent; + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + parent = &phb->ioda.pe_array[pdn->pe_number]; + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); + if (ret) + return ret; + } + + pdev = pdev->bus->self; + } + + return 0; +} + static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -561,48 +678,36 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return -ENXIO; } - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* Configure PELTV */ + pnv_ioda_set_peltv(phb, pe, true); - /* Add to all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } /* Setup reverse map */ for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = pe->pe_number; /* Setup one MVTs on IODA1 */ - if (phb->type == PNV_PHB_IODA1) { - pe->mve_number = pe->pe_number; - rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, - pe->pe_number); + if (phb->type != PNV_PHB_IODA1) { + pe->mve_number = 0; + goto out; + } + + pe->mve_number = pe->pe_number; + rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); + if (rc != OPAL_SUCCESS) { + pe_err(pe, "OPAL error %ld setting up MVE %d\n", + rc, pe->mve_number); + pe->mve_number = -1; + } else { + rc = opal_pci_set_mve_enable(phb->opal_id, + pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %d\n", rc, pe->mve_number); pe->mve_number = -1; - } else { - rc = opal_pci_set_mve_enable(phb->opal_id, - pe->mve_number, OPAL_ENABLE_MVE); - if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", - rc, pe->mve_number); - pe->mve_number = -1; - } } - } else if (phb->type == PNV_PHB_IODA2) - pe->mve_number = 0; + } +out: return 0; } @@ -837,8 +942,8 @@ static void pnv_pci_ioda_setup_PEs(void) phb = hose->private_data; /* M64 layout might affect PE allocation */ - if (phb->alloc_m64_pe) - phb->alloc_m64_pe(phb); + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(phb); pnv_ioda_setup_PEs(hose->bus); } @@ -1509,7 +1614,6 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - struct pci_dn *pdn = pci_get_pdn(dev); unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; @@ -1523,7 +1627,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -ENXIO; /* Force 32-bit MSI on some broken devices */ - if (pdn && pdn->force_32bit_msi) + if (dev->no_64bit_msi) is_64 = 0; /* Assign XIVE to PE */ @@ -1835,19 +1939,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (!phb) { - pr_err(" Out of memory !\n"); - return; - } + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); /* Allocate PCI controller */ - memset(phb, 0, sizeof(struct pnv_phb)); phb->hose = hose = pcibios_alloc_controller(np); if (!phb->hose) { pr_err(" Can't allocate PCI controller for %s\n", np->full_name); - free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); + memblock_free(__pa(phb), sizeof(struct pnv_phb)); return; } @@ -1914,8 +2013,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); - aux = alloc_bootmem(size); - memset(aux, 0, size); + aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) @@ -1997,11 +2095,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (is_kdump_kernel()) { pr_info(" Issue PHB reset ...\n"); ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); - ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); + ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); } - /* Configure M64 window */ - if (phb->init_m64 && phb->init_m64(phb)) + /* Remove M64 resource if we can't configure it successfully */ + if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; } diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 94ce3481490b..6ef6d4d8e7e2 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, return; } - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = pcibios_alloc_controller(np); - } - if (!phb || !phb->hose) { + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb->hose = pcibios_alloc_controller(np); + if (!phb->hose) { pr_err(" Failed to allocate PCI controller\n"); return; } @@ -196,16 +193,27 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) hub_id = be64_to_cpup(prop64); pr_info(" HUB-ID : 0x%016llx\n", hub_id); + /* Count child PHBs and calculate TCE space per PHB */ + for_each_child_of_node(np, phbn) { + if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || + of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) + phb_count++; + } + + if (phb_count <= 0) { + pr_info(" No PHBs for Hub %s\n", np->full_name); + return; + } + + tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); + pr_info(" Allocating %lld MB of TCE memory per PHB\n", + tce_per_phb >> 20); + /* Currently allocate 16M of TCE memory for every Hub * * XXX TODO: Make it chip local if possible */ - tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY, - __pa(MAX_DMA_ADDRESS)); - if (!tce_mem) { - pr_err(" Failed to allocate TCE Memory !\n"); - return; - } + tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); pr_debug(" TCE : 0x%016lx..0x%016lx\n", __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), @@ -215,18 +223,6 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) return; } - /* Count child PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - /* Calculate how much TCE space we can give per PHB */ - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - /* Initialize PHBs */ for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b3ca77ddf36d..4945e87f12dc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -16,7 +16,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <linux/io.h> #include <linux/msi.h> @@ -50,7 +49,6 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); struct msi_desc *entry; struct msi_msg msg; int hwirq; @@ -60,7 +58,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) return -ENODEV; - if (pdn && pdn->force_32bit_msi && !phb->msi32_support) + if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; list_for_each_entry(entry, &pdev->msi_list, list) { @@ -91,7 +89,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return rc; } irq_set_msi_desc(virq, entry); - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; } @@ -505,7 +503,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, edev = of_node_to_eeh_dev(dn); if (edev) { if (edev->pe && - (edev->pe->state & EEH_PE_RESET)) + (edev->pe->state & EEH_PE_CFG_BLOCKED)) return false; if (edev->mode & EEH_DEV_REMOVED) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 34d29eb2a4de..6c02ff8dd69f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -130,7 +130,7 @@ struct pnv_phb { u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); - void (*alloc_m64_pe)(struct pnv_phb *phb); + void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3f9546d8a51f..30b1c3e298a6 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -265,10 +265,8 @@ static unsigned long pnv_memory_block_size(void) static void __init pnv_setup_machdep_opal(void) { ppc_md.get_boot_time = opal_get_boot_time; - ppc_md.get_rtc_time = opal_get_rtc_time; - ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; - ppc_md.power_off = pnv_power_off; + pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; @@ -285,7 +283,7 @@ static void __init pnv_setup_machdep_rtas(void) ppc_md.set_rtc_time = rtas_set_rtc_time; } ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } #endif /* CONFIG_PPC_POWERNV_RTAS */ diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 4753958cd509..b716f666e48a 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -149,6 +149,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; + unsigned long srr1; /* Standard hot unplug procedure */ local_irq_disable(); @@ -165,13 +166,25 @@ static void pnv_smp_cpu_kill_self(void) mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { ppc64_runlatch_off(); - power7_nap(1); + srr1 = power7_nap(1); ppc64_runlatch_on(); - /* Clear the IPI that woke us up */ - icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - mb(); + /* + * If the SRR1 value indicates that we woke up due to + * an external interrupt, then clear the interrupt. + * We clear the interrupt before checking for the + * reason, so as to avoid a race where we wake up for + * some other reason, find nothing and clear the interrupt + * just as some other cpu is sending us an interrupt. + * If we returned from power7_nap as a result of + * having finished executing in a KVM guest, then srr1 + * contains 0. + */ + if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; + smp_mb(); + } if (cpu_core_split_required()) continue; |