diff options
Diffstat (limited to '')
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_cache.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_driver.c | 57 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_event.c | 52 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 216 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 82 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/rtasd.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/scanlog.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 269 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/smp.c | 33 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/vio.c | 274 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/xics.c | 747 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/xics.h | 17 |
14 files changed, 863 insertions, 910 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 930898635c9f..e5e0ff466904 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -1,8 +1,11 @@ +ifeq ($(CONFIG_PPC64),y) +EXTRA_CFLAGS += -mno-minimal-toc +endif + obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o ras.o rtasd.o pci_dlpar.o \ firmware.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index d4a402c5866c..c37a8497c60f 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -287,7 +287,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev) * find the pci device that corresponds to a given address. * This routine scans all pci busses to build the cache. * Must be run late in boot process, after the pci controllers - * have been scaned for devices (after all device resources are known). + * have been scanned for devices (after all device resources are known). */ void __init pci_addr_cache_build(void) { @@ -304,6 +304,8 @@ void __init pci_addr_cache_build(void) pci_addr_cache_insert_device(dev); dn = pci_device_to_OF_node(dev); + if (!dn) + continue; pci_dev_get (dev); /* matching put is in eeh_remove_device() */ PCI_DN(dn)->pcidev = dev; } diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 1fba695e32e8..aaad2c0afcbf 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -23,9 +23,8 @@ * */ #include <linux/delay.h> -#include <linux/irq.h> #include <linux/interrupt.h> -#include <linux/notifier.h> +#include <linux/irq.h> #include <linux/pci.h> #include <asm/eeh.h> #include <asm/eeh_event.h> @@ -176,7 +175,7 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) * * pSeries systems will isolate a PCI slot if the PCI-Host * bridge detects address or data parity errors, DMA's - * occuring to wild addresses (which usually happen due to + * occurring to wild addresses (which usually happen due to * bugs in device drivers or in PCI adapter firmware). * Slot isolations also occur if #SERR, #PERR or other misc * PCI-related errors are detected. @@ -202,7 +201,11 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) { - int rc; + int cnt, rc; + + /* pcibios will clear the counter; save the value */ + cnt = pe_dn->eeh_freeze_count; + if (bus) pcibios_remove_pci_devices(bus); @@ -241,6 +244,7 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) ssleep (5); pcibios_add_pci_devices(bus); } + pe_dn->eeh_freeze_count = cnt; return 0; } @@ -250,23 +254,29 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) */ #define MAX_WAIT_FOR_RECOVERY 15 -void handle_eeh_events (struct eeh_event *event) +struct pci_dn * handle_eeh_events (struct eeh_event *event) { struct device_node *frozen_dn; struct pci_dn *frozen_pdn; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *pci_str, *drv_str; + const char *location, *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); frozen_bus = pcibios_find_pci_bus(frozen_dn); if (!frozen_dn) { - printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n", - pci_name(event->dev)); - return; + + location = (char *) get_property(event->dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; + printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " + "for location=%s pci addr=%s\n", + location, pci_name(event->dev)); + return NULL; } + location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL); + location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. * In the old style, it was the highest EEH-capable device @@ -278,9 +288,10 @@ void handle_eeh_events (struct eeh_event *event) frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); if (!frozen_bus) { - printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", - frozen_dn->full_name); - return; + printk(KERN_ERR "EEH: Cannot find PCI bus " + "for location=%s dn=%s\n", + location, frozen_dn->full_name); + return NULL; } #if 0 @@ -314,8 +325,9 @@ void handle_eeh_events (struct eeh_event *event) eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); printk(KERN_WARNING - "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", - frozen_pdn->eeh_freeze_count, drv_str, pci_str); + "EEH: This PCI device has failed %d times since last reboot: " + "location=%s driver=%s pci addr=%s\n", + frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -355,7 +367,7 @@ void handle_eeh_events (struct eeh_event *event) /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); - return; + return frozen_pdn; excess_failures: /* @@ -364,17 +376,18 @@ excess_failures: * due to actual, failed cards. */ printk(KERN_ERR - "EEH: PCI device %s - %s has failed %d times \n" - "and has been permanently disabled. Please try reseating\n" - "this device or replacing it.\n", - drv_str, pci_str, frozen_pdn->eeh_freeze_count); + "EEH: PCI device at location=%s driver=%s pci addr=%s \n" + "has failed %d times and has been permanently disabled. \n" + "Please try reseating this device or replacing it.\n", + location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); goto perm_error; hard_fail: printk(KERN_ERR - "EEH: Unable to recover from failure of PCI device %s - %s\n" + "EEH: Unable to recover from failure of PCI device " + "at location=%s driver=%s pci addr=%s \n" "Please try reseating this device or replacing it.\n", - drv_str, pci_str); + location, drv_str, pci_str); perm_error: eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); @@ -384,6 +397,8 @@ perm_error: /* Shut down the device drivers for good. */ pcibios_remove_pci_devices(frozen_bus); + + return NULL; } /* ---------- end of file ---------- */ diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 40020c65c89e..45ccc687e57c 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -18,6 +18,7 @@ * Copyright (c) 2005 Linas Vepstas <linas@linas.org> */ +#include <linux/delay.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/pci.h> @@ -34,7 +35,7 @@ */ /* EEH event workqueue setup. */ -static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(eeh_eventlist_lock); LIST_HEAD(eeh_eventlist); static void eeh_thread_launcher(void *); DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); @@ -56,38 +57,43 @@ static int eeh_event_handler(void * dummy) { unsigned long flags; struct eeh_event *event; + struct pci_dn *pdn; daemonize ("eehd"); + set_current_state(TASK_INTERRUPTIBLE); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; + /* Unqueue the event, get ready to process. */ + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - /* Unqueue the event, get ready to process. */ - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (event == NULL) + return 0; - if (event == NULL) - break; + /* Serialize processing of EEH events */ + mutex_lock(&eeh_event_mutex); + eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); - /* Serialize processing of EEH events */ - mutex_lock(&eeh_event_mutex); - eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); + printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", + pci_name(event->dev)); - printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - pci_name(event->dev)); + pdn = handle_eeh_events(event); - handle_eeh_events(event); + eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); + pci_dev_put(event->dev); + kfree(event); + mutex_unlock(&eeh_event_mutex); - eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); - pci_dev_put(event->dev); - kfree(event); - mutex_unlock(&eeh_event_mutex); + /* If there are no new errors after an hour, clear the counter. */ + if (pdn && pdn->eeh_freeze_count>0) { + msleep_interruptible (3600*1000); + if (pdn->eeh_freeze_count>0) + pdn->eeh_freeze_count--; } return 0; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 2643078433f0..d67af2c65754 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1,29 +1,29 @@ /* * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation * - * Rewrite, cleanup: + * Rewrite, cleanup: * * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation + * Copyright (C) 2006 Olof Johansson <olof@lixom.net> * * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. * - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/config.h> #include <linux/init.h> #include <linux/types.h> #include <linux/slab.h> @@ -49,97 +49,101 @@ #define DBG(fmt...) -static void tce_build_pSeries(struct iommu_table *tbl, long index, - long npages, unsigned long uaddr, +static void tce_build_pSeries(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, enum dma_data_direction direction) { - union tce_entry t; - union tce_entry *tp; + u64 proto_tce; + u64 *tcep; + u64 rpn; index <<= TCE_PAGE_FACTOR; npages <<= TCE_PAGE_FACTOR; - t.te_word = 0; - t.te_rdwr = 1; // Read allowed + proto_tce = TCE_PCI_READ; // Read allowed if (direction != DMA_TO_DEVICE) - t.te_pciwr = 1; + proto_tce |= TCE_PCI_WRITE; - tp = ((union tce_entry *)tbl->it_base) + index; + tcep = ((u64 *)tbl->it_base) + index; while (npages--) { /* can't move this out since we might cross LMB boundary */ - t.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - - tp->te_word = t.te_word; + rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; uaddr += TCE_PAGE_SIZE; - tp++; + tcep++; } } static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) { - union tce_entry t; - union tce_entry *tp; + u64 *tcep; npages <<= TCE_PAGE_FACTOR; index <<= TCE_PAGE_FACTOR; - t.te_word = 0; - tp = ((union tce_entry *)tbl->it_base) + index; - - while (npages--) { - tp->te_word = t.te_word; - - tp++; - } + tcep = ((u64 *)tbl->it_base) + index; + + while (npages--) + *(tcep++) = 0; } +static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) +{ + u64 *tcep; + + index <<= TCE_PAGE_FACTOR; + tcep = ((u64 *)tbl->it_base) + index; + + return *tcep; +} static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction) { u64 rc; - union tce_entry tce; + u64 proto_tce, tce; + u64 rpn; tcenum <<= TCE_PAGE_FACTOR; npages <<= TCE_PAGE_FACTOR; - tce.te_word = 0; - tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - tce.te_rdwr = 1; + rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) - tce.te_pciwr = 1; + proto_tce |= TCE_PCI_WRITE; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word ); - + tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%lx\n", (u64)tcenum); - printk("\ttce val = 0x%lx\n", tce.te_word ); + printk("\ttce val = 0x%lx\n", tce ); show_stack(current, (unsigned long *)__get_SP()); } - + tcenum++; - tce.te_rpn++; + rpn++; } } -static DEFINE_PER_CPU(void *, tce_page) = NULL; +static DEFINE_PER_CPU(u64 *, tce_page) = NULL; static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction) { u64 rc; - union tce_entry tce, *tcep; + u64 proto_tce; + u64 *tcep; + u64 rpn; long l, limit; if (TCE_PAGE_FACTOR == 0 && npages == 1) @@ -152,7 +156,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * from iommu_alloc{,_sg}() */ if (!tcep) { - tcep = (void *)__get_free_page(GFP_ATOMIC); + tcep = (u64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) return tce_build_pSeriesLP(tbl, tcenum, npages, @@ -163,11 +167,10 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum <<= TCE_PAGE_FACTOR; npages <<= TCE_PAGE_FACTOR; - tce.te_word = 0; - tce.te_rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; - tce.te_rdwr = 1; + rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) - tce.te_pciwr = 1; + proto_tce |= TCE_PCI_WRITE; /* We can map max one pageful of TCEs at a time */ do { @@ -175,11 +178,11 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * Set up the page with TCE data, looping through and setting * the values. */ - limit = min_t(long, npages, 4096/sizeof(union tce_entry)); + limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { - tcep[l] = tce; - tce.te_rpn++; + tcep[l] = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + rpn++; } rc = plpar_tce_put_indirect((u64)tbl->it_index, @@ -195,7 +198,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); printk("\tnpages = 0x%lx\n", (u64)npages); - printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); + printk("\ttce[0] val = 0x%lx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } } @@ -203,23 +206,17 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { u64 rc; - union tce_entry tce; tcenum <<= TCE_PAGE_FACTOR; npages <<= TCE_PAGE_FACTOR; - tce.te_word = 0; - while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word); + rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%lx\n", (u64)tcenum); - printk("\ttce val = 0x%lx\n", tce.te_word ); show_stack(current, (unsigned long *)__get_SP()); } @@ -231,31 +228,43 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { u64 rc; - union tce_entry tce; tcenum <<= TCE_PAGE_FACTOR; npages <<= TCE_PAGE_FACTOR; - tce.te_word = 0; - - rc = plpar_tce_stuff((u64)tbl->it_index, - (u64)tcenum << 12, - tce.te_word, - npages); + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); printk("\trc = %ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); printk("\tnpages = 0x%lx\n", (u64)npages); - printk("\ttce val = 0x%lx\n", tce.te_word ); show_stack(current, (unsigned long *)__get_SP()); } } +static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) +{ + u64 rc; + unsigned long tce_ret; + + tcenum <<= TCE_PAGE_FACTOR; + rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret); + + if (rc && printk_ratelimit()) { + printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%ld\n", + rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + show_stack(current, (unsigned long *)__get_SP()); + } + + return tce_ret; +} + static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, - struct iommu_table *tbl) + struct iommu_table *tbl) { struct device_node *node; unsigned long *basep; @@ -272,19 +281,22 @@ static void iommu_table_setparms(struct pci_controller *phb, } tbl->it_base = (unsigned long)__va(*basep); + +#ifndef CONFIG_CRASH_DUMP memset((void *)tbl->it_base, 0, *sizep); +#endif tbl->it_busno = phb->bus->number; - + /* Units of tce entries */ tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; - + /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); } - + phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ @@ -299,30 +311,22 @@ static void iommu_table_setparms(struct pci_controller *phb, * iommu_table_setparms_lpar * * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - * - * ToDo: properly interpret the ibm,dma-window property. The definition is: - * logical-bus-number (1 word) - * phys-address (#address-cells words) - * size (#cell-size words) - * - * Currently we hard code these sizes (more or less). */ static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl, - unsigned int *dma_window) + unsigned char *dma_window) { + unsigned long offset, size; + tbl->it_busno = PCI_DN(dn)->bussubno; + of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); - /* TODO: Parse field size properties properly. */ - tbl->it_size = (((unsigned long)dma_window[4] << 32) | - (unsigned long)dma_window[5]) >> PAGE_SHIFT; - tbl->it_offset = (((unsigned long)dma_window[2] << 32) | - (unsigned long)dma_window[3]) >> PAGE_SHIFT; tbl->it_base = 0; - tbl->it_index = dma_window[0]; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; + tbl->it_offset = offset >> PAGE_SHIFT; + tbl->it_size = size >> PAGE_SHIFT; } static void iommu_bus_setup_pSeries(struct pci_bus *bus) @@ -357,13 +361,9 @@ static void iommu_bus_setup_pSeries(struct pci_bus *bus) if (isa_dn_orig) of_node_put(isa_dn_orig); - /* Count number of direct PCI children of the PHB. - * All PCI device nodes have class-code property, so it's - * an easy way to find them. - */ + /* Count number of direct PCI children of the PHB. */ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) - if (get_property(tmp, "class-code", NULL)) - children++; + children++; DBG("Children: %d\n", children); @@ -394,10 +394,11 @@ static void iommu_bus_setup_pSeries(struct pci_bus *bus) pci->phb->dma_window_size = 0x8000000ul; pci->phb->dma_window_base_cur = 0x8000000ul; - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + pci->phb->node); iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); + pci->iommu_table = iommu_init_table(tbl, pci->phb->node); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -414,7 +415,7 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - unsigned int *dma_window = NULL; + unsigned char *dma_window = NULL; DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); @@ -422,7 +423,7 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) /* Find nearest ibm,dma-window, walking up the device tree */ for (pdn = dn; pdn != NULL; pdn = pdn->parent) { - dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + dma_window = get_property(pdn, "ibm,dma-window", NULL); if (dma_window != NULL) break; } @@ -440,12 +441,12 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) ppci->bussubno = bus->number; - tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), - GFP_KERNEL); - + tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + ppci->phb->node); + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); - ppci->iommu_table = iommu_init_table(tbl); + ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); } if (pdn != dn) @@ -468,9 +469,11 @@ static void iommu_dev_setup_pSeries(struct pci_dev *dev) */ if (!dev->bus->self) { DBG(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + PCI_DN(dn)->phb->node); iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); - PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); + PCI_DN(dn)->iommu_table = iommu_init_table(tbl, + PCI_DN(dn)->phb->node); return; } @@ -516,7 +519,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - int *dma_window = NULL; + unsigned char *dma_window = NULL; struct pci_dn *pci; DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); @@ -531,8 +534,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { - dma_window = (unsigned int *) - get_property(pdn, "ibm,dma-window", NULL); + dma_window = get_property(pdn, "ibm,dma-window", NULL); if (dma_window) break; } @@ -553,12 +555,12 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) /* iommu_table_setparms_lpar needs bussubno. */ pci->bussubno = pci->phb->bus->number; - tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), - GFP_KERNEL); + tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); - pci->iommu_table = iommu_init_table(tbl); + pci->iommu_table = iommu_init_table(tbl, pci->phb->node); } if (pdn != dn) @@ -588,11 +590,13 @@ void iommu_init_early_pSeries(void) ppc_md.tce_build = tce_build_pSeriesLP; ppc_md.tce_free = tce_free_pSeriesLP; } + ppc_md.tce_get = tce_get_pSeriesLP; ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; } else { ppc_md.tce_build = tce_build_pSeries; ppc_md.tce_free = tce_free_pSeries; + ppc_md.tce_get = tce_get_pseries; ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 634b7d06d3cc..3aeb40699042 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -21,7 +21,6 @@ #undef DEBUG_LOW -#include <linux/config.h> #include <linux/kernel.h> #include <linux/dma-mapping.h> #include <linux/console.h> @@ -513,7 +512,7 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local) spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } -void hpte_init_lpar(void) +void __init hpte_init_lpar(void) { ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; @@ -522,6 +521,4 @@ void hpte_init_lpar(void) ppc_md.hpte_remove = pSeries_lpar_hpte_remove; ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; - - htab_finish_init(); } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9639c66b453d..9df783088b61 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -72,32 +72,62 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id, /* #define DEBUG */ -static void request_ras_irqs(struct device_node *np, char *propname, + +static void request_ras_irqs(struct device_node *np, irqreturn_t (*handler)(int, void *, struct pt_regs *), const char *name) { - unsigned int *ireg, len, i; - int virq, n_intr; - - ireg = (unsigned int *)get_property(np, propname, &len); - if (ireg == NULL) - return; - n_intr = prom_n_intr_cells(np); - len /= n_intr * sizeof(*ireg); - - for (i = 0; i < len; i++) { - virq = virt_irq_create_mapping(*ireg); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", np->full_name); - return; + int i, index, count = 0; + struct of_irq oirq; + u32 *opicprop; + unsigned int opicplen; + unsigned int virqs[16]; + + /* Check for obsolete "open-pic-interrupt" property. If present, then + * map those interrupts using the default interrupt host and default + * trigger + */ + opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen); + if (opicprop) { + opicplen /= sizeof(u32); + for (i = 0; i < opicplen; i++) { + if (count > 15) + break; + virqs[count] = irq_create_mapping(NULL, *(opicprop++), + IRQ_TYPE_NONE); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + } - if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { + } + /* Else use normal interrupt tree parsing */ + else { + /* First try to do a proper OF tree parsing */ + for (index = 0; of_irq_map_one(np, index, &oirq) == 0; + index++) { + if (count > 15) + break; + virqs[count] = irq_create_of_mapping(oirq.controller, + oirq.specifier, + oirq.size); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + } + } + + /* Now request them */ + for (i = 0; i < count; i++) { + if (request_irq(virqs[i], handler, 0, name, NULL)) { printk(KERN_ERR "Unable to request interrupt %d for " - "%s\n", irq_offset_up(virq), np->full_name); + "%s\n", virqs[i], np->full_name); return; } - ireg += n_intr; } } @@ -115,20 +145,14 @@ static int __init init_ras_IRQ(void) /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, - "RAS_ERROR"); - request_ras_irqs(np, "interrupts", ras_error_interrupt, - "RAS_ERROR"); + request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR"); of_node_put(np); } /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { - request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, - "RAS_EPOW"); - request_ras_irqs(np, "interrupts", ras_epow_interrupt, - "RAS_EPOW"); + request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW"); of_node_put(np); } @@ -162,7 +186,7 @@ ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), + irq_map[irq].hwirq, RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, critical, __pa(&ras_log_buf), rtas_get_error_log_max()); @@ -198,7 +222,7 @@ ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs) status = rtas_call(ras_check_exception_token, 6, 1, NULL, RAS_VECTOR_OFFSET, - virt_irq_to_real(irq_offset_down(irq)), + irq_map[irq].hwirq, RTAS_INTERNAL_ERROR, 1 /*Time Critical */, __pa(&ras_log_buf), rtas_get_error_log_max()); diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index e0000ce769e5..2e4e04042d85 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -348,7 +348,7 @@ static int enable_surveillance(int timeout) return 0; if (error == -EINVAL) { - printk(KERN_INFO "rtasd: surveillance not supported\n"); + printk(KERN_DEBUG "rtasd: surveillance not supported\n"); return 0; } @@ -440,7 +440,7 @@ static int rtasd(void *unused) goto error; } - printk(KERN_INFO "RTAS daemon started\n"); + printk(KERN_DEBUG "RTAS daemon started\n"); DEBUG("will sleep for %d milliseconds\n", (30000/rtas_event_scan_rate)); @@ -487,7 +487,7 @@ static int __init rtas_init(void) /* No RTAS */ if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) { - printk(KERN_INFO "rtasd: no event-scan on system\n"); + printk(KERN_DEBUG "rtasd: no event-scan on system\n"); return -ENODEV; } diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 50643496eb63..77a5bb1d9c30 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -107,9 +107,9 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, /* Break to sleep default time */ break; default: - if (status > 9900 && status <= 9905) { - wait_time = rtas_extended_busy_delay_time(status); - } else { + /* Assume extended busy */ + wait_time = rtas_busy_delay_time(status); + if (!wait_time) { printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); return -EIO; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f79f01c44f2..54a52437265c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -18,7 +18,6 @@ #undef DEBUG -#include <linux/config.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -77,6 +76,9 @@ #define DBG(fmt...) #endif +/* move those away to a .h */ +extern void smp_init_pseries_mpic(void); +extern void smp_init_pseries_xics(void); extern void find_udbg_vterm(void); int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -84,7 +86,7 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ static void pseries_shared_idle_sleep(void); static void pseries_dedicated_idle_sleep(void); -struct mpic *pSeries_mpic; +static struct device_node *pSeries_mpic_node; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -119,63 +121,92 @@ static void __init fwnmi_init(void) fwnmi_active = 1; } -static void __init pSeries_init_mpic(void) +void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) { - unsigned int *addrp; - struct device_node *np; - unsigned long intack = 0; - - /* All ISUs are setup, complete initialization */ - mpic_init(pSeries_mpic); - - /* Check what kind of cascade ACK we have */ - if (!(np = of_find_node_by_name(NULL, "pci")) - || !(addrp = (unsigned int *) - get_property(np, "8259-interrupt-acknowledge", NULL))) - printk(KERN_ERR "Cannot find pci to get ack address\n"); - else - intack = addrp[prom_n_addr_cells(np)-1]; - of_node_put(np); - - /* Setup the legacy interrupts & controller */ - i8259_init(intack, 0); - - /* Hook cascade to mpic */ - mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL); + unsigned int cascade_irq = i8259_irq(regs); + if (cascade_irq != NO_IRQ) + generic_handle_irq(cascade_irq, regs); + desc->chip->eoi(irq); } -static void __init pSeries_setup_mpic(void) +static void __init pseries_mpic_init_IRQ(void) { + struct device_node *np, *old, *cascade = NULL; + unsigned int *addrp; + unsigned long intack = 0; unsigned int *opprop; unsigned long openpic_addr = 0; - unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; - struct device_node *root; - int irq_count; + unsigned int cascade_irq; + int naddr, n, i, opplen; + struct mpic *mpic; - /* Find the Open PIC if present */ - root = of_find_node_by_path("/"); - opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); + np = of_find_node_by_path("/"); + naddr = prom_n_addr_cells(np); + opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen); if (opprop != 0) { - int n = prom_n_addr_cells(root); - - for (openpic_addr = 0; n > 0; --n) - openpic_addr = (openpic_addr << 32) + *opprop++; + openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); } - of_node_put(root); + of_node_put(np); BUG_ON(openpic_addr == 0); - /* Get the sense values from OF */ - prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS); - /* Setup the openpic driver */ - irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ - pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, - 16, 16, irq_count, /* isu size, irq offset, irq count */ - NR_IRQS - 4, /* ipi offset */ - senses, irq_count, /* sense & sense size */ - " MPIC "); + mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, + MPIC_PRIMARY, + 16, 250, /* isu size, irq count */ + " MPIC "); + BUG_ON(mpic == NULL); + + /* Add ISUs */ + opplen /= sizeof(u32); + for (n = 0, i = naddr; i < opplen; i += naddr, n++) { + unsigned long isuaddr = of_read_number(opprop + i, naddr); + mpic_assign_isu(mpic, n, isuaddr); + } + + /* All ISUs are setup, complete initialization */ + mpic_init(mpic); + + /* Look for cascade */ + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + cascade = np; + break; + } + if (cascade == NULL) + return; + + cascade_irq = irq_of_parse_and_map(cascade, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "xics: failed to map cascade interrupt"); + return; + } + + /* Check ACK type */ + for (old = of_node_get(cascade); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (strcmp(np->name, "pci") != 0) + continue; + addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", + NULL); + if (addrp == NULL) + continue; + naddr = prom_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "mpic: PCI 8259 intack at 0x%016lx\n", + intack); + i8259_init(cascade, intack); + of_node_put(cascade); + set_irq_chained_handler(cascade_irq, pseries_8259_cascade); } static void pseries_lpar_enable_pmcs(void) @@ -193,23 +224,67 @@ static void pseries_lpar_enable_pmcs(void) get_lppaca()->pmcregs_in_use = 1; } -static void __init pSeries_setup_arch(void) +#ifdef CONFIG_KEXEC +static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary) { - /* Fixup ppc_md depending on the type of interrupt controller */ - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; - ppc_md.get_irq = mpic_get_irq; - /* Allocate the mpic now, so that find_and_init_phbs() can - * fill the ISUs */ - pSeries_setup_mpic(); - } else { - ppc_md.init_IRQ = xics_init_IRQ; - ppc_md.get_irq = xics_get_irq; + mpic_teardown_this_cpu(secondary); +} + +static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) +{ + /* Don't risk a hypervisor call if we're crashing */ + if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { + unsigned long vpa = __pa(get_lppaca()); + + if (unregister_vpa(hard_smp_processor_id(), vpa)) { + printk("VPA deregistration of cpu %u (hw_cpu_id %d) " + "failed\n", smp_processor_id(), + hard_smp_processor_id()); + } } + xics_teardown_cpu(secondary); +} +#endif /* CONFIG_KEXEC */ +static void __init pseries_discover_pic(void) +{ + struct device_node *np; + char *typep; + + for (np = NULL; (np = of_find_node_by_name(np, + "interrupt-controller"));) { + typep = (char *)get_property(np, "compatible", NULL); + if (strstr(typep, "open-pic")) { + pSeries_mpic_node = of_node_get(np); + ppc_md.init_IRQ = pseries_mpic_init_IRQ; + ppc_md.get_irq = mpic_get_irq; +#ifdef CONFIG_KEXEC + ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic; +#endif #ifdef CONFIG_SMP - smp_init_pSeries(); + smp_init_pseries_mpic(); #endif + return; + } else if (strstr(typep, "ppc-xicp")) { + ppc_md.init_IRQ = xics_init_IRQ; +#ifdef CONFIG_KEXEC + ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics; +#endif +#ifdef CONFIG_SMP + smp_init_pseries_xics(); +#endif + return; + } + } + printk(KERN_ERR "pSeries_discover_pic: failed to recognize" + " interrupt-controller\n"); +} + +static void __init pSeries_setup_arch(void) +{ + /* Discover PIC type and setup ppc_md accordingly */ + pseries_discover_pic(); + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */ @@ -235,14 +310,14 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_SPLPAR)) { vpa_init(boot_cpuid); if (get_lppaca()->shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); + printk(KERN_DEBUG "Using shared processor idle loop\n"); ppc_md.power_save = pseries_shared_idle_sleep; } else { - printk(KERN_INFO "Using dedicated idle loop\n"); + printk(KERN_DEBUG "Using dedicated idle loop\n"); ppc_md.power_save = pseries_dedicated_idle_sleep; } } else { - printk(KERN_INFO "Using default idle loop\n"); + printk(KERN_DEBUG "Using default idle loop\n"); } if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -261,41 +336,11 @@ static int __init pSeries_init_panel(void) } arch_initcall(pSeries_init_panel); -static void __init pSeries_discover_pic(void) -{ - struct device_node *np; - char *typep; - - /* - * Setup interrupt mapping options that are needed for finish_device_tree - * to properly parse the OF interrupt tree & do the virtual irq mapping - */ - __irq_offset_value = NUM_ISA_INTERRUPTS; - ppc64_interrupt_controller = IC_INVALID; - for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { - typep = (char *)get_property(np, "compatible", NULL); - if (strstr(typep, "open-pic")) { - ppc64_interrupt_controller = IC_OPEN_PIC; - break; - } else if (strstr(typep, "ppc-xicp")) { - ppc64_interrupt_controller = IC_PPC_XIC; - break; - } - } - if (ppc64_interrupt_controller == IC_INVALID) - printk("pSeries_discover_pic: failed to recognize" - " interrupt-controller\n"); - -} - static void pSeries_mach_cpu_die(void) { local_irq_disable(); idle_task_exit(); - /* Some hardware requires clearing the CPPR, while other hardware does not - * it is safe either way - */ - pSeriesLP_cppr_info(0, 0); + xics_teardown_cpu(0); rtas_stop_self(); /* Should never get here... */ BUG(); @@ -322,11 +367,6 @@ static void __init pSeries_init_early(void) DBG(" -> pSeries_init_early()\n"); fw_feature_init(); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - hpte_init_lpar(); - else - hpte_init_native(); if (firmware_has_feature(FW_FEATURE_LPAR)) find_udbg_vterm(); @@ -338,8 +378,6 @@ static void __init pSeries_init_early(void) iommu_init_early_pSeries(); - pSeries_discover_pic(); - DBG(" <- pSeries_init_early()\n"); } @@ -384,11 +422,17 @@ static int __init pSeries_probe_hypertas(unsigned long node, if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL) != NULL) powerpc_firmware_features |= FW_FEATURE_LPAR; + if (firmware_has_feature(FW_FEATURE_LPAR)) + hpte_init_lpar(); + else + hpte_init_native(); + return 1; } static int __init pSeries_probe(void) { + unsigned long root = of_get_flat_dt_root(); char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), "device_type", NULL); if (dtype == NULL) @@ -396,6 +440,13 @@ static int __init pSeries_probe(void) if (strcmp(dtype, "chrp")) return 0; + /* Cell blades firmware claims to be chrp while it's not. Until this + * is fixed, we need to avoid those here. + */ + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") || + of_flat_dt_is_compatible(root, "IBM,CBEA")) + return 0; + DBG("pSeries detected, looking for LPAR capability...\n"); /* Now try to figure out if we are running on LPAR */ @@ -498,27 +549,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -#ifdef CONFIG_KEXEC -static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) -{ - /* Don't risk a hypervisor call if we're crashing */ - if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { - unsigned long vpa = __pa(get_lppaca()); - - if (unregister_vpa(hard_smp_processor_id(), vpa)) { - printk("VPA deregistration of cpu %u (hw_cpu_id %d) " - "failed\n", smp_processor_id(), - hard_smp_processor_id()); - } - } - - if (ppc64_interrupt_controller == IC_OPEN_PIC) - mpic_teardown_this_cpu(secondary); - else - xics_teardown_cpu(secondary); -} -#endif - define_machine(pseries) { .name = "pSeries", .probe = pSeries_probe, @@ -543,7 +573,6 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, #ifdef CONFIG_KEXEC - .kexec_cpu_down = pseries_kexec_cpu_down, .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, .machine_crash_shutdown = default_machine_crash_shutdown, diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 3cf78a6cd27c..ac61098ff401 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -14,7 +14,6 @@ #undef DEBUG -#include <linux/config.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> @@ -417,27 +416,12 @@ static struct smp_ops_t pSeries_xics_smp_ops = { #endif /* This is called very early */ -void __init smp_init_pSeries(void) +static void __init smp_init_pseries(void) { int i; DBG(" -> smp_init_pSeries()\n"); - switch (ppc64_interrupt_controller) { -#ifdef CONFIG_MPIC - case IC_OPEN_PIC: - smp_ops = &pSeries_mpic_smp_ops; - break; -#endif -#ifdef CONFIG_XICS - case IC_PPC_XIC: - smp_ops = &pSeries_xics_smp_ops; - break; -#endif - default: - panic("Invalid interrupt controller"); - } - #ifdef CONFIG_HOTPLUG_CPU smp_ops->cpu_disable = pSeries_cpu_disable; smp_ops->cpu_die = pSeries_cpu_die; @@ -472,3 +456,18 @@ void __init smp_init_pSeries(void) DBG(" <- smp_init_pSeries()\n"); } +#ifdef CONFIG_MPIC +void __init smp_init_pseries_mpic(void) +{ + smp_ops = &pSeries_mpic_smp_ops; + + smp_init_pseries(); +} +#endif + +void __init smp_init_pseries_xics(void) +{ + smp_ops = &pSeries_xics_smp_ops; + + smp_init_pseries(); +} diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c deleted file mode 100644 index 8e53e04ada8b..000000000000 --- a/arch/powerpc/platforms/pseries/vio.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * IBM PowerPC pSeries Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003-2005 IBM Corp. - * Dave Engebretsen engebret@us.ibm.com - * Santiago Leon santil@us.ibm.com - * Hollis Blanchard <hollisb@us.ibm.com> - * Stephen Rothwell - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/kobject.h> -#include <asm/iommu.h> -#include <asm/dma.h> -#include <asm/prom.h> -#include <asm/vio.h> -#include <asm/hvcall.h> -#include <asm/tce.h> - -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ - -static void probe_bus_pseries(void) -{ - struct device_node *node_vroot, *of_node; - - node_vroot = find_devices("vdevice"); - if ((node_vroot == NULL) || (node_vroot->child == NULL)) - /* this machine doesn't do virtual IO, and that's ok */ - return; - - /* - * Create struct vio_devices for each virtual device in the device tree. - * Drivers will associate with them later. - */ - for (of_node = node_vroot->child; of_node != NULL; - of_node = of_node->sibling) { - printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); - vio_register_device_node(of_node); - } -} - -/** - * vio_match_device_pseries: - Tell if a pSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_pseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && - device_is_compatible(dev->dev.platform_data, id->compat); -} - -static void vio_release_device_pseries(struct device *dev) -{ - /* XXX free TCE table */ - of_node_put(dev->platform_data); -} - -static ssize_t viodev_show_devspec(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->platform_data; - - return sprintf(buf, "%s\n", of_node->full_name); -} -DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); - -static void vio_unregister_device_pseries(struct vio_dev *viodev) -{ - device_remove_file(&viodev->dev, &dev_attr_devspec); -} - -static struct vio_bus_ops vio_bus_ops_pseries = { - .match = vio_match_device_pseries, - .unregister_device = vio_unregister_device_pseries, - .release_device = vio_release_device_pseries, -}; - -/** - * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus - */ -static int __init vio_bus_init_pseries(void) -{ - int err; - - err = vio_bus_init(&vio_bus_ops_pseries); - if (err == 0) - probe_bus_pseries(); - return err; -} - -__initcall(vio_bus_init_pseries); - -/** - * vio_build_iommu_table: - gets the dma information from OF and - * builds the TCE tree. - * @dev: the virtual device. - * - * Returns a pointer to the built tce tree, or NULL if it can't - * find property. -*/ -static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) -{ - unsigned int *dma_window; - struct iommu_table *newTceTable; - unsigned long offset; - int dma_window_property_size; - - dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); - if(!dma_window) { - return NULL; - } - - newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - /* There should be some code to extract the phys-encoded offset - using prom_n_addr_cells(). However, according to a comment - on earlier versions, it's always zero, so we don't bother */ - offset = dma_window[1] >> PAGE_SHIFT; - - /* TCE table size - measured in tce entries */ - newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; - /* offset for VIO should always be 0 */ - newTceTable->it_offset = offset; - newTceTable->it_busno = 0; - newTceTable->it_index = (unsigned long)dma_window[0]; - newTceTable->it_type = TCE_VB; - - return iommu_init_table(newTceTable); -} - -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node (dev.platform_data) and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - unsigned int *unit_address; - unsigned int *irq_p; - - /* we need the 'device_type' property, in order to match with drivers */ - if ((NULL == of_node->type)) { - printk(KERN_WARNING - "%s: node %s missing 'device_type'\n", __FUNCTION__, - of_node->name ? of_node->name : "<unknown>"); - return NULL; - } - - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); - if (!unit_address) { - printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, - of_node->name ? of_node->name : "<unknown>"); - return NULL; - } - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) { - return NULL; - } - memset(viodev, 0, sizeof(struct vio_dev)); - - viodev->dev.platform_data = of_node_get(of_node); - - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); - viodev->name = of_node->name; - viodev->type = of_node->type; - viodev->unit_address = *unit_address; - viodev->iommu_table = vio_build_iommu_table(viodev); - - /* register with generic device framework */ - if (vio_register_device(viodev) == NULL) { - /* XXX free TCE table */ - kfree(viodev); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_devspec); - - return viodev; -} -EXPORT_SYMBOL(vio_register_device_node); - -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's get_property() to return the value of the - * attribute specified by the preprocessor constant @which -*/ -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) -{ - return get_property(vdev->dev.platform_data, (char*)which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - * XXX once vio_bus_type.devices is actually used as a kset in - * drivers/base/bus.c, this function should be removed in favor of - * "device_find(kobj_name, &vio_bus_type)" - */ -static struct vio_dev *vio_find_name(const char *kobj_name) -{ - struct kobject *found; - - found = kset_find_obj(&devices_subsys.kset, kobj_name); - if (!found) - return NULL; - - return to_vio_dev(container_of(found, struct device, kobj)); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - uint32_t *unit_address; - char kobj_name[BUS_ID_SIZE]; - - /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); - if (!unit_address) - return NULL; - snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_SUCCESS) - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_SUCCESS) - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 2d60ea30fed6..716972aa9777 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -8,7 +8,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> + +#undef DEBUG + #include <linux/types.h> #include <linux/threads.h> #include <linux/kernel.h> @@ -20,6 +22,7 @@ #include <linux/gfp.h> #include <linux/radix-tree.h> #include <linux/cpu.h> + #include <asm/firmware.h> #include <asm/prom.h> #include <asm/io.h> @@ -32,26 +35,6 @@ #include "xics.h" -static unsigned int xics_startup(unsigned int irq); -static void xics_enable_irq(unsigned int irq); -static void xics_disable_irq(unsigned int irq); -static void xics_mask_and_ack_irq(unsigned int irq); -static void xics_end_irq(unsigned int irq); -static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask); - -static struct hw_interrupt_type xics_pic = { - .typename = " XICS ", - .startup = xics_startup, - .enable = xics_enable_irq, - .disable = xics_disable_irq, - .ack = xics_mask_and_ack_irq, - .end = xics_end_irq, - .set_affinity = xics_set_affinity -}; - -/* This is used to map real irq numbers to virtual */ -static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); - #define XICS_IPI 2 #define XICS_IRQ_SPURIOUS 0 @@ -60,7 +43,7 @@ static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); /* * Mark IPIs as higher priority so we can take them inside interrupts that - * arent marked SA_INTERRUPT + * arent marked IRQF_DISABLED */ #define IPI_PRIORITY 4 @@ -82,12 +65,12 @@ struct xics_ipl { static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; -static int xics_irq_8259_cascade = 0; -static int xics_irq_8259_cascade_real = 0; static unsigned int default_server = 0xFF; static unsigned int default_distrib_server = 0; static unsigned int interrupt_server_size = 8; +static struct irq_host *xics_host; + /* * XICS only has a single IPI, so encode the messages per CPU */ @@ -99,47 +82,33 @@ static int ibm_set_xive; static int ibm_int_on; static int ibm_int_off; -typedef struct { - int (*xirr_info_get)(int cpu); - void (*xirr_info_set)(int cpu, int val); - void (*cppr_info)(int cpu, u8 val); - void (*qirr_info)(int cpu, u8 val); -} xics_ops; +/* Direct HW low level accessors */ -/* SMP */ -static int pSeries_xirr_info_get(int n_cpu) +static inline unsigned int direct_xirr_info_get(int n_cpu) { return in_be32(&xics_per_cpu[n_cpu]->xirr.word); } -static void pSeries_xirr_info_set(int n_cpu, int value) +static inline void direct_xirr_info_set(int n_cpu, int value) { out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); } -static void pSeries_cppr_info(int n_cpu, u8 value) +static inline void direct_cppr_info(int n_cpu, u8 value) { out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); } -static void pSeries_qirr_info(int n_cpu, u8 value) +static inline void direct_qirr_info(int n_cpu, u8 value) { out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); } -static xics_ops pSeries_ops = { - pSeries_xirr_info_get, - pSeries_xirr_info_set, - pSeries_cppr_info, - pSeries_qirr_info -}; - -static xics_ops *ops = &pSeries_ops; +/* LPAR low level accessors */ -/* LPAR */ static inline long plpar_eoi(unsigned long xirr) { @@ -162,7 +131,7 @@ static inline long plpar_xirr(unsigned long *xirr_ret) return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); } -static int pSeriesLP_xirr_info_get(int n_cpu) +static inline unsigned int lpar_xirr_info_get(int n_cpu) { unsigned long lpar_rc; unsigned long return_value; @@ -170,10 +139,10 @@ static int pSeriesLP_xirr_info_get(int n_cpu) lpar_rc = plpar_xirr(&return_value); if (lpar_rc != H_SUCCESS) panic(" bad return code xirr - rc = %lx \n", lpar_rc); - return (int)return_value; + return (unsigned int)return_value; } -static void pSeriesLP_xirr_info_set(int n_cpu, int value) +static inline void lpar_xirr_info_set(int n_cpu, int value) { unsigned long lpar_rc; unsigned long val64 = value & 0xffffffff; @@ -184,7 +153,7 @@ static void pSeriesLP_xirr_info_set(int n_cpu, int value) val64); } -void pSeriesLP_cppr_info(int n_cpu, u8 value) +static inline void lpar_cppr_info(int n_cpu, u8 value) { unsigned long lpar_rc; @@ -193,7 +162,7 @@ void pSeriesLP_cppr_info(int n_cpu, u8 value) panic("bad return code cppr - rc = %lx\n", lpar_rc); } -static void pSeriesLP_qirr_info(int n_cpu , u8 value) +static inline void lpar_qirr_info(int n_cpu , u8 value) { unsigned long lpar_rc; @@ -202,43 +171,16 @@ static void pSeriesLP_qirr_info(int n_cpu , u8 value) panic("bad return code qirr - rc = %lx\n", lpar_rc); } -xics_ops pSeriesLP_ops = { - pSeriesLP_xirr_info_get, - pSeriesLP_xirr_info_set, - pSeriesLP_cppr_info, - pSeriesLP_qirr_info -}; -static unsigned int xics_startup(unsigned int virq) -{ - unsigned int irq; +/* High level handlers and init code */ - irq = irq_offset_down(virq); - if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), - &virt_irq_to_real_map[irq]) == -ENOMEM) - printk(KERN_CRIT "Out of memory creating real -> virtual" - " IRQ mapping for irq %u (real 0x%x)\n", - virq, virt_irq_to_real(irq)); - xics_enable_irq(virq); - return 0; /* return value is ignored */ -} - -static unsigned int real_irq_to_virt(unsigned int real_irq) -{ - unsigned int *ptr; - - ptr = radix_tree_lookup(&irq_map, real_irq); - if (ptr == NULL) - return NO_IRQ; - return ptr - virt_irq_to_real_map; -} #ifdef CONFIG_SMP -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { unsigned int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_affinity[irq]; + cpumask_t cpumask = irq_desc[virq].affinity; cpumask_t tmp = CPU_MASK_NONE; if (!distribute_irqs) @@ -259,23 +201,28 @@ static int get_irq_server(unsigned int irq) } #else -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { return default_server; } #endif -static void xics_enable_irq(unsigned int virq) + +static void xics_unmask_irq(unsigned int virq) { unsigned int irq; int call_status; unsigned int server; - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI) + pr_debug("xics: unmask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + pr_debug(" -> map to hwirq 0x%x\n", irq); + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return; server = get_irq_server(virq); + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, DEFAULT_PRIORITY); if (call_status != 0) { @@ -294,7 +241,7 @@ static void xics_enable_irq(unsigned int virq) } } -static void xics_disable_real_irq(unsigned int irq) +static void xics_mask_real_irq(unsigned int irq) { int call_status; unsigned int server; @@ -319,75 +266,86 @@ static void xics_disable_real_irq(unsigned int irq) } } -static void xics_disable_irq(unsigned int virq) +static void xics_mask_irq(unsigned int virq) +{ + unsigned int irq; + + pr_debug("xics: mask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + return; + xics_mask_real_irq(irq); +} + +static unsigned int xics_startup(unsigned int virq) { unsigned int irq; - irq = virt_irq_to_real(irq_offset_down(virq)); - xics_disable_real_irq(irq); + /* force a reverse mapping of the interrupt so it gets in the cache */ + irq = (unsigned int)irq_map[virq].hwirq; + irq_radix_revmap(xics_host, irq); + + /* unmask it */ + xics_unmask_irq(virq); + return 0; } -static void xics_end_irq(unsigned int irq) +static void xics_eoi_direct(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; iosync(); - ops->xirr_info_set(cpu, ((0xff << 24) | - (virt_irq_to_real(irq_offset_down(irq))))); - + direct_xirr_info_set(cpu, (0xff << 24) | irq); } -static void xics_mask_and_ack_irq(unsigned int irq) + +static void xics_eoi_lpar(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; - if (irq < irq_offset_value()) { - i8259_pic.ack(irq); - iosync(); - ops->xirr_info_set(cpu, ((0xff<<24) | - xics_irq_8259_cascade_real)); - iosync(); - } + iosync(); + lpar_xirr_info_set(cpu, (0xff << 24) | irq); } -int xics_get_irq(struct pt_regs *regs) +static inline unsigned int xics_remap_irq(unsigned int vec) { - unsigned int cpu = smp_processor_id(); - unsigned int vec; - int irq; + unsigned int irq; - vec = ops->xirr_info_get(cpu); - /* (vec >> 24) == old priority */ vec &= 0x00ffffff; - /* for sanity, this had better be < NR_IRQS - 16 */ - if (vec == xics_irq_8259_cascade_real) { - irq = i8259_irq(regs); - xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); - } else if (vec == XICS_IRQ_SPURIOUS) { - irq = -1; - } else { - irq = real_irq_to_virt(vec); - if (irq == NO_IRQ) - irq = real_irq_to_virt_slowpath(vec); - if (irq == NO_IRQ) { - printk(KERN_ERR "Interrupt %u (real) is invalid," - " disabling it.\n", vec); - xics_disable_real_irq(vec); - } else - irq = irq_offset_up(irq); - } - return irq; + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + irq = irq_radix_revmap(xics_host, vec); + if (likely(irq != NO_IRQ)) + return irq; + + printk(KERN_ERR "Interrupt %u (real) is invalid," + " disabling it.\n", vec); + xics_mask_real_irq(vec); + return NO_IRQ; } -#ifdef CONFIG_SMP +static unsigned int xics_get_irq_direct(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + + return xics_remap_irq(direct_xirr_info_get(cpu)); +} -static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +static unsigned int xics_get_irq_lpar(struct pt_regs *regs) { - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); + + return xics_remap_irq(lpar_xirr_info_get(cpu)); +} - ops->qirr_info(cpu, 0xff); +#ifdef CONFIG_SMP +static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs) +{ WARN_ON(cpu_is_offline(cpu)); while (xics_ipi_message[cpu].value) { @@ -419,18 +377,88 @@ static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } +static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + direct_qirr_info(cpu, 0xff); + + return xics_ipi_dispatch(cpu, regs); +} + +static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + lpar_qirr_info(cpu, 0xff); + + return xics_ipi_dispatch(cpu, regs); +} + void xics_cause_IPI(int cpu) { - ops->qirr_info(cpu, IPI_PRIORITY); + if (firmware_has_feature(FW_FEATURE_LPAR)) + lpar_qirr_info(cpu, IPI_PRIORITY); + else + direct_qirr_info(cpu, IPI_PRIORITY); } + #endif /* CONFIG_SMP */ +static void xics_set_cpu_priority(int cpu, unsigned char cppr) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + lpar_cppr_info(cpu, cppr); + else + direct_cppr_info(cpu, cppr); + iosync(); +} + +static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +{ + unsigned int irq; + int status; + int xics_status[2]; + unsigned long newmask; + cpumask_t tmp = CPU_MASK_NONE; + + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + return; + + status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " + "returns %d\n", irq, status); + return; + } + + /* For the moment only implement delivery to all cpus or one cpu */ + if (cpus_equal(cpumask, CPU_MASK_ALL)) { + newmask = default_distrib_server; + } else { + cpus_and(tmp, cpu_online_map, cpumask); + if (cpus_empty(tmp)) + return; + newmask = get_hard_smp_processor_id(first_cpu(tmp)); + } + + status = rtas_call(ibm_set_xive, 3, 1, NULL, + irq, newmask, xics_status[1]); + + if (status) { + printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " + "returns %d\n", irq, status); + return; + } +} + void xics_setup_cpu(void) { int cpu = smp_processor_id(); - ops->cppr_info(cpu, 0xff); - iosync(); + xics_set_cpu_priority(cpu, 0xff); /* * Put the calling processor into the GIQ. This is really only @@ -443,72 +471,266 @@ void xics_setup_cpu(void) (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); } -void xics_init_IRQ(void) + +static struct irq_chip xics_pic_direct = { + .typename = " XICS ", + .startup = xics_startup, + .mask = xics_mask_irq, + .unmask = xics_unmask_irq, + .eoi = xics_eoi_direct, + .set_affinity = xics_set_affinity +}; + + +static struct irq_chip xics_pic_lpar = { + .typename = " XICS ", + .startup = xics_startup, + .mask = xics_mask_irq, + .unmask = xics_unmask_irq, + .eoi = xics_eoi_lpar, + .set_affinity = xics_set_affinity +}; + + +static int xics_host_match(struct irq_host *h, struct device_node *node) +{ + /* IBM machines have interrupt parents of various funky types for things + * like vdevices, events, etc... The trick we use here is to match + * everything here except the legacy 8259 which is compatible "chrp,iic" + */ + return !device_is_compatible(node, "chrp,iic"); +} + +static int xics_host_map_direct(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + + pr_debug("xics: map_direct virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq); + return 0; +} + +static int xics_host_map_lpar(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + + pr_debug("xics: map_lpar virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq); + return 0; +} + +static int xics_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + { + /* Current xics implementation translates everything + * to level. It is not technically right for MSIs but this + * is irrelevant at this point. We might get smarter in the future + */ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +static struct irq_host_ops xics_host_direct_ops = { + .match = xics_host_match, + .map = xics_host_map_direct, + .xlate = xics_host_xlate, +}; + +static struct irq_host_ops xics_host_lpar_ops = { + .match = xics_host_match, + .map = xics_host_map_lpar, + .xlate = xics_host_xlate, +}; + +static void __init xics_init_host(void) +{ + struct irq_host_ops *ops; + + if (firmware_has_feature(FW_FEATURE_LPAR)) + ops = &xics_host_lpar_ops; + else + ops = &xics_host_direct_ops; + xics_host = irq_alloc_host(IRQ_HOST_MAP_TREE, 0, ops, + XICS_IRQ_SPURIOUS); + BUG_ON(xics_host == NULL); + irq_set_default_host(xics_host); +} + +static void __init xics_map_one_cpu(int hw_id, unsigned long addr, + unsigned long size) +{ +#ifdef CONFIG_SMP int i; - unsigned long intr_size = 0; - struct device_node *np; - uint *ireg, ilen, indx = 0; - unsigned long intr_base = 0; - struct xics_interrupt_node { - unsigned long addr; - unsigned long size; - } intnodes[NR_CPUS]; - ppc64_boot_msg(0x20, "XICS Init"); + /* This may look gross but it's good enough for now, we don't quite + * have a hard -> linux processor id matching. + */ + for_each_possible_cpu(i) { + if (!cpu_present(i)) + continue; + if (hw_id == get_hard_smp_processor_id(i)) { + xics_per_cpu[i] = ioremap(addr, size); + return; + } + } +#else + if (hw_id != 0) + return; + xics_per_cpu[0] = ioremap(addr, size); +#endif /* CONFIG_SMP */ +} - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); +static void __init xics_init_one_node(struct device_node *np, + unsigned int *indx) +{ + unsigned int ilen; + u32 *ireg; - np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); - if (!np) - panic("xics_init_IRQ: can't find interrupt presentation"); + /* This code does the theorically broken assumption that the interrupt + * server numbers are the same as the hard CPU numbers. + * This happens to be the case so far but we are playing with fire... + * should be fixed one of these days. -BenH. + */ + ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL); -nextnode: - ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); + /* Do that ever happen ? we'll know soon enough... but even good'old + * f80 does have that property .. + */ + WARN_ON(ireg == NULL); if (ireg) { /* * set node starting index for this node */ - indx = *ireg; + *indx = *ireg; } - - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (!ireg) panic("xics_init_IRQ: can't find interrupt reg property"); - while (ilen) { - intnodes[indx].addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].addr |= *ireg++; - ilen -= sizeof(uint); - intnodes[indx].size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].size |= *ireg++; - ilen -= sizeof(uint); - indx++; - if (indx >= NR_CPUS) break; + while (ilen >= (4 * sizeof(u32))) { + unsigned long addr, size; + + /* XXX Use proper OF parsing code here !!! */ + addr = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + addr |= *ireg++; + ilen -= sizeof(u32); + size = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + size |= *ireg++; + ilen -= sizeof(u32); + xics_map_one_cpu(*indx, addr, size); + (*indx)++; } +} + + +static void __init xics_setup_8259_cascade(void) +{ + struct device_node *np, *old, *found = NULL; + int cascade, naddr; + u32 *addrp; + unsigned long intack = 0; + + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + found = np; + break; + } + if (found == NULL) { + printk(KERN_DEBUG "xics: no ISA interrupt controller\n"); + return; + } + cascade = irq_of_parse_and_map(found, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "xics: failed to map cascade interrupt"); + return; + } + pr_debug("xics: cascade mapped to irq %d\n", cascade); + + for (old = of_node_get(found); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (strcmp(np->name, "pci") != 0) + continue; + addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL); + if (addrp == NULL) + continue; + naddr = prom_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "xics: PCI 8259 intack at 0x%016lx\n", intack); + i8259_init(found, intack); + of_node_put(found); + set_irq_chained_handler(cascade, pseries_8259_cascade); +} + +void __init xics_init_IRQ(void) +{ + int i; + struct device_node *np; + u32 *ireg, ilen, indx = 0; + int found = 0; + + ppc64_boot_msg(0x20, "XICS Init"); + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); - np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); - if ((indx < NR_CPUS) && np) goto nextnode; + for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { + found = 1; + if (firmware_has_feature(FW_FEATURE_LPAR)) + break; + xics_init_one_node(np, &indx); + } + if (found == 0) + return; + + xics_init_host(); /* Find the server numbers for the boot cpu. */ for (np = of_find_node_by_type(NULL, "cpu"); np; np = of_find_node_by_type(np, "cpu")) { - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { - ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", - &ilen); + ireg = (u32 *)get_property(np, + "ibm,ppc-interrupt-gserver#s", + &ilen); i = ilen / sizeof(int); if (ireg && i > 0) { default_server = ireg[0]; - default_distrib_server = ireg[i-1]; /* take last element */ + /* take last element */ + default_distrib_server = ireg[i-1]; } - ireg = (uint *)get_property(np, + ireg = (u32 *)get_property(np, "ibm,interrupt-server#-size", NULL); if (ireg) interrupt_server_size = *ireg; @@ -517,147 +739,72 @@ nextnode: } of_node_put(np); - intr_base = intnodes[0].addr; - intr_size = intnodes[0].size; - - np = of_find_node_by_type(NULL, "interrupt-controller"); - if (!np) { - printk(KERN_WARNING "xics: no ISA interrupt controller\n"); - xics_irq_8259_cascade_real = -1; - xics_irq_8259_cascade = -1; - } else { - ireg = (uint *) get_property(np, "interrupts", NULL); - if (!ireg) - panic("xics_init_IRQ: can't find ISA interrupts property"); - - xics_irq_8259_cascade_real = *ireg; - xics_irq_8259_cascade - = virt_irq_create_mapping(xics_irq_8259_cascade_real); - i8259_init(0, 0); - of_node_put(np); - } - if (firmware_has_feature(FW_FEATURE_LPAR)) - ops = &pSeriesLP_ops; - else { -#ifdef CONFIG_SMP - for_each_possible_cpu(i) { - int hard_id; - - /* FIXME: Do this dynamically! --RR */ - if (!cpu_present(i)) - continue; - - hard_id = get_hard_smp_processor_id(i); - xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, - intnodes[hard_id].size); - } -#else - xics_per_cpu[0] = ioremap(intr_base, intr_size); -#endif /* CONFIG_SMP */ - } - - for (i = irq_offset_value(); i < NR_IRQS; ++i) - get_irq_desc(i)->handler = &xics_pic; + ppc_md.get_irq = xics_get_irq_lpar; + else + ppc_md.get_irq = xics_get_irq_direct; xics_setup_cpu(); + xics_setup_8259_cascade(); + ppc64_boot_msg(0x21, "XICS Done"); } -/* - * We cant do this in init_IRQ because we need the memory subsystem up for - * request_irq() - */ -static int __init xics_setup_i8259(void) -{ - if (ppc64_interrupt_controller == IC_PPC_XIC && - xics_irq_8259_cascade != -1) { - if (request_irq(irq_offset_up(xics_irq_8259_cascade), - no_action, 0, "8259 cascade", NULL)) - printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 " - "cascade\n"); - } - return 0; -} -arch_initcall(xics_setup_i8259); #ifdef CONFIG_SMP void xics_request_IPIs(void) { - virt_irq_to_real_map[XICS_IPI] = XICS_IPI; - - /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ - request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT, - "IPI", NULL); - get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; -} -#endif - -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) -{ - unsigned int irq; - int status; - int xics_status[2]; - unsigned long newmask; - cpumask_t tmp = CPU_MASK_NONE; - - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI || irq == NO_IRQ) - return; - - status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); - - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive " - "returns %d\n", irq, status); - return; - } - - /* For the moment only implement delivery to all cpus or one cpu */ - if (cpus_equal(cpumask, CPU_MASK_ALL)) { - newmask = default_distrib_server; - } else { - cpus_and(tmp, cpu_online_map, cpumask); - if (cpus_empty(tmp)) - return; - newmask = get_hard_smp_processor_id(first_cpu(tmp)); - } + unsigned int ipi; - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, newmask, xics_status[1]); + ipi = irq_create_mapping(xics_host, XICS_IPI, 0); + BUG_ON(ipi == NO_IRQ); - if (status) { - printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive " - "returns %d\n", irq, status); - return; - } + /* + * IPIs are marked IRQF_DISABLED as they must run with irqs + * disabled + */ + set_irq_handler(ipi, handle_percpu_irq); + if (firmware_has_feature(FW_FEATURE_LPAR)) + request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED, + "IPI", NULL); + else + request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED, + "IPI", NULL); } +#endif /* CONFIG_SMP */ void xics_teardown_cpu(int secondary) { int cpu = smp_processor_id(); + unsigned int ipi; + struct irq_desc *desc; - ops->cppr_info(cpu, 0x00); - iosync(); + xics_set_cpu_priority(cpu, 0); + + /* + * we need to EOI the IPI if we got here from kexec down IPI + * + * probably need to check all the other interrupts too + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + + ipi = irq_find_mapping(xics_host, XICS_IPI); + if (ipi == XICS_IRQ_SPURIOUS) + return; + desc = get_irq_desc(ipi); + if (desc->chip && desc->chip->eoi) + desc->chip->eoi(XICS_IPI); /* * Some machines need to have at least one cpu in the GIQ, * so leave the master cpu in the group. */ - if (secondary) { - /* - * we need to EOI the IPI if we got here from kexec down IPI - * - * probably need to check all the other interrupts too - * should we be flagging idle loop instead? - * or creating some task to be scheduled? - */ - ops->xirr_info_set(cpu, XICS_IPI); + if (secondary) rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - - default_distrib_server, 0); - } + (1UL << interrupt_server_size) - 1 - + default_distrib_server, 0); } #ifdef CONFIG_HOTPLUG_CPU @@ -669,8 +816,7 @@ void xics_migrate_irqs_away(void) unsigned int irq, virq, cpu = smp_processor_id(); /* Reject any interrupt that was queued to us... */ - ops->cppr_info(cpu, 0); - iosync(); + xics_set_cpu_priority(cpu, 0); /* remove ourselves from the global interrupt queue */ status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, @@ -678,29 +824,28 @@ void xics_migrate_irqs_away(void) WARN_ON(status < 0); /* Allow IPIs again... */ - ops->cppr_info(cpu, DEFAULT_PRIORITY); - iosync(); + xics_set_cpu_priority(cpu, DEFAULT_PRIORITY); for_each_irq(virq) { - irq_desc_t *desc; + struct irq_desc *desc; int xics_status[2]; unsigned long flags; /* We cant set affinity on ISA interrupts */ - if (virq < irq_offset_value()) + if (virq < NUM_ISA_INTERRUPTS) continue; - - desc = get_irq_desc(virq); - irq = virt_irq_to_real(irq_offset_down(virq)); - + if (irq_map[virq].host != xics_host) + continue; + irq = (unsigned int)irq_map[virq].hwirq; /* We need to get IPIs still. */ - if (irq == XICS_IPI || irq == NO_IRQ) + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) continue; + desc = get_irq_desc(virq); /* We only need to migrate enabled IRQS */ - if (desc == NULL || desc->handler == NULL + if (desc == NULL || desc->chip == NULL || desc->action == NULL - || desc->handler->set_affinity == NULL) + || desc->chip->set_affinity == NULL) continue; spin_lock_irqsave(&desc->lock, flags); @@ -725,8 +870,8 @@ void xics_migrate_irqs_away(void) virq, cpu); /* Reset affinity to all cpus */ - desc->handler->set_affinity(virq, CPU_MASK_ALL); - irq_affinity[virq] = CPU_MASK_ALL; + desc->chip->set_affinity(virq, CPU_MASK_ALL); + irq_desc[irq].affinity = CPU_MASK_ALL; unlock: spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h index e14c70868f1d..6ee1055b0ffb 100644 --- a/arch/powerpc/platforms/pseries/xics.h +++ b/arch/powerpc/platforms/pseries/xics.h @@ -14,13 +14,12 @@ #include <linux/cache.h> -void xics_init_IRQ(void); -int xics_get_irq(struct pt_regs *); -void xics_setup_cpu(void); -void xics_teardown_cpu(int secondary); -void xics_cause_IPI(int cpu); -void xics_request_IPIs(void); -void xics_migrate_irqs_away(void); +extern void xics_init_IRQ(void); +extern void xics_setup_cpu(void); +extern void xics_teardown_cpu(int secondary); +extern void xics_cause_IPI(int cpu); +extern void xics_request_IPIs(void); +extern void xics_migrate_irqs_away(void); /* first argument is ignored for now*/ void pSeriesLP_cppr_info(int n_cpu, u8 value); @@ -31,4 +30,8 @@ struct xics_ipi_struct { extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; +struct irq_desc; +extern void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs); + #endif /* _POWERPC_KERNEL_XICS_H */ |