15 files changed, 1352 insertions, 1257 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 31f22c1f657d..aadbe4f6d537 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,6 +3,7 @@ config PPC_PSERIES
 	bool "IBM pSeries & new (POWER5-based) iSeries"
 	select HAVE_PCSPKR_PLATFORM
 	select MPIC
+	select OF_DYNAMIC
 	select PCI_MSI
 	select PPC_XICS
 	select PPC_ICP_NATIVE
@@ -72,7 +73,7 @@ config IO_EVENT_IRQ
 
 config LPARCFG
 	bool "LPAR Configuration Data"
-	depends on PPC_PSERIES || PPC_ISERIES
+	depends on PPC_PSERIES
 	help
 	Provide system capacity information via human readable
 	<key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 236db46b4078..c222189f5bb2 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,7 +6,8 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
-obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
+obj-$(CONFIG_EEH)	+= eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
+			   eeh_event.o eeh_sysfs.o eeh_pseries.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
 obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)	+= msi.o
@@ -18,7 +19,6 @@ obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
 obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
-obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
 obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index c0b40af4ce4f..8011088392d3 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1,8 +1,8 @@
 /*
- * eeh.c
  * Copyright IBM Corporation 2001, 2005, 2006
  * Copyright Dave Engebretsen & Todd Inglett 2001
  * Copyright Linas Vepstas 2005, 2006
+ * Copyright 2001-2012 IBM Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@
  */
 
 #include <linux/delay.h>
-#include <linux/sched.h>	/* for init_mm */
+#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/pci.h>
@@ -86,16 +86,8 @@
 /* Time to wait for a PCI slot to report status, in milliseconds */
 #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
 
-/* RTAS tokens */
-static int ibm_set_eeh_option;
-static int ibm_set_slot_reset;
-static int ibm_read_slot_reset_state;
-static int ibm_read_slot_reset_state2;
-static int ibm_slot_error_detail;
-static int ibm_get_config_addr_info;
-static int ibm_get_config_addr_info2;
-static int ibm_configure_bridge;
-static int ibm_configure_pe;
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
 
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -103,14 +95,6 @@ EXPORT_SYMBOL(eeh_subsystem_enabled);
 /* Lock to avoid races due to multiple reports of an error */
 static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 
-/* Buffer for reporting slot-error-detail rtas calls. Its here
- * in BSS, and not dynamically alloced, so that it ends up in
- * RMO where RTAS can access it.
- */
-static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
-static DEFINE_SPINLOCK(slot_errbuf_lock);
-static int eeh_error_buf_size;
-
 /* Buffer for reporting pci register dumps. Its here in BSS, and
  * not dynamically alloced, so that it ends up in RMO where RTAS
  * can access it.
@@ -118,74 +102,50 @@ static int eeh_error_buf_size;
 #define EEH_PCI_REGS_LOG_LEN 4096
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
-
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-
-/* --------------------------------------------------------------- */
-/* Below lies the EEH event infrastructure */
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+	u64 no_device;		/* PCI device not found		*/
+	u64 no_dn;		/* OF node not found		*/
+	u64 no_cfg_addr;	/* Config address not found	*/
+	u64 ignored_check;	/* EEH check skipped		*/
+	u64 total_mmio_ffs;	/* Total EEH checks		*/
+	u64 false_positives;	/* Unnecessary EEH checks	*/
+	u64 slot_resets;	/* PE reset			*/
+};
 
-static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
-                                   char *driver_log, size_t loglen)
-{
-	int config_addr;
-	unsigned long flags;
-	int rc;
+static struct eeh_stats eeh_stats;
 
-	/* Log the error with the rtas logger */
-	spin_lock_irqsave(&slot_errbuf_lock, flags);
-	memset(slot_errbuf, 0, eeh_error_buf_size);
-
-	/* Use PE configuration address, if present */
-	config_addr = pdn->eeh_config_addr;
-	if (pdn->eeh_pe_config_addr)
-		config_addr = pdn->eeh_pe_config_addr;
-
-	rc = rtas_call(ibm_slot_error_detail,
-	               8, 1, NULL, config_addr,
-	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid),
-	               virt_to_phys(driver_log), loglen,
-	               virt_to_phys(slot_errbuf),
-	               eeh_error_buf_size,
-	               severity);
-
-	if (rc == 0)
-		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
-	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
-}
+#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
 /**
- * gather_pci_data - copy assorted PCI config space registers to buff
- * @pdn: device to report data for
+ * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
+ * @edev: device to report data for
  * @buf: point to buffer in which to log
  * @len: amount of room in buffer
  *
  * This routine captures assorted PCI configuration space data,
  * and puts them into a buffer for RTAS error logging.
  */
-static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 {
-	struct pci_dev *dev = pdn->pcidev;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	u32 cfg;
 	int cap, i;
 	int n = 0;
 
-	n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
-	printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
+	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
+	printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
 
-	rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
 	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
 
-	rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
+	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
 	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
 
@@ -196,11 +156,11 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
 
 	/* Gather bridge-specific registers */
 	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-		rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
 		printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
 
-		rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
 		printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
 	}
@@ -208,11 +168,11 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
 	/* Dump out the PCI-X command and status regs */
 	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
 	if (cap) {
-		rtas_read_config(pdn, cap, 4, &cfg);
+		eeh_ops->read_config(dn, cap, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
 		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
 
-		rtas_read_config(pdn, cap+4, 4, &cfg);
+		eeh_ops->read_config(dn, cap+4, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
 		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
 	}
@@ -225,7 +185,7 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
 		       "EEH: PCI-E capabilities and status follow:\n");
 
 		for (i=0; i<=8; i++) {
-			rtas_read_config(pdn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
 		}
@@ -237,7 +197,7 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
 			       "EEH: PCI-E AER capability register set follows:\n");
 
 			for (i=0; i<14; i++) {
-				rtas_read_config(pdn, cap+4*i, 4, &cfg);
+				eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
 				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
 			}
@@ -246,111 +206,46 @@ static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
 
 	/* Gather status on devices under the bridge */
 	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-		struct device_node *dn;
+		struct device_node *child;
 
-		for_each_child_of_node(pdn->node, dn) {
-			pdn = PCI_DN(dn);
-			if (pdn)
-				n += gather_pci_data(pdn, buf+n, len-n);
+		for_each_child_of_node(dn, child) {
+			if (of_node_to_eeh_dev(child))
+				n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n);
 		}
 	}
 
 	return n;
 }
 
-void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
-{
-	size_t loglen = 0;
-	pci_regs_buf[0] = 0;
-
-	rtas_pci_enable(pdn, EEH_THAW_MMIO);
-	rtas_configure_bridge(pdn);
-	eeh_restore_bars(pdn);
-	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
-
-	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
-}
-
 /**
- * read_slot_reset_state - Read the reset state of a device node's slot
- * @dn: device node to read
- * @rets: array to return results in
- */
-static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
-{
-	int token, outputs;
-	int config_addr;
-
-	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
-		token = ibm_read_slot_reset_state2;
-		outputs = 4;
-	} else {
-		token = ibm_read_slot_reset_state;
-		rets[2] = 0; /* fake PE Unavailable info */
-		outputs = 3;
-	}
-
-	/* Use PE configuration address, if present */
-	config_addr = pdn->eeh_config_addr;
-	if (pdn->eeh_pe_config_addr)
-		config_addr = pdn->eeh_pe_config_addr;
-
-	return rtas_call(token, 3, outputs, rets, config_addr,
-			 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
-}
-
-/**
- * eeh_wait_for_slot_status - returns error status of slot
- * @pdn pci device node
- * @max_wait_msecs maximum number to millisecs to wait
- *
- * Return negative value if a permanent error, else return
- * Partition Endpoint (PE) status value.
+ * eeh_slot_error_detail - Generate combined log including driver log and error log
+ * @edev: device to report error log for
+ * @severity: temporary or permanent error log
  *
- * If @max_wait_msecs is positive, then this routine will
- * sleep until a valid status can be obtained, or until
- * the max allowed wait time is exceeded, in which case
- * a -2 is returned.
+ * This routine should be called to generate the combined log, which
+ * is comprised of driver log and error log. The driver log is figured
+ * out from the config space of the corresponding PCI device, while
+ * the error log is fetched through platform dependent function call.
  */
-int
-eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
+void eeh_slot_error_detail(struct eeh_dev *edev, int severity)
 {
-	int rc;
-	int rets[3];
-	int mwait;
-
-	while (1) {
-		rc = read_slot_reset_state(pdn, rets);
-		if (rc) return rc;
-		if (rets[1] == 0) return -1;  /* EEH is not supported */
-
-		if (rets[0] != 5) return rets[0]; /* return actual status */
-
-		if (rets[2] == 0) return -1; /* permanently unavailable */
+	size_t loglen = 0;
+	pci_regs_buf[0] = 0;
 
-		if (max_wait_msecs <= 0) break;
+	eeh_pci_enable(edev, EEH_OPT_THAW_MMIO);
+	eeh_ops->configure_bridge(eeh_dev_to_of_node(edev));
+	eeh_restore_bars(edev);
+	loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
 
-		mwait = rets[2];
-		if (mwait <= 0) {
-			printk (KERN_WARNING
-			        "EEH: Firmware returned bad wait value=%d\n", mwait);
-			mwait = 1000;
-		} else if (mwait > 300*1000) {
-			printk (KERN_WARNING
-			        "EEH: Firmware is taking too long, time=%d\n", mwait);
-			mwait = 300*1000;
-		}
-		max_wait_msecs -= mwait;
-		msleep (mwait);
-	}
-
-	printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
-	return -2;
+	eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen);
 }
 
 /**
- * eeh_token_to_phys - convert EEH address token to phys address
- * @token i/o token, should be address in the form 0xA....
+ * eeh_token_to_phys - Convert EEH address token to phys address
+ * @token: I/O token, should be address in the form 0xA....
+ *
+ * This routine should be called to convert virtual I/O address
+ * to physical one.
  */
 static inline unsigned long eeh_token_to_phys(unsigned long token)
 {
@@ -365,36 +260,43 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	return pa | (token & (PAGE_SIZE-1));
 }
 
-/** 
- * Return the "partitionable endpoint" (pe) under which this device lies
+/**
+ * eeh_find_device_pe - Retrieve the PE for the given device
+ * @dn: device node
+ *
+ * Return the PE under which this device lies
  */
-struct device_node * find_device_pe(struct device_node *dn)
+struct device_node *eeh_find_device_pe(struct device_node *dn)
 {
-	while ((dn->parent) && PCI_DN(dn->parent) &&
-	      (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+	while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+	       (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
 		dn = dn->parent;
 	}
 	return dn;
 }
 
-/** Mark all devices that are children of this device as failed.
- *  Mark the device driver too, so that it can see the failure
- *  immediately; this is critical, since some drivers poll
- *  status registers in interrupts ... If a driver is polling,
- *  and the slot is frozen, then the driver can deadlock in
- *  an interrupt context, which is bad.
+/**
+ * __eeh_mark_slot - Mark all child devices as failed
+ * @parent: parent device
+ * @mode_flag: failure flag
+ *
+ * Mark all devices that are children of this device as failed.
+ * Mark the device driver too, so that it can see the failure
+ * immediately; this is critical, since some drivers poll
+ * status registers in interrupts ... If a driver is polling,
+ * and the slot is frozen, then the driver can deadlock in
+ * an interrupt context, which is bad.
  */
-
 static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
 {
 	struct device_node *dn;
 
 	for_each_child_of_node(parent, dn) {
-		if (PCI_DN(dn)) {
+		if (of_node_to_eeh_dev(dn)) {
 			/* Mark the pci device driver too */
-			struct pci_dev *dev = PCI_DN(dn)->pcidev;
+			struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
 
-			PCI_DN(dn)->eeh_mode |= mode_flag;
+			of_node_to_eeh_dev(dn)->mode |= mode_flag;
 
 			if (dev && dev->driver)
 				dev->error_state = pci_channel_io_frozen;
@@ -404,92 +306,81 @@ static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
 	}
 }
 
-void eeh_mark_slot (struct device_node *dn, int mode_flag)
+/**
+ * eeh_mark_slot - Mark the indicated device and its children as failed
+ * @dn: parent device
+ * @mode_flag: failure flag
+ *
+ * Mark the indicated device and its child devices as failed.
+ * The device drivers are marked as failed as well.
+ */
+void eeh_mark_slot(struct device_node *dn, int mode_flag)
 {
 	struct pci_dev *dev;
-	dn = find_device_pe (dn);
+	dn = eeh_find_device_pe(dn);
 
 	/* Back up one, since config addrs might be shared */
-	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
 		dn = dn->parent;
 
-	PCI_DN(dn)->eeh_mode |= mode_flag;
+	of_node_to_eeh_dev(dn)->mode |= mode_flag;
 
 	/* Mark the pci device too */
-	dev = PCI_DN(dn)->pcidev;
+	dev = of_node_to_eeh_dev(dn)->pdev;
 	if (dev)
 		dev->error_state = pci_channel_io_frozen;
 
 	__eeh_mark_slot(dn, mode_flag);
 }
 
+/**
+ * __eeh_clear_slot - Clear failure flag for the child devices
+ * @parent: parent device
+ * @mode_flag: flag to be cleared
+ *
+ * Clear failure flag for the child devices.
+ */
 static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
 {
 	struct device_node *dn;
 
 	for_each_child_of_node(parent, dn) {
-		if (PCI_DN(dn)) {
-			PCI_DN(dn)->eeh_mode &= ~mode_flag;
-			PCI_DN(dn)->eeh_check_count = 0;
+		if (of_node_to_eeh_dev(dn)) {
+			of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
+			of_node_to_eeh_dev(dn)->check_count = 0;
 			__eeh_clear_slot(dn, mode_flag);
 		}
 	}
 }
 
-void eeh_clear_slot (struct device_node *dn, int mode_flag)
+/**
+ * eeh_clear_slot - Clear failure flag for the indicated device and its children
+ * @dn: parent device
+ * @mode_flag: flag to be cleared
+ *
+ * Clear failure flag for the indicated device and its children.
+ */
+void eeh_clear_slot(struct device_node *dn, int mode_flag)
 {
 	unsigned long flags;
 	raw_spin_lock_irqsave(&confirm_error_lock, flags);
 	
-	dn = find_device_pe (dn);
+	dn = eeh_find_device_pe(dn);
 	
 	/* Back up one, since config addrs might be shared */
-	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
 		dn = dn->parent;
 
-	PCI_DN(dn)->eeh_mode &= ~mode_flag;
-	PCI_DN(dn)->eeh_check_count = 0;
+	of_node_to_eeh_dev(dn)->mode &= ~mode_flag;
+	of_node_to_eeh_dev(dn)->check_count = 0;
 	__eeh_clear_slot(dn, mode_flag);
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
-void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
-{
-	struct device_node *dn;
-
-	for_each_child_of_node(parent, dn) {
-		if (PCI_DN(dn)) {
-
-			struct pci_dev *dev = PCI_DN(dn)->pcidev;
-
-			if (dev && dev->driver)
-				*freset |= dev->needs_freset;
-
-			__eeh_set_pe_freset(dn, freset);
-		}
-	}
-}
-
-void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
-{
-	struct pci_dev *dev;
-	dn = find_device_pe(dn);
-
-	/* Back up one, since config addrs might be shared */
-	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
-		dn = dn->parent;
-
-	dev = PCI_DN(dn)->pcidev;
-	if (dev)
-		*freset |= dev->needs_freset;
-
-	__eeh_set_pe_freset(dn, freset);
-}
-
 /**
- * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
- * @dn device node
- * @dev pci device, if known
+ * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @dn: device node
+ * @dev: pci device, if known
  *
  * Check for an EEH failure for the given device node.  Call this
  * routine if the result of a read was all 0xff's and you want to
@@ -504,35 +395,34 @@ void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
 int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 {
 	int ret;
-	int rets[3];
 	unsigned long flags;
-	struct pci_dn *pdn;
+	struct eeh_dev *edev;
 	int rc = 0;
 	const char *location;
 
-	total_mmio_ffs++;
+	eeh_stats.total_mmio_ffs++;
 
 	if (!eeh_subsystem_enabled)
 		return 0;
 
 	if (!dn) {
-		no_dn++;
+		eeh_stats.no_dn++;
 		return 0;
 	}
-	dn = find_device_pe(dn);
-	pdn = PCI_DN(dn);
+	dn = eeh_find_device_pe(dn);
+	edev = of_node_to_eeh_dev(dn);
 
 	/* Access to IO BARs might get this far and still not want checking. */
-	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
-	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
-		ignored_check++;
+	if (!(edev->mode & EEH_MODE_SUPPORTED) ||
+	    edev->mode & EEH_MODE_NOCHECK) {
+		eeh_stats.ignored_check++;
 		pr_debug("EEH: Ignored check (%x) for %s %s\n",
-			 pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
+			edev->mode, eeh_pci_name(dev), dn->full_name);
 		return 0;
 	}
 
-	if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
-		no_cfg_addr++;
+	if (!edev->config_addr && !edev->pe_config_addr) {
+		eeh_stats.no_cfg_addr++;
 		return 0;
 	}
 
@@ -544,15 +434,15 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	 */
 	raw_spin_lock_irqsave(&confirm_error_lock, flags);
 	rc = 1;
-	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
-		pdn->eeh_check_count ++;
-		if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
+	if (edev->mode & EEH_MODE_ISOLATED) {
+		edev->check_count++;
+		if (edev->check_count % EEH_MAX_FAILS == 0) {
 			location = of_get_property(dn, "ibm,loc-code", NULL);
-			printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
+			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 				"location=%s driver=%s pci addr=%s\n",
-				pdn->eeh_check_count, location,
+				edev->check_count, location,
 				eeh_driver_name(dev), eeh_pci_name(dev));
-			printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+			printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
 				eeh_driver_name(dev));
 			dump_stack();
 		}
@@ -566,58 +456,39 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	 * function zero of a multi-function device.
 	 * In any case they must share a common PHB.
 	 */
-	ret = read_slot_reset_state(pdn, rets);
-
-	/* If the call to firmware failed, punt */
-	if (ret != 0) {
-		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
-		       ret, dn->full_name);
-		false_positives++;
-		pdn->eeh_false_positives ++;
-		rc = 0;
-		goto dn_unlock;
-	}
+	ret = eeh_ops->get_state(dn, NULL);
 
 	/* Note that config-io to empty slots may fail;
-	 * they are empty when they don't have children. */
-	if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
-		false_positives++;
-		pdn->eeh_false_positives ++;
-		rc = 0;
-		goto dn_unlock;
-	}
-
-	/* If EEH is not supported on this device, punt. */
-	if (rets[1] != 1) {
-		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
-		       ret, dn->full_name);
-		false_positives++;
-		pdn->eeh_false_positives ++;
-		rc = 0;
-		goto dn_unlock;
-	}
-
-	/* If not the kind of error we know about, punt. */
-	if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
-		false_positives++;
-		pdn->eeh_false_positives ++;
+	 * they are empty when they don't have children.
+	 * We will punt with the following conditions: Failure to get
+	 * PE's state, EEH not support and Permanently unavailable
+	 * state, PE is in good state.
+	 */
+	if ((ret < 0) ||
+	    (ret == EEH_STATE_NOT_SUPPORT) ||
+	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+		eeh_stats.false_positives++;
+		edev->false_positives ++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
-	slot_resets++;
+	eeh_stats.slot_resets++;
  
 	/* Avoid repeated reports of this failure, including problems
 	 * with other functions on this device, and functions under
-	 * bridges. */
-	eeh_mark_slot (dn, EEH_MODE_ISOLATED);
+	 * bridges.
+	 */
+	eeh_mark_slot(dn, EEH_MODE_ISOLATED);
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 
-	eeh_send_failure_event (dn, dev);
+	eeh_send_failure_event(edev);
 
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
-	 * out what happened.  So print that out. */
+	 * out what happened.  So print that out.
+	 */
 	dump_stack();
 	return 1;
 
@@ -629,9 +500,9 @@ dn_unlock:
 EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
 
 /**
- * eeh_check_failure - check if all 1's data is due to EEH slot freeze
- * @token i/o token, should be address in the form 0xA....
- * @val value, should be all 1's (XXX why do we need this arg??)
+ * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @token: I/O token, should be address in the form 0xA....
+ * @val: value, should be all 1's (XXX why do we need this arg??)
  *
  * Check for an EEH failure at the given token address.  Call this
  * routine if the result of a read was all 0xff's and you want to
@@ -648,14 +519,14 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 
 	/* Finding the phys addr + pci device; this is pretty quick. */
 	addr = eeh_token_to_phys((unsigned long __force) token);
-	dev = pci_get_device_by_addr(addr);
+	dev = pci_addr_cache_get_device(addr);
 	if (!dev) {
-		no_device++;
+		eeh_stats.no_device++;
 		return val;
 	}
 
 	dn = pci_device_to_OF_node(dev);
-	eeh_dn_check_failure (dn, dev);
+	eeh_dn_check_failure(dn, dev);
 
 	pci_dev_put(dev);
 	return val;
@@ -663,115 +534,54 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 
 EXPORT_SYMBOL(eeh_check_failure);
 
-/* ------------------------------------------------------------- */
-/* The code below deals with error recovery */
 
 /**
- * rtas_pci_enable - enable MMIO or DMA transfers for this slot
- * @pdn pci device node
+ * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
+ * @edev: pci device node
+ *
+ * This routine should be called to reenable frozen MMIO or DMA
+ * so that it would work correctly again. It's useful while doing
+ * recovery or log collection on the indicated device.
  */
-
-int
-rtas_pci_enable(struct pci_dn *pdn, int function)
+int eeh_pci_enable(struct eeh_dev *edev, int function)
 {
-	int config_addr;
 	int rc;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
 
-	/* Use PE configuration address, if present */
-	config_addr = pdn->eeh_config_addr;
-	if (pdn->eeh_pe_config_addr)
-		config_addr = pdn->eeh_pe_config_addr;
-
-	rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-	               config_addr,
-	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid),
-		            function);
-
+	rc = eeh_ops->set_option(dn, function);
 	if (rc)
 		printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
-		        function, rc, pdn->node->full_name);
+		        function, rc, dn->full_name);
 
-	rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC);
-	if ((rc == 4) && (function == EEH_THAW_MMIO))
+	rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+	if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
+	   (function == EEH_OPT_THAW_MMIO))
 		return 0;
 
 	return rc;
 }
 
 /**
- * rtas_pci_slot_reset - raises/lowers the pci #RST line
- * @pdn pci device node
- * @state: 1/0 to raise/lower the #RST
- *
- * Clear the EEH-frozen condition on a slot.  This routine
- * asserts the PCI #RST line if the 'state' argument is '1',
- * and drops the #RST line if 'state is '0'.  This routine is
- * safe to call in an interrupt context.
- *
- */
-
-static void
-rtas_pci_slot_reset(struct pci_dn *pdn, int state)
-{
-	int config_addr;
-	int rc;
-
-	BUG_ON (pdn==NULL); 
-
-	if (!pdn->phb) {
-		printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
-		        pdn->node->full_name);
-		return;
-	}
-
-	/* Use PE configuration address, if present */
-	config_addr = pdn->eeh_config_addr;
-	if (pdn->eeh_pe_config_addr)
-		config_addr = pdn->eeh_pe_config_addr;
-
-	rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-	               config_addr,
-	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid),
-	               state);
-
-	/* Fundamental-reset not supported on this PE, try hot-reset */
-	if (rc == -8 && state == 3) {
-		rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-			       config_addr,
-			       BUID_HI(pdn->phb->buid),
-			       BUID_LO(pdn->phb->buid), 1);
-		if (rc)
-			printk(KERN_WARNING
-				"EEH: Unable to reset the failed slot,"
-				" #RST=%d dn=%s\n",
-				rc, pdn->node->full_name);
-	}
-}
-
-/**
  * pcibios_set_pcie_slot_reset - Set PCI-E reset state
- * @dev:	pci device struct
- * @state:	reset state to enter
+ * @dev: pci device struct
+ * @state: reset state to enter
  *
  * Return value:
  * 	0 if success
- **/
+ */
 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
 {
 	struct device_node *dn = pci_device_to_OF_node(dev);
-	struct pci_dn *pdn = PCI_DN(dn);
 
 	switch (state) {
 	case pcie_deassert_reset:
-		rtas_pci_slot_reset(pdn, 0);
+		eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
 		break;
 	case pcie_hot_reset:
-		rtas_pci_slot_reset(pdn, 1);
+		eeh_ops->reset(dn, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
-		rtas_pci_slot_reset(pdn, 3);
+		eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
 		break;
 	default:
 		return -EINVAL;
@@ -781,13 +591,66 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 }
 
 /**
- * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
- * @pdn: pci device node to be reset.
+ * __eeh_set_pe_freset - Check the required reset for child devices
+ * @parent: parent device
+ * @freset: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collect the information from
+ * the child devices so that they could be reset accordingly.
+ */
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+	struct device_node *dn;
+
+	for_each_child_of_node(parent, dn) {
+		if (of_node_to_eeh_dev(dn)) {
+			struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
+
+			if (dev && dev->driver)
+				*freset |= dev->needs_freset;
+
+			__eeh_set_pe_freset(dn, freset);
+		}
+	}
+}
+
+/**
+ * eeh_set_pe_freset - Check the required reset for the indicated device and its children
+ * @dn: parent device
+ * @freset: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collected the information for
+ * the indicated device and its children so that the bunch of the
+ * devices could be reset properly.
  */
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+	struct pci_dev *dev;
+	dn = eeh_find_device_pe(dn);
+
+	/* Back up one, since config addrs might be shared */
+	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
+		dn = dn->parent;
+
+	dev = of_node_to_eeh_dev(dn)->pdev;
+	if (dev)
+		*freset |= dev->needs_freset;
 
-static void __rtas_set_slot_reset(struct pci_dn *pdn)
+	__eeh_set_pe_freset(dn, freset);
+}
+
+/**
+ * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * @edev: pci device node to be reset.
+ *
+ * Assert the PCI #RST line for 1/4 second.
+ */
+static void eeh_reset_pe_once(struct eeh_dev *edev)
 {
 	unsigned int freset = 0;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
 
 	/* Determine type of EEH reset required for
 	 * Partitionable Endpoint, a hot-reset (1)
@@ -795,58 +658,68 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn)
 	 * A fundamental reset required by any device under
 	 * Partitionable Endpoint trumps hot-reset.
   	 */
-	eeh_set_pe_freset(pdn->node, &freset);
+	eeh_set_pe_freset(dn, &freset);
 
 	if (freset)
-		rtas_pci_slot_reset(pdn, 3);
+		eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
 	else
-		rtas_pci_slot_reset(pdn, 1);
+		eeh_ops->reset(dn, EEH_RESET_HOT);
 
 	/* The PCI bus requires that the reset be held high for at least
-	 * a 100 milliseconds. We wait a bit longer 'just in case'.  */
-
+	 * a 100 milliseconds. We wait a bit longer 'just in case'.
+	 */
 #define PCI_BUS_RST_HOLD_TIME_MSEC 250
-	msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+	msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
 	
 	/* We might get hit with another EEH freeze as soon as the 
 	 * pci slot reset line is dropped. Make sure we don't miss
-	 * these, and clear the flag now. */
-	eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+	 * these, and clear the flag now.
+	 */
+	eeh_clear_slot(dn, EEH_MODE_ISOLATED);
 
-	rtas_pci_slot_reset (pdn, 0);
+	eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
 
 	/* After a PCI slot has been reset, the PCI Express spec requires
 	 * a 1.5 second idle time for the bus to stabilize, before starting
-	 * up traffic. */
+	 * up traffic.
+	 */
 #define PCI_BUS_SETTLE_TIME_MSEC 1800
-	msleep (PCI_BUS_SETTLE_TIME_MSEC);
+	msleep(PCI_BUS_SETTLE_TIME_MSEC);
 }
 
-int rtas_set_slot_reset(struct pci_dn *pdn)
+/**
+ * eeh_reset_pe - Reset the indicated PE
+ * @edev: PCI device associated EEH device
+ *
+ * This routine should be called to reset indicated device, including
+ * PE. A PE might include multiple PCI devices and sometimes PCI bridges
+ * might be involved as well.
+ */
+int eeh_reset_pe(struct eeh_dev *edev)
 {
 	int i, rc;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
 
 	/* Take three shots at resetting the bus */
 	for (i=0; i<3; i++) {
-		__rtas_set_slot_reset(pdn);
+		eeh_reset_pe_once(edev);
 
-		rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
-		if (rc == 0)
+		rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+		if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
 			return 0;
 
 		if (rc < 0) {
 			printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
-			       pdn->node->full_name);
+			       dn->full_name);
 			return -1;
 		}
 		printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
-		       i+1, pdn->node->full_name, rc);
+		       i+1, dn->full_name, rc);
 	}
 
 	return -1;
 }
 
-/* ------------------------------------------------------- */
 /** Save and restore of PCI BARs
  *
  * Although firmware will set up BARs during boot, it doesn't
@@ -856,181 +729,122 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
  */
 
 /**
- * __restore_bars - Restore the Base Address Registers
- * @pdn: pci device node
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @edev: PCI device associated EEH device
  *
  * Loads the PCI configuration space base address registers,
  * the expansion ROM base address, the latency timer, and etc.
  * from the saved values in the device node.
  */
-static inline void __restore_bars (struct pci_dn *pdn)
+static inline void eeh_restore_one_device_bars(struct eeh_dev *edev)
 {
 	int i;
 	u32 cmd;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
+
+	if (!edev->phb)
+		return;
 
-	if (NULL==pdn->phb) return;
 	for (i=4; i<10; i++) {
-		rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
+		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
 	}
 
 	/* 12 == Expansion ROM Address */
-	rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
+	eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
 
 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-	rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
 	            SAVED_BYTE(PCI_CACHE_LINE_SIZE));
 
-	rtas_write_config (pdn, PCI_LATENCY_TIMER, 1,
+	eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
 	            SAVED_BYTE(PCI_LATENCY_TIMER));
 
 	/* max latency, min grant, interrupt pin and line */
-	rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
+	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
 
 	/* Restore PERR & SERR bits, some devices require it,
-	   don't touch the other command bits */
-	rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
-	if (pdn->config_space[1] & PCI_COMMAND_PARITY)
+	 * don't touch the other command bits
+	 */
+	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+	if (edev->config_space[1] & PCI_COMMAND_PARITY)
 		cmd |= PCI_COMMAND_PARITY;
 	else
 		cmd &= ~PCI_COMMAND_PARITY;
-	if (pdn->config_space[1] & PCI_COMMAND_SERR)
+	if (edev->config_space[1] & PCI_COMMAND_SERR)
 		cmd |= PCI_COMMAND_SERR;
 	else
 		cmd &= ~PCI_COMMAND_SERR;
-	rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
+	eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
 }
 
 /**
- * eeh_restore_bars - restore the PCI config space info
+ * eeh_restore_bars - Restore the PCI config space info
+ * @edev: EEH device
  *
  * This routine performs a recursive walk to the children
  * of this device as well.
  */
-void eeh_restore_bars(struct pci_dn *pdn)
+void eeh_restore_bars(struct eeh_dev *edev)
 {
 	struct device_node *dn;
-	if (!pdn) 
+	if (!edev)
 		return;
 	
-	if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
-		__restore_bars (pdn);
+	if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code))
+		eeh_restore_one_device_bars(edev);
 
-	for_each_child_of_node(pdn->node, dn)
-		eeh_restore_bars (PCI_DN(dn));
+	for_each_child_of_node(eeh_dev_to_of_node(edev), dn)
+		eeh_restore_bars(of_node_to_eeh_dev(dn));
 }
 
 /**
- * eeh_save_bars - save device bars
+ * eeh_save_bars - Save device bars
+ * @edev: PCI device associated EEH device
  *
  * Save the values of the device bars. Unlike the restore
  * routine, this routine is *not* recursive. This is because
  * PCI devices are added individually; but, for the restore,
  * an entire slot is reset at a time.
  */
-static void eeh_save_bars(struct pci_dn *pdn)
+static void eeh_save_bars(struct eeh_dev *edev)
 {
 	int i;
+	struct device_node *dn;
 
-	if (!pdn )
+	if (!edev)
 		return;
+	dn = eeh_dev_to_of_node(edev);
 	
 	for (i = 0; i < 16; i++)
-		rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
-}
-
-void
-rtas_configure_bridge(struct pci_dn *pdn)
-{
-	int config_addr;
-	int rc;
-	int token;
-
-	/* Use PE configuration address, if present */
-	config_addr = pdn->eeh_config_addr;
-	if (pdn->eeh_pe_config_addr)
-		config_addr = pdn->eeh_pe_config_addr;
-
-	/* Use new configure-pe function, if supported */
-	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
-		token = ibm_configure_pe;
-	else
-		token = ibm_configure_bridge;
-
-	rc = rtas_call(token, 3, 1, NULL,
-	               config_addr,
-	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid));
-	if (rc) {
-		printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
-		        rc, pdn->node->full_name);
-	}
+		eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
 }
 
-/* ------------------------------------------------------------- */
-/* The code below deals with enabling EEH for devices during  the
- * early boot sequence.  EEH must be enabled before any PCI probing
- * can be done.
+/**
+ * eeh_early_enable - Early enable EEH on the indicated device
+ * @dn: device node
+ * @data: BUID
+ *
+ * Enable EEH functionality on the specified PCI device. The function
+ * is expected to be called before real PCI probing is done. However,
+ * the PHBs have been initialized at this point.
  */
-
-#define EEH_ENABLE 1
-
-struct eeh_early_enable_info {
-	unsigned int buid_hi;
-	unsigned int buid_lo;
-};
-
-static int get_pe_addr (int config_addr,
-                        struct eeh_early_enable_info *info)
+static void *eeh_early_enable(struct device_node *dn, void *data)
 {
-	unsigned int rets[3];
-	int ret;
-
-	/* Use latest config-addr token on power6 */
-	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
-		/* Make sure we have a PE in hand */
-		ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
-			config_addr, info->buid_hi, info->buid_lo, 1);
-		if (ret || (rets[0]==0))
-			return 0;
-
-		ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
-			config_addr, info->buid_hi, info->buid_lo, 0);
-		if (ret)
-			return 0;
-		return rets[0];
-	}
-
-	/* Use older config-addr token on power5 */
-	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
-		ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets,
-			config_addr, info->buid_hi, info->buid_lo, 0);
-		if (ret)
-			return 0;
-		return rets[0];
-	}
-	return 0;
-}
-
-/* Enable eeh for the given device node. */
-static void *early_enable_eeh(struct device_node *dn, void *data)
-{
-	unsigned int rets[3];
-	struct eeh_early_enable_info *info = data;
 	int ret;
 	const u32 *class_code = of_get_property(dn, "class-code", NULL);
 	const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
 	const u32 *device_id = of_get_property(dn, "device-id", NULL);
 	const u32 *regs;
 	int enable;
-	struct pci_dn *pdn = PCI_DN(dn);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
 
-	pdn->class_code = 0;
-	pdn->eeh_mode = 0;
-	pdn->eeh_check_count = 0;
-	pdn->eeh_freeze_count = 0;
-	pdn->eeh_false_positives = 0;
+	edev->class_code = 0;
+	edev->mode = 0;
+	edev->check_count = 0;
+	edev->freeze_count = 0;
+	edev->false_positives = 0;
 
 	if (!of_device_is_available(dn))
 		return NULL;
@@ -1041,54 +855,56 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 
 	/* There is nothing to check on PCI to ISA bridges */
 	if (dn->type && !strcmp(dn->type, "isa")) {
-		pdn->eeh_mode |= EEH_MODE_NOCHECK;
+		edev->mode |= EEH_MODE_NOCHECK;
 		return NULL;
 	}
-	pdn->class_code = *class_code;
+	edev->class_code = *class_code;
 
 	/* Ok... see if this device supports EEH.  Some do, some don't,
-	 * and the only way to find out is to check each and every one. */
+	 * and the only way to find out is to check each and every one.
+	 */
 	regs = of_get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
 		/* Try to enable eeh */
-		ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-		                regs[0], info->buid_hi, info->buid_lo,
-		                EEH_ENABLE);
+		ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
 
 		enable = 0;
 		if (ret == 0) {
-			pdn->eeh_config_addr = regs[0];
+			edev->config_addr = regs[0];
 
 			/* If the newer, better, ibm,get-config-addr-info is supported, 
-			 * then use that instead. */
-			pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
+			 * then use that instead.
+			 */
+			edev->pe_config_addr = eeh_ops->get_pe_addr(dn);
 
 			/* Some older systems (Power4) allow the
 			 * ibm,set-eeh-option call to succeed even on nodes
 			 * where EEH is not supported. Verify support
-			 * explicitly. */
-			ret = read_slot_reset_state(pdn, rets);
-			if ((ret == 0) && (rets[1] == 1))
+			 * explicitly.
+			 */
+			ret = eeh_ops->get_state(dn, NULL);
+			if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
 				enable = 1;
 		}
 
 		if (enable) {
 			eeh_subsystem_enabled = 1;
-			pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+			edev->mode |= EEH_MODE_SUPPORTED;
 
 			pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
-				 dn->full_name, pdn->eeh_config_addr,
-				 pdn->eeh_pe_config_addr);
+				 dn->full_name, edev->config_addr,
+				 edev->pe_config_addr);
 		} else {
 
 			/* This device doesn't support EEH, but it may have an
-			 * EEH parent, in which case we mark it as supported. */
-			if (dn->parent && PCI_DN(dn->parent)
-			    && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+			 * EEH parent, in which case we mark it as supported.
+			 */
+			if (dn->parent && of_node_to_eeh_dev(dn->parent) &&
+			    (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
 				/* Parent supports EEH. */
-				pdn->eeh_mode |= EEH_MODE_SUPPORTED;
-				pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
+				edev->mode |= EEH_MODE_SUPPORTED;
+				edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr;
 				return NULL;
 			}
 		}
@@ -1097,11 +913,63 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 		       dn->full_name);
 	}
 
-	eeh_save_bars(pdn);
+	eeh_save_bars(edev);
 	return NULL;
 }
 
-/*
+/**
+ * eeh_ops_register - Register platform dependent EEH operations
+ * @ops: platform dependent EEH operations
+ *
+ * Register the platform dependent EEH operation callback
+ * functions. The platform should call this function before
+ * any other EEH operations.
+ */
+int __init eeh_ops_register(struct eeh_ops *ops)
+{
+	if (!ops->name) {
+		pr_warning("%s: Invalid EEH ops name for %p\n",
+			__func__, ops);
+		return -EINVAL;
+	}
+
+	if (eeh_ops && eeh_ops != ops) {
+		pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
+			__func__, eeh_ops->name, ops->name);
+		return -EEXIST;
+	}
+
+	eeh_ops = ops;
+
+	return 0;
+}
+
+/**
+ * eeh_ops_unregister - Unreigster platform dependent EEH operations
+ * @name: name of EEH platform operations
+ *
+ * Unregister the platform dependent EEH operation callback
+ * functions.
+ */
+int __exit eeh_ops_unregister(const char *name)
+{
+	if (!name || !strlen(name)) {
+		pr_warning("%s: Invalid EEH ops name\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (eeh_ops && !strcmp(eeh_ops->name, name)) {
+		eeh_ops = NULL;
+		return 0;
+	}
+
+	return -EEXIST;
+}
+
+/**
+ * eeh_init - EEH initialization
+ *
  * Initialize EEH by trying to enable it for all of the adapters in the system.
  * As a side effect we can determine here if eeh is supported at all.
  * Note that we leave EEH on so failed config cycles won't cause a machine
@@ -1117,50 +985,35 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
 void __init eeh_init(void)
 {
 	struct device_node *phb, *np;
-	struct eeh_early_enable_info info;
+	int ret;
+
+	/* call platform initialization function */
+	if (!eeh_ops) {
+		pr_warning("%s: Platform EEH operation not found\n",
+			__func__);
+		return;
+	} else if ((ret = eeh_ops->init())) {
+		pr_warning("%s: Failed to call platform init function (%d)\n",
+			__func__, ret);
+		return;
+	}
 
 	raw_spin_lock_init(&confirm_error_lock);
-	spin_lock_init(&slot_errbuf_lock);
 
 	np = of_find_node_by_path("/rtas");
 	if (np == NULL)
 		return;
 
-	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
-	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
-	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
-	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
-	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
-	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
-	ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
-	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
-	ibm_configure_pe = rtas_token("ibm,configure-pe");
-
-	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
-		return;
-
-	eeh_error_buf_size = rtas_token("rtas-error-log-max");
-	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		eeh_error_buf_size = 1024;
-	}
-	if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
-		      "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
-		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
-	}
-
 	/* Enable EEH for all adapters.  Note that eeh requires buid's */
 	for (phb = of_find_node_by_name(NULL, "pci"); phb;
 	     phb = of_find_node_by_name(phb, "pci")) {
 		unsigned long buid;
 
 		buid = get_phb_buid(phb);
-		if (buid == 0 || PCI_DN(phb) == NULL)
+		if (buid == 0 || !of_node_to_eeh_dev(phb))
 			continue;
 
-		info.buid_lo = BUID_LO(buid);
-		info.buid_hi = BUID_HI(buid);
-		traverse_pci_devices(phb, early_enable_eeh, &info);
+		traverse_pci_devices(phb, eeh_early_enable, NULL);
 	}
 
 	if (eeh_subsystem_enabled)
@@ -1170,7 +1023,7 @@ void __init eeh_init(void)
 }
 
 /**
- * eeh_add_device_early - enable EEH for the indicated device_node
+ * eeh_add_device_early - Enable EEH for the indicated device_node
  * @dn: device node for which to set up EEH
  *
  * This routine must be used to perform EEH initialization for PCI
@@ -1184,21 +1037,26 @@ void __init eeh_init(void)
 static void eeh_add_device_early(struct device_node *dn)
 {
 	struct pci_controller *phb;
-	struct eeh_early_enable_info info;
 
-	if (!dn || !PCI_DN(dn))
+	if (!dn || !of_node_to_eeh_dev(dn))
 		return;
-	phb = PCI_DN(dn)->phb;
+	phb = of_node_to_eeh_dev(dn)->phb;
 
 	/* USB Bus children of PCI devices will not have BUID's */
 	if (NULL == phb || 0 == phb->buid)
 		return;
 
-	info.buid_hi = BUID_HI(phb->buid);
-	info.buid_lo = BUID_LO(phb->buid);
-	early_enable_eeh(dn, &info);
+	eeh_early_enable(dn, NULL);
 }
 
+/**
+ * eeh_add_device_tree_early - Enable EEH for the indicated device
+ * @dn: device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
 void eeh_add_device_tree_early(struct device_node *dn)
 {
 	struct device_node *sib;
@@ -1210,7 +1068,7 @@ void eeh_add_device_tree_early(struct device_node *dn)
 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
 
 /**
- * eeh_add_device_late - perform EEH initialization for the indicated pci device
+ * eeh_add_device_late - Perform EEH initialization for the indicated pci device
  * @dev: pci device for which to set up EEH
  *
  * This routine must be used to complete EEH initialization for PCI
@@ -1219,7 +1077,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
 static void eeh_add_device_late(struct pci_dev *dev)
 {
 	struct device_node *dn;
-	struct pci_dn *pdn;
+	struct eeh_dev *edev;
 
 	if (!dev || !eeh_subsystem_enabled)
 		return;
@@ -1227,20 +1085,29 @@ static void eeh_add_device_late(struct pci_dev *dev)
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
 
 	dn = pci_device_to_OF_node(dev);
-	pdn = PCI_DN(dn);
-	if (pdn->pcidev == dev) {
+	edev = pci_dev_to_eeh_dev(dev);
+	if (edev->pdev == dev) {
 		pr_debug("EEH: Already referenced !\n");
 		return;
 	}
-	WARN_ON(pdn->pcidev);
+	WARN_ON(edev->pdev);
 
-	pci_dev_get (dev);
-	pdn->pcidev = dev;
+	pci_dev_get(dev);
+	edev->pdev = dev;
+	dev->dev.archdata.edev = edev;
 
 	pci_addr_cache_insert_device(dev);
 	eeh_sysfs_add_device(dev);
 }
 
+/**
+ * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
 void eeh_add_device_tree_late(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
@@ -1257,7 +1124,7 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
 
 /**
- * eeh_remove_device - undo EEH setup for the indicated pci device
+ * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
  *
  * This routine should be called when a device is removed from
@@ -1268,25 +1135,35 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
  */
 static void eeh_remove_device(struct pci_dev *dev)
 {
-	struct device_node *dn;
+	struct eeh_dev *edev;
+
 	if (!dev || !eeh_subsystem_enabled)
 		return;
+	edev = pci_dev_to_eeh_dev(dev);
 
 	/* Unregister the device with the EEH/PCI address search system */
 	pr_debug("EEH: Removing device %s\n", pci_name(dev));
 
-	dn = pci_device_to_OF_node(dev);
-	if (PCI_DN(dn)->pcidev == NULL) {
+	if (!edev || !edev->pdev) {
 		pr_debug("EEH: Not referenced !\n");
 		return;
 	}
-	PCI_DN(dn)->pcidev = NULL;
-	pci_dev_put (dev);
+	edev->pdev = NULL;
+	dev->dev.archdata.edev = NULL;
+	pci_dev_put(dev);
 
 	pci_addr_cache_remove_device(dev);
 	eeh_sysfs_remove_device(dev);
 }
 
+/**
+ * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
+ * @dev: PCI device
+ *
+ * This routine must be called when a device is removed from the
+ * running system through hotplug or dlpar. The corresponding
+ * PCI address cache will be removed.
+ */
 void eeh_remove_bus_device(struct pci_dev *dev)
 {
 	struct pci_bus *bus = dev->subordinate;
@@ -1305,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (0 == eeh_subsystem_enabled) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
 	} else {
 		seq_printf(m, "EEH Subsystem is enabled\n");
 		seq_printf(m,
-				"no device=%ld\n"
-				"no device node=%ld\n"
-				"no config address=%ld\n"
-				"check not wanted=%ld\n"
-				"eeh_total_mmio_ffs=%ld\n"
-				"eeh_false_positives=%ld\n"
-				"eeh_slot_resets=%ld\n",
-				no_device, no_dn, no_cfg_addr, 
-				ignored_check, total_mmio_ffs, 
-				false_positives,
-				slot_resets);
+				"no device=%llu\n"
+				"no device node=%llu\n"
+				"no config address=%llu\n"
+				"check not wanted=%llu\n"
+				"eeh_total_mmio_ffs=%llu\n"
+				"eeh_false_positives=%llu\n"
+				"eeh_slot_resets=%llu\n",
+				eeh_stats.no_device,
+				eeh_stats.no_dn,
+				eeh_stats.no_cfg_addr,
+				eeh_stats.ignored_check,
+				eeh_stats.total_mmio_ffs,
+				eeh_stats.false_positives,
+				eeh_stats.slot_resets);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index fc5ae767989e..e5ae1c687c66 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -1,5 +1,4 @@
 /*
- * eeh_cache.c
  * PCI address cache; allows the lookup of PCI devices based on I/O address
  *
  * Copyright IBM Corporation 2004
@@ -47,8 +46,7 @@
  * than any hash algo I could think of for this problem, even
  * with the penalty of slow pointer chases for d-cache misses).
  */
-struct pci_io_addr_range
-{
+struct pci_io_addr_range {
 	struct rb_node rb_node;
 	unsigned long addr_lo;
 	unsigned long addr_hi;
@@ -56,13 +54,12 @@ struct pci_io_addr_range
 	unsigned int flags;
 };
 
-static struct pci_io_addr_cache
-{
+static struct pci_io_addr_cache {
 	struct rb_root rb_root;
 	spinlock_t piar_lock;
 } pci_io_addr_cache_root;
 
-static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
+static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
 {
 	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
 
@@ -86,7 +83,7 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
 }
 
 /**
- * pci_get_device_by_addr - Get device, given only address
+ * pci_addr_cache_get_device - Get device, given only address
  * @addr: mmio (PIO) phys address or i/o port number
  *
  * Given an mmio phys address, or a port number, find a pci device
@@ -95,13 +92,13 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
  * from zero (that is, they do *not* have pci_io_addr added in).
  * It is safe to call this function within an interrupt.
  */
-struct pci_dev *pci_get_device_by_addr(unsigned long addr)
+struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
 {
 	struct pci_dev *dev;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	dev = __pci_get_device_by_addr(addr);
+	dev = __pci_addr_cache_get_device(addr);
 	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
 	return dev;
 }
@@ -166,7 +163,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 
 #ifdef DEBUG
 	printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
-	                  alo, ahi, pci_name (dev));
+	                  alo, ahi, pci_name(dev));
 #endif
 
 	rb_link_node(&piar->rb_node, parent, p);
@@ -178,7 +175,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 {
 	struct device_node *dn;
-	struct pci_dn *pdn;
+	struct eeh_dev *edev;
 	int i;
 
 	dn = pci_device_to_OF_node(dev);
@@ -187,13 +184,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
 		return;
 	}
 
+	edev = of_node_to_eeh_dev(dn);
+	if (!edev) {
+		pr_warning("PCI: no EEH dev found for dn=%s\n",
+			dn->full_name);
+		return;
+	}
+
 	/* Skip any devices for which EEH is not enabled. */
-	pdn = PCI_DN(dn);
-	if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
-	    pdn->eeh_mode & EEH_MODE_NOCHECK) {
+	if (!(edev->mode & EEH_MODE_SUPPORTED) ||
+	    edev->mode & EEH_MODE_NOCHECK) {
 #ifdef DEBUG
-		printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
-		       pci_name(dev), pdn->node->full_name);
+		pr_info("PCI: skip building address cache for=%s - %s\n",
+			pci_name(dev), dn->full_name);
 #endif
 		return;
 	}
@@ -284,6 +287,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev)
 void __init pci_addr_cache_build(void)
 {
 	struct device_node *dn;
+	struct eeh_dev *edev;
 	struct pci_dev *dev = NULL;
 
 	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
@@ -294,8 +298,14 @@ void __init pci_addr_cache_build(void)
 		dn = pci_device_to_OF_node(dev);
 		if (!dn)
 			continue;
+
+		edev = of_node_to_eeh_dev(dn);
+		if (!edev)
+			continue;
+
 		pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
-		PCI_DN(dn)->pcidev = dev;
+		dev->dev.archdata.edev = edev;
+		edev->pdev = dev;
 
 		eeh_sysfs_add_device(dev);
 	}
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
new file mode 100644
index 000000000000..f3aed7dcae95
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -0,0 +1,102 @@
+/*
+ * The file intends to implement dynamic creation of EEH device, which will
+ * be bound with OF node and PCI device simutaneously. The EEH devices would
+ * be foundamental information for EEH core components to work proerly. Besides,
+ * We have to support multiple situations where dynamic creation of EEH device
+ * is required:
+ *
+ * 1) Before PCI emunation starts, we need create EEH devices according to the
+ *    PCI sensitive OF nodes.
+ * 2) When PCI emunation is done, we need do the binding between PCI device and
+ *    the associated EEH device.
+ * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
+ *    will be created while PCI sensitive OF node is detected from DR.
+ * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
+ *    PHB is newly inserted, we also need create EEH devices accordingly.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+/**
+ * eeh_dev_init - Create EEH device according to OF node
+ * @dn: device node
+ * @data: PHB
+ *
+ * It will create EEH device according to the given OF node. The function
+ * might be called by PCI emunation, DR, PHB hotplug.
+ */
+void * __devinit eeh_dev_init(struct device_node *dn, void *data)
+{
+	struct pci_controller *phb = data;
+	struct eeh_dev *edev;
+
+	/* Allocate EEH device */
+	edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
+	if (!edev) {
+		pr_warning("%s: out of memory\n", __func__);
+		return NULL;
+	}
+
+	/* Associate EEH device with OF node */
+	dn->edev  = edev;
+	edev->dn  = dn;
+	edev->phb = phb;
+
+	return NULL;
+}
+
+/**
+ * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
+ * @phb: PHB
+ *
+ * Scan the PHB OF node and its child association, then create the
+ * EEH devices accordingly
+ */
+void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
+{
+	struct device_node *dn = phb->dn;
+
+	/* EEH device for PHB */
+	eeh_dev_init(dn, phb);
+
+	/* EEH devices for children OF nodes */
+	traverse_pci_devices(dn, eeh_dev_init, phb);
+}
+
+/**
+ * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
+ *
+ * Scan all the existing PHBs and create EEH devices for their OF
+ * nodes and their children OF nodes
+ */
+void __init eeh_dev_phb_init(void)
+{
+	struct pci_controller *phb, *tmp;
+
+	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+		eeh_dev_phb_init_dynamic(phb);
+}
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 1b6cb10589e0..baf92cd9dfab 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -33,8 +33,14 @@
 #include <asm/prom.h>
 #include <asm/rtas.h>
 
-
-static inline const char * pcid_name (struct pci_dev *pdev)
+/**
+ * eeh_pcid_name - Retrieve name of PCI device driver
+ * @pdev: PCI device
+ *
+ * This routine is used to retrieve the name of PCI device driver
+ * if that's valid.
+ */
+static inline const char *eeh_pcid_name(struct pci_dev *pdev)
 {
 	if (pdev && pdev->dev.driver)
 		return pdev->dev.driver->name;
@@ -64,48 +70,59 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
 #endif
 
 /**
- * eeh_disable_irq - disable interrupt for the recovering device
+ * eeh_disable_irq - Disable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called when reporting temporary or permanent
+ * error to the particular PCI device to disable interrupt of that
+ * device. If the device has enabled MSI or MSI-X interrupt, we needn't
+ * do real work because EEH should freeze DMA transfers for those PCI
+ * devices encountering EEH errors, which includes MSI or MSI-X.
  */
 static void eeh_disable_irq(struct pci_dev *dev)
 {
-	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 
 	/* Don't disable MSI and MSI-X interrupts. They are
 	 * effectively disabled by the DMA Stopped state
 	 * when an EEH error occurs.
-	*/
+	 */
 	if (dev->msi_enabled || dev->msix_enabled)
 		return;
 
 	if (!irq_has_action(dev->irq))
 		return;
 
-	PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
+	edev->mode |= EEH_MODE_IRQ_DISABLED;
 	disable_irq_nosync(dev->irq);
 }
 
 /**
- * eeh_enable_irq - enable interrupt for the recovering device
+ * eeh_enable_irq - Enable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called to enable interrupt while failed
+ * device could be resumed.
  */
 static void eeh_enable_irq(struct pci_dev *dev)
 {
-	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 
-	if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
-		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
+	if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
+		edev->mode &= ~EEH_MODE_IRQ_DISABLED;
 		enable_irq(dev->irq);
 	}
 }
 
-/* ------------------------------------------------------- */
 /**
- * eeh_report_error - report pci error to each device driver
+ * eeh_report_error - Report pci error to each device driver
+ * @dev: PCI device
+ * @userdata: return value
  * 
  * Report an EEH error to each device driver, collect up and 
  * merge the device driver responses. Cumulative response 
  * passed back in "userdata".
  */
-
 static int eeh_report_error(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
@@ -122,7 +139,7 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
 	    !driver->err_handler->error_detected)
 		return 0;
 
-	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
+	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
 	/* A driver that needs a reset trumps all others */
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
@@ -132,13 +149,14 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
 }
 
 /**
- * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @dev: PCI device
+ * @userdata: return value
  *
  * Tells each device driver that IO ports, MMIO and config space I/O
  * are now enabled. Collects up and merges the device driver responses.
  * Cumulative response passed back in "userdata".
  */
-
 static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
@@ -149,7 +167,7 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 	    !driver->err_handler->mmio_enabled)
 		return 0;
 
-	rc = driver->err_handler->mmio_enabled (dev);
+	rc = driver->err_handler->mmio_enabled(dev);
 
 	/* A driver that needs a reset trumps all others */
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
@@ -159,9 +177,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 }
 
 /**
- * eeh_report_reset - tell device that slot has been reset
+ * eeh_report_reset - Tell device that slot has been reset
+ * @dev: PCI device
+ * @userdata: return value
+ *
+ * This routine must be called while EEH tries to reset particular
+ * PCI device so that the associated PCI device driver could take
+ * some actions, usually to save data the driver needs so that the
+ * driver can work again while the device is recovered.
  */
-
 static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
@@ -188,9 +212,14 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 }
 
 /**
- * eeh_report_resume - tell device to resume normal operations
+ * eeh_report_resume - Tell device to resume normal operations
+ * @dev: PCI device
+ * @userdata: return value
+ *
+ * This routine must be called to notify the device driver that it
+ * could resume so that the device driver can do some initialization
+ * to make the recovered device work again.
  */
-
 static int eeh_report_resume(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -212,12 +241,13 @@ static int eeh_report_resume(struct pci_dev *dev, void *userdata)
 }
 
 /**
- * eeh_report_failure - tell device driver that device is dead.
+ * eeh_report_failure - Tell device driver that device is dead.
+ * @dev: PCI device
+ * @userdata: return value
  *
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-
 static int eeh_report_failure(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -238,65 +268,46 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
 	return 0;
 }
 
-/* ------------------------------------------------------- */
 /**
- * handle_eeh_events -- reset a PCI device after hard lockup.
- *
- * pSeries systems will isolate a PCI slot if the PCI-Host
- * bridge detects address or data parity errors, DMA's
- * occurring to wild addresses (which usually happen due to
- * bugs in device drivers or in PCI adapter firmware).
- * Slot isolations also occur if #SERR, #PERR or other misc
- * PCI-related errors are detected.
+ * eeh_reset_device - Perform actual reset of a pci slot
+ * @edev: PE associated EEH device
+ * @bus: PCI bus corresponding to the isolcated slot
  *
- * Recovery process consists of unplugging the device driver
- * (which generated hotplug events to userspace), then issuing
- * a PCI #RST to the device, then reconfiguring the PCI config
- * space for all bridges & devices under this slot, and then
- * finally restarting the device drivers (which cause a second
- * set of hotplug events to go out to userspace).
+ * This routine must be called to do reset on the indicated PE.
+ * During the reset, udev might be invoked because those affected
+ * PCI devices will be removed and then added.
  */
-
-/**
- * eeh_reset_device() -- perform actual reset of a pci slot
- * @bus: pointer to the pci bus structure corresponding
- *            to the isolated slot. A non-null value will
- *            cause all devices under the bus to be removed
- *            and then re-added.
- * @pe_dn: pointer to a "Partionable Endpoint" device node.
- *            This is the top-level structure on which pci
- *            bus resets can be performed.
- */
-
-static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
 {
 	struct device_node *dn;
 	int cnt, rc;
 
 	/* pcibios will clear the counter; save the value */
-	cnt = pe_dn->eeh_freeze_count;
+	cnt = edev->freeze_count;
 
 	if (bus)
 		pcibios_remove_pci_devices(bus);
 
 	/* Reset the pci controller. (Asserts RST#; resets config space).
 	 * Reconfigure bridges and devices. Don't try to bring the system
-	 * up if the reset failed for some reason. */
-	rc = rtas_set_slot_reset(pe_dn);
+	 * up if the reset failed for some reason.
+	 */
+	rc = eeh_reset_pe(edev);
 	if (rc)
 		return rc;
 
-	/* Walk over all functions on this device.  */
-	dn = pe_dn->node;
-	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+	/* Walk over all functions on this device. */
+	dn = eeh_dev_to_of_node(edev);
+	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
 		dn = dn->parent->child;
 
 	while (dn) {
-		struct pci_dn *ppe = PCI_DN(dn);
+		struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
+
 		/* On Power4, always true because eeh_pe_config_addr=0 */
-		if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
-			rtas_configure_bridge(ppe);
-			eeh_restore_bars(ppe);
+		if (edev->pe_config_addr == pedev->pe_config_addr) {
+			eeh_ops->configure_bridge(dn);
+			eeh_restore_bars(pedev);
  		}
 		dn = dn->sibling;
 	}
@@ -308,10 +319,10 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
 	 * potentially weird things happen.
 	 */
 	if (bus) {
-		ssleep (5);
+		ssleep(5);
 		pcibios_add_pci_devices(bus);
 	}
-	pe_dn->eeh_freeze_count = cnt;
+	edev->freeze_count = cnt;
 
 	return 0;
 }
@@ -321,23 +332,39 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
  */
 #define MAX_WAIT_FOR_RECOVERY 150
 
-struct pci_dn * handle_eeh_events (struct eeh_event *event)
+/**
+ * eeh_handle_event - Reset a PCI device after hard lockup.
+ * @event: EEH event
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 {
 	struct device_node *frozen_dn;
-	struct pci_dn *frozen_pdn;
+	struct eeh_dev *frozen_edev;
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
 
-	frozen_dn = find_device_pe(event->dn);
+	frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
 	if (!frozen_dn) {
-
-		location = of_get_property(event->dn, "ibm,loc-code", NULL);
+		location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
 		location = location ? location : "unknown";
 		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
 		                "for location=%s pci addr=%s\n",
-		        location, eeh_pci_name(event->dev));
+			location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
 		return NULL;
 	}
 
@@ -350,9 +377,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	 * which was always an EADS pci bridge.  In the new style,
 	 * there might not be any EADS bridges, and even when there are,
 	 * the firmware marks them as "EEH incapable". So another
-	 * two-step is needed to find the pci bus.. */
+	 * two-step is needed to find the pci bus..
+	 */
 	if (!frozen_bus)
-		frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
+		frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
 
 	if (!frozen_bus) {
 		printk(KERN_ERR "EEH: Cannot find PCI bus "
@@ -361,22 +389,21 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 		return NULL;
 	}
 
-	frozen_pdn = PCI_DN(frozen_dn);
-	frozen_pdn->eeh_freeze_count++;
+	frozen_edev = of_node_to_eeh_dev(frozen_dn);
+	frozen_edev->freeze_count++;
+	pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
+	drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
 
-	pci_str = eeh_pci_name(event->dev);
-	drv_str = pcid_name(event->dev);
-	
-	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
+	if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
 
 	printk(KERN_WARNING
 	   "EEH: This PCI device has failed %d times in the last hour:\n",
-		frozen_pdn->eeh_freeze_count);
+		frozen_edev->freeze_count);
 
-	if (frozen_pdn->pcidev) {
-		bus_pci_str = pci_name(frozen_pdn->pcidev);
-		bus_drv_str = pcid_name(frozen_pdn->pcidev);
+	if (frozen_edev->pdev) {
+		bus_pci_str = pci_name(frozen_edev->pdev);
+		bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
 		printk(KERN_WARNING
 			"EEH: Bus location=%s driver=%s pci addr=%s\n",
 			location, bus_drv_str, bus_pci_str);
@@ -395,9 +422,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	pci_walk_bus(frozen_bus, eeh_report_error, &result);
 
 	/* Get the current PCI slot state. This can take a long time,
-	 * sometimes over 3 seconds for certain systems. */
-	rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
-	if (rc < 0) {
+	 * sometimes over 3 seconds for certain systems.
+	 */
+	rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
+	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
 		printk(KERN_WARNING "EEH: Permanent failure\n");
 		goto hard_fail;
 	}
@@ -406,14 +434,14 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	 * don't post the error log until after all dev drivers
 	 * have been informed.
 	 */
-	eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);
+	eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
 
 	/* If all device drivers were EEH-unaware, then shut
 	 * down all of the device drivers, and hope they
 	 * go down willingly, without panicing the system.
 	 */
 	if (result == PCI_ERS_RESULT_NONE) {
-		rc = eeh_reset_device(frozen_pdn, frozen_bus);
+		rc = eeh_reset_device(frozen_edev, frozen_bus);
 		if (rc) {
 			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
 			goto hard_fail;
@@ -422,7 +450,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 
 	/* If all devices reported they can proceed, then re-enable MMIO */
 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
+		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
 
 		if (rc < 0)
 			goto hard_fail;
@@ -436,7 +464,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 
 	/* If all devices reported they can proceed, then re-enable DMA */
 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
+		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
 
 		if (rc < 0)
 			goto hard_fail;
@@ -454,7 +482,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
-		rc = eeh_reset_device(frozen_pdn, NULL);
+		rc = eeh_reset_device(frozen_edev, NULL);
 		if (rc) {
 			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
 			goto hard_fail;
@@ -473,7 +501,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	/* Tell all device drivers that they can resume operations */
 	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
 
-	return frozen_pdn;
+	return frozen_edev;
 	
 excess_failures:
 	/*
@@ -486,7 +514,7 @@ excess_failures:
 		"has failed %d times in the last hour "
 		"and has been permanently disabled.\n"
 		"Please try reseating this device or replacing it.\n",
-		location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
+		location, drv_str, pci_str, frozen_edev->freeze_count);
 	goto perm_error;
 
 hard_fail:
@@ -497,7 +525,7 @@ hard_fail:
 		location, drv_str, pci_str);
 
 perm_error:
-	eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);
+	eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
 
 	/* Notify all devices that they're about to go down. */
 	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
@@ -508,4 +536,3 @@ perm_error:
 	return NULL;
 }
 
-/* ---------- end of file ---------- */
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index d2383cfb6dfd..4a4752565856 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -1,6 +1,4 @@
 /*
- * eeh_event.c
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -46,7 +44,7 @@ DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
 DEFINE_MUTEX(eeh_event_mutex);
 
 /**
- * eeh_event_handler - dispatch EEH events.
+ * eeh_event_handler - Dispatch EEH events.
  * @dummy - unused
  *
  * The detection of a frozen slot can occur inside an interrupt,
@@ -58,10 +56,10 @@ DEFINE_MUTEX(eeh_event_mutex);
 static int eeh_event_handler(void * dummy)
 {
 	unsigned long flags;
-	struct eeh_event	*event;
-	struct pci_dn *pdn;
+	struct eeh_event *event;
+	struct eeh_dev *edev;
 
-	daemonize ("eehd");
+	daemonize("eehd");
 	set_current_state(TASK_INTERRUPTIBLE);
 
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);
@@ -79,31 +77,37 @@ static int eeh_event_handler(void * dummy)
 
 	/* Serialize processing of EEH events */
 	mutex_lock(&eeh_event_mutex);
-	eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
+	edev = event->edev;
+	eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
 
 	printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
-	       eeh_pci_name(event->dev));
+	       eeh_pci_name(edev->pdev));
+
+	edev = handle_eeh_events(event);
 
-	pdn = handle_eeh_events(event);
+	eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
+	pci_dev_put(edev->pdev);
 
-	eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
-	pci_dev_put(event->dev);
 	kfree(event);
 	mutex_unlock(&eeh_event_mutex);
 
 	/* If there are no new errors after an hour, clear the counter. */
-	if (pdn && pdn->eeh_freeze_count>0) {
-		msleep_interruptible (3600*1000);
-		if (pdn->eeh_freeze_count>0)
-			pdn->eeh_freeze_count--;
+	if (edev && edev->freeze_count>0) {
+		msleep_interruptible(3600*1000);
+		if (edev->freeze_count>0)
+			edev->freeze_count--;
+
 	}
 
 	return 0;
 }
 
 /**
- * eeh_thread_launcher
+ * eeh_thread_launcher - Start kernel thread to handle EEH events
  * @dummy - unused
+ *
+ * This routine is called to start the kernel thread for processing
+ * EEH event.
  */
 static void eeh_thread_launcher(struct work_struct *dummy)
 {
@@ -112,18 +116,18 @@ static void eeh_thread_launcher(struct work_struct *dummy)
 }
 
 /**
- * eeh_send_failure_event - generate a PCI error event
- * @dev pci device
+ * eeh_send_failure_event - Generate a PCI error event
+ * @edev: EEH device
  *
  * This routine can be called within an interrupt context;
  * the actual event will be delivered in a normal context
  * (from a workqueue).
  */
-int eeh_send_failure_event (struct device_node *dn,
-                            struct pci_dev *dev)
+int eeh_send_failure_event(struct eeh_dev *edev)
 {
 	unsigned long flags;
 	struct eeh_event *event;
+	struct device_node *dn = eeh_dev_to_of_node(edev);
 	const char *location;
 
 	if (!mem_init_done) {
@@ -135,15 +139,14 @@ int eeh_send_failure_event (struct device_node *dn,
 	}
 	event = kmalloc(sizeof(*event), GFP_ATOMIC);
 	if (event == NULL) {
-		printk (KERN_ERR "EEH: out of memory, event not handled\n");
+		printk(KERN_ERR "EEH: out of memory, event not handled\n");
 		return 1;
  	}
 
-	if (dev)
-		pci_dev_get(dev);
+	if (edev->pdev)
+		pci_dev_get(edev->pdev);
 
-	event->dn = dn;
-	event->dev = dev;
+	event->edev = edev;
 
 	/* We may or may not be called in an interrupt context */
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);
@@ -154,5 +157,3 @@ int eeh_send_failure_event (struct device_node *dn,
 
 	return 0;
 }
-
-/********************** END OF FILE ******************************/
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
new file mode 100644
index 000000000000..8752f79a6af8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -0,0 +1,565 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on pseries.
+ * Actually, the pseries platform is built based on RTAS heavily. That means the
+ * pseries platform dependent EEH operations will be built on RTAS calls. The functions
+ * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
+ * been done.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_get_config_addr_info2;
+static int ibm_configure_bridge;
+static int ibm_configure_pe;
+
+/*
+ * Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+/**
+ * pseries_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on pseries.
+ */
+static int pseries_eeh_init(void)
+{
+	/* figure out EEH RTAS function call tokens */
+	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
+	ibm_set_slot_reset		= rtas_token("ibm,set-slot-reset");
+	ibm_read_slot_reset_state2	= rtas_token("ibm,read-slot-reset-state2");
+	ibm_read_slot_reset_state	= rtas_token("ibm,read-slot-reset-state");
+	ibm_slot_error_detail		= rtas_token("ibm,slot-error-detail");
+	ibm_get_config_addr_info2	= rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info	= rtas_token("ibm,get-config-addr-info");
+	ibm_configure_pe		= rtas_token("ibm,configure-pe");
+	ibm_configure_bridge		= rtas_token ("ibm,configure-bridge");
+
+	/* necessary sanity check */
+	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n",
+			__func__);
+		return -EINVAL;
+	} else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm, set-slot-reset> invalid\n",
+			__func__);
+		return -EINVAL;
+	} else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+		   ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and "
+			"<ibm,read-slot-reset-state> invalid\n",
+			__func__);
+		return -EINVAL;
+	} else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n",
+			__func__);
+		return -EINVAL;
+	} else if (ibm_get_config_addr_info2 == RTAS_UNKNOWN_SERVICE &&
+		   ibm_get_config_addr_info == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm,get-config-addr-info2> and "
+			"<ibm,get-config-addr-info> invalid\n",
+			__func__);
+		return -EINVAL;
+	} else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
+		   ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: RTAS service <ibm,configure-pe> and "
+			"<ibm,configure-bridge> invalid\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Initialize error log lock and size */
+	spin_lock_init(&slot_errbuf_lock);
+	eeh_error_buf_size = rtas_token("rtas-error-log-max");
+	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("%s: unknown EEH error log size\n",
+			__func__);
+		eeh_error_buf_size = 1024;
+	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+		pr_warning("%s: EEH error log size %d exceeds the maximal %d\n",
+			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+	}
+
+	return 0;
+}
+
+/**
+ * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @dn: device node
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pseries_eeh_set_option(struct device_node *dn, int option)
+{
+	int ret = 0;
+	struct eeh_dev *edev;
+	const u32 *reg;
+	int config_addr;
+
+	edev = of_node_to_eeh_dev(dn);
+
+	/*
+	 * When we're enabling or disabling EEH functioality on
+	 * the particular PE, the PE config address is possibly
+	 * unavailable. Therefore, we have to figure it out from
+	 * the FDT node.
+	 */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+		reg = of_get_property(dn, "reg", NULL);
+		config_addr = reg[0];
+		break;
+
+	case EEH_OPT_THAW_MMIO:
+	case EEH_OPT_THAW_DMA:
+		config_addr = edev->config_addr;
+		if (edev->pe_config_addr)
+			config_addr = edev->pe_config_addr;
+		break;
+
+	default:
+		pr_err("%s: Invalid option %d\n",
+			__func__, option);
+		return -EINVAL;
+	}
+
+	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+			config_addr, BUID_HI(edev->phb->buid),
+			BUID_LO(edev->phb->buid), option);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_get_pe_addr - Retrieve PE address
+ * @dn: device node
+ *
+ * Retrieve the assocated PE address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ * It's notable that zero'ed return value means invalid PE config
+ * address.
+ */
+static int pseries_eeh_get_pe_addr(struct device_node *dn)
+{
+	struct eeh_dev *edev;
+	int ret = 0;
+	int rets[3];
+
+	edev = of_node_to_eeh_dev(dn);
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. Otherwise, PE address is
+		 * meaningless.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				edev->config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid), 1);
+		if (ret || (rets[0] == 0))
+			return 0;
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				edev->config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid), 0);
+		if (ret) {
+			pr_warning("%s: Failed to get PE address for %s\n",
+				__func__, dn->full_name);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+				edev->config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid), 0);
+		if (ret) {
+			pr_warning("%s: Failed to get PE address for %s\n",
+				__func__, dn->full_name);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_get_state - Retrieve PE state
+ * @dn: PE associated device node
+ * @state: return value
+ *
+ * Retrieve the state of the specified PE. On RTAS compliant
+ * pseries platform, there already has one dedicated RTAS function
+ * for the purpose. It's notable that the associated PE config address
+ * might be ready when calling the function. Therefore, endeavour to
+ * use the PE config address if possible. Further more, there're 2
+ * RTAS calls for the purpose, we need to try the new one and back
+ * to the old one if the new one couldn't work properly.
+ */
+static int pseries_eeh_get_state(struct device_node *dn, int *state)
+{
+	struct eeh_dev *edev;
+	int config_addr;
+	int ret;
+	int rets[4];
+	int result;
+
+	/* Figure out PE config address if possible */
+	edev = of_node_to_eeh_dev(dn);
+	config_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		config_addr = edev->pe_config_addr;
+
+	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+				config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid));
+	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+		/* Fake PE unavailable info */
+		rets[2] = 0;
+		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+				config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid));
+	} else {
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	if (ret)
+		return ret;
+
+	/* Parse the result out */
+	result = 0;
+	if (rets[1]) {
+		switch(rets[0]) {
+		case 0:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			break;
+		case 1:
+			result |= EEH_STATE_RESET_ACTIVE;
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			break;
+		case 2:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result &= ~EEH_STATE_MMIO_ACTIVE;
+			result &= ~EEH_STATE_DMA_ACTIVE;
+			break;
+		case 4:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result &= ~EEH_STATE_MMIO_ACTIVE;
+			result &= ~EEH_STATE_DMA_ACTIVE;
+			result |= EEH_STATE_MMIO_ENABLED;
+			break;
+		case 5:
+			if (rets[2]) {
+				if (state) *state = rets[2];
+				result = EEH_STATE_UNAVAILABLE;
+			} else {
+				result = EEH_STATE_NOT_SUPPORT;
+			}
+		default:
+			result = EEH_STATE_NOT_SUPPORT;
+		}
+	} else {
+		result = EEH_STATE_NOT_SUPPORT;
+	}
+
+	return result;
+}
+
+/**
+ * pseries_eeh_reset - Reset the specified PE
+ * @dn: PE associated device node
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int pseries_eeh_reset(struct device_node *dn, int option)
+{
+	struct eeh_dev *edev;
+	int config_addr;
+	int ret;
+
+	/* Figure out PE address */
+	edev = of_node_to_eeh_dev(dn);
+	config_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		config_addr = edev->pe_config_addr;
+
+	/* Reset PE through RTAS call */
+	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			config_addr, BUID_HI(edev->phb->buid),
+			BUID_LO(edev->phb->buid), option);
+
+	/* If fundamental-reset not supported, try hot-reset */
+	if (option == EEH_RESET_FUNDAMENTAL &&
+	    ret == -8) {
+		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid), EEH_RESET_HOT);
+	}
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_wait_state - Wait for PE state
+ * @dn: PE associated device node
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	/*
+	 * According to PAPR, the state of PE might be temporarily
+	 * unavailable. Under the circumstance, we have to wait
+	 * for indicated time determined by firmware. The maximal
+	 * wait time is 5 minutes, which is acquired from the original
+	 * EEH implementation. Also, the original implementation
+	 * also defined the minimal wait time as 1 second.
+	 */
+#define EEH_STATE_MIN_WAIT_TIME	(1000)
+#define EEH_STATE_MAX_WAIT_TIME	(300 * 1000)
+
+	while (1) {
+		ret = pseries_eeh_get_state(dn, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		if (max_wait <= 0) {
+			pr_warning("%s: Timeout when getting PE's state (%d)\n",
+				__func__, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		if (mwait <= 0) {
+			pr_warning("%s: Firmware returned bad wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MIN_WAIT_TIME;
+		} else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+			pr_warning("%s: Firmware returned too long wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MAX_WAIT_TIME;
+		}
+
+		max_wait -= mwait;
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * pseries_eeh_get_log - Retrieve error log
+ * @dn: device node
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ * Actually, the error will be retrieved through the dedicated
+ * RTAS call.
+ */
+static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
+{
+	struct eeh_dev *edev;
+	int config_addr;
+	unsigned long flags;
+	int ret;
+
+	edev = of_node_to_eeh_dev(dn);
+	spin_lock_irqsave(&slot_errbuf_lock, flags);
+	memset(slot_errbuf, 0, eeh_error_buf_size);
+
+	/* Figure out the PE address */
+	config_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		config_addr = edev->pe_config_addr;
+
+	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+			BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
+			virt_to_phys(drv_log), len,
+			virt_to_phys(slot_errbuf), eeh_error_buf_size,
+			severity);
+	if (!ret)
+		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @dn: PE associated device node
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_configure_bridge(struct device_node *dn)
+{
+	struct eeh_dev *edev;
+	int config_addr;
+	int ret;
+
+	/* Figure out the PE address */
+	edev = of_node_to_eeh_dev(dn);
+	config_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		config_addr = edev->pe_config_addr;
+
+	/* Use new configure-pe function, if supported */
+	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid));
+	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
+				config_addr, BUID_HI(edev->phb->buid),
+				BUID_LO(edev->phb->buid));
+	} else {
+		return -EFAULT;
+	}
+
+	if (ret)
+		pr_warning("%s: Unable to configure bridge %d for %s\n",
+			__func__, ret, dn->full_name);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_read_config - Read PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val)
+{
+	struct pci_dn *pdn;
+
+	pdn = PCI_DN(dn);
+
+	return rtas_read_config(pdn, where, size, val);
+}
+
+/**
+ * pseries_eeh_write_config - Write PCI config space
+ * @dn: device node
+ * @where: PCI address
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
+{
+	struct pci_dn *pdn;
+
+	pdn = PCI_DN(dn);
+
+	return rtas_write_config(pdn, where, size, val);
+}
+
+static struct eeh_ops pseries_eeh_ops = {
+	.name			= "pseries",
+	.init			= pseries_eeh_init,
+	.set_option		= pseries_eeh_set_option,
+	.get_pe_addr		= pseries_eeh_get_pe_addr,
+	.get_state		= pseries_eeh_get_state,
+	.reset			= pseries_eeh_reset,
+	.wait_state		= pseries_eeh_wait_state,
+	.get_log		= pseries_eeh_get_log,
+	.configure_bridge       = pseries_eeh_configure_bridge,
+	.read_config		= pseries_eeh_read_config,
+	.write_config		= pseries_eeh_write_config
+};
+
+/**
+ * eeh_pseries_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on pseries platform. This function should be
+ * called before any EEH related functions.
+ */
+int __init eeh_pseries_init(void)
+{
+	return eeh_ops_register(&pseries_eeh_ops);
+}
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
index eb744ee234da..243b3510d70f 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c
@@ -28,7 +28,7 @@
 #include <asm/pci-bridge.h>
 
 /**
- * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
+ * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
  * @_name: name of file in sysfs directory
  * @_memb: name of member in struct pci_dn to access
  * @_format: printf format for display
@@ -41,24 +41,21 @@ static ssize_t eeh_show_##_name(struct device *dev,      \
 		struct device_attribute *attr, char *buf)          \
 {                                                        \
 	struct pci_dev *pdev = to_pci_dev(dev);               \
-	struct device_node *dn = pci_device_to_OF_node(pdev); \
-	struct pci_dn *pdn;                                   \
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);      \
 	                                                      \
-	if (!dn || PCI_DN(dn) == NULL)                        \
-		return 0;                                          \
+	if (!edev)                                            \
+		return 0;                                     \
 	                                                      \
-	pdn = PCI_DN(dn);                                     \
-	return sprintf(buf, _format "\n", pdn->_memb);        \
+	return sprintf(buf, _format "\n", edev->_memb);       \
 }                                                        \
 static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
 
-
-EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x");
-EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x");
-EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d");
-EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d");
-EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d");
+EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
+EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
+EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
+EEH_SHOW_ATTR(eeh_check_count,     check_count,     "%d"  );
+EEH_SHOW_ATTR(eeh_freeze_count,    freeze_count,    "%d"  );
+EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d"  );
 
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 7bc73af6c7b9..5f3ef876ded2 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -41,6 +41,7 @@
 #include <asm/udbg.h>
 #include <asm/smp.h>
 #include <asm/trace.h>
+#include <asm/firmware.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 38d24e7e7bb1..109fdb75578d 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -217,7 +217,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 	if (!dn)
 		return NULL;
 
-	dn = find_device_pe(dn);
+	dn = eeh_find_device_pe(dn);
 	if (!dn)
 		return NULL;
 
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 55d4ec1bd1ac..8b7bafa489c2 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -84,7 +84,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus)
 	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
 		pr_debug("     * Removing %s...\n", pci_name(dev));
 		eeh_remove_bus_device(dev);
- 		pci_remove_bus_device(dev);
+ 		pci_stop_and_remove_bus_device(dev);
  	}
 }
 EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
@@ -147,6 +147,9 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
 
 	pci_devs_phb_init_dynamic(phb);
 
+	/* Create EEH devices for the PHB */
+	eeh_dev_phb_init_dynamic(phb);
+
 	if (dn->child)
 		eeh_add_device_tree_early(dn);
 
diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c
deleted file mode 100644
index 6e7742da0072..000000000000
--- a/arch/powerpc/platforms/pseries/phyp_dump.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Hypervisor-assisted dump
- *
- * Linas Vepstas, Manish Ahuja 2008
- * Copyright 2008 IBM Corp.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kobject.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/pfn.h>
-#include <linux/swap.h>
-#include <linux/sysfs.h>
-
-#include <asm/page.h>
-#include <asm/phyp_dump.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-
-/* Variables, used to communicate data between early boot and late boot */
-static struct phyp_dump phyp_dump_vars;
-struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
-
-static int ibm_configure_kernel_dump;
-/* ------------------------------------------------- */
-/* RTAS interfaces to declare the dump regions */
-
-struct dump_section {
-	u32 dump_flags;
-	u16 source_type;
-	u16 error_flags;
-	u64 source_address;
-	u64 source_length;
-	u64 length_copied;
-	u64 destination_address;
-};
-
-struct phyp_dump_header {
-	u32 version;
-	u16 num_of_sections;
-	u16 status;
-
-	u32 first_offset_section;
-	u32 dump_disk_section;
-	u64 block_num_dd;
-	u64 num_of_blocks_dd;
-	u32 offset_dd;
-	u32 maxtime_to_auto;
-	/* No dump disk path string used */
-
-	struct dump_section cpu_data;
-	struct dump_section hpte_data;
-	struct dump_section kernel_data;
-};
-
-/* The dump header *must be* in low memory, so .bss it */
-static struct phyp_dump_header phdr;
-
-#define NUM_DUMP_SECTIONS	3
-#define DUMP_HEADER_VERSION	0x1
-#define DUMP_REQUEST_FLAG	0x1
-#define DUMP_SOURCE_CPU		0x0001
-#define DUMP_SOURCE_HPTE	0x0002
-#define DUMP_SOURCE_RMO		0x0011
-#define DUMP_ERROR_FLAG		0x2000
-#define DUMP_TRIGGERED		0x4000
-#define DUMP_PERFORMED		0x8000
-
-
-/**
- * init_dump_header() - initialize the header declaring a dump
- * Returns: length of dump save area.
- *
- * When the hypervisor saves crashed state, it needs to put
- * it somewhere. The dump header tells the hypervisor where
- * the data can be saved.
- */
-static unsigned long init_dump_header(struct phyp_dump_header *ph)
-{
-	unsigned long addr_offset = 0;
-
-	/* Set up the dump header */
-	ph->version = DUMP_HEADER_VERSION;
-	ph->num_of_sections = NUM_DUMP_SECTIONS;
-	ph->status = 0;
-
-	ph->first_offset_section =
-		(u32)offsetof(struct phyp_dump_header, cpu_data);
-	ph->dump_disk_section = 0;
-	ph->block_num_dd = 0;
-	ph->num_of_blocks_dd = 0;
-	ph->offset_dd = 0;
-
-	ph->maxtime_to_auto = 0; /* disabled */
-
-	/* The first two sections are mandatory */
-	ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
-	ph->cpu_data.source_type = DUMP_SOURCE_CPU;
-	ph->cpu_data.source_address = 0;
-	ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
-	ph->cpu_data.destination_address = addr_offset;
-	addr_offset += phyp_dump_info->cpu_state_size;
-
-	ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
-	ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
-	ph->hpte_data.source_address = 0;
-	ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
-	ph->hpte_data.destination_address = addr_offset;
-	addr_offset += phyp_dump_info->hpte_region_size;
-
-	/* This section describes the low kernel region */
-	ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
-	ph->kernel_data.source_type = DUMP_SOURCE_RMO;
-	ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
-	ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
-	ph->kernel_data.destination_address = addr_offset;
-	addr_offset += ph->kernel_data.source_length;
-
-	return addr_offset;
-}
-
-static void print_dump_header(const struct phyp_dump_header *ph)
-{
-#ifdef DEBUG
-	if (ph == NULL)
-		return;
-
-	printk(KERN_INFO "dump header:\n");
-	/* setup some ph->sections required */
-	printk(KERN_INFO "version = %d\n", ph->version);
-	printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
-	printk(KERN_INFO "Status = 0x%x\n", ph->status);
-
-	/* No ph->disk, so all should be set to 0 */
-	printk(KERN_INFO "Offset to first section 0x%x\n",
-		ph->first_offset_section);
-	printk(KERN_INFO "dump disk sections should be zero\n");
-	printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
-	printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
-	printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
-	printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
-	printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
-
-	/*set cpu state and hpte states as well scratch pad area */
-	printk(KERN_INFO " CPU AREA\n");
-	printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
-	printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
-	printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
-	printk(KERN_INFO "cpu source_address =%llx\n",
-		ph->cpu_data.source_address);
-	printk(KERN_INFO "cpu source_length =%llx\n",
-		ph->cpu_data.source_length);
-	printk(KERN_INFO "cpu length_copied =%llx\n",
-		ph->cpu_data.length_copied);
-
-	printk(KERN_INFO " HPTE AREA\n");
-	printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
-	printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
-	printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
-	printk(KERN_INFO "HPTE source_address =%llx\n",
-		ph->hpte_data.source_address);
-	printk(KERN_INFO "HPTE source_length =%llx\n",
-		ph->hpte_data.source_length);
-	printk(KERN_INFO "HPTE length_copied =%llx\n",
-		ph->hpte_data.length_copied);
-
-	printk(KERN_INFO " SRSD AREA\n");
-	printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
-	printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
-	printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
-	printk(KERN_INFO "SRSD source_address =%llx\n",
-		ph->kernel_data.source_address);
-	printk(KERN_INFO "SRSD source_length =%llx\n",
-		ph->kernel_data.source_length);
-	printk(KERN_INFO "SRSD length_copied =%llx\n",
-		ph->kernel_data.length_copied);
-#endif
-}
-
-static ssize_t show_phyp_dump_active(struct kobject *kobj,
-			struct kobj_attribute *attr, char *buf)
-{
-
-	/* create filesystem entry so kdump is phyp-dump aware */
-	return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
-}
-
-static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
-					show_phyp_dump_active,
-					NULL);
-
-static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
-{
-	int rc;
-
-	/* Add addr value if not initialized before */
-	if (ph->cpu_data.destination_address == 0) {
-		ph->cpu_data.destination_address += addr;
-		ph->hpte_data.destination_address += addr;
-		ph->kernel_data.destination_address += addr;
-	}
-
-	/* ToDo Invalidate kdump and free memory range. */
-
-	do {
-		rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
-				1, ph, sizeof(struct phyp_dump_header));
-	} while (rtas_busy_delay(rc));
-
-	if (rc) {
-		printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
-						"register\n", rc);
-		print_dump_header(ph);
-		return;
-	}
-
-	rc = sysfs_create_file(kernel_kobj, &pdl.attr);
-	if (rc)
-		printk(KERN_ERR "phyp-dump: unable to create sysfs"
-				" file (%d)\n", rc);
-}
-
-static
-void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
-{
-	int rc;
-
-	/* Add addr value if not initialized before */
-	if (ph->cpu_data.destination_address == 0) {
-		ph->cpu_data.destination_address += addr;
-		ph->hpte_data.destination_address += addr;
-		ph->kernel_data.destination_address += addr;
-	}
-
-	do {
-		rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
-				2, ph, sizeof(struct phyp_dump_header));
-	} while (rtas_busy_delay(rc));
-
-	if (rc) {
-		printk(KERN_ERR "phyp-dump: unexpected error (%d) "
-						"on invalidate\n", rc);
-		print_dump_header(ph);
-	}
-}
-
-/* ------------------------------------------------- */
-/**
- * release_memory_range -- release memory previously memblock_reserved
- * @start_pfn: starting physical frame number
- * @nr_pages: number of pages to free.
- *
- * This routine will release memory that had been previously
- * memblock_reserved in early boot. The released memory becomes
- * available for genreal use.
- */
-static void release_memory_range(unsigned long start_pfn,
-			unsigned long nr_pages)
-{
-	struct page *rpage;
-	unsigned long end_pfn;
-	long i;
-
-	end_pfn = start_pfn + nr_pages;
-
-	for (i = start_pfn; i <= end_pfn; i++) {
-		rpage = pfn_to_page(i);
-		if (PageReserved(rpage)) {
-			ClearPageReserved(rpage);
-			init_page_count(rpage);
-			__free_page(rpage);
-			totalram_pages++;
-		}
-	}
-}
-
-/**
- * track_freed_range -- Counts the range being freed.
- * Once the counter goes to zero, it re-registers dump for
- * future use.
- */
-static void
-track_freed_range(unsigned long addr, unsigned long length)
-{
-	static unsigned long scratch_area_size, reserved_area_size;
-
-	if (addr < phyp_dump_info->init_reserve_start)
-		return;
-
-	if ((addr >= phyp_dump_info->init_reserve_start) &&
-	    (addr <= phyp_dump_info->init_reserve_start +
-	     phyp_dump_info->init_reserve_size))
-		reserved_area_size += length;
-
-	if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
-	    (addr <= phyp_dump_info->reserved_scratch_addr +
-	     phyp_dump_info->reserved_scratch_size))
-		scratch_area_size += length;
-
-	if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
-	    (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
-
-		invalidate_last_dump(&phdr,
-				phyp_dump_info->reserved_scratch_addr);
-		register_dump_area(&phdr,
-				phyp_dump_info->reserved_scratch_addr);
-	}
-}
-
-/* ------------------------------------------------- */
-/**
- * sysfs_release_region -- sysfs interface to release memory range.
- *
- * Usage:
- *   "echo <start addr> <length> > /sys/kernel/release_region"
- *
- * Example:
- *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
- *
- * will release 256MB starting at 1GB.
- */
-static ssize_t store_release_region(struct kobject *kobj,
-				struct kobj_attribute *attr,
-				const char *buf, size_t count)
-{
-	unsigned long start_addr, length, end_addr;
-	unsigned long start_pfn, nr_pages;
-	ssize_t ret;
-
-	ret = sscanf(buf, "%lx %lx", &start_addr, &length);
-	if (ret != 2)
-		return -EINVAL;
-
-	track_freed_range(start_addr, length);
-
-	/* Range-check - don't free any reserved memory that
-	 * wasn't reserved for phyp-dump */
-	if (start_addr < phyp_dump_info->init_reserve_start)
-		start_addr = phyp_dump_info->init_reserve_start;
-
-	end_addr = phyp_dump_info->init_reserve_start +
-			phyp_dump_info->init_reserve_size;
-	if (start_addr+length > end_addr)
-		length = end_addr - start_addr;
-
-	/* Release the region of memory assed in by user */
-	start_pfn = PFN_DOWN(start_addr);
-	nr_pages = PFN_DOWN(length);
-	release_memory_range(start_pfn, nr_pages);
-
-	return count;
-}
-
-static ssize_t show_release_region(struct kobject *kobj,
-			struct kobj_attribute *attr, char *buf)
-{
-	u64 second_addr_range;
-
-	/* total reserved size - start of scratch area */
-	second_addr_range = phyp_dump_info->init_reserve_size -
-				phyp_dump_info->reserved_scratch_size;
-	return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
-			    " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
-		phdr.cpu_data.destination_address,
-		phdr.cpu_data.length_copied,
-		phdr.hpte_data.destination_address,
-		phdr.hpte_data.length_copied,
-		phdr.kernel_data.destination_address,
-		phdr.kernel_data.length_copied,
-		phyp_dump_info->init_reserve_start,
-		second_addr_range);
-}
-
-static struct kobj_attribute rr = __ATTR(release_region, 0600,
-					show_release_region,
-					store_release_region);
-
-static int __init phyp_dump_setup(void)
-{
-	struct device_node *rtas;
-	const struct phyp_dump_header *dump_header = NULL;
-	unsigned long dump_area_start;
-	unsigned long dump_area_length;
-	int header_len = 0;
-	int rc;
-
-	/* If no memory was reserved in early boot, there is nothing to do */
-	if (phyp_dump_info->init_reserve_size == 0)
-		return 0;
-
-	/* Return if phyp dump not supported */
-	if (!phyp_dump_info->phyp_dump_configured)
-		return -ENOSYS;
-
-	/* Is there dump data waiting for us? If there isn't,
-	 * then register a new dump area, and release all of
-	 * the rest of the reserved ram.
-	 *
-	 * The /rtas/ibm,kernel-dump rtas node is present only
-	 * if there is dump data waiting for us.
-	 */
-	rtas = of_find_node_by_path("/rtas");
-	if (rtas) {
-		dump_header = of_get_property(rtas, "ibm,kernel-dump",
-						&header_len);
-		of_node_put(rtas);
-	}
-
-	ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
-
-	print_dump_header(dump_header);
-	dump_area_length = init_dump_header(&phdr);
-	/* align down */
-	dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
-
-	if (dump_header == NULL) {
-		register_dump_area(&phdr, dump_area_start);
-		return 0;
-	}
-
-	/* re-register the dump area, if old dump was invalid */
-	if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
-		invalidate_last_dump(&phdr, dump_area_start);
-		register_dump_area(&phdr, dump_area_start);
-		return 0;
-	}
-
-	if (dump_header) {
-		phyp_dump_info->reserved_scratch_addr =
-				dump_header->cpu_data.destination_address;
-		phyp_dump_info->reserved_scratch_size =
-				dump_header->cpu_data.source_length +
-				dump_header->hpte_data.source_length +
-				dump_header->kernel_data.source_length;
-	}
-
-	/* Should we create a dump_subsys, analogous to s390/ipl.c ? */
-	rc = sysfs_create_file(kernel_kobj, &rr.attr);
-	if (rc)
-		printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
-									rc);
-
-	/* ToDo: re-register the dump area, for next time. */
-	return 0;
-}
-machine_subsys_initcall(pseries, phyp_dump_setup);
-
-int __init early_init_dt_scan_phyp_dump(unsigned long node,
-		const char *uname, int depth, void *data)
-{
-	const unsigned int *sizes;
-
-	phyp_dump_info->phyp_dump_configured = 0;
-	phyp_dump_info->phyp_dump_is_active = 0;
-
-	if (depth != 1 || strcmp(uname, "rtas") != 0)
-		return 0;
-
-	if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
-		phyp_dump_info->phyp_dump_configured++;
-
-	if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
-		phyp_dump_info->phyp_dump_is_active++;
-
-	sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
-				    NULL);
-	if (!sizes)
-		return 0;
-
-	if (sizes[0] == 1)
-		phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
-
-	if (sizes[3] == 2)
-		phyp_dump_info->hpte_region_size =
-						*((unsigned long *)&sizes[4]);
-	return 1;
-}
-
-/* Look for phyp_dump= cmdline option */
-static int __init early_phyp_dump_enabled(char *p)
-{
-	phyp_dump_info->phyp_dump_at_boot = 1;
-
-        if (!p)
-                return 0;
-
-        if (strncmp(p, "1", 1) == 0)
-		phyp_dump_info->phyp_dump_at_boot = 1;
-        else if (strncmp(p, "0", 1) == 0)
-		phyp_dump_info->phyp_dump_at_boot = 0;
-
-        return 0;
-}
-early_param("phyp_dump", early_phyp_dump_enabled);
-
-/* Look for phyp_dump_reserve_size= cmdline option */
-static int __init early_phyp_dump_reserve_size(char *p)
-{
-        if (p)
-		phyp_dump_info->reserve_bootvar = memparse(p, &p);
-
-        return 0;
-}
-early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 085fd3f45ad2..a12e95af6933 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -96,6 +96,20 @@ out:
 	return index;
 }
 
+static void check_and_cede_processor(void)
+{
+	/*
+	 * Interrupts are soft-disabled at this point,
+	 * but not hard disabled. So an interrupt might have
+	 * occurred before entering NAP, and would be potentially
+	 * lost (edge events, decrementer events, etc...) unless
+	 * we first hard disable then check.
+	 */
+	hard_irq_disable();
+	if (get_paca()->irq_happened == 0)
+		cede_processor();
+}
+
 static int dedicated_cede_loop(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv,
 				int index)
@@ -108,7 +122,7 @@ static int dedicated_cede_loop(struct cpuidle_device *dev,
 
 	ppc64_runlatch_off();
 	HMT_medium();
-	cede_processor();
+	check_and_cede_processor();
 
 	get_lppaca()->donate_dedicated_cpu = 0;
 	dev->last_residency =
@@ -132,7 +146,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
 	 * processor. When returning here, external interrupts
 	 * are enabled.
 	 */
-	cede_processor();
+	check_and_cede_processor();
 
 	dev->last_residency =
 		(int)idle_loop_epilog(in_purr, kt_before);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f79f1278dfca..51ecac920dd8 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -190,9 +190,8 @@ static void __init pseries_mpic_init_IRQ(void)
 	BUG_ON(openpic_addr == 0);
 
 	/* Setup the openpic driver */
-	mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, 0,
-			  16, 250, /* isu size, irq count */
-			  " MPIC     ");
+	mpic = mpic_alloc(pSeries_mpic_node, openpic_addr,
+			MPIC_NO_RESET, 16, 0, " MPIC     ");
 	BUG_ON(mpic == NULL);
 
 	/* Add ISUs */
@@ -261,8 +260,12 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
 	switch (action) {
 	case PSERIES_RECONFIG_ADD:
 		pci = np->parent->data;
-		if (pci)
+		if (pci) {
 			update_dn_pci_info(np, pci->phb);
+
+			/* Create EEH device for the OF node */
+			eeh_dev_init(np, pci->phb);
+		}
 		break;
 	default:
 		err = NOTIFY_DONE;
@@ -380,8 +383,12 @@ static void __init pSeries_setup_arch(void)
 
 	fwnmi_init();
 
+	/* By default, only probe PCI (can be overriden by rtas_pci) */
+	pci_add_flags(PCI_PROBE_ONLY);
+
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
+	eeh_pseries_init();
 	find_and_init_phbs();
 	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
 	eeh_init();