26 files changed, 1280 insertions, 580 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 06a85b704331..82551770917c 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,3 +1,26 @@
+config PPC_CELL
+	bool
+	default n
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_OF_PLATFORM_PCI
+	select PPC_INDIRECT_IO
+	select PPC_NATIVE
+	select MPIC
+	default n
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC_MULTIPLATFORM && PPC64
+	select PPC_CELL_NATIVE
+	select PPC_RTAS
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
 menu "Cell Broadband Engine options"
 	depends on PPC_CELL
 
@@ -18,6 +41,7 @@ config SPU_BASE
 
 config CBE_RAS
 	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
 	default y
 
 config CBE_THERM
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
index a3850fd1e94c..f9ac3fe3be97 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -25,9 +25,12 @@
 
 #include <asm/hw_irq.h>
 #include <asm/io.h>
+#include <asm/machdep.h>
 #include <asm/processor.h>
 #include <asm/prom.h>
 #include <asm/time.h>
+#include <asm/pmi.h>
+#include <asm/of_platform.h>
 
 #include "cbe_regs.h"
 
@@ -68,6 +71,38 @@ static u64 MIC_Slow_Next_Timer_table[] = {
  * hardware specific functions
  */
 
+static struct of_device *pmi_dev;
+
+static int set_pmode_pmi(int cpu, unsigned int pmode)
+{
+	int ret;
+	pmi_message_t pmi_msg;
+#ifdef DEBUG
+	u64 time;
+#endif
+
+	pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
+	pmi_msg.data1 =	cbe_cpu_to_node(cpu);
+	pmi_msg.data2 = pmode;
+
+#ifdef DEBUG
+	time = (u64) get_cycles();
+#endif
+
+	pmi_send_message(pmi_dev, pmi_msg);
+	ret = pmi_msg.data2;
+
+	pr_debug("PMI returned slow mode %d\n", ret);
+
+#ifdef DEBUG
+	time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
+	time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
+	pr_debug("had to wait %lu ns for a transition\n", time);
+#endif
+	return ret;
+}
+
+
 static int get_pmode(int cpu)
 {
 	int ret;
@@ -79,7 +114,7 @@ static int get_pmode(int cpu)
 	return ret;
 }
 
-static int set_pmode(int cpu, unsigned int pmode)
+static int set_pmode_reg(int cpu, unsigned int pmode)
 {
 	struct cbe_pmd_regs __iomem *pmd_regs;
 	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
@@ -120,37 +155,71 @@ static int set_pmode(int cpu, unsigned int pmode)
 	return 0;
 }
 
+static int set_pmode(int cpu, unsigned int slow_mode) {
+	if (pmi_dev)
+		return set_pmode_pmi(cpu, slow_mode);
+	else
+		return set_pmode_reg(cpu, slow_mode);
+}
+
+static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
+{
+	struct cpufreq_policy policy;
+	u8 cpu;
+	u8 cbe_pmode_new;
+
+	BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
+
+	cpu = cbe_node_to_cpu(pmi_msg.data1);
+	cbe_pmode_new = pmi_msg.data2;
+
+	cpufreq_get_policy(&policy, cpu);
+
+	policy.max = min(policy.max, cbe_freqs[cbe_pmode_new].frequency);
+	policy.min = min(policy.min, policy.max);
+
+	pr_debug("cbe_handle_pmi: new policy.min=%d policy.max=%d\n", policy.min, policy.max);
+	cpufreq_set_policy(&policy);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_FREQ_CHANGE,
+	.handle_pmi_message	= cbe_cpufreq_handle_pmi,
+};
+
+
 /*
  * cpufreq functions
  */
 
-static int cbe_cpufreq_cpu_init (struct cpufreq_policy *policy)
+static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	u32 *max_freq;
+	const u32 *max_freqp;
+	u32 max_freq;
 	int i, cur_pmode;
 	struct device_node *cpu;
 
 	cpu = of_get_cpu_node(policy->cpu, NULL);
 
-	if(!cpu)
+	if (!cpu)
 		return -ENODEV;
 
 	pr_debug("init cpufreq on CPU %d\n", policy->cpu);
 
-	max_freq = (u32*) get_property(cpu, "clock-frequency", NULL);
+	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
 
-	if(!max_freq)
+	if (!max_freqp)
 		return -EINVAL;
 
-	// we need the freq in kHz
-	*max_freq /= 1000;
+	/* we need the freq in kHz */
+	max_freq = *max_freqp / 1000;
 
-	pr_debug("max clock-frequency is at %u kHz\n", *max_freq);
+	pr_debug("max clock-frequency is at %u kHz\n", max_freq);
 	pr_debug("initializing frequency table\n");
 
-	// initialize frequency table
+	/* initialize frequency table */
 	for (i=0; cbe_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
-		cbe_freqs[i].frequency = *max_freq / cbe_freqs[i].index;
+		cbe_freqs[i].frequency = max_freq / cbe_freqs[i].index;
 		pr_debug("%d: %d\n", i, cbe_freqs[i].frequency);
 	}
 
@@ -167,10 +236,10 @@ static int cbe_cpufreq_cpu_init (struct cpufreq_policy *policy)
 	policy->cpus = cpu_sibling_map[policy->cpu];
 #endif
 
-	cpufreq_frequency_table_get_attr (cbe_freqs, policy->cpu);
+	cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
 
 	/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
-	return cpufreq_frequency_table_cpuinfo (policy, cbe_freqs);
+	return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
 }
 
 static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
@@ -202,7 +271,7 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
 	freqs.new = cbe_freqs[cbe_pmode_new].frequency;
 	freqs.cpu = policy->cpu;
 
-	mutex_lock (&cbe_switch_mutex);
+	mutex_lock(&cbe_switch_mutex);
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
 	pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
@@ -233,11 +302,26 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
 
 static int __init cbe_cpufreq_init(void)
 {
+	struct device_node *np;
+
+	if (!machine_is(cell))
+		return -ENODEV;
+
+	np = of_find_node_by_type(NULL, "ibm,pmi");
+
+	pmi_dev = of_find_device_by_node(np);
+
+	if (pmi_dev)
+		pmi_register_handler(pmi_dev, &cbe_pmi_handler);
+
 	return cpufreq_register_driver(&cbe_cpufreq_driver);
 }
 
 static void __exit cbe_cpufreq_exit(void)
 {
+	if (pmi_dev)
+		pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
+
 	cpufreq_unregister_driver(&cbe_cpufreq_driver);
 }
 
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index 9a0ee62691d5..12c9674b4b1f 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -14,6 +14,8 @@
 #include <asm/pgtable.h>
 #include <asm/prom.h>
 #include <asm/ptrace.h>
+#include <asm/of_device.h>
+#include <asm/of_platform.h>
 
 #include "cbe_regs.h"
 
@@ -27,6 +29,7 @@
 static struct cbe_regs_map
 {
 	struct device_node *cpu_node;
+	struct device_node *be_node;
 	struct cbe_pmd_regs __iomem *pmd_regs;
 	struct cbe_iic_regs __iomem *iic_regs;
 	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
@@ -37,30 +40,43 @@ static int cbe_regs_map_count;
 static struct cbe_thread_map
 {
 	struct device_node *cpu_node;
+	struct device_node *be_node;
 	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
 } cbe_thread_map[NR_CPUS];
 
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
+static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+
 static struct cbe_regs_map *cbe_find_map(struct device_node *np)
 {
 	int i;
 	struct device_node *tmp_np;
 
-	if (strcasecmp(np->type, "spe") == 0) {
-		if (np->data == NULL) {
-			/* walk up path until cpu node was found */
-			tmp_np = np->parent;
-			while (tmp_np != NULL && strcasecmp(tmp_np->type, "cpu") != 0)
-				tmp_np = tmp_np->parent;
+	if (strcasecmp(np->type, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
 
-			np->data = cbe_find_map(tmp_np);
-		}
+	if (np->data)
 		return np->data;
-	}
 
-	for (i = 0; i < cbe_regs_map_count; i++)
-		if (cbe_regs_maps[i].cpu_node == np)
-			return &cbe_regs_maps[i];
-	return NULL;
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (strcasecmp(tmp_np->type, "cpu") &&
+		 strcasecmp(tmp_np->type, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
 }
 
 struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
@@ -130,38 +146,105 @@ struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
 }
 EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
 
-/* FIXME
- * This is little more than a stub at the moment.  It should be
- * fleshed out so that it works for both SMT and non-SMT, no
- * matter if the passed cpu is odd or even.
- * For SMT enabled, returns 0 for even-numbered cpu; otherwise 1.
- * For SMT disabled, returns 0 for all cpus.
- */
 u32 cbe_get_hw_thread_id(int cpu)
 {
-	return (cpu & 1);
+	return cbe_thread_map[cpu].thread_id;
 }
 EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
 
-void __init cbe_regs_init(void)
+u32 cbe_cpu_to_node(int cpu)
 {
-	int i;
-	struct device_node *cpu;
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
 
-	/* Build local fast map of CPUs */
-	for_each_possible_cpu(i)
-		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, NULL);
+u32 cbe_node_to_cpu(int node)
+{
+	return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
 
-	/* Find maps for each device tree CPU */
-	for_each_node_by_type(cpu, "cpu") {
-		struct cbe_regs_map *map = &cbe_regs_maps[cbe_regs_map_count++];
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		for (i=0; i<len; i++)
+			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+				return np;
+	}
+
+	return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive")
+			if (of_get_parent(np) == be)
+				map->pmd_regs = of_iomap(np, 0);
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+			if (of_get_parent(np) == be)
+				map->iic_regs = of_iomap(np, 2);
 
+		for_each_node_by_type(np, "mic-tm")
+			if (of_get_parent(np) == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+	} else {
+		struct device_node *cpu;
 		/* That hack must die die die ! */
 		const struct address_prop {
 			unsigned long address;
 			unsigned int len;
 		} __attribute__((packed)) *prop;
 
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
 
 		if (cbe_regs_map_count > MAX_CBE) {
 			printk(KERN_ERR "cbe_regs: More BE chips than supported"
@@ -170,22 +253,21 @@ void __init cbe_regs_init(void)
 			return;
 		}
 		map->cpu_node = cpu;
-		for_each_possible_cpu(i)
-			if (cbe_thread_map[i].cpu_node == cpu)
-				cbe_thread_map[i].regs = map;
 
-		prop = get_property(cpu, "pervasive", NULL);
-		if (prop != NULL)
-			map->pmd_regs = ioremap(prop->address, prop->len);
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
 
-		prop = get_property(cpu, "iic", NULL);
-		if (prop != NULL)
-			map->iic_regs = ioremap(prop->address, prop->len);
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpu_set(i, cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpu_set(i, cbe_first_online_cpu);
+			}
+		}
 
-		prop = (struct address_prop *)get_property(cpu, "mic-tm",
-							   NULL);
-		if (prop != NULL)
-			map->mic_tm_regs = ioremap(prop->address, prop->len);
+		cbe_fill_regs_map(map);
 	}
 }
 
diff --git a/arch/powerpc/platforms/cell/cbe_regs.h b/arch/powerpc/platforms/cell/cbe_regs.h
index 440a7ecc66ea..17d597144877 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.h
+++ b/arch/powerpc/platforms/cell/cbe_regs.h
@@ -255,6 +255,11 @@ struct cbe_mic_tm_regs {
 extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
 extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
 
+/* some utility functions to deal with SMT */
+extern u32 cbe_get_hw_thread_id(int cpu);
+extern u32 cbe_cpu_to_node(int cpu);
+extern u32 cbe_node_to_cpu(int node);
+
 /* Init this module early */
 extern void cbe_regs_init(void);
 
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 70e0d968d30f..f370f0fa6f4c 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -1,6 +1,31 @@
 /*
  * thermal support for the cell processor
  *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
  *
  * Author: Christian Krafft <krafft@de.ibm.com>
@@ -31,6 +56,26 @@
 #include "cbe_regs.h"
 #include "spu_priv1_mmio.h"
 
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define SYSDEV_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct sysdev_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
 static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev)
 {
 	struct spu *spu;
@@ -43,14 +88,14 @@ static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev)
 /* returns the value for a given spu in a given register */
 static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iomem *reg)
 {
-	unsigned int *id;
+	const unsigned int *id;
 	union spe_reg value;
 	struct spu *spu;
 
 	/* getting the id from the reg attribute will not work on future device-tree layouts
 	 * in future we should store the id to the spu struct and use it here */
 	spu = container_of(sysdev, struct spu, sysdev);
-	id = (unsigned int *)get_property(spu_devnode(spu), "reg", NULL);
+	id = of_get_property(spu_devnode(spu), "reg", NULL);
 	value.val = in_be64(&reg->val);
 
 	return value.spe[*id];
@@ -58,20 +103,81 @@ static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iom
 
 static ssize_t spu_show_temp(struct sys_device *sysdev, char *buf)
 {
-	int value;
+	u8 value;
 	struct cbe_pmd_regs __iomem *pmd_regs;
 
 	pmd_regs = get_pmd_regs(sysdev);
 
 	value = spu_read_register_value(sysdev, &pmd_regs->ts_ctsr1);
-	/* clear all other bits */
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
 	value &= 0x3F;
-	/* temp is stored in steps of 2 degrees */
-	value *= 2;
-	/* base temp is 65 degrees */
-	value += 65;
 
-	return sprintf(buf, "%d\n", (int) value);
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(get_pmd_regs(sysdev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(sysdev), buf, size, 16);
 }
 
 static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos)
@@ -82,16 +188,9 @@ static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos)
 	pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
 	value = in_be64(&pmd_regs->ts_ctsr2);
 
-	/* access the corresponding byte */
-	value >>= pos;
-	/* clear all other bits */
-	value &= 0x3F;
-	/* temp is stored in steps of 2 degrees */
-	value *= 2;
-	/* base temp is 65 degrees */
-	value += 65;
+	value = (value >> pos) & 0x3f;
 
-	return sprintf(buf, "%d\n", (int) value);
+	return sprintf(buf, "%d\n", reg_to_temp(value));
 }
 
 
@@ -108,13 +207,52 @@ static ssize_t ppe_show_temp1(struct sys_device *sysdev, char *buf)
 	return ppe_show_temp(sysdev, buf, 0);
 }
 
+static ssize_t ppe_show_throttle_end(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct sys_device *sysdev, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct sys_device *sysdev, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(sysdev->id), buf, size, 48);
+}
+
+
 static struct sysdev_attribute attr_spu_temperature = {
 	.attr = {.name = "temperature", .mode = 0400 },
 	.show = spu_show_temp,
 };
 
+static SYSDEV_PREFIX_ATTR(spu, throttle_end, 0600);
+static SYSDEV_PREFIX_ATTR(spu, throttle_begin, 0600);
+static SYSDEV_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
 static struct attribute *spu_attributes[] = {
 	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
 	NULL,
 };
 
@@ -133,9 +271,16 @@ static struct sysdev_attribute attr_ppe_temperature1 = {
 	.show = ppe_show_temp1,
 };
 
+static SYSDEV_PREFIX_ATTR(ppe, throttle_end, 0600);
+static SYSDEV_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static SYSDEV_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
 static struct attribute *ppe_attributes[] = {
 	&attr_ppe_temperature0.attr,
 	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
 	NULL,
 };
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 6666d037eb44..4fc4e92775d0 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -261,7 +261,7 @@ static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
 		return -ENODEV;
 	if (intsize != 1)
 		return -ENODEV;
-	val = get_property(ct, "#interrupt-cells", NULL);
+	val = of_get_property(ct, "#interrupt-cells", NULL);
 	if (val == NULL || *val != 1)
 		return -ENODEV;
 
@@ -327,7 +327,7 @@ static int __init setup_iic(void)
 		if (!device_is_compatible(dn,
 				     "IBM,CBEA-Internal-Interrupt-Controller"))
 			continue;
-		np = get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
 		if (np == NULL) {
 			printk(KERN_WARNING "IIC: CPU association not found\n");
 			of_node_put(dn);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index 7c73128305ec..d68d920eb2c4 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -318,7 +318,7 @@ static int __init spider_pci_workaround_init(void)
 	 */
 	list_for_each_entry(phb, &hose_list, list_node) {
 		struct device_node *np = phb->arch_data;
-		const char *model = get_property(np, "model", NULL);
+		const char *model = of_get_property(np, "model", NULL);
 
 		/* If no model property or name isn't exactly "pci", skip */
 		if (model == NULL || strcmp(np->name, "pci"))
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 67d617b60a23..760caa76841a 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -291,9 +291,9 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base)
 		const unsigned int *nidp;
 		const unsigned long *tmp;
 
-		nidp = get_property(np, "node-id", NULL);
+		nidp = of_get_property(np, "node-id", NULL);
 		if (nidp && *nidp == nid) {
-			tmp = get_property(np, "ioc-translation", NULL);
+			tmp = of_get_property(np, "ioc-translation", NULL);
 			if (tmp) {
 				*base = *tmp;
 				of_node_put(np);
@@ -430,7 +430,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
 	struct iommu_window *window;
 	const unsigned int *ioid;
 
-	ioid = get_property(np, "ioid", NULL);
+	ioid = of_get_property(np, "ioid", NULL);
 	if (ioid == NULL)
 		printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
 		       np->full_name);
@@ -496,7 +496,7 @@ static void cell_dma_dev_setup(struct device *dev)
 	struct dev_archdata *archdata = &dev->archdata;
 
 	/* If we run without iommu, no need to do anything */
-	if (pci_dma_ops == &dma_direct_ops)
+	if (get_pci_dma_ops() == &dma_direct_ops)
 		return;
 
 	/* Current implementation uses the first window available in that
@@ -530,7 +530,7 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
 		return 0;
 
 	/* We use the PCI DMA ops */
-	dev->archdata.dma_ops = pci_dma_ops;
+	dev->archdata.dma_ops = get_pci_dma_ops();
 
 	cell_dma_dev_setup(dev);
 
@@ -549,7 +549,7 @@ static int __init cell_iommu_get_window(struct device_node *np,
 	unsigned long index;
 
 	/* Use ibm,dma-window if available, else, hard code ! */
-	dma_window = get_property(np, "ibm,dma-window", NULL);
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
 	if (dma_window == NULL) {
 		*base = 0;
 		*size = 0x80000000u;
@@ -646,7 +646,7 @@ static int __init cell_iommu_init_disabled(void)
 	unsigned long base = 0, size;
 
 	/* When no iommu is present, we use direct DMA ops */
-	pci_dma_ops = &dma_direct_ops;
+	set_pci_dma_ops(&dma_direct_ops);
 
 	/* First make sure all IOC translation is turned off */
 	cell_disable_iommus();
@@ -734,7 +734,7 @@ static int __init cell_iommu_init(void)
 	}
 
 	/* Setup default PCI iommu ops */
-	pci_dma_ops = &dma_iommu_ops;
+	set_pci_dma_ops(&dma_iommu_ops);
 
  bail:
 	/* Register callbacks on OF platform device addition/removal
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 0984c7071695..3961a085b432 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -3,11 +3,13 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
+#include <linux/reboot.h>
 
 #include <asm/reg.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/rtas.h>
 
 #include "ras.h"
 #include "cbe_regs.h"
@@ -82,6 +84,164 @@ static int cbe_machine_check_handler(struct pt_regs *regs)
 	return 0;
 }
 
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+#ifdef CONFIG_CRASH_DUMP
+	rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+#endif
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = alloc_pages_node(area->nid, GFP_KERNEL, area->order);
+
+	if (!area->pages)
+		goto out_free_area;
+
+	addr = __pa(page_address(area->pages));
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__FUNCTION__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size)
+		return -ENODEV;
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __FUNCTION__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %s is missing node-id?\n",
+					__FUNCTION__, np->full_name);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __FUNCTION__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __FUNCTION__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret) {
+		printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+		return ret;
+	}
+
+	return cbe_ptcal_enable();
+}
+
+arch_initcall(cbe_ptcal_init);
+
 void __init cbe_ras_init(void)
 {
 	unsigned long hid0;
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 36989c2eee66..54b96183cb64 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -71,7 +71,7 @@ static void cell_show_cpuinfo(struct seq_file *m)
 
 	root = of_find_node_by_path("/");
 	if (root)
-		model = get_property(root, "model", NULL);
+		model = of_get_property(root, "model", NULL);
 	seq_printf(m, "machine\t\t: CHRP %s\n", model);
 	of_node_put(root);
 }
@@ -190,15 +190,6 @@ static int __init cell_probe(void)
 	return 1;
 }
 
-/*
- * Cell has no legacy IO; anything calling this function has to
- * fail or bad things will happen
- */
-static int cell_check_legacy_ioport(unsigned int baseport)
-{
-	return -ENODEV;
-}
-
 define_machine(cell) {
 	.name			= "Cell",
 	.probe			= cell_probe,
@@ -211,7 +202,6 @@ define_machine(cell) {
 	.get_rtc_time		= rtas_get_rtc_time,
 	.set_rtc_time		= rtas_set_rtc_time,
 	.calibrate_decr		= generic_calibrate_decr,
-	.check_legacy_ioport	= cell_check_legacy_ioport,
 	.progress		= cell_progress,
 	.init_IRQ       	= cell_init_irq,
 	.pci_setup_phb		= rtas_setup_phb,
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 21a9ebd4978e..fb1f15797bbb 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -254,25 +254,25 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	}
 
 	/* Now do the horrible hacks */
-	tmp = get_property(pic->of_node, "#interrupt-cells", NULL);
+	tmp = of_get_property(pic->of_node, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
-	imap = get_property(pic->of_node, "interrupt-map", &imaplen);
+	imap = of_get_property(pic->of_node, "interrupt-map", &imaplen);
 	if (imap == NULL || imaplen < (intsize + 1))
 		return NO_IRQ;
 	iic = of_find_node_by_phandle(imap[intsize]);
 	if (iic == NULL)
 		return NO_IRQ;
 	imap += intsize + 1;
-	tmp = get_property(iic, "#interrupt-cells", NULL);
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
 	/* Assume unit is last entry of interrupt specifier */
 	unit = imap[intsize - 1];
 	/* Ok, we have a unit, now let's try to get the node */
-	tmp = get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
 	if (tmp == NULL) {
 		of_node_put(iic);
 		return NO_IRQ;
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index c43999a10deb..fec51525252e 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -36,10 +36,65 @@
 #include <asm/xmon.h>
 
 const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
 const struct spu_priv1_ops *spu_priv1_ops;
 
+static struct list_head spu_list[MAX_NUMNODES];
+static LIST_HEAD(spu_full_list);
+static DEFINE_MUTEX(spu_mutex);
+static DEFINE_SPINLOCK(spu_list_lock);
+
 EXPORT_SYMBOL_GPL(spu_priv1_ops);
 
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	__cpus_setall(&mm->cpu_vm_mask, nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
 static int __spu_trap_invalid_dma(struct spu *spu)
 {
 	pr_debug("%s\n", __FUNCTION__);
@@ -74,6 +129,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
 	struct mm_struct *mm = spu->mm;
 	u64 esid, vsid, llp;
+	int psize;
 
 	pr_debug("%s\n", __FUNCTION__);
 
@@ -90,22 +146,25 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
 	case USER_REGION_ID:
 #ifdef CONFIG_HUGETLB_PAGE
 		if (in_hugepage_area(mm->context, ea))
-			llp = mmu_psize_defs[mmu_huge_psize].sllp;
+			psize = mmu_huge_psize;
 		else
 #endif
-			llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+			psize = mm->context.user_psize;
 		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
-				SLB_VSID_USER | llp;
+				SLB_VSID_USER;
 		break;
 	case VMALLOC_REGION_ID:
-		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
 		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
-			SLB_VSID_KERNEL | llp;
+			SLB_VSID_KERNEL;
 		break;
 	case KERNEL_REGION_ID:
-		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+		psize = mmu_linear_psize;
 		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
-			SLB_VSID_KERNEL | llp;
+			SLB_VSID_KERNEL;
 		break;
 	default:
 		/* Future: support kernel segments so that drivers
@@ -114,9 +173,10 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
 		pr_debug("invalid region access at %016lx\n", ea);
 		return 1;
 	}
+	llp = mmu_psize_defs[psize].sllp;
 
 	out_be64(&priv2->slb_index_W, spu->slb_replace);
-	out_be64(&priv2->slb_vsid_RW, vsid);
+	out_be64(&priv2->slb_vsid_RW, vsid | llp);
 	out_be64(&priv2->slb_esid_RW, esid);
 
 	spu->slb_replace++;
@@ -232,7 +292,6 @@ spu_irq_class_1(int irq, void *data)
 
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data)
@@ -330,10 +389,6 @@ static void spu_free_irqs(struct spu *spu)
 		free_irq(spu->irqs[2], spu);
 }
 
-static struct list_head spu_list[MAX_NUMNODES];
-static LIST_HEAD(spu_full_list);
-static DEFINE_MUTEX(spu_mutex);
-
 static void spu_init_channels(struct spu *spu)
 {
 	static const struct {
@@ -377,10 +432,11 @@ struct spu *spu_alloc_node(int node)
 		spu = list_entry(spu_list[node].next, struct spu, list);
 		list_del_init(&spu->list);
 		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-		spu_init_channels(spu);
 	}
 	mutex_unlock(&spu_mutex);
 
+	if (spu)
+		spu_init_channels(spu);
 	return spu;
 }
 EXPORT_SYMBOL_GPL(spu_alloc_node);
@@ -407,108 +463,6 @@ void spu_free(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-	struct mm_struct *mm = spu->mm;
-	struct vm_area_struct *vma;
-	u64 ea, dsisr, is_write;
-	int ret;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-#if 0
-	if (!IS_VALID_EA(ea)) {
-		return -EFAULT;
-	}
-#endif /* XXX */
-	if (mm == NULL) {
-		return -EFAULT;
-	}
-	if (mm->pgd == NULL) {
-		return -EFAULT;
-	}
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, ea);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= ea)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-#if 0
-	if (expand_stack(vma, ea))
-		goto bad_area;
-#endif /* XXX */
-good_area:
-	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-	if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (dsisr & MFC_DSISR_ACCESS_DENIED)
-			goto bad_area;
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-			goto bad_area;
-	}
-	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
-	case VM_FAULT_MINOR:
-		current->min_flt++;
-		break;
-	case VM_FAULT_MAJOR:
-		current->maj_flt++;
-		break;
-	case VM_FAULT_SIGBUS:
-		ret = -EFAULT;
-		goto bad_area;
-	case VM_FAULT_OOM:
-		ret = -ENOMEM;
-		goto bad_area;
-	default:
-		BUG();
-	}
-	up_read(&mm->mmap_sem);
-	return ret;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-	return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-	u64 ea, dsisr, access, error = 0UL;
-	int ret = 0;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-		u64 flags;
-
-		access = (_PAGE_PRESENT | _PAGE_USER);
-		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-		local_irq_save(flags);
-		if (hash_page(ea, access, 0x300) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		local_irq_restore(flags);
-	}
-	if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-		if ((ret = spu_handle_mm_fault(spu)) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		else
-			error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-	}
-	spu->dar = 0UL;
-	spu->dsisr = 0UL;
-	if (!error) {
-		spu_restart_dma(spu);
-	} else {
-		spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-	}
-	return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
 	set_kset_name("spu")
 };
@@ -582,17 +536,12 @@ static int spu_create_sysdev(struct spu *spu)
 	return 0;
 }
 
-static void spu_destroy_sysdev(struct spu *spu)
-{
-	sysfs_remove_device_from_node(&spu->sysdev, spu->node);
-	sysdev_unregister(&spu->sysdev);
-}
-
 static int __init create_spu(void *data)
 {
 	struct spu *spu;
 	int ret;
 	static int number;
+	unsigned long flags;
 
 	ret = -ENOMEM;
 	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
@@ -620,8 +569,10 @@ static int __init create_spu(void *data)
 		goto out_free_irqs;
 
 	mutex_lock(&spu_mutex);
+	spin_lock_irqsave(&spu_list_lock, flags);
 	list_add(&spu->list, &spu_list[spu->node]);
 	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_list_lock, flags);
 	mutex_unlock(&spu_mutex);
 
 	goto out;
@@ -636,58 +587,37 @@ out:
 	return ret;
 }
 
-static void destroy_spu(struct spu *spu)
-{
-	list_del_init(&spu->list);
-	list_del_init(&spu->full_list);
-
-	spu_destroy_sysdev(spu);
-	spu_free_irqs(spu);
-	spu_destroy_spu(spu);
-	kfree(spu);
-}
-
-static void cleanup_spu_base(void)
-{
-	struct spu *spu, *tmp;
-	int node;
-
-	mutex_lock(&spu_mutex);
-	for (node = 0; node < MAX_NUMNODES; node++) {
-		list_for_each_entry_safe(spu, tmp, &spu_list[node], list)
-			destroy_spu(spu);
-	}
-	mutex_unlock(&spu_mutex);
-	sysdev_class_unregister(&spu_sysdev_class);
-}
-module_exit(cleanup_spu_base);
-
 static int __init init_spu_base(void)
 {
-	int i, ret;
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++)
+		INIT_LIST_HEAD(&spu_list[i]);
 
 	if (!spu_management_ops)
-		return 0;
+		goto out;
 
 	/* create sysdev class for spus */
 	ret = sysdev_class_register(&spu_sysdev_class);
 	if (ret)
-		return ret;
-
-	for (i = 0; i < MAX_NUMNODES; i++)
-		INIT_LIST_HEAD(&spu_list[i]);
+		goto out;
 
 	ret = spu_enumerate_spus(create_spu);
 
 	if (ret) {
 		printk(KERN_WARNING "%s: Error initializing spus\n",
 			__FUNCTION__);
-		cleanup_spu_base();
-		return ret;
+		goto out_unregister_sysdev_class;
 	}
 
 	xmon_register_spus(&spu_full_list);
 
+	return 0;
+
+ out_unregister_sysdev_class:
+	sysdev_class_unregister(&spu_sysdev_class);
+ out:
+
 	return ret;
 }
 module_init(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_coredump.c b/arch/powerpc/platforms/cell/spu_coredump.c
index 6915b418ee73..4fd37ff1e210 100644
--- a/arch/powerpc/platforms/cell/spu_coredump.c
+++ b/arch/powerpc/platforms/cell/spu_coredump.c
@@ -26,19 +26,18 @@
 
 #include <asm/spu.h>
 
-static struct spu_coredump_calls spu_coredump_calls;
+static struct spu_coredump_calls *spu_coredump_calls;
 static DEFINE_MUTEX(spu_coredump_mutex);
 
 int arch_notes_size(void)
 {
 	long ret;
-	struct module *owner = spu_coredump_calls.owner;
 
 	ret = -ENOSYS;
 	mutex_lock(&spu_coredump_mutex);
-	if (owner && try_module_get(owner)) {
-		ret = spu_coredump_calls.arch_notes_size();
-		module_put(owner);
+	if (spu_coredump_calls && try_module_get(spu_coredump_calls->owner)) {
+		ret = spu_coredump_calls->arch_notes_size();
+		module_put(spu_coredump_calls->owner);
 	}
 	mutex_unlock(&spu_coredump_mutex);
 	return ret;
@@ -46,36 +45,35 @@ int arch_notes_size(void)
 
 void arch_write_notes(struct file *file)
 {
-	struct module *owner = spu_coredump_calls.owner;
-
 	mutex_lock(&spu_coredump_mutex);
-	if (owner && try_module_get(owner)) {
-		spu_coredump_calls.arch_write_notes(file);
-		module_put(owner);
+	if (spu_coredump_calls && try_module_get(spu_coredump_calls->owner)) {
+		spu_coredump_calls->arch_write_notes(file);
+		module_put(spu_coredump_calls->owner);
 	}
 	mutex_unlock(&spu_coredump_mutex);
 }
 
 int register_arch_coredump_calls(struct spu_coredump_calls *calls)
 {
-	if (spu_coredump_calls.owner)
-		return -EBUSY;
+	int ret = 0;
+
 
 	mutex_lock(&spu_coredump_mutex);
-	spu_coredump_calls.arch_notes_size = calls->arch_notes_size;
-	spu_coredump_calls.arch_write_notes = calls->arch_write_notes;
-	spu_coredump_calls.owner = calls->owner;
+	if (spu_coredump_calls)
+		ret = -EBUSY;
+	else
+		spu_coredump_calls = calls;
 	mutex_unlock(&spu_coredump_mutex);
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(register_arch_coredump_calls);
 
 void unregister_arch_coredump_calls(struct spu_coredump_calls *calls)
 {
-	BUG_ON(spu_coredump_calls.owner != calls->owner);
+	BUG_ON(spu_coredump_calls != calls);
 
 	mutex_lock(&spu_coredump_mutex);
-	spu_coredump_calls.owner = NULL;
+	spu_coredump_calls = NULL;
 	mutex_unlock(&spu_coredump_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_arch_coredump_calls);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index e34599f53d28..1d4562ae463d 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -48,11 +48,11 @@ static u64 __init find_spu_unit_number(struct device_node *spe)
 {
 	const unsigned int *prop;
 	int proplen;
-	prop = get_property(spe, "unit-id", &proplen);
+	prop = of_get_property(spe, "unit-id", &proplen);
 	if (proplen == 4)
 		return (u64)*prop;
 
-	prop = get_property(spe, "reg", &proplen);
+	prop = of_get_property(spe, "reg", &proplen);
 	if (proplen == 4)
 		return (u64)*prop;
 
@@ -76,12 +76,12 @@ static int __init spu_map_interrupts_old(struct spu *spu,
 	int nid;
 
 	/* Get the interrupt source unit from the device-tree */
-	tmp = get_property(np, "isrc", NULL);
+	tmp = of_get_property(np, "isrc", NULL);
 	if (!tmp)
 		return -ENODEV;
 	isrc = tmp[0];
 
-	tmp = get_property(np->parent->parent, "node-id", NULL);
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
 	if (!tmp) {
 		printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__);
 		nid = spu->node;
@@ -110,7 +110,7 @@ static void __iomem * __init spu_map_prop_old(struct spu *spu,
 	} __attribute__((packed)) *prop;
 	int proplen;
 
-	prop = get_property(n, name, &proplen);
+	prop = of_get_property(n, name, &proplen);
 	if (prop == NULL || proplen != sizeof (struct address_prop))
 		return NULL;
 
@@ -124,11 +124,11 @@ static int __init spu_map_device_old(struct spu *spu)
 	int ret;
 
 	ret = -ENODEV;
-	spu->name = get_property(node, "name", NULL);
+	spu->name = of_get_property(node, "name", NULL);
 	if (!spu->name)
 		goto out;
 
-	prop = get_property(node, "local-store", NULL);
+	prop = of_get_property(node, "local-store", NULL);
 	if (!prop)
 		goto out;
 	spu->local_store_phys = *(unsigned long *)prop;
@@ -139,7 +139,7 @@ static int __init spu_map_device_old(struct spu *spu)
 	if (!spu->local_store)
 		goto out;
 
-	prop = get_property(node, "problem", NULL);
+	prop = of_get_property(node, "problem", NULL);
 	if (!prop)
 		goto out_unmap;
 	spu->problem_phys = *(unsigned long *)prop;
@@ -226,7 +226,7 @@ static int __init spu_map_device(struct spu *spu)
 	struct device_node *np = spu->devnode;
 	int ret = -ENODEV;
 
-	spu->name = get_property(np, "name", NULL);
+	spu->name = of_get_property(np, "name", NULL);
 	if (!spu->name)
 		goto out;
 
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index 472217d19faf..2cd89c11af5a 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
-obj-y += switch.o
+obj-y += switch.o fault.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 1898f0d3a8b8..3322528fa6eb 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx,
 	return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	/* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
 	.mbox_read = spu_backing_mbox_read,
 	.mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops = {
 	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
 	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 04ad2e364e97..a87d9ca3dba2 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -41,9 +41,10 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 		goto out_free;
 	}
 	spin_lock_init(&ctx->mmio_lock);
+	spin_lock_init(&ctx->mapping_lock);
 	kref_init(&ctx->kref);
 	mutex_init(&ctx->state_mutex);
-	init_MUTEX(&ctx->run_sema);
+	mutex_init(&ctx->run_mutex);
 	init_waitqueue_head(&ctx->ibox_wq);
 	init_waitqueue_head(&ctx->wbox_wq);
 	init_waitqueue_head(&ctx->stop_wq);
@@ -51,6 +52,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	ctx->state = SPU_STATE_SAVED;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
 	ctx->rt_priority = current->rt_priority;
@@ -75,6 +77,7 @@ void destroy_spu_context(struct kref *kref)
 	spu_fini_csa(&ctx->csa);
 	if (ctx->gang)
 		spu_gang_remove_ctx(ctx->gang, ctx);
+	BUG_ON(!list_empty(&ctx->rq));
 	kfree(ctx);
 }
 
@@ -119,46 +122,6 @@ void spu_unmap_mappings(struct spu_context *ctx)
 }
 
 /**
- * spu_acquire_exclusive - lock spu contex and protect against userspace access
- * @ctx:	spu contex to lock
- *
- * Note:
- *	Returns 0 and with the context locked on success
- *	Returns negative error and with the context _unlocked_ on failure.
- */
-int spu_acquire_exclusive(struct spu_context *ctx)
-{
-	int ret = -EINVAL;
-
-	spu_acquire(ctx);
-	/*
-	 * Context is about to be freed, so we can't acquire it anymore.
-	 */
-	if (!ctx->owner)
-		goto out_unlock;
-
-	if (ctx->state == SPU_STATE_SAVED) {
-		ret = spu_activate(ctx, 0);
-		if (ret)
-			goto out_unlock;
-	} else {
-		/*
-		 * We need to exclude userspace access to the context.
-		 *
-		 * To protect against memory access we invalidate all ptes
-		 * and make sure the pagefault handlers block on the mutex.
-		 */
-		spu_unmap_mappings(ctx);
-	}
-
-	return 0;
-
- out_unlock:
-	spu_release(ctx);
-	return ret;
-}
-
-/**
  * spu_acquire_runnable - lock spu contex and make sure it is in runnable state
  * @ctx:	spu contex to lock
  *
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 725e19561159..5d9ad5a0307b 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -169,12 +169,12 @@ static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i,
 	struct spu_context *ctx;
 	loff_t pos = 0;
 	int sz, dfd, rc, total = 0;
-	const int bufsz = 4096;
+	const int bufsz = PAGE_SIZE;
 	char *name;
 	char fullname[80], *buf;
 	struct elf_note en;
 
-	buf = kmalloc(bufsz, GFP_KERNEL);
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!buf)
 		return;
 
@@ -187,9 +187,8 @@ static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i,
 		sz = spufs_coredump_read[i].size;
 
 	ctx = ctx_info->ctx;
-	if (!ctx) {
-		return;
-	}
+	if (!ctx)
+		goto out;
 
 	sprintf(fullname, "SPU/%d/%s", dfd, name);
 	en.n_namesz = strlen(fullname) + 1;
@@ -197,23 +196,25 @@ static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i,
 	en.n_type = NT_SPU;
 
 	if (!spufs_dump_write(file, &en, sizeof(en)))
-		return;
+		goto out;
 	if (!spufs_dump_write(file, fullname, en.n_namesz))
-		return;
+		goto out;
 	if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4)))
-		return;
+		goto out;
 
 	do {
 		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
 		if (rc > 0) {
 			if (!spufs_dump_write(file, buf, rc))
-				return;
+				goto out;
 			total += rc;
 		}
 	} while (rc == bufsz && total < sz);
 
 	spufs_dump_seek(file, roundup((unsigned long)file->f_pos
 						- total + sz, 4));
+out:
+	free_page((unsigned long)buf);
 }
 
 static void spufs_arch_write_notes(struct file *file)
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 000000000000..0f75c07e29d8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,211 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+#if 0
+	if (!IS_VALID_EA(ea)) {
+		return -EFAULT;
+	}
+#endif /* XXX */
+	if (mm == NULL) {
+		return -EFAULT;
+	}
+	if (mm->pgd == NULL) {
+		return -EFAULT;
+	}
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, ea);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= ea)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, ea))
+		goto bad_area;
+good_area:
+	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+	ret = 0;
+	switch (handle_mm_fault(mm, vma, ea, is_write)) {
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		ret = -EFAULT;
+		goto bad_area;
+	case VM_FAULT_OOM:
+		ret = -ENOMEM;
+		goto bad_area;
+	default:
+		BUG();
+	}
+	up_read(&mm->mmap_sem);
+	return ret;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+	return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+	} else {
+		siginfo_t info;
+		memset(&info, 0, sizeof(info));
+
+		switch (type) {
+		case SPE_EVENT_INVALID_DMA:
+			info.si_signo = SIGBUS;
+			info.si_code = BUS_OBJERR;
+			break;
+		case SPE_EVENT_SPE_DATA_STORAGE:
+			info.si_signo = SIGBUS;
+			info.si_addr = (void __user *)ea;
+			info.si_code = BUS_ADRERR;
+			break;
+		case SPE_EVENT_DMA_ALIGNMENT:
+			info.si_signo = SIGBUS;
+			/* DAR isn't set for an alignment fault :( */
+			info.si_code = BUS_ADRALN;
+			break;
+		case SPE_EVENT_SPE_ERROR:
+			info.si_signo = SIGILL;
+			info.si_addr = (void __user *)(unsigned long)
+				ctx->ops->npc_read(ctx) - 4;
+			info.si_code = ILL_ILLOPC;
+			break;
+		}
+		if (info.si_signo)
+			force_sig_info(info.si_signo, &info, current);
+	}
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+	spufs_handle_dma_error(spu->ctx, spu->dar, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		ea = ctx->spu->dar;
+		dsisr = ctx->spu->dsisr;
+		ctx->spu->dar= ctx->spu->dsisr = 0;
+	} else {
+		ea = ctx->csa.priv1.mfc_dar_RW;
+		dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+		ctx->csa.priv1.mfc_dar_RW = 0;
+		ctx->csa.priv1.mfc_dsisr_RW = 0;
+	}
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	/* we must not hold the lock when entering spu_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+	spu_acquire(ctx);
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index b00653d69c01..d010b2464a98 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -44,9 +44,25 @@ spufs_mem_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->local_store = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	spin_unlock(&ctx->mapping_lock);
 	return 0;
 }
 
@@ -63,8 +79,8 @@ static ssize_t
 spufs_mem_read(struct file *file, char __user *buffer,
 				size_t size, loff_t *pos)
 {
-	int ret;
 	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
 
 	spu_acquire(ctx);
 	ret = __spufs_mem_read(ctx, buffer, size, pos);
@@ -74,25 +90,29 @@ spufs_mem_read(struct file *file, char __user *buffer,
 
 static ssize_t
 spufs_mem_write(struct file *file, const char __user *buffer,
-					size_t size, loff_t *pos)
+					size_t size, loff_t *ppos)
 {
 	struct spu_context *ctx = file->private_data;
 	char *local_store;
+	loff_t pos = *ppos;
 	int ret;
 
-	size = min_t(ssize_t, LS_SIZE - *pos, size);
-	if (size <= 0)
+	if (pos < 0)
+		return -EINVAL;
+	if (pos > LS_SIZE)
 		return -EFBIG;
-	*pos += size;
+	if (size > LS_SIZE - pos)
+		size = LS_SIZE - pos;
 
 	spu_acquire(ctx);
-
 	local_store = ctx->ops->get_ls(ctx);
-	ret = copy_from_user(local_store + *pos - size,
-			     buffer, size) ? -EFAULT : size;
-
+	ret = copy_from_user(local_store + pos, buffer, size);
 	spu_release(ctx);
-	return ret;
+
+	if (ret)
+		return -EFAULT;
+	*ppos = pos + size;
+	return size;
 }
 
 static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
@@ -145,6 +165,7 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
 
 static const struct file_operations spufs_mem_fops = {
 	.open	 = spufs_mem_open,
+	.release = spufs_mem_release,
 	.read    = spufs_mem_read,
 	.write   = spufs_mem_write,
 	.llseek  = generic_file_llseek,
@@ -234,16 +255,33 @@ static int spufs_cntl_open(struct inode *inode, struct file *file)
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->cntl = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return simple_attr_open(inode, file, spufs_cntl_get,
 					spufs_cntl_set, "0x%08lx");
 }
 
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_close(inode, file);
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_cntl_fops = {
 	.open = spufs_cntl_open,
-	.release = simple_attr_close,
+	.release = spufs_cntl_release,
 	.read = simple_attr_read,
 	.write = simple_attr_write,
 	.mmap = spufs_cntl_mmap,
@@ -719,12 +757,28 @@ static int spufs_signal1_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal1 = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -817,6 +871,7 @@ static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
 
 static const struct file_operations spufs_signal1_fops = {
 	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
@@ -826,12 +881,28 @@ static int spufs_signal2_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal2 = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -928,6 +999,7 @@ static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
 
 static const struct file_operations spufs_signal2_fops = {
 	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
@@ -1027,13 +1099,30 @@ static int spufs_mss_open(struct inode *inode, struct file *file)
 	struct spu_context *ctx = i->i_ctx;
 
 	file->private_data = i->i_ctx;
-	ctx->mss = inode->i_mapping;
-	smp_wmb();
+
+	spin_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
 	.mmap	 = spufs_mss_mmap,
 };
 
@@ -1068,14 +1157,30 @@ static int spufs_psmap_open(struct inode *inode, struct file *file)
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = i->i_ctx;
-	ctx->psmap = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 static const struct file_operations spufs_psmap_fops = {
 	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
 	.mmap	 = spufs_psmap_mmap,
 };
 
@@ -1122,12 +1227,27 @@ static int spufs_mfc_open(struct inode *inode, struct file *file)
 	if (atomic_read(&inode->i_count) != 1)
 		return -EBUSY;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->mfc = inode->i_mapping;
-	smp_wmb();
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
 /* interrupt-level mfc callback function. */
 void spufs_mfc_callback(struct spu *spu)
 {
@@ -1309,7 +1429,10 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
 	if (ret)
 		goto out;
 
-	spu_acquire_runnable(ctx, 0);
+	ret = spu_acquire_runnable(ctx, 0);
+	if (ret)
+		goto out;
+
 	if (file->f_flags & O_NONBLOCK) {
 		ret = ctx->ops->send_mfc_command(ctx, &cmd);
 	} else {
@@ -1395,6 +1518,7 @@ static int spufs_mfc_fasync(int fd, struct file *file, int on)
 
 static const struct file_operations spufs_mfc_fops = {
 	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
 	.read	 = spufs_mfc_read,
 	.write	 = spufs_mfc_write,
 	.poll	 = spufs_mfc_poll,
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index ae42e03b8c86..428875c5e4ec 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struct spu_context *ctx,
 	}
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
 	.mbox_read = spu_hw_mbox_read,
 	.mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = {
 	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
 	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 8079983ef94f..13e4f70ec8c0 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -36,6 +36,7 @@
 #include <asm/prom.h>
 #include <asm/semaphore.h>
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
@@ -54,6 +55,7 @@ spufs_alloc_inode(struct super_block *sb)
 
 	ei->i_gang = NULL;
 	ei->i_ctx = NULL;
+	ei->i_openers = 0;
 
 	return &ei->vfs_inode;
 }
@@ -520,13 +522,14 @@ out:
 
 /* File system initialization */
 enum {
-	Opt_uid, Opt_gid, Opt_err,
+	Opt_uid, Opt_gid, Opt_mode, Opt_err,
 };
 
 static match_table_t spufs_tokens = {
-	{ Opt_uid, "uid=%d" },
-	{ Opt_gid, "gid=%d" },
-	{ Opt_err, NULL  },
+	{ Opt_uid,  "uid=%d" },
+	{ Opt_gid,  "gid=%d" },
+	{ Opt_mode, "mode=%o" },
+	{ Opt_err,   NULL  },
 };
 
 static int
@@ -553,6 +556,11 @@ spufs_parse_options(char *options, struct inode *root)
 				return 0;
 			root->i_gid = option;
 			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
 		default:
 			return 0;
 		}
@@ -560,6 +568,11 @@ spufs_parse_options(char *options, struct inode *root)
 	return 1;
 }
 
+static void spufs_exit_isolated_loader(void)
+{
+	kfree(isolated_loader);
+}
+
 static void
 spufs_init_isolated_loader(void)
 {
@@ -571,7 +584,7 @@ spufs_init_isolated_loader(void)
 	if (!dn)
 		return;
 
-	loader = get_property(dn, "loader", &size);
+	loader = of_get_property(dn, "loader", &size);
 	if (!loader)
 		return;
 
@@ -653,6 +666,10 @@ static int __init spufs_init(void)
 {
 	int ret;
 
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
 	ret = -ENOMEM;
 	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
 			sizeof(struct spufs_inode_info), 0,
@@ -660,25 +677,29 @@ static int __init spufs_init(void)
 
 	if (!spufs_inode_cache)
 		goto out;
-	if (spu_sched_init() != 0) {
-		kmem_cache_destroy(spufs_inode_cache);
-		goto out;
-	}
-	ret = register_filesystem(&spufs_type);
+	ret = spu_sched_init();
 	if (ret)
 		goto out_cache;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_sched;
 	ret = register_spu_syscalls(&spufs_calls);
 	if (ret)
 		goto out_fs;
 	ret = register_arch_coredump_calls(&spufs_coredump_calls);
 	if (ret)
-		goto out_fs;
+		goto out_syscalls;
 
 	spufs_init_isolated_loader();
 
 	return 0;
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
 out_fs:
 	unregister_filesystem(&spufs_type);
+out_sched:
+	spu_sched_exit();
 out_cache:
 	kmem_cache_destroy(spufs_inode_cache);
 out:
@@ -689,6 +710,7 @@ module_init(spufs_init);
 static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
+	spufs_exit_isolated_loader();
 	unregister_arch_coredump_calls(&spufs_coredump_calls);
 	unregister_spu_syscalls(&spufs_calls);
 	unregister_filesystem(&spufs_type);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 353a8fa07ab8..57626600b1a4 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,27 +18,6 @@ void spufs_stop_callback(struct spu *spu)
 	wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-	struct spu_context *ctx = spu->ctx;
-
-	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-		ctx->event_return |= type;
-		wake_up_all(&ctx->stop_wq);
-	} else {
-		switch (type) {
-		case SPE_EVENT_DMA_ALIGNMENT:
-		case SPE_EVENT_SPE_DATA_STORAGE:
-		case SPE_EVENT_INVALID_DMA:
-			force_sig(SIGBUS, /* info, */ current);
-			break;
-		case SPE_EVENT_SPE_ERROR:
-			force_sig(SIGILL, /* info */ current);
-			break;
-		}
-	}
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
 	struct spu *spu;
@@ -63,13 +42,18 @@ static int spu_setup_isolated(struct spu_context *ctx)
 	const u32 status_loading = SPU_STATUS_RUNNING
 		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
 
+	ret = -ENODEV;
 	if (!isolated_loader)
-		return -ENODEV;
-
-	ret = spu_acquire_exclusive(ctx);
-	if (ret)
 		goto out;
 
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
 	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
 
 	/* purge the MFC DMA queue to ensure no spurious accesses before we
@@ -82,7 +66,7 @@ static int spu_setup_isolated(struct spu_context *ctx)
 			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
 					__FUNCTION__);
 			ret = -EIO;
-			goto out_unlock;
+			goto out;
 		}
 		cond_resched();
 	}
@@ -119,12 +103,15 @@ static int spu_setup_isolated(struct spu_context *ctx)
 		pr_debug("%s: isolated LOAD failed\n", __FUNCTION__);
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 		ret = -EACCES;
+		goto out_drop_priv;
+	}
 
-	} else if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
 		/* This isn't allowed by the CBEA, but check anyway */
 		pr_debug("%s: SPU fell out of isolated mode?\n", __FUNCTION__);
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
 		ret = -EINVAL;
+		goto out_drop_priv;
 	}
 
 out_drop_priv:
@@ -132,30 +119,19 @@ out_drop_priv:
 	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
 	spu_mfc_sr1_set(ctx->spu, sr1);
 
-out_unlock:
-	spu_release(ctx);
 out:
 	return ret;
 }
 
-static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
+static int spu_run_init(struct spu_context *ctx, u32 * npc)
 {
-	int ret;
-	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
-
-	ret = spu_acquire_runnable(ctx, SPU_ACTIVATE_NOWAKE);
-	if (ret)
-		return ret;
-
 	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		unsigned long runcntl;
+
 		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
-			/* Need to release ctx, because spu_setup_isolated will
-			 * acquire it exclusively.
-			 */
-			spu_release(ctx);
-			ret = spu_setup_isolated(ctx);
-			if (!ret)
-				ret = spu_acquire_runnable(ctx, SPU_ACTIVATE_NOWAKE);
+			int ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
 		}
 
 		/* if userspace has set the runcntrl register (eg, to issue an
@@ -164,16 +140,17 @@ static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
 			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
 		if (runcntl == 0)
 			runcntl = SPU_RUNCNTL_RUNNABLE;
+		ctx->ops->runcntl_write(ctx, runcntl);
 	} else {
 		spu_start_tick(ctx);
 		ctx->ops->npc_write(ctx, *npc);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	}
 
-	ctx->ops->runcntl_write(ctx, runcntl);
-	return ret;
+	return 0;
 }
 
-static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
+static int spu_run_fini(struct spu_context *ctx, u32 * npc,
 			       u32 * status)
 {
 	int ret = 0;
@@ -189,19 +166,27 @@ static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
 	return ret;
 }
 
-static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
+static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
 				         u32 *status)
 {
 	int ret;
 
-	if ((ret = spu_run_fini(ctx, npc, status)) != 0)
+	ret = spu_run_fini(ctx, npc, status);
+	if (ret)
 		return ret;
-	if (*status & (SPU_STATUS_STOPPED_BY_STOP |
-		       SPU_STATUS_STOPPED_BY_HALT)) {
+
+	if (*status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT))
 		return *status;
-	}
-	if ((ret = spu_run_init(ctx, npc)) != 0)
+
+	ret = spu_acquire_runnable(ctx, 0);
+	if (ret)
+		return ret;
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
 		return ret;
+	}
 	return 0;
 }
 
@@ -253,17 +238,17 @@ int spu_process_callback(struct spu_context *ctx)
 {
 	struct spu_syscall_block s;
 	u32 ls_pointer, npc;
-	char *ls;
+	void __iomem *ls;
 	long spu_ret;
 	int ret;
 
 	/* get syscall block from local store */
-	npc = ctx->ops->npc_read(ctx);
-	ls = ctx->ops->get_ls(ctx);
-	ls_pointer = *(u32*)(ls + npc);
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
 	if (ls_pointer > (LS_SIZE - sizeof(s)))
 		return -EFAULT;
-	memcpy(&s, ls + ls_pointer, sizeof (s));
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
 
 	/* do actual syscall without pinning the spu */
 	ret = 0;
@@ -283,7 +268,7 @@ int spu_process_callback(struct spu_context *ctx)
 	}
 
 	/* write result, jump over indirect pointer */
-	memcpy(ls + ls_pointer, &spu_ret, sizeof (spu_ret));
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
 	ctx->ops->npc_write(ctx, npc);
 	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	return ret;
@@ -292,11 +277,8 @@ int spu_process_callback(struct spu_context *ctx)
 static inline int spu_process_events(struct spu_context *ctx)
 {
 	struct spu *spu = ctx->spu;
-	u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
 	int ret = 0;
 
-	if (spu->dsisr & pte_fault)
-		ret = spu_irq_class_1_bottom(spu);
 	if (spu->class_0_pending)
 		ret = spu_irq_class_0_bottom(spu);
 	if (!ret && signal_pending(current))
@@ -310,14 +292,21 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 	int ret;
 	u32 status;
 
-	if (down_interruptible(&ctx->run_sema))
+	if (mutex_lock_interruptible(&ctx->run_mutex))
 		return -ERESTARTSYS;
 
 	ctx->ops->master_start(ctx);
 	ctx->event_return = 0;
-	ret = spu_run_init(ctx, npc);
+
+	ret = spu_acquire_runnable(ctx, 0);
 	if (ret)
+		return ret;
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
 		goto out;
+	}
 
 	do {
 		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
@@ -330,6 +319,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 				break;
 			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
 			ret = spu_reacquire_runnable(ctx, npc, &status);
 			if (ret) {
@@ -363,6 +356,6 @@ out2:
 
 out:
 	*event = ctx->event_return;
-	up(&ctx->run_sema);
+	mutex_unlock(&ctx->run_mutex);
 	return ret;
 }
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2f25e68b4bac..91030b8abdca 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -71,14 +71,27 @@ static inline int node_allowed(int node)
 
 void spu_start_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * Make sure the exiting bit is cleared.
+		 */
+		clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
+	}
 }
 
 void spu_stop_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * While the work can be rearming normally setting this flag
+		 * makes sure it does not rearm itself anymore.
+		 */
+		set_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		cancel_delayed_work(&ctx->sched_work);
+	}
 }
 
 void spu_sched_tick(struct work_struct *work)
@@ -86,7 +99,15 @@ void spu_sched_tick(struct work_struct *work)
 	struct spu_context *ctx =
 		container_of(work, struct spu_context, sched_work.work);
 	struct spu *spu;
-	int rearm = 1;
+	int preempted = 0;
+
+	/*
+	 * If this context is being stopped avoid rescheduling from the
+	 * scheduler tick because we would block on the state_mutex.
+	 * The caller will yield the spu later on anyway.
+	 */
+	if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
+		return;
 
 	mutex_lock(&ctx->state_mutex);
 	spu = ctx->spu;
@@ -94,12 +115,19 @@ void spu_sched_tick(struct work_struct *work)
 		int best = sched_find_first_bit(spu_prio->bitmap);
 		if (best <= ctx->prio) {
 			spu_deactivate(ctx);
-			rearm = 0;
+			preempted = 1;
 		}
 	}
 	mutex_unlock(&ctx->state_mutex);
 
-	if (rearm)
+	if (preempted) {
+		/*
+		 * We need to break out of the wait loop in spu_run manually
+		 * to ensure this context gets put on the runqueue again
+		 * ASAP.
+		 */
+		wake_up(&ctx->stop_wq);
+	} else
 		spu_start_tick(ctx);
 }
 
@@ -127,14 +155,6 @@ static void spu_remove_from_active_list(struct spu *spu)
 	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
-static inline void mm_needs_global_tlbie(struct mm_struct *mm)
-{
-	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
-
-	/* Global TLBIE broadcast required with SPEs. */
-	__cpus_setall(&mm->cpu_vm_mask, nr);
-}
-
 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
 
 static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
@@ -167,8 +187,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
 	spu->pid = current->pid;
-	spu->mm = ctx->owner;
-	mm_needs_global_tlbie(spu->mm);
+	spu_associate_mm(spu, ctx->owner);
 	spu->ibox_callback = spufs_ibox_callback;
 	spu->wbox_callback = spufs_wbox_callback;
 	spu->stop_callback = spufs_stop_callback;
@@ -205,7 +224,7 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 	spu->stop_callback = NULL;
 	spu->mfc_callback = NULL;
 	spu->dma_callback = NULL;
-	spu->mm = NULL;
+	spu_associate_mm(spu, NULL);
 	spu->pid = 0;
 	ctx->ops = &spu_backing_ops;
 	ctx->spu = NULL;
@@ -217,62 +236,42 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  * spu_add_to_rq - add a context to the runqueue
  * @ctx:       context to add
  */
-static void spu_add_to_rq(struct spu_context *ctx)
+static void __spu_add_to_rq(struct spu_context *ctx)
 {
-	spin_lock(&spu_prio->runq_lock);
-	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
-	set_bit(ctx->prio, spu_prio->bitmap);
-	spin_unlock(&spu_prio->runq_lock);
-}
+	int prio = ctx->prio;
 
-/**
- * spu_del_from_rq - remove a context from the runqueue
- * @ctx:       context to remove
- */
-static void spu_del_from_rq(struct spu_context *ctx)
-{
-	spin_lock(&spu_prio->runq_lock);
-	list_del_init(&ctx->rq);
-	if (list_empty(&spu_prio->runq[ctx->prio]))
-		clear_bit(ctx->prio, spu_prio->bitmap);
-	spin_unlock(&spu_prio->runq_lock);
+	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
+	set_bit(prio, spu_prio->bitmap);
 }
 
-/**
- * spu_grab_context - remove one context from the runqueue
- * @prio:      priority of the context to be removed
- *
- * This function removes one context from the runqueue for priority @prio.
- * If there is more than one context with the given priority the first
- * task on the runqueue will be taken.
- *
- * Returns the spu_context it just removed.
- *
- * Must be called with spu_prio->runq_lock held.
- */
-static struct spu_context *spu_grab_context(int prio)
+static void __spu_del_from_rq(struct spu_context *ctx)
 {
-	struct list_head *rq = &spu_prio->runq[prio];
+	int prio = ctx->prio;
 
-	if (list_empty(rq))
-		return NULL;
-	return list_entry(rq->next, struct spu_context, rq);
+	if (!list_empty(&ctx->rq))
+		list_del_init(&ctx->rq);
+	if (list_empty(&spu_prio->runq[prio]))
+		clear_bit(prio, spu_prio->bitmap);
 }
 
 static void spu_prio_wait(struct spu_context *ctx)
 {
 	DEFINE_WAIT(wait);
 
-	set_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
+	spin_lock(&spu_prio->runq_lock);
 	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
 	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
 		mutex_unlock(&ctx->state_mutex);
 		schedule();
 		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
 	}
+	spin_unlock(&spu_prio->runq_lock);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->stop_wq, &wait);
-	clear_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
 }
 
 /**
@@ -291,9 +290,14 @@ static void spu_reschedule(struct spu *spu)
 	spin_lock(&spu_prio->runq_lock);
 	best = sched_find_first_bit(spu_prio->bitmap);
 	if (best < MAX_PRIO) {
-		struct spu_context *ctx = spu_grab_context(best);
-		if (ctx && test_bit(SPU_SCHED_WAKE, &ctx->sched_flags))
-			wake_up(&ctx->stop_wq);
+		struct list_head *rq = &spu_prio->runq[best];
+		struct spu_context *ctx;
+
+		BUG_ON(list_empty(rq));
+
+		ctx = list_entry(rq->next, struct spu_context, rq);
+		__spu_del_from_rq(ctx);
+		wake_up(&ctx->stop_wq);
 	}
 	spin_unlock(&spu_prio->runq_lock);
 }
@@ -376,6 +380,12 @@ static struct spu *find_victim(struct spu_context *ctx)
 			}
 			spu_unbind_context(spu, victim);
 			mutex_unlock(&victim->state_mutex);
+			/*
+			 * We need to break out of the wait loop in spu_run
+			 * manually to ensure this context gets put on the
+			 * runqueue again ASAP.
+			 */
+			wake_up(&victim->stop_wq);
 			return spu;
 		}
 	}
@@ -388,7 +398,7 @@ static struct spu *find_victim(struct spu_context *ctx)
  * @ctx:	spu context to schedule
  * @flags:	flags (currently ignored)
  *
- * Tries to find a free spu to run @ctx.  If no free spu is availble
+ * Tries to find a free spu to run @ctx.  If no free spu is available
  * add the context to the runqueue so it gets woken up once an spu
  * is available.
  */
@@ -413,10 +423,7 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
 			return 0;
 		}
 
-		spu_add_to_rq(ctx);
-		if (!(flags & SPU_ACTIVATE_NOWAKE))
-			spu_prio_wait(ctx);
-		spu_del_from_rq(ctx);
+		spu_prio_wait(ctx);
 	} while (!signal_pending(current));
 
 	return -ERESTARTSYS;
@@ -450,7 +457,6 @@ void spu_deactivate(struct spu_context *ctx)
 void spu_yield(struct spu_context *ctx)
 {
 	struct spu *spu;
-	int need_yield = 0;
 
 	if (mutex_trylock(&ctx->state_mutex)) {
 		if ((spu = ctx->spu) != NULL) {
@@ -459,13 +465,10 @@ void spu_yield(struct spu_context *ctx)
 				pr_debug("%s: yielding SPU %d NODE %d\n",
 					 __FUNCTION__, spu->number, spu->node);
 				spu_deactivate(ctx);
-				need_yield = 1;
 			}
 		}
 		mutex_unlock(&ctx->state_mutex);
 	}
-	if (unlikely(need_yield))
-		yield();
 }
 
 int __init spu_sched_init(void)
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 0c437891dfd5..0a947fd7de57 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -41,7 +41,7 @@ struct spu_gang;
 
 /* ctx->sched_flags */
 enum {
-	SPU_SCHED_WAKE = 0,
+	SPU_SCHED_EXITING = 0,
 };
 
 struct spu_context {
@@ -50,16 +50,17 @@ struct spu_context {
 	spinlock_t mmio_lock;		  /* protects mmio access */
 	struct address_space *local_store; /* local store mapping.  */
 	struct address_space *mfc;	   /* 'mfc' area mappings. */
-	struct address_space *cntl; 	   /* 'control' area mappings. */
-	struct address_space *signal1; 	   /* 'signal1' area mappings. */
-	struct address_space *signal2; 	   /* 'signal2' area mappings. */
-	struct address_space *mss; 	   /* 'mss' area mappings. */
-	struct address_space *psmap; 	   /* 'psmap' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	spinlock_t mapping_lock;
 	u64 object_id;		   /* user space pointer for oprofile */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
 	struct mutex state_mutex;
-	struct semaphore run_sema;
+	struct mutex run_mutex;
 
 	struct mm_struct *owner;
 
@@ -140,6 +141,7 @@ struct spu_context_ops {
 			       struct spu_dma_info * info);
 	void (*proxydma_info_read) (struct spu_context * ctx,
 				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -149,6 +151,7 @@ struct spufs_inode_info {
 	struct spu_context *i_ctx;
 	struct spu_gang *i_gang;
 	struct inode vfs_inode;
+	int i_openers;
 };
 #define SPUFS_I(inode) \
 	container_of(inode, struct spufs_inode_info, vfs_inode)
@@ -170,6 +173,9 @@ int put_spu_gang(struct spu_gang *gang);
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
@@ -190,10 +196,7 @@ void spu_unmap_mappings(struct spu_context *ctx);
 void spu_forget(struct spu_context *ctx);
 int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
 void spu_acquire_saved(struct spu_context *ctx);
-int spu_acquire_exclusive(struct spu_context *ctx);
-enum {
-	SPU_ACTIVATE_NOWAKE = 1,
-};
+
 int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
@@ -220,14 +223,13 @@ extern char *isolated_loader;
 		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			spu_release(ctx);				\
-			schedule();					\
-			spu_acquire(ctx);				\
-			continue;					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		__ret = -ERESTARTSYS;					\
-		break;							\
+		spu_release(ctx);					\
+		schedule();						\
+		spu_acquire(ctx);					\
 	}								\
 	finish_wait(&(wq), &__wait);					\
 	__ret;								\
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index c08981ff7fc6..8347c4a3f894 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -468,26 +468,6 @@ static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
 			 MFC_CNTL_PURGE_DMA_COMPLETE);
 }
 
-static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Save, Step 29:
-	 *     If MFC_SR1[R]='1', save SLBs in CSA.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W);
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW);
-			csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW);
-			eieio();
-		}
-	}
-}
-
 static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Save, Step 30:
@@ -708,20 +688,6 @@ static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
 	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
 }
 
-static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-
-	/* Save, Step 45:
-	 * Restore, Step 19:
-	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		out_be64(&priv2->slb_invalidate_all_W, 0UL);
-		eieio();
-	}
-}
-
 static inline void get_kernel_slb(u64 ea, u64 slb[2])
 {
 	u64 llp;
@@ -765,7 +731,7 @@ static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu)
 	 *     MFC_SR1[R]=1 (in other words, assume that
 	 *     translation is desired by OS environment).
 	 */
-	invalidate_slbs(csa, spu);
+	spu_invalidate_slbs(spu);
 	get_kernel_slb((unsigned long)&spu_save_code[0], code_slb);
 	get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb);
 	load_mfc_slb(spu, code_slb, 0);
@@ -1718,27 +1684,6 @@ static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
 	}
 }
 
-static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Restore, Step 68:
-	 *     If MFC_SR1[R]='1', restore SLBs from CSA.
-	 */
-	if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) {
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]);
-			out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]);
-			eieio();
-		}
-		out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W);
-		eieio();
-	}
-}
-
 static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Restore, Step 69:
@@ -1875,7 +1820,6 @@ static void save_csa(struct spu_state *prev, struct spu *spu)
 	set_mfc_tclass_id(prev, spu);	/* Step 26. */
 	purge_mfc_queue(prev, spu);	/* Step 27. */
 	wait_purge_complete(prev, spu);	/* Step 28. */
-	save_mfc_slbs(prev, spu);	/* Step 29. */
 	setup_mfc_sr1(prev, spu);	/* Step 30. */
 	save_spu_npc(prev, spu);	/* Step 31. */
 	save_spu_privcntl(prev, spu);	/* Step 32. */
@@ -1987,7 +1931,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
 	reset_spu_privcntl(prev, spu);	        /* Step 16. */
 	reset_spu_lslr(prev, spu);              /* Step 17. */
 	setup_mfc_sr1(prev, spu);	        /* Step 18. */
-	invalidate_slbs(prev, spu);	        /* Step 19. */
+	spu_invalidate_slbs(spu);        	/* Step 19. */
 	reset_ch_part1(prev, spu);	        /* Step 20. */
 	reset_ch_part2(prev, spu);	        /* Step 21. */
 	enable_interrupts(prev, spu);	        /* Step 22. */
@@ -2055,7 +1999,7 @@ static void restore_csa(struct spu_state *next, struct spu *spu)
 	restore_spu_mb(next, spu);	        /* Step 65. */
 	check_ppu_mb_stat(next, spu);	        /* Step 66. */
 	check_ppuint_mb_stat(next, spu);	/* Step 67. */
-	restore_mfc_slbs(next, spu);	        /* Step 68. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
 	restore_mfc_sr1(next, spu);	        /* Step 69. */
 	restore_other_spu_access(next, spu);	/* Step 70. */
 	restore_spu_runcntl(next, spu);	        /* Step 71. */
@@ -2140,6 +2084,10 @@ int spu_save(struct spu_state *prev, struct spu *spu)
 	int rc;
 
 	acquire_spu_lock(spu);	        /* Step 1.     */
+	prev->dar = spu->dar;
+	prev->dsisr = spu->dsisr;
+	spu->dar = 0;
+	spu->dsisr = 0;
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
 	if (rc != 0 && rc != 2 && rc != 6) {
@@ -2165,9 +2113,9 @@ int spu_restore(struct spu_state *new, struct spu *spu)
 
 	acquire_spu_lock(spu);
 	harvest(NULL, spu);
-	spu->dar = 0;
-	spu->dsisr = 0;
 	spu->slb_replace = 0;
+	new->dar = 0;
+	new->dsisr = 0;
 	spu->class_0_pending = 0;
 	rc = __do_spu_restore(new, spu);
 	release_spu_lock(spu);