53 files changed, 1852 insertions, 759 deletions
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 8c0d324f657e..3823df235f25 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
 		if (ret)
 			break;
 		/* fall through and return the timeout */
+		fallthrough;
 
 	case WDIOC_GETTIMEOUT:
 		/* we need to round here as to avoid e.g. the following
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index 87f524e4b09c..8a7e55acf090 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr)
 
 	/* Setup fake reset vector to call __secondary_start_mpc86xx. */
 	target = (unsigned long) __secondary_start_mpc86xx;
-	patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+	patch_branch(vector, target, BRANCH_SET_LINK);
 
 	/* Kick that CPU */
 	smp_86xx_release_core(nr);
@@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr)
 		mdelay(1);
 
 	/* Restore the exception vector */
-	patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+	patch_instruction(vector, ppc_inst(save_vector));
 
 	local_irq_restore(flags);
 
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 7a5e8f4541e3..e02d29a9d12f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"
@@ -49,6 +51,7 @@ config PPC_NATIVE
 config PPC_OF_BOOT_TRAMPOLINE
 	bool "Support booting from Open Firmware or yaboot"
 	depends on PPC_BOOK3S_32 || PPC64
+	select RELOCATABLE if PPC64
 	default y
 	help
 	  Support from booting from Open Firmware or yaboot using an
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index f998e655b570..6794145603de 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,6 +61,7 @@ config 44x
 	select 4xx_SOC
 	select HAVE_PCI
 	select PHYS_64BIT
+	select PPC_HAVE_KUEP
 
 endchoice
 
@@ -101,6 +102,8 @@ config PPC_BOOK3S_64
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_HUGETLBFS
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select HAVE_MOVE_PMD
+	select HAVE_MOVE_PUD
 	select IRQ_WORK
 	select PPC_MM_SLICES
 	select PPC_HAVE_KUEP
@@ -111,6 +114,7 @@ config PPC_BOOK3E_64
 	select PPC_FPU # Make it a choice ?
 	select PPC_SMP_MUXED_IPI
 	select PPC_DOORBELL
+	select ZONE_DMA
 
 endchoice
 
@@ -389,7 +393,7 @@ config PPC_HAVE_KUEP
 config PPC_KUEP
 	bool "Kernel Userspace Execution Prevention"
 	depends on PPC_HAVE_KUEP
-	default y if !PPC_BOOK3S_32
+	default y
 	help
 	  Enable support for Kernel Userspace Execution Prevention (KUEP)
 
@@ -401,7 +405,7 @@ config PPC_HAVE_KUAP
 config PPC_KUAP
 	bool "Kernel Userspace Access Protection"
 	depends on PPC_HAVE_KUAP
-	default y if !PPC_BOOK3S_32
+	default y
 	help
 	  Enable support for Kernel Userspace Access Protection (KUAP)
 
@@ -424,10 +428,6 @@ config PPC_MMU_NOHASH
 	def_bool y
 	depends on !PPC_BOOK3S
 
-config PPC_MMU_NOHASH_32
-	def_bool y
-	depends on PPC_MMU_NOHASH && PPC32
-
 config PPC_BOOK3E_MMU
 	def_bool y
 	depends on FSL_BOOKE || PPC_BOOK3E
@@ -476,9 +476,9 @@ config SMP
 	  If you don't know what to do here, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-8192)"
-	range 2 8192
-	depends on SMP
+	int "Maximum number of CPUs (2-8192)" if SMP
+	range 2 8192 if SMP
+	default "1" if !SMP
 	default "32" if PPC64
 	default "4"
 
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 143d4417f6cc..94470fb27c99 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
+obj-$(CONFIG_PPC_BOOK3S)	+= book3s/
+obj-$(CONFIG_PPC_MICROWATT)	+= microwatt/
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644
index 000000000000..34c931592ef0
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+	bool "IBM Virtual Accelerator Switchboard (VAS)"
+	depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+	default y
+	help
+	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+	  VAS devices are found in POWER9-based and later systems, they
+	  provide access to accelerator coprocessors such as NX-GZIP and
+	  NX-842. This config allows the kernel to use NX-842 accelerators,
+	  and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+	  and POWER10 PowerVM platforms.
+
+	  If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644
index 000000000000..e790f1910f61
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS)	+= vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644
index 000000000000..30172e52e16b
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ *	fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ *	rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ *	paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ *	vas_copy(&crb, 0, 1);
+ *	vas_paste(paste_addr, 0, 1);
+ *	close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documententation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+	struct cdev cdev;
+	struct device *device;
+	char *name;
+	dev_t devt;
+	struct class *class;
+	enum vas_cop_type cop_type;
+	const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+	struct coproc_dev *coproc;
+	struct vas_window *txwin;
+};
+
+static char *coproc_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+	/*
+	 * Window opened by a child thread may not be closed when
+	 * it exits. So take reference to its pid and release it
+	 * when the window is free by parent thread.
+	 * Acquire a reference to the task's pid to make sure
+	 * pid will not be re-used - needed only for multithread
+	 * applications.
+	 */
+	task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+	/*
+	 * Acquire a reference to the task's mm.
+	 */
+	task_ref->mm = get_task_mm(current);
+	if (!task_ref->mm) {
+		put_pid(task_ref->pid);
+		pr_err("VAS: pid(%d): mm_struct is not found\n",
+				current->pid);
+		return -EPERM;
+	}
+
+	mmgrab(task_ref->mm);
+	mmput(task_ref->mm);
+	/*
+	 * Process closes window during exit. In the case of
+	 * multithread application, the child thread can open
+	 * window and can exit without closing it. So takes tgid
+	 * reference until window closed to make sure tgid is not
+	 * reused.
+	 */
+	task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+	return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+			  struct task_struct **tskp, struct pid **pidp)
+{
+	struct task_struct *tsk;
+	struct pid *pid;
+
+	pid = task_ref->pid;
+	tsk = get_pid_task(pid, PIDTYPE_PID);
+	if (!tsk) {
+		pid = task_ref->tgid;
+		tsk = get_pid_task(pid, PIDTYPE_PID);
+		/*
+		 * Parent thread (tgid) will be closing window when it
+		 * exits. So should not get here.
+		 */
+		if (WARN_ON_ONCE(!tsk))
+			return false;
+	}
+
+	/* Return if the task is exiting. */
+	if (tsk->flags & PF_EXITING) {
+		put_task_struct(tsk);
+		return false;
+	}
+
+	*tskp = tsk;
+	*pidp = pid;
+
+	return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+		    struct vas_user_win_ref *task_ref)
+{
+	struct coprocessor_status_block csb;
+	struct kernel_siginfo info;
+	struct task_struct *tsk;
+	void __user *csb_addr;
+	struct pid *pid;
+	int rc;
+
+	/*
+	 * NX user space windows can not be opened for task->mm=NULL
+	 * and faults will not be generated for kernel requests.
+	 */
+	if (WARN_ON_ONCE(!task_ref->mm))
+		return;
+
+	csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+	memset(&csb, 0, sizeof(csb));
+	csb.cc = CSB_CC_FAULT_ADDRESS;
+	csb.ce = CSB_CE_TERMINATION;
+	csb.cs = 0;
+	csb.count = 0;
+
+	/*
+	 * NX operates and returns in BE format as defined CRB struct.
+	 * So saves fault_storage_addr in BE as NX pastes in FIFO and
+	 * expects user space to convert to CPU format.
+	 */
+	csb.address = crb->stamp.nx.fault_storage_addr;
+	csb.flags = 0;
+
+	/*
+	 * Process closes send window after all pending NX requests are
+	 * completed. In multi-thread applications, a child thread can
+	 * open a window and can exit without closing it. May be some
+	 * requests are pending or this window can be used by other
+	 * threads later. We should handle faults if NX encounters
+	 * pages faults on these requests. Update CSB with translation
+	 * error and fault address. If csb_addr passed by user space is
+	 * invalid, send SEGV signal to pid saved in window. If the
+	 * child thread is not running, send the signal to tgid.
+	 * Parent thread (tgid) will close this window upon its exit.
+	 *
+	 * pid and mm references are taken when window is opened by
+	 * process (pid). So tgid is used only when child thread opens
+	 * a window and exits without closing it.
+	 */
+
+	if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+		return;
+
+	kthread_use_mm(task_ref->mm);
+	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+	/*
+	 * User space polls on csb.flags (first byte). So add barrier
+	 * then copy first byte with csb flags update.
+	 */
+	if (!rc) {
+		csb.flags = CSB_V;
+		/* Make sure update to csb.flags is visible now */
+		smp_mb();
+		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+	}
+	kthread_unuse_mm(task_ref->mm);
+	put_task_struct(tsk);
+
+	/* Success */
+	if (!rc)
+		return;
+
+
+	pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+			csb_addr, pid_vnr(pid));
+
+	clear_siginfo(&info);
+	info.si_signo = SIGSEGV;
+	info.si_errno = EFAULT;
+	info.si_code = SEGV_MAPERR;
+	info.si_addr = csb_addr;
+	/*
+	 * process will be polling on csb.flags after request is sent to
+	 * NX. So generally CSB update should not fail except when an
+	 * application passes invalid csb_addr. So an error message will
+	 * be displayed and leave it to user space whether to ignore or
+	 * handle this signal.
+	 */
+	rcu_read_lock();
+	rc = kill_pid_info(SIGSEGV, &info, pid);
+	rcu_read_unlock();
+
+	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
+			pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+	struct data_descriptor_entry *dde;
+	struct nx_fault_stamp *nx;
+
+	dde = &crb->source;
+	pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+		dde->count, dde->index, dde->flags);
+
+	dde = &crb->target;
+	pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+		dde->count, dde->index, dde->flags);
+
+	nx = &crb->stamp.nx;
+	pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+		be32_to_cpu(nx->pswid),
+		be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+		nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+	struct coproc_instance *cp_inst;
+
+	cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+	if (!cp_inst)
+		return -ENOMEM;
+
+	cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+					cdev);
+	fp->private_data = cp_inst;
+
+	return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+	void __user *uptr = (void __user *)arg;
+	struct vas_tx_win_open_attr uattr;
+	struct coproc_instance *cp_inst;
+	struct vas_window *txwin;
+	int rc;
+
+	cp_inst = fp->private_data;
+
+	/*
+	 * One window for file descriptor
+	 */
+	if (cp_inst->txwin)
+		return -EEXIST;
+
+	rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+	if (rc) {
+		pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
+		return -EFAULT;
+	}
+
+	if (uattr.version != 1) {
+		pr_err("Invalid window open API version\n");
+		return -EINVAL;
+	}
+
+	if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) {
+		pr_err("VAS API is not registered\n");
+		return -EACCES;
+	}
+
+	txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+						cp_inst->coproc->cop_type);
+	if (IS_ERR(txwin)) {
+		pr_err("%s() VAS window open failed, %ld\n", __func__,
+				PTR_ERR(txwin));
+		return PTR_ERR(txwin);
+	}
+
+	cp_inst->txwin = txwin;
+
+	return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+	struct coproc_instance *cp_inst = fp->private_data;
+	int rc;
+
+	if (cp_inst->txwin) {
+		if (cp_inst->coproc->vops &&
+			cp_inst->coproc->vops->close_win) {
+			rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+			if (rc)
+				return rc;
+		}
+		cp_inst->txwin = NULL;
+	}
+
+	kfree(cp_inst);
+	fp->private_data = NULL;
+
+	/*
+	 * We don't know here if user has other receive windows
+	 * open, so we can't really call clear_thread_tidr().
+	 * So, once the process calls set_thread_tidr(), the
+	 * TIDR value sticks around until process exits, resulting
+	 * in an extra copy in restore_sprs().
+	 */
+
+	return 0;
+}
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	struct coproc_instance *cp_inst = fp->private_data;
+	struct vas_window *txwin;
+	unsigned long pfn;
+	u64 paste_addr;
+	pgprot_t prot;
+	int rc;
+
+	txwin = cp_inst->txwin;
+
+	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+		pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
+				(vma->vm_end - vma->vm_start), PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	/* Ensure instance has an open send window */
+	if (!txwin) {
+		pr_err("%s(): No send window open?\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) {
+		pr_err("%s(): VAS API is not registered\n", __func__);
+		return -EACCES;
+	}
+
+	paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+	if (!paste_addr) {
+		pr_err("%s(): Window paste address failed\n", __func__);
+		return -EINVAL;
+	}
+
+	pfn = paste_addr >> PAGE_SHIFT;
+
+	/* flags, page_prot from cxl_mmap(), except we want cachable */
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+	prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+	rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+			vma->vm_end - vma->vm_start, prot);
+
+	pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
+			paste_addr, vma->vm_start, rc);
+
+	return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case VAS_TX_WIN_OPEN:
+		return coproc_ioc_tx_win_open(fp, arg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct file_operations coproc_fops = {
+	.open = coproc_open,
+	.release = coproc_release,
+	.mmap = coproc_mmap,
+	.unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+			    const char *name,
+			    const struct vas_user_win_ops *vops)
+{
+	int rc = -EINVAL;
+	dev_t devno;
+
+	rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+	if (rc) {
+		pr_err("Unable to allocate coproc major number: %i\n", rc);
+		return rc;
+	}
+
+	pr_devel("%s device allocated, dev [%i,%i]\n", name,
+			MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+	coproc_device.class = class_create(mod, name);
+	if (IS_ERR(coproc_device.class)) {
+		rc = PTR_ERR(coproc_device.class);
+		pr_err("Unable to create %s class %d\n", name, rc);
+		goto err_class;
+	}
+	coproc_device.class->devnode = coproc_devnode;
+	coproc_device.cop_type = cop_type;
+	coproc_device.vops = vops;
+
+	coproc_fops.owner = mod;
+	cdev_init(&coproc_device.cdev, &coproc_fops);
+
+	devno = MKDEV(MAJOR(coproc_device.devt), 0);
+	rc = cdev_add(&coproc_device.cdev, devno, 1);
+	if (rc) {
+		pr_err("cdev_add() failed %d\n", rc);
+		goto err_cdev;
+	}
+
+	coproc_device.device = device_create(coproc_device.class, NULL,
+			devno, NULL, name, MINOR(devno));
+	if (IS_ERR(coproc_device.device)) {
+		rc = PTR_ERR(coproc_device.device);
+		pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+		goto err;
+	}
+
+	pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
+			MINOR(devno));
+
+	return 0;
+
+err:
+	cdev_del(&coproc_device.cdev);
+err_cdev:
+	class_destroy(coproc_device.class);
+err_class:
+	unregister_chrdev_region(coproc_device.devt, 1);
+	return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+	dev_t devno;
+
+	cdev_del(&coproc_device.cdev);
+	devno = MKDEV(MAJOR(coproc_device.devt), 0);
+	device_destroy(coproc_device.class, devno);
+
+	class_destroy(coproc_device.class);
+	unregister_chrdev_region(coproc_device.devt, 1);
+}
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e7c976bcadff..cb70c5f25bc6 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,7 @@ config PPC_IBM_CELL_BLADE
 config AXON_MSI
 	bool
 	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	select IRQ_DOMAIN_NOMAP
 	default y
 
 menu "Cell Broadband Engine options"
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 35bbd15582af..b207a7f99be5 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/types.h>
 #include <linux/export.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index c855a0aeb49c..d7ab868aab54 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -78,9 +78,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 
 	pcpu = get_hard_smp_processor_id(lcpu);
 
-	/* Fixup atomic count: it exited inside IRQ handler. */
-	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
-
 	/*
 	 * If the RTAS start-cpu token does not exist then presume the
 	 * cpu is already spinning.
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index 93ea41680f54..a1c293f42a1f 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -25,10 +25,9 @@ struct spiderpci_iowa_private {
 static void spiderpci_io_flush(struct iowa_bus *bus)
 {
 	struct spiderpci_iowa_private *priv;
-	u32 val;
 
 	priv = bus->private;
-	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
 	iosync();
 }
 
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index d56b4e3241cd..b41e81b22fdc 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
 static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_problem __iomem *prob = spu->problem;
-	u32 dummy = 0;
 
 	/* Restore, Step 66:
 	 *     If CSA.MB_Stat[P]=0 (mailbox empty) then
 	 *     read from the PPU_MB register.
 	 */
 	if ((csa->prob.mb_stat_R & 0xFF) == 0) {
-		dummy = in_be32(&prob->pu_mb_R);
+		in_be32(&prob->pu_mb_R);
 		eieio();
 	}
 }
@@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
 static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	u64 dummy = 0UL;
 
 	/* Restore, Step 66:
 	 *     If CSA.MB_Stat[I]=0 (mailbox empty) then
 	 *     read from the PPUINT_MB register.
 	 */
 	if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
-		dummy = in_be64(&priv2->puint_mb_R);
+		in_be64(&priv2->puint_mb_R);
 		eieio();
 		spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
 		eieio();
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index d39a9213a3e6..609bda2ad5dd 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 53065d564161..85521b3e7098 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs->msr |= MSR_RI;
-		regs->nip = extable_fixup(entry);
+		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
 	return 0;
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 5565647dc879..d8da6a483e59 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
 	/* Are we prepared to handle this fault */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
 		tsi108_clear_pci_cfg_error();
-		regs->msr |= MSR_RI;
-		regs->nip = extable_fixup(entry);
+		regs_set_return_msr(regs, regs->msr | MSR_RI);
+		regs_set_return_ip(regs, extable_fixup(entry));
 		return 1;
 	}
 	return 0;
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
new file mode 100644
index 000000000000..8f6a81978461
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MICROWATT
+	depends on PPC_BOOK3S_64 && !SMP
+	bool "Microwatt SoC platform"
+	select PPC_XICS
+	select PPC_ICS_NATIVE
+	select PPC_ICP_NATIVE
+	select PPC_NATIVE
+	select PPC_UDBG_16550
+	select ARCH_RANDOM
+	help
+          This option enables support for FPGA-based Microwatt implementations.
+
diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile
new file mode 100644
index 000000000000..116d6d3ad3f0
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o rng.o
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
new file mode 100644
index 000000000000..3d8ee6eb7dad
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/rng.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/platforms/powernv/rng.c, which is:
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"microwatt-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+int microwatt_get_random_darn(unsigned long *v)
+{
+	unsigned long val;
+
+	/* Using DARN with L=1 - 64-bit conditioned random number */
+	asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+	if (val == DARN_ERR)
+		return 0;
+
+	*v = val;
+
+	return 1;
+}
+
+static __init int rng_init(void)
+{
+	unsigned long val;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		if (microwatt_get_random_darn(&val)) {
+			ppc_md.get_random_seed = microwatt_get_random_darn;
+			return 0;
+		}
+	}
+
+	pr_warn("Unable to use DARN for get_random_seed()\n");
+
+	return -EIO;
+}
+machine_subsys_initcall(, rng_init);
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
new file mode 100644
index 000000000000..0b02603bdb74
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/setup.c
@@ -0,0 +1,41 @@
+/*
+ * Microwatt FPGA-based SoC platform setup code.
+ *
+ * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/xics.h>
+#include <asm/udbg.h>
+
+static void __init microwatt_init_IRQ(void)
+{
+	xics_init();
+}
+
+static int __init microwatt_probe(void)
+{
+	return of_machine_is_compatible("microwatt-soc");
+}
+
+static int __init microwatt_populate(void)
+{
+	return of_platform_default_populate(NULL, NULL, NULL);
+}
+machine_arch_initcall(microwatt, microwatt_populate);
+
+define_machine(microwatt) {
+	.name			= "microwatt",
+	.probe			= microwatt_probe,
+	.init_IRQ		= microwatt_init_IRQ,
+	.progress		= udbg_progress,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 1c954c90b0f4..9b88e3cded7d 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
 	 */
 
 	if (regs->msr & SRR1_WAKEMASK)
-		regs->nip = regs->link;
+		regs_set_return_ip(regs, regs->link);
 
 	switch (regs->msr & SRR1_WAKEMASK) {
 	case SRR1_WAKEDEC:
@@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
 	restore_astate(hard_smp_processor_id());
 
 	/* everything handled */
-	regs->msr |= MSR_RI;
+	regs_set_return_msr(regs, regs->msr | MSR_RI);
 	return 1;
 }
 
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index c02d8c503b29..b97bf12801eb 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -24,6 +24,7 @@ config PPC_PMAC32_PSURGE
 	bool "Support for powersurge upgrade cards" if EXPERT
 	depends on SMP && PPC32 && PPC_PMAC
 	select PPC_SMP_MUXED_IPI
+	select IRQ_DOMAIN_NOMAP
 	default y
 	help
 	  The powersurge cpu boards can be used in the generation
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 9d4ecd292255..d20ef35e6d9d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi)
 	bootx_printf("\nframe buffer at  : 0x%x", bi->dispDeviceBase);
 	bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
 	bootx_printf(" (log)");
-	bootx_printf("\nklimit           : 0x%x",(unsigned long)klimit);
+	bootx_printf("\nklimit           : 0x%x",(unsigned long)_end);
 	bootx_printf("\nboot_info at     : 0x%x", bi);
 	__asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
 	bootx_printf("\nMSR              : 0x%x", flags);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index adae2a6712e1..bdfea6d6ab69 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr)
 	 *   b __secondary_start_pmac_0 + nr*8
 	 */
 	target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
-	patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK);
+	patch_branch(vector, target, BRANCH_SET_LINK);
 
 	/* Put some life in our friend */
 	pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
@@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr)
 	mdelay(1);
 
 	/* Restore our exception vector */
-	patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector));
+	patch_instruction(vector, ppc_inst(save_vector));
 
 	local_irq_restore(flags);
 	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 619b093a0657..043eefbbdd28 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -33,20 +33,6 @@ config PPC_MEMTRACE
 	  Enabling this option allows for runtime allocation of memory (RAM)
 	  for hardware tracing.
 
-config PPC_VAS
-	bool "IBM Virtual Accelerator Switchboard (VAS)"
-	depends on PPC_POWERNV && PPC_64K_PAGES
-	default y
-	help
-	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
-
-	  VAS allows accelerators in co-processors like NX-GZIP and NX-842
-	  to be accessible to kernel subsystems and user processes.
-
-	  VAS adapters are found in POWER9 based systems.
-
-	  If unsure, say N.
-
 config SCOM_DEBUGFS
 	bool "Expose SCOM controllers via debugfs"
 	depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index be2546b96816..dc7b37c23b60 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
 obj-$(CONFIG_OPAL_PRD)	+= opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE)	+= memtrace.o
-obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o
+obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o vas-debug.o vas-fault.o
 obj-$(CONFIG_OCXL_BASE)	+= ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
 obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9..528a7e0cf83a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
 	u64 uamor;
 };
 
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
 	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 		/* EC=ESL=0 case */
 
-		BUG_ON(!mmu_on);
-
 		/*
 		 * Wake synchronously. SRESET via xscom may still cause
 		 * a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ core_woken:
 		__slb_restore_bolted_realmode();
 
 out:
-	if (mmu_on)
-		mtmsr(MSR_KERNEL);
+	mtmsr(MSR_KERNEL);
 
 	return srr1;
 }
@@ -895,7 +892,7 @@ struct p10_sprs {
 	 */
 };
 
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
 	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 		/* EC=ESL=0 case */
 
-		BUG_ON(!mmu_on);
-
 		/*
 		 * Wake synchronously. SRESET via xscom may still cause
 		 * a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ core_woken:
 		__slb_restore_bolted_realmode();
 
 out:
-	if (mmu_on)
-		mtmsr(MSR_KERNEL);
+	mtmsr(MSR_KERNEL);
 
 	return srr1;
 }
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
 {
 	unsigned long srr1;
 
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	__ppc64_runlatch_off();
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, true);
+		srr1 = power10_idle_stop(psscr);
 	else
-		srr1 = power9_idle_stop(psscr, true);
-	__ppc64_runlatch_on();
-#else
-	/*
-	 * Tell KVM we're entering idle.
-	 * This does not have to be done in real mode because the P9 MMU
-	 * is independent per-thread. Some steppings share radix/hash mode
-	 * between threads, but in that case KVM has a barrier sync in real
-	 * mode before and after switching between radix and hash.
-	 *
-	 * kvm_start_guest must still be called in real mode though, hence
-	 * the false argument.
-	 */
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
-	__ppc64_runlatch_off();
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, false);
-	else
-		srr1 = power9_idle_stop(psscr, false);
-	__ppc64_runlatch_on();
-
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	smp_mb();
-	if (local_paca->kvm_hstate.hwthread_req)
-		srr1 = idle_kvm_start_guest(srr1);
-	mtmsr(MSR_KERNEL);
-#endif
+		srr1 = power9_idle_stop(psscr);
 
 	return srr1;
 }
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
 
 	__ppc64_runlatch_off();
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, true);
+		srr1 = power10_idle_stop(psscr);
 	else
-		srr1 = power9_idle_stop(psscr, true);
+		srr1 = power9_idle_stop(psscr);
 	__ppc64_runlatch_on();
 
 	fini_irq_for_idle_irqsoff();
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 01401e3da7ca..f812c74c61e5 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/percpu.h>
 #include <linux/jump_label.h>
+#include <asm/interrupt.h>
 #include <asm/opal-api.h>
 #include <asm/trace.h>
 #include <asm/asm-prototypes.h>
@@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
 	bool mmu = (msr & (MSR_IR|MSR_DR));
 	int64_t ret;
 
+	/* OPAL call / firmware may use SRR and/or HSRR */
+	srr_regs_clobbered();
+
 	msr &= ~MSR_EE;
 
 	if (unlikely(!mmu))
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 303d7c775740..2835376e61a4 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
 	 * Setup regs->nip to rfi into fixup address.
 	 */
 	if (recover_addr)
-		regs->nip = recover_addr;
+		regs_set_return_ip(regs, recover_addr);
 
 out:
 	return !!recover_addr;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b18468dc31ff..6bb3c52633fb 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-#if CONFIG_EEH
+#ifdef CONFIG_EEH
 static bool pnv_pci_cfg_check(struct pci_dn *pdn)
 {
 	struct eeh_dev *edev = NULL;
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 73207b53dc2b..7e98b00ea2e8 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0)
 	}
 }
 
+static inline void update_power8_hid0(unsigned long hid0)
+{
+	/*
+	 *  The HID0 update on Power8 should at the very least be
+	 *  preceded by a SYNC instruction followed by an ISYNC
+	 *  instruction
+	 */
+	asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
+}
+
 static void unsplit_core(void)
 {
 	u64 hid0, mask;
diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c
deleted file mode 100644
index 98ed5d8c5441..000000000000
--- a/arch/powerpc/platforms/powernv/vas-api.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * VAS user space API for its accelerators (Only NX-GZIP is supported now)
- * Copyright (C) 2019 Haren Myneni, IBM Corp
- */
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <asm/vas.h>
-#include <uapi/asm/vas-api.h>
-#include "vas.h"
-
-/*
- * The driver creates the device node that can be used as follows:
- * For NX-GZIP
- *
- *	fd = open("/dev/crypto/nx-gzip", O_RDWR);
- *	rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
- *	paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
- *	vas_copy(&crb, 0, 1);
- *	vas_paste(paste_addr, 0, 1);
- *	close(fd) or exit process to close window.
- *
- * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
- * copy/paste returns to the user space directly. So refer NX hardware
- * documententation for exact copy/paste usage and completion / error
- * conditions.
- */
-
-/*
- * Wrapper object for the nx-gzip device - there is just one instance of
- * this node for the whole system.
- */
-static struct coproc_dev {
-	struct cdev cdev;
-	struct device *device;
-	char *name;
-	dev_t devt;
-	struct class *class;
-	enum vas_cop_type cop_type;
-} coproc_device;
-
-struct coproc_instance {
-	struct coproc_dev *coproc;
-	struct vas_window *txwin;
-};
-
-static char *coproc_devnode(struct device *dev, umode_t *mode)
-{
-	return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
-}
-
-static int coproc_open(struct inode *inode, struct file *fp)
-{
-	struct coproc_instance *cp_inst;
-
-	cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
-	if (!cp_inst)
-		return -ENOMEM;
-
-	cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
-					cdev);
-	fp->private_data = cp_inst;
-
-	return 0;
-}
-
-static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
-{
-	void __user *uptr = (void __user *)arg;
-	struct vas_tx_win_attr txattr = {};
-	struct vas_tx_win_open_attr uattr;
-	struct coproc_instance *cp_inst;
-	struct vas_window *txwin;
-	int rc, vasid;
-
-	cp_inst = fp->private_data;
-
-	/*
-	 * One window for file descriptor
-	 */
-	if (cp_inst->txwin)
-		return -EEXIST;
-
-	rc = copy_from_user(&uattr, uptr, sizeof(uattr));
-	if (rc) {
-		pr_err("%s(): copy_from_user() returns %d\n", __func__, rc);
-		return -EFAULT;
-	}
-
-	if (uattr.version != 1) {
-		pr_err("Invalid version\n");
-		return -EINVAL;
-	}
-
-	vasid = uattr.vas_id;
-
-	vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type);
-
-	txattr.lpid = mfspr(SPRN_LPID);
-	txattr.pidr = mfspr(SPRN_PID);
-	txattr.user_win = true;
-	txattr.rsvd_txbuf_count = false;
-	txattr.pswid = false;
-
-	pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
-				mfspr(SPRN_PID));
-
-	txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr);
-	if (IS_ERR(txwin)) {
-		pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__,
-					PTR_ERR(txwin));
-		return PTR_ERR(txwin);
-	}
-
-	cp_inst->txwin = txwin;
-
-	return 0;
-}
-
-static int coproc_release(struct inode *inode, struct file *fp)
-{
-	struct coproc_instance *cp_inst = fp->private_data;
-
-	if (cp_inst->txwin) {
-		vas_win_close(cp_inst->txwin);
-		cp_inst->txwin = NULL;
-	}
-
-	kfree(cp_inst);
-	fp->private_data = NULL;
-
-	/*
-	 * We don't know here if user has other receive windows
-	 * open, so we can't really call clear_thread_tidr().
-	 * So, once the process calls set_thread_tidr(), the
-	 * TIDR value sticks around until process exits, resulting
-	 * in an extra copy in restore_sprs().
-	 */
-
-	return 0;
-}
-
-static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-	struct coproc_instance *cp_inst = fp->private_data;
-	struct vas_window *txwin;
-	unsigned long pfn;
-	u64 paste_addr;
-	pgprot_t prot;
-	int rc;
-
-	txwin = cp_inst->txwin;
-
-	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-		pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
-				(vma->vm_end - vma->vm_start), PAGE_SIZE);
-		return -EINVAL;
-	}
-
-	/* Ensure instance has an open send window */
-	if (!txwin) {
-		pr_err("%s(): No send window open?\n", __func__);
-		return -EINVAL;
-	}
-
-	vas_win_paste_addr(txwin, &paste_addr, NULL);
-	pfn = paste_addr >> PAGE_SHIFT;
-
-	/* flags, page_prot from cxl_mmap(), except we want cachable */
-	vma->vm_flags |= VM_IO | VM_PFNMAP;
-	vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
-
-	prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
-
-	rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
-			vma->vm_end - vma->vm_start, prot);
-
-	pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__,
-			paste_addr, vma->vm_start, rc);
-
-	return rc;
-}
-
-static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case VAS_TX_WIN_OPEN:
-		return coproc_ioc_tx_win_open(fp, arg);
-	default:
-		return -EINVAL;
-	}
-}
-
-static struct file_operations coproc_fops = {
-	.open = coproc_open,
-	.release = coproc_release,
-	.mmap = coproc_mmap,
-	.unlocked_ioctl = coproc_ioctl,
-};
-
-/*
- * Supporting only nx-gzip coprocessor type now, but this API code
- * extended to other coprocessor types later.
- */
-int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
-				const char *name)
-{
-	int rc = -EINVAL;
-	dev_t devno;
-
-	rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
-	if (rc) {
-		pr_err("Unable to allocate coproc major number: %i\n", rc);
-		return rc;
-	}
-
-	pr_devel("%s device allocated, dev [%i,%i]\n", name,
-			MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
-
-	coproc_device.class = class_create(mod, name);
-	if (IS_ERR(coproc_device.class)) {
-		rc = PTR_ERR(coproc_device.class);
-		pr_err("Unable to create %s class %d\n", name, rc);
-		goto err_class;
-	}
-	coproc_device.class->devnode = coproc_devnode;
-	coproc_device.cop_type = cop_type;
-
-	coproc_fops.owner = mod;
-	cdev_init(&coproc_device.cdev, &coproc_fops);
-
-	devno = MKDEV(MAJOR(coproc_device.devt), 0);
-	rc = cdev_add(&coproc_device.cdev, devno, 1);
-	if (rc) {
-		pr_err("cdev_add() failed %d\n", rc);
-		goto err_cdev;
-	}
-
-	coproc_device.device = device_create(coproc_device.class, NULL,
-			devno, NULL, name, MINOR(devno));
-	if (IS_ERR(coproc_device.device)) {
-		rc = PTR_ERR(coproc_device.device);
-		pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
-		goto err;
-	}
-
-	pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
-			MINOR(devno));
-
-	return 0;
-
-err:
-	cdev_del(&coproc_device.cdev);
-err_cdev:
-	class_destroy(coproc_device.class);
-err_class:
-	unregister_chrdev_region(coproc_device.devt, 1);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(vas_register_coproc_api);
-
-void vas_unregister_coproc_api(void)
-{
-	dev_t devno;
-
-	cdev_del(&coproc_device.cdev);
-	devno = MKDEV(MAJOR(coproc_device.devt), 0);
-	device_destroy(coproc_device.class, devno);
-
-	class_destroy(coproc_device.class);
-	unregister_chrdev_region(coproc_device.devt, 1);
-}
-EXPORT_SYMBOL_GPL(vas_unregister_coproc_api);
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index 41fa90d2f4ab..3ce89a4b54be 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -9,6 +9,7 @@
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <asm/vas.h>
 #include "vas.h"
 
 static struct dentry *vas_debugfs;
@@ -28,7 +29,7 @@ static char *cop_to_str(int cop)
 
 static int info_show(struct seq_file *s, void *private)
 {
-	struct vas_window *window = s->private;
+	struct pnv_vas_window *window = s->private;
 
 	mutex_lock(&vas_mutex);
 
@@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private)
 	if (!window->hvwc_map)
 		goto unlock;
 
-	seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
+	seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop),
 					window->tx_win ? "Send" : "Receive");
-	seq_printf(s, "Pid : %d\n", vas_window_pid(window));
+	seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win));
 
 unlock:
 	mutex_unlock(&vas_mutex);
@@ -47,7 +48,7 @@ unlock:
 
 DEFINE_SHOW_ATTRIBUTE(info);
 
-static inline void print_reg(struct seq_file *s, struct vas_window *win,
+static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win,
 			char *name, u32 reg)
 {
 	seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
@@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win,
 
 static int hvwc_show(struct seq_file *s, void *private)
 {
-	struct vas_window *window = s->private;
+	struct pnv_vas_window *window = s->private;
 
 	mutex_lock(&vas_mutex);
 
@@ -103,8 +104,10 @@ unlock:
 
 DEFINE_SHOW_ATTRIBUTE(hvwc);
 
-void vas_window_free_dbgdir(struct vas_window *window)
+void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win)
 {
+	struct vas_window *window =  &pnv_win->vas_win;
+
 	if (window->dbgdir) {
 		debugfs_remove_recursive(window->dbgdir);
 		kfree(window->dbgname);
@@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window)
 	}
 }
 
-void vas_window_init_dbgdir(struct vas_window *window)
+void vas_window_init_dbgdir(struct pnv_vas_window *window)
 {
 	struct dentry *d;
 
 	if (!window->vinst->dbgdir)
 		return;
 
-	window->dbgname = kzalloc(16, GFP_KERNEL);
-	if (!window->dbgname)
+	window->vas_win.dbgname = kzalloc(16, GFP_KERNEL);
+	if (!window->vas_win.dbgname)
 		return;
 
-	snprintf(window->dbgname, 16, "w%d", window->winid);
+	snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid);
 
-	d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
-	window->dbgdir = d;
+	d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir);
+	window->vas_win.dbgdir = d;
 
 	debugfs_create_file("info", 0444, d, window, &info_fops);
 	debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
index 3d21fce254b7..a7aabc18039e 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -26,150 +26,6 @@
  */
 #define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
 
-static void dump_crb(struct coprocessor_request_block *crb)
-{
-	struct data_descriptor_entry *dde;
-	struct nx_fault_stamp *nx;
-
-	dde = &crb->source;
-	pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
-		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
-		dde->count, dde->index, dde->flags);
-
-	dde = &crb->target;
-	pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
-		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
-		dde->count, dde->index, dde->flags);
-
-	nx = &crb->stamp.nx;
-	pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
-		be32_to_cpu(nx->pswid),
-		be64_to_cpu(crb->stamp.nx.fault_storage_addr),
-		nx->flags, nx->fault_status);
-}
-
-/*
- * Update the CSB to indicate a translation error.
- *
- * User space will be polling on CSB after the request is issued.
- * If NX can handle the request without any issues, it updates CSB.
- * Whereas if NX encounters page fault, the kernel will handle the
- * fault and update CSB with translation error.
- *
- * If we are unable to update the CSB means copy_to_user failed due to
- * invalid csb_addr, send a signal to the process.
- */
-static void update_csb(struct vas_window *window,
-			struct coprocessor_request_block *crb)
-{
-	struct coprocessor_status_block csb;
-	struct kernel_siginfo info;
-	struct task_struct *tsk;
-	void __user *csb_addr;
-	struct pid *pid;
-	int rc;
-
-	/*
-	 * NX user space windows can not be opened for task->mm=NULL
-	 * and faults will not be generated for kernel requests.
-	 */
-	if (WARN_ON_ONCE(!window->mm || !window->user_win))
-		return;
-
-	csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
-
-	memset(&csb, 0, sizeof(csb));
-	csb.cc = CSB_CC_FAULT_ADDRESS;
-	csb.ce = CSB_CE_TERMINATION;
-	csb.cs = 0;
-	csb.count = 0;
-
-	/*
-	 * NX operates and returns in BE format as defined CRB struct.
-	 * So saves fault_storage_addr in BE as NX pastes in FIFO and
-	 * expects user space to convert to CPU format.
-	 */
-	csb.address = crb->stamp.nx.fault_storage_addr;
-	csb.flags = 0;
-
-	pid = window->pid;
-	tsk = get_pid_task(pid, PIDTYPE_PID);
-	/*
-	 * Process closes send window after all pending NX requests are
-	 * completed. In multi-thread applications, a child thread can
-	 * open a window and can exit without closing it. May be some
-	 * requests are pending or this window can be used by other
-	 * threads later. We should handle faults if NX encounters
-	 * pages faults on these requests. Update CSB with translation
-	 * error and fault address. If csb_addr passed by user space is
-	 * invalid, send SEGV signal to pid saved in window. If the
-	 * child thread is not running, send the signal to tgid.
-	 * Parent thread (tgid) will close this window upon its exit.
-	 *
-	 * pid and mm references are taken when window is opened by
-	 * process (pid). So tgid is used only when child thread opens
-	 * a window and exits without closing it.
-	 */
-	if (!tsk) {
-		pid = window->tgid;
-		tsk = get_pid_task(pid, PIDTYPE_PID);
-		/*
-		 * Parent thread (tgid) will be closing window when it
-		 * exits. So should not get here.
-		 */
-		if (WARN_ON_ONCE(!tsk))
-			return;
-	}
-
-	/* Return if the task is exiting. */
-	if (tsk->flags & PF_EXITING) {
-		put_task_struct(tsk);
-		return;
-	}
-
-	kthread_use_mm(window->mm);
-	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
-	/*
-	 * User space polls on csb.flags (first byte). So add barrier
-	 * then copy first byte with csb flags update.
-	 */
-	if (!rc) {
-		csb.flags = CSB_V;
-		/* Make sure update to csb.flags is visible now */
-		smp_mb();
-		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
-	}
-	kthread_unuse_mm(window->mm);
-	put_task_struct(tsk);
-
-	/* Success */
-	if (!rc)
-		return;
-
-	pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
-			csb_addr, pid_vnr(pid));
-
-	clear_siginfo(&info);
-	info.si_signo = SIGSEGV;
-	info.si_errno = EFAULT;
-	info.si_code = SEGV_MAPERR;
-	info.si_addr = csb_addr;
-
-	/*
-	 * process will be polling on csb.flags after request is sent to
-	 * NX. So generally CSB update should not fail except when an
-	 * application passes invalid csb_addr. So an error message will
-	 * be displayed and leave it to user space whether to ignore or
-	 * handle this signal.
-	 */
-	rcu_read_lock();
-	rc = kill_pid_info(SIGSEGV, &info, pid);
-	rcu_read_unlock();
-
-	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
-			pid_vnr(pid), rc);
-}
-
 static void dump_fifo(struct vas_instance *vinst, void *entry)
 {
 	unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
@@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
 	struct vas_instance *vinst = data;
 	struct coprocessor_request_block *crb, *entry;
 	struct coprocessor_request_block buf;
-	struct vas_window *window;
+	struct pnv_vas_window *window;
 	unsigned long flags;
 	void *fifo;
 
@@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
 				vinst->vas_id, vinst->fault_fifo, fifo,
 				vinst->fault_crbs);
 
-		dump_crb(crb);
+		vas_dump_crb(crb);
 		window = vas_pswid_to_window(vinst,
 				be32_to_cpu(crb->stamp.nx.pswid));
 
@@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data)
 
 			WARN_ON_ONCE(1);
 		} else {
-			update_csb(window, crb);
+			/*
+			 * NX sees faults only with user space windows.
+			 */
+			if (window->user_win)
+				vas_update_csb(crb, &window->vas_win.task_ref);
+			else
+				WARN_ON_ONCE(!window->user_win);
+
 			/*
 			 * Return credit for send window after processing
 			 * fault CRB.
@@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id)
 int vas_setup_fault_window(struct vas_instance *vinst)
 {
 	struct vas_rx_win_attr attr;
+	struct vas_window *win;
 
 	vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
 	vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
@@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst)
 	attr.lnotify_pid = mfspr(SPRN_PID);
 	attr.lnotify_tid = mfspr(SPRN_PID);
 
-	vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT,
-					&attr);
-
-	if (IS_ERR(vinst->fault_win)) {
-		pr_err("VAS: Error %ld opening FaultWin\n",
-			PTR_ERR(vinst->fault_win));
+	win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr);
+	if (IS_ERR(win)) {
+		pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win));
 		kfree(vinst->fault_fifo);
-		return PTR_ERR(vinst->fault_win);
+		return PTR_ERR(win);
 	}
 
+	vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win);
+
 	pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
-			vinst->fault_win->winid, attr.lnotify_lpid,
+			vinst->fault_win->vas_win.winid, attr.lnotify_lpid,
 			attr.lnotify_pid, attr.lnotify_tid);
 
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
index a449b9f0c12e..ca2e08f2ddc0 100644
--- a/arch/powerpc/platforms/powernv/vas-trace.h
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -80,7 +80,7 @@ TRACE_EVENT(	vas_tx_win_open,
 TRACE_EVENT(	vas_paste_crb,
 
 		TP_PROTO(struct task_struct *tsk,
-			struct vas_window *win),
+			struct pnv_vas_window *win),
 
 		TP_ARGS(tsk, win),
 
@@ -96,7 +96,7 @@ TRACE_EVENT(	vas_paste_crb,
 		TP_fast_assign(
 			__entry->pid = tsk->pid;
 			__entry->vasid = win->vinst->vas_id;
-			__entry->winid = win->winid;
+			__entry->winid = win->vas_win.winid;
 			__entry->paste_kaddr = (unsigned long)win->paste_kaddr
 		),
 
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5f5fe63a3d1c..0f8d39fbf2b2 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <asm/switch_to.h>
 #include <asm/ppc-opcode.h>
+#include <asm/vas.h>
 #include "vas.h"
 #include "copy-paste.h"
 
@@ -26,14 +27,14 @@
  * Compute the paste address region for the window @window using the
  * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
  */
-void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
+void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
 {
 	int winid;
 	u64 base, shift;
 
 	base = window->vinst->paste_base_addr;
 	shift = window->vinst->paste_win_id_shift;
-	winid = window->winid;
+	winid = window->vas_win.winid;
 
 	*addr  = base + (winid << shift);
 	if (len)
@@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len)
 	pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
 }
 
-static inline void get_hvwc_mmio_bar(struct vas_window *window,
+static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
 			u64 *start, int *len)
 {
 	u64 pbaddr;
 
 	pbaddr = window->vinst->hvwc_bar_start;
-	*start = pbaddr + window->winid * VAS_HVWC_SIZE;
+	*start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
 	*len = VAS_HVWC_SIZE;
 }
 
-static inline void get_uwc_mmio_bar(struct vas_window *window,
+static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
 			u64 *start, int *len)
 {
 	u64 pbaddr;
 
 	pbaddr = window->vinst->uwc_bar_start;
-	*start = pbaddr + window->winid * VAS_UWC_SIZE;
+	*start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
 	*len = VAS_UWC_SIZE;
 }
 
@@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window,
  * space. Unlike MMIO regions (map_mmio_region() below), paste region must
  * be mapped cache-able and is only applicable to send windows.
  */
-static void *map_paste_region(struct vas_window *txwin)
+static void *map_paste_region(struct pnv_vas_window *txwin)
 {
 	int len;
 	void *map;
@@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin)
 	u64 start;
 
 	name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
-				txwin->winid);
+				txwin->vas_win.winid);
 	if (!name)
 		goto free_name;
 
@@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len)
 /*
  * Unmap the paste address region for a window.
  */
-static void unmap_paste_region(struct vas_window *window)
+static void unmap_paste_region(struct pnv_vas_window *window)
 {
 	int len;
 	u64 busaddr_start;
@@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window)
  * path, just minimize the time we hold the mutex for now. We can add
  * a per-instance mutex later if necessary.
  */
-static void unmap_winctx_mmio_bars(struct vas_window *window)
+static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
 {
 	int len;
 	void *uwc_map;
@@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window)
  * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
  * Map these bus addresses and save the mapped kernel addresses in @window.
  */
-static int map_winctx_mmio_bars(struct vas_window *window)
+static int map_winctx_mmio_bars(struct pnv_vas_window *window)
 {
 	int len;
 	u64 start;
@@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window)
  *	 registers are not sequential. And, we can only write to offsets
  *	 with valid registers.
  */
-static void reset_window_regs(struct vas_window *window)
+static void reset_window_regs(struct pnv_vas_window *window)
 {
 	write_hvwc_reg(window, VREG(LPID), 0ULL);
 	write_hvwc_reg(window, VREG(PID), 0ULL);
@@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window)
  * want to add fields to vas_winctx and move the initialization to
  * init_vas_winctx_regs().
  */
-static void init_xlate_regs(struct vas_window *window, bool user_win)
+static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
 {
 	u64 lpcr, val;
 
@@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win)
  *
  * TODO: Reserved (aka dedicated) send buffers are not supported yet.
  */
-static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
 				struct vas_winctx *winctx)
 {
 	write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
@@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin,
  *	as a one-time task? That could work for NX but what about other
  *	receivers?  Let the receivers tell us the rx-fifo buffers for now.
  */
-static void init_winctx_regs(struct vas_window *window,
+static void init_winctx_regs(struct pnv_vas_window *window,
 			     struct vas_winctx *winctx)
 {
 	u64 val;
@@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida)
 	return winid;
 }
 
-static void vas_window_free(struct vas_window *window)
+static void vas_window_free(struct pnv_vas_window *window)
 {
-	int winid = window->winid;
 	struct vas_instance *vinst = window->vinst;
+	int winid = window->vas_win.winid;
 
 	unmap_winctx_mmio_bars(window);
 
@@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window)
 	vas_release_window_id(&vinst->ida, winid);
 }
 
-static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
 {
 	int winid;
-	struct vas_window *window;
+	struct pnv_vas_window *window;
 
 	winid = vas_assign_window_id(&vinst->ida);
 	if (winid < 0)
@@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
 		goto out_free;
 
 	window->vinst = vinst;
-	window->winid = winid;
+	window->vas_win.winid = winid;
 
 	if (map_winctx_mmio_bars(window))
 		goto out_free;
@@ -562,7 +563,7 @@ out_free:
 	return ERR_PTR(-ENOMEM);
 }
 
-static void put_rx_win(struct vas_window *rxwin)
+static void put_rx_win(struct pnv_vas_window *rxwin)
 {
 	/* Better not be a send window! */
 	WARN_ON_ONCE(rxwin->tx_win);
@@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin)
  *
  * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
  */
-static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
+static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
+					     u32 pswid)
 {
 	int vasid, winid;
-	struct vas_window *rxwin;
+	struct pnv_vas_window *rxwin;
 
 	decode_pswid(pswid, &vasid, &winid);
 
@@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
 
 	rxwin = vinst->windows[winid];
 
-	if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
+	if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
 		return ERR_PTR(-EINVAL);
 
 	return rxwin;
@@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
  *
  * See also function header of set_vinst_win().
  */
-static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
 			enum vas_cop_type cop, u32 pswid)
 {
-	struct vas_window *rxwin;
+	struct pnv_vas_window *rxwin;
 
 	mutex_lock(&vinst->mutex);
 
@@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
  * window, we also save the window in the ->rxwin[] table.
  */
 static void set_vinst_win(struct vas_instance *vinst,
-			struct vas_window *window)
+			struct pnv_vas_window *window)
 {
-	int id = window->winid;
+	int id = window->vas_win.winid;
 
 	mutex_lock(&vinst->mutex);
 
@@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst,
 	 * unless its a user (FTW) window.
 	 */
 	if (!window->user_win && !window->tx_win) {
-		WARN_ON_ONCE(vinst->rxwin[window->cop]);
-		vinst->rxwin[window->cop] = window;
+		WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
+		vinst->rxwin[window->vas_win.cop] = window;
 	}
 
 	WARN_ON_ONCE(vinst->windows[id] != NULL);
@@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst,
  * Clear this window from the table(s) of windows for this VAS instance.
  * See also function header of set_vinst_win().
  */
-static void clear_vinst_win(struct vas_window *window)
+static void clear_vinst_win(struct pnv_vas_window *window)
 {
-	int id = window->winid;
+	int id = window->vas_win.winid;
 	struct vas_instance *vinst = window->vinst;
 
 	mutex_lock(&vinst->mutex);
 
 	if (!window->user_win && !window->tx_win) {
-		WARN_ON_ONCE(!vinst->rxwin[window->cop]);
-		vinst->rxwin[window->cop] = NULL;
+		WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
+		vinst->rxwin[window->vas_win.cop] = NULL;
 	}
 
 	WARN_ON_ONCE(vinst->windows[id] != window);
@@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window)
 	mutex_unlock(&vinst->mutex);
 }
 
-static void init_winctx_for_rxwin(struct vas_window *rxwin,
+static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
 			struct vas_rx_win_attr *rxattr,
 			struct vas_winctx *winctx)
 {
@@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
 
 	winctx->rx_fifo = rxattr->rx_fifo;
 	winctx->rx_fifo_size = rxattr->rx_fifo_size;
-	winctx->wcreds_max = rxwin->wcreds_max;
+	winctx->wcreds_max = rxwin->vas_win.wcreds_max;
 	winctx->pin_win = rxattr->pin_win;
 
 	winctx->nx_win = rxattr->nx_win;
@@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
 struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
 			struct vas_rx_win_attr *rxattr)
 {
-	struct vas_window *rxwin;
+	struct pnv_vas_window *rxwin;
 	struct vas_winctx winctx;
 	struct vas_instance *vinst;
 
@@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
 	rxwin = vas_window_alloc(vinst);
 	if (IS_ERR(rxwin)) {
 		pr_devel("Unable to allocate memory for Rx window\n");
-		return rxwin;
+		return (struct vas_window *)rxwin;
 	}
 
 	rxwin->tx_win = false;
 	rxwin->nx_win = rxattr->nx_win;
 	rxwin->user_win = rxattr->user_win;
-	rxwin->cop = cop;
-	rxwin->wcreds_max = rxattr->wcreds_max;
+	rxwin->vas_win.cop = cop;
+	rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
 
 	init_winctx_for_rxwin(rxwin, rxattr, &winctx);
 	init_winctx_regs(rxwin, &winctx);
 
 	set_vinst_win(vinst, rxwin);
 
-	return rxwin;
+	return &rxwin->vas_win;
 }
 EXPORT_SYMBOL_GPL(vas_rx_win_open);
 
@@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
 }
 EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
 
-static void init_winctx_for_txwin(struct vas_window *txwin,
+static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
 			struct vas_tx_win_attr *txattr,
 			struct vas_winctx *winctx)
 {
@@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
 	 */
 	memset(winctx, 0, sizeof(struct vas_winctx));
 
-	winctx->wcreds_max = txwin->wcreds_max;
+	winctx->wcreds_max = txwin->vas_win.wcreds_max;
 
 	winctx->user_win = txattr->user_win;
 	winctx->nx_win = txwin->rxwin->nx_win;
@@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
 
 	winctx->lpid = txattr->lpid;
 	winctx->pidr = txattr->pidr;
-	winctx->rx_win_id = txwin->rxwin->winid;
+	winctx->rx_win_id = txwin->rxwin->vas_win.winid;
 	/*
 	 * IRQ and fault window setup is successful. Set fault window
 	 * for the send window so that ready to handle faults.
 	 */
 	if (txwin->vinst->virq)
-		winctx->fault_win_id = txwin->vinst->fault_win->winid;
+		winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
 
 	winctx->dma_type = VAS_DMA_TYPE_INJECT;
 	winctx->tc_mode = txattr->tc_mode;
@@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
 		winctx->irq_port = txwin->vinst->irq_port;
 
 	winctx->pswid = txattr->pswid ? txattr->pswid :
-			encode_pswid(txwin->vinst->vas_id, txwin->winid);
+			encode_pswid(txwin->vinst->vas_id,
+			txwin->vas_win.winid);
 }
 
 static bool tx_win_args_valid(enum vas_cop_type cop,
@@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 			struct vas_tx_win_attr *attr)
 {
 	int rc;
-	struct vas_window *txwin;
-	struct vas_window *rxwin;
+	struct pnv_vas_window *txwin;
+	struct pnv_vas_window *rxwin;
 	struct vas_winctx winctx;
 	struct vas_instance *vinst;
 
@@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 	rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
 	if (IS_ERR(rxwin)) {
 		pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
-		return rxwin;
+		return (struct vas_window *)rxwin;
 	}
 
 	txwin = vas_window_alloc(vinst);
@@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 		goto put_rxwin;
 	}
 
-	txwin->cop = cop;
+	txwin->vas_win.cop = cop;
 	txwin->tx_win = 1;
 	txwin->rxwin = rxwin;
 	txwin->nx_win = txwin->rxwin->nx_win;
 	txwin->user_win = attr->user_win;
-	txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+	txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
 
 	init_winctx_for_txwin(txwin, attr, &winctx);
 
@@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 			rc = -ENODEV;
 			goto free_window;
 		}
-
-		/*
-		 * Window opened by a child thread may not be closed when
-		 * it exits. So take reference to its pid and release it
-		 * when the window is free by parent thread.
-		 * Acquire a reference to the task's pid to make sure
-		 * pid will not be re-used - needed only for multithread
-		 * applications.
-		 */
-		txwin->pid = get_task_pid(current, PIDTYPE_PID);
-		/*
-		 * Acquire a reference to the task's mm.
-		 */
-		txwin->mm = get_task_mm(current);
-
-		if (!txwin->mm) {
-			put_pid(txwin->pid);
-			pr_err("VAS: pid(%d): mm_struct is not found\n",
-					current->pid);
-			rc = -EPERM;
+		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+		if (rc)
 			goto free_window;
-		}
 
-		mmgrab(txwin->mm);
-		mmput(txwin->mm);
-		mm_context_add_vas_window(txwin->mm);
-		/*
-		 * Process closes window during exit. In the case of
-		 * multithread application, the child thread can open
-		 * window and can exit without closing it. Expects parent
-		 * thread to use and close the window. So do not need
-		 * to take pid reference for parent thread.
-		 */
-		txwin->tgid = find_get_pid(task_tgid_vnr(current));
-		/*
-		 * Even a process that has no foreign real address mapping can
-		 * use an unpaired COPY instruction (to no real effect). Issue
-		 * CP_ABORT to clear any pending COPY and prevent a covert
-		 * channel.
-		 *
-		 * __switch_to() will issue CP_ABORT on future context switches
-		 * if process / thread has any open VAS window (Use
-		 * current->mm->context.vas_windows).
-		 */
-		asm volatile(PPC_CP_ABORT);
+		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
 	}
 
 	set_vinst_win(vinst, txwin);
 
-	return txwin;
+	return &txwin->vas_win;
 
 free_window:
 	vas_window_free(txwin);
@@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset)
 EXPORT_SYMBOL_GPL(vas_copy_crb);
 
 #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
-int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
 {
+	struct pnv_vas_window *txwin;
 	int rc;
 	void *addr;
 	uint64_t val;
 
+	txwin = container_of(vwin, struct pnv_vas_window, vas_win);
 	trace_vas_paste_crb(current, txwin);
 
 	/*
@@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
 	else
 		rc = -EINVAL;
 
-	pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
+	pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
 			read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
 
 	return rc;
@@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb);
  *	user space. (NX-842 driver waits for CSB and Fast thread-wakeup
  *	doesn't use credit checking).
  */
-static void poll_window_credits(struct vas_window *window)
+static void poll_window_credits(struct pnv_vas_window *window)
 {
 	u64 val;
 	int creds, mode;
@@ -1217,7 +1182,7 @@ retry:
 	 *       and issue CRB Kill to stop all pending requests. Need only
 	 *       if there is a bug in NX or fault handling in kernel.
 	 */
-	if (creds < window->wcreds_max) {
+	if (creds < window->vas_win.wcreds_max) {
 		val = 0;
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(msecs_to_jiffies(10));
@@ -1228,7 +1193,8 @@ retry:
 		 */
 		if (!(count % 1000))
 			pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
-				vas_window_pid(window), window->winid,
+				vas_window_pid(&window->vas_win),
+				window->vas_win.winid,
 				creds, count);
 
 		goto retry;
@@ -1240,7 +1206,7 @@ retry:
  * short time to queue a CRB, so window should not be busy for too long.
  * Trying 5ms intervals.
  */
-static void poll_window_busy_state(struct vas_window *window)
+static void poll_window_busy_state(struct pnv_vas_window *window)
 {
 	int busy;
 	u64 val;
@@ -1260,7 +1226,8 @@ retry:
 		 */
 		if (!(count % 1000))
 			pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
-				vas_window_pid(window), window->winid, count);
+				vas_window_pid(&window->vas_win),
+				window->vas_win.winid, count);
 
 		goto retry;
 	}
@@ -1282,7 +1249,7 @@ retry:
  *	casting out becomes necessary we should consider offloading the
  *	job to a worker thread, so the window close can proceed quickly.
  */
-static void poll_window_castout(struct vas_window *window)
+static void poll_window_castout(struct pnv_vas_window *window)
 {
 	/* stub for now */
 }
@@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window)
  * Unpin and close a window so no new requests are accepted and the
  * hardware can evict this window from cache if necessary.
  */
-static void unpin_close_window(struct vas_window *window)
+static void unpin_close_window(struct pnv_vas_window *window)
 {
 	u64 val;
 
@@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window)
  *
  * Besides the hardware, kernel has some bookkeeping of course.
  */
-int vas_win_close(struct vas_window *window)
+int vas_win_close(struct vas_window *vwin)
 {
-	if (!window)
+	struct pnv_vas_window *window;
+
+	if (!vwin)
 		return 0;
 
+	window = container_of(vwin, struct pnv_vas_window, vas_win);
+
 	if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
 		pr_devel("Attempting to close an active Rx window!\n");
 		WARN_ON_ONCE(1);
@@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window)
 	/* if send window, drop reference to matching receive window */
 	if (window->tx_win) {
 		if (window->user_win) {
-			/* Drop references to pid and mm */
-			put_pid(window->pid);
-			if (window->mm) {
-				mm_context_remove_vas_window(window->mm);
-				mmdrop(window->mm);
-			}
+			put_vas_user_win_ref(&vwin->task_ref);
+			mm_context_remove_vas_window(vwin->task_ref.mm);
 		}
 		put_rx_win(window->rxwin);
 	}
@@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close);
  * - The kernel with return credit on fault window after reading entry
  *   from fault FIFO.
  */
-void vas_return_credit(struct vas_window *window, bool tx)
+void vas_return_credit(struct pnv_vas_window *window, bool tx)
 {
 	uint64_t val;
 
@@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx)
 	}
 }
 
-struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
 		uint32_t pswid)
 {
-	struct vas_window *window;
+	struct pnv_vas_window *window;
 	int winid;
 
 	if (!pswid) {
@@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
 	 * by NX).
 	 */
 	if (!window->tx_win || !window->user_win || !window->nx_win ||
-			window->cop == VAS_COP_TYPE_FAULT ||
-			window->cop == VAS_COP_TYPE_FTW) {
+			window->vas_win.cop == VAS_COP_TYPE_FAULT ||
+			window->vas_win.cop == VAS_COP_TYPE_FTW) {
 		pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
 			winid, window->tx_win, window->user_win,
-			window->nx_win, window->cop);
+			window->nx_win, window->vas_win.cop);
 		WARN_ON(1);
 	}
 
 	return window;
 }
+
+static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
+				enum vas_cop_type cop_type)
+{
+	struct vas_tx_win_attr txattr = {};
+
+	vas_init_tx_win_attr(&txattr, cop_type);
+
+	txattr.lpid = mfspr(SPRN_LPID);
+	txattr.pidr = mfspr(SPRN_PID);
+	txattr.user_win = true;
+	txattr.rsvd_txbuf_count = false;
+	txattr.pswid = false;
+
+	pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+				mfspr(SPRN_PID));
+
+	return vas_tx_win_open(vas_id, cop_type, &txattr);
+}
+
+static u64 vas_user_win_paste_addr(struct vas_window *txwin)
+{
+	struct pnv_vas_window *win;
+	u64 paste_addr;
+
+	win = container_of(txwin, struct pnv_vas_window, vas_win);
+	vas_win_paste_addr(win, &paste_addr, NULL);
+
+	return paste_addr;
+}
+
+static int vas_user_win_close(struct vas_window *txwin)
+{
+	vas_win_close(txwin);
+
+	return 0;
+}
+
+static const struct vas_user_win_ops vops =  {
+	.open_win	=	vas_user_win_open,
+	.paste_addr	=	vas_user_win_paste_addr,
+	.close_win	=	vas_user_win_close,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+			     const char *name)
+{
+
+	return vas_register_coproc_api(mod, cop_type, name, &vops);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_powernv);
+
+void vas_unregister_api_powernv(void)
+{
+	vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index c7db3190baca..8bb08e395de0 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -334,11 +334,11 @@ struct vas_instance {
 	int fifo_in_progress;	/* To wake up thread or return IRQ_HANDLED */
 	spinlock_t fault_lock;	/* Protects fifo_in_progress update */
 	void *fault_fifo;
-	struct vas_window *fault_win; /* Fault window */
+	struct pnv_vas_window *fault_win; /* Fault window */
 
 	struct mutex mutex;
-	struct vas_window *rxwin[VAS_COP_TYPE_MAX];
-	struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+	struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX];
+	struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP];
 
 	char *name;
 	char *dbgname;
@@ -346,32 +346,24 @@ struct vas_instance {
 };
 
 /*
- * In-kernel state a VAS window. One per window.
+ * In-kernel state a VAS window on PowerNV. One per window.
  */
-struct vas_window {
+struct pnv_vas_window {
+	struct vas_window vas_win;
 	/* Fields common to send and receive windows */
 	struct vas_instance *vinst;
-	int winid;
 	bool tx_win;		/* True if send window */
 	bool nx_win;		/* True if NX window */
 	bool user_win;		/* True if user space window */
 	void *hvwc_map;		/* HV window context */
 	void *uwc_map;		/* OS/User window context */
-	struct pid *pid;	/* Linux process id of owner */
-	struct pid *tgid;	/* Thread group ID of owner */
-	struct mm_struct *mm;	/* Linux process mm_struct */
-	int wcreds_max;		/* Window credits */
-
-	char *dbgname;
-	struct dentry *dbgdir;
 
 	/* Fields applicable only to send windows */
 	void *paste_kaddr;
 	char *paste_addr_name;
-	struct vas_window *rxwin;
+	struct pnv_vas_window *rxwin;
 
-	/* Feilds applicable only to receive windows */
-	enum vas_cop_type cop;
+	/* Fields applicable only to receive windows */
 	atomic_t num_txwins;
 };
 
@@ -430,32 +422,32 @@ extern struct mutex vas_mutex;
 extern struct vas_instance *find_vas_instance(int vasid);
 extern void vas_init_dbgdir(void);
 extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
-extern void vas_window_init_dbgdir(struct vas_window *win);
-extern void vas_window_free_dbgdir(struct vas_window *win);
+extern void vas_window_init_dbgdir(struct pnv_vas_window *win);
+extern void vas_window_free_dbgdir(struct pnv_vas_window *win);
 extern int vas_setup_fault_window(struct vas_instance *vinst);
 extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
 extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
-extern void vas_return_credit(struct vas_window *window, bool tx);
-extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+extern void vas_return_credit(struct pnv_vas_window *window, bool tx);
+extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
 						uint32_t pswid);
-extern void vas_win_paste_addr(struct vas_window *window, u64 *addr,
-					int *len);
+extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr,
+				int *len);
 
 static inline int vas_window_pid(struct vas_window *window)
 {
-	return pid_vnr(window->pid);
+	return pid_vnr(window->task_ref.pid);
 }
 
-static inline void vas_log_write(struct vas_window *win, char *name,
+static inline void vas_log_write(struct pnv_vas_window *win, char *name,
 			void *regptr, u64 val)
 {
 	if (val)
 		pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
-				win->tx_win ? "Tx" : "Rx", win->winid, name,
-				regptr, val);
+				win->tx_win ? "Tx" : "Rx", win->vas_win.winid,
+				name, regptr, val);
 }
 
-static inline void write_uwc_reg(struct vas_window *win, char *name,
+static inline void write_uwc_reg(struct pnv_vas_window *win, char *name,
 			s32 reg, u64 val)
 {
 	void *regptr;
@@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name,
 	out_be64(regptr, val);
 }
 
-static inline void write_hvwc_reg(struct vas_window *win, char *name,
+static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name,
 			s32 reg, u64 val)
 {
 	void *regptr;
@@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name,
 	out_be64(regptr, val);
 }
 
-static inline u64 read_hvwc_reg(struct vas_window *win,
+static inline u64 read_hvwc_reg(struct pnv_vas_window *win,
 			char *name __maybe_unused, s32 reg)
 {
 	return in_be64(win->hvwc_map+reg);
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e32406e918d0..a4048b8c8c50 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,6 +7,7 @@ config PPC_PS3
 	select USB_OHCI_BIG_ENDIAN_MMIO
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select HAVE_PCI
+	select IRQ_DOMAIN_NOMAP
 	help
 	  This option enables support for the Sony PS3 game console
 	  and other platforms using the PS3 hypervisor.  Enabling this
@@ -85,6 +86,15 @@ config PS3_SYS_MANAGER
 	  This support is required for PS3 system control.  In
 	  general, all users will say Y or M.
 
+config PS3_VERBOSE_RESULT
+	bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED
+	depends on PPC_PS3
+	help
+	  Enables more verbose log mesages for LV1 hypercall results.
+
+	  If in doubt, say N here and reduce the size of the kernel by a
+	  small amount.
+
 config PS3_REPOSITORY_WRITE
 	bool "PS3 Repository write support" if PS3_ADVANCED
 	depends on PPC_PS3
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 78f2339ed5cb..49871427f599 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
@@ -45,7 +46,7 @@
  * implementation equates HV plug value to Linux virq value, constrains each
  * interrupt to have a system wide unique plug number, and limits the range
  * of the plug values to map into the first dword of the bitmaps.  This
- * gives a usable range of plug values of  {NUM_ISA_INTERRUPTS..63}.  Note
+ * gives a usable range of plug values of  {NR_IRQS_LEGACY..63}.  Note
  * that there is no constraint on how many in this set an individual thread
  * can acquire.
  *
@@ -721,7 +722,7 @@ static unsigned int ps3_get_irq(void)
 	}
 
 #if defined(DEBUG)
-	if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+	if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
 		dump_bmp(&per_cpu(ps3_private, 0));
 		dump_bmp(&per_cpu(ps3_private, 1));
 		BUG();
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index d094321964fb..a81eac35d900 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -6,6 +6,7 @@
  *  Copyright 2006 Sony Corp.
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
@@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
 	enum ps3_dma_region_type region_type, void *addr, unsigned long len)
 {
 	unsigned long lpar_addr;
+	int result;
 
 	lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
 
@@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev,
 		r->offset -= map.r1.offset;
 	r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
 
+	dev->core.dma_mask = &r->dma_mask;
+
+	result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32));
+
+	if (result < 0) {
+		dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n",
+			__func__, __LINE__, result);
+		return result;
+	}
+
 	switch (dev->dev_type) {
 	case PS3_DEVICE_TYPE_SB:
 		r->region_ops =  (USE_DYNAMIC_DMA)
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index e9ae5dd03593..3de9145c20bc 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex);
 EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
 
 static union ps3_firmware_version ps3_firmware_version;
+static char ps3_firmware_version_str[16];
 
 void ps3_get_firmware_version(union ps3_firmware_version *v)
 {
@@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
 	return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
 }
 
+static ssize_t ps3_fw_version_show(struct kobject *kobj,
+	struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s", ps3_firmware_version_str);
+}
+
+static int __init ps3_setup_sysfs(void)
+{
+	static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO,
+		ps3_fw_version_show, NULL);
+	static struct kobject *kobj;
+	int result;
+
+	kobj = kobject_create_and_add("ps3", firmware_kobj);
+
+	if (!kobj) {
+		pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__,
+			__LINE__);
+		return -ENOMEM;
+	}
+
+	result = sysfs_create_file(kobj, &attr.attr);
+
+	if (result) {
+		pr_warn("%s:%d: sysfs_create_file failed.\n", __func__,
+			__LINE__);
+		kobject_put(kobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+core_initcall(ps3_setup_sysfs);
+
 static void __init ps3_setup_arch(void)
 {
 	u64 tmp;
@@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void)
 
 	lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
 
-	printk(KERN_INFO "PS3 firmware version %u.%u.%u\n",
-	       ps3_firmware_version.major, ps3_firmware_version.minor,
-	       ps3_firmware_version.rev);
+	snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str),
+		"%u.%u.%u", ps3_firmware_version.major,
+		ps3_firmware_version.minor, ps3_firmware_version.rev);
+
+	printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str);
 
 	ps3_spu_set_platform();
 
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index b431f41c6cb5..1a5665875165 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
 	result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
 
 	if (result) {
-		pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__,
-			__LINE__, ps3_result(result));
-			result = -EPERM;
+		pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n",
+			__func__, __LINE__, dev->match_id, dev->match_sub_id,
+			dev_name(&dev->core), ps3_result(result));
+		result = -EPERM;
 	}
 
 done:
@@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
 	result = lv1_gpu_open(0);
 
 	if (result) {
-		pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+		pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__,
 			__LINE__, ps3_result(result));
 			result = -EPERM;
 	}
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index c8a2b0b05ac0..4cda0ef87be0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM)		+= svm.o
 obj-$(CONFIG_FA_DUMP)		+= rtas-fadump.o
 
 obj-$(CONFIG_SUSPEND)		+= suspend.o
+obj-$(CONFIG_PPC_VAS)		+= vas.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 3ac70790ec7a..b1f01ac0c29e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index)
 {
 	int dr_status, rc;
 
-	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-		       DR_ENTITY_SENSE, drc_index);
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
 	if (rc || dr_status != DR_ENTITY_UNUSABLE)
 		return -1;
 
@@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index)
 {
 	int dr_status, rc;
 
-	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-		       DR_ENTITY_SENSE, drc_index);
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
 	if (rc || dr_status != DR_ENTITY_PRESENT)
 		return -1;
 
@@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index)
 {
 	int dr_status, rc;
 
-	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
-				DR_ENTITY_SENSE, drc_index);
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
 	if (rc || dr_status != DR_ENTITY_PRESENT)
 		return -1;
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8377f1f7c78e..377d852f5a9a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np)
 
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
-	if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+	if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+		!(lmb->flags & DRCONF_MEM_ASSIGNED))
 		return false;
 
 #ifdef CONFIG_FA_DUMP
@@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 {
 	struct drmem_lmb *lmb;
-	int lmbs_removed = 0;
+	int lmbs_reserved = 0;
 	int lmbs_available = 0;
 	int rc;
 
@@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 		 */
 		drmem_mark_lmb_reserved(lmb);
 
-		lmbs_removed++;
-		if (lmbs_removed == lmbs_to_remove)
+		lmbs_reserved++;
+		if (lmbs_reserved == lmbs_to_remove)
 			break;
 	}
 
-	if (lmbs_removed != lmbs_to_remove) {
+	if (lmbs_reserved != lmbs_to_remove) {
 		pr_err("Memory hot-remove failed, adding LMB's back\n");
 
 		for_each_drmem_lmb(lmb) {
@@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 				       lmb->drc_index);
 
 			drmem_remove_lmb_reservation(lmb);
+
+			lmbs_reserved--;
+			if (lmbs_reserved == 0)
+				break;
 		}
 
 		rc = -EINVAL;
@@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 				lmb->base_addr);
 
 			drmem_remove_lmb_reservation(lmb);
+
+			lmbs_reserved--;
+			if (lmbs_reserved == 0)
+				break;
 		}
 		rc = 0;
 	}
@@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
 static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
 	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
-	int lmbs_available = 0;
 	int rc;
 
 	pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
@@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 	if (rc)
 		return -EINVAL;
 
-	/* Validate that there are enough LMBs to satisfy the request */
+	/*
+	 * Validate that all LMBs in range are not reserved. Note that it
+	 * is ok if they are !ASSIGNED since our goal here is to remove the
+	 * LMB range, regardless of whether some LMBs were already removed
+	 * by any other reason.
+	 *
+	 * This is a contrast to what is done in remove_by_count() where we
+	 * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+	 * because we want to remove a fixed amount of LMBs in that function.
+	 */
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
-		if (lmb->flags & DRCONF_MEM_RESERVED)
-			break;
-
-		lmbs_available++;
+		if (lmb->flags & DRCONF_MEM_RESERVED) {
+			pr_err("Memory at %llx (drc index %x) is reserved\n",
+				lmb->base_addr, lmb->drc_index);
+			return -EINVAL;
+		}
 	}
 
-	if (lmbs_available < lmbs_to_remove)
-		return -EINVAL;
-
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+		/*
+		 * dlpar_remove_lmb() will error out if the LMB is already
+		 * !ASSIGNED, but this case is a no-op for us.
+		 */
 		if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
 			continue;
 
@@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 			if (!drmem_lmb_reserved(lmb))
 				continue;
 
+			/*
+			 * Setting the isolation state of an UNISOLATED/CONFIGURED
+			 * device to UNISOLATE is a no-op, but the hypervisor can
+			 * use it as a hint that the LMB removal failed.
+			 */
+			dlpar_unisolate_drc(lmb->drc_index);
+
 			rc = dlpar_add_lmb(lmb);
 			if (rc)
 				pr_err("Failed to add LMB, drc index %x\n",
@@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np)
 {
 	return 0;
 }
-static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
-{
-	return -EOPNOTSUPP;
-}
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
 	return -EOPNOTSUPP;
@@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 {
 	struct drmem_lmb *lmb;
 	int lmbs_available = 0;
-	int lmbs_added = 0;
+	int lmbs_reserved = 0;
 	int rc;
 
 	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
@@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 
 	/* Validate that there are enough LMBs to satisfy the request */
 	for_each_drmem_lmb(lmb) {
+		if (lmb->flags & DRCONF_MEM_RESERVED)
+			continue;
+
 		if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
 			lmbs_available++;
 
@@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 		 * requested LMBs cannot be added.
 		 */
 		drmem_mark_lmb_reserved(lmb);
-
-		lmbs_added++;
-		if (lmbs_added == lmbs_to_add)
+		lmbs_reserved++;
+		if (lmbs_reserved == lmbs_to_add)
 			break;
 	}
 
-	if (lmbs_added != lmbs_to_add) {
+	if (lmbs_reserved != lmbs_to_add) {
 		pr_err("Memory hot-add failed, removing any added LMBs\n");
 
 		for_each_drmem_lmb(lmb) {
@@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 				dlpar_release_drc(lmb->drc_index);
 
 			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
 		}
 		rc = -EINVAL;
 	} else {
@@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 			pr_debug("Memory at %llx (drc index %x) was hot-added\n",
 				 lmb->base_addr, lmb->drc_index);
 			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
 		}
 		rc = 0;
 	}
@@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index)
 static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 {
 	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
-	int lmbs_available = 0;
 	int rc;
 
 	pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
@@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 
 	/* Validate that the LMBs in this range are not reserved */
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
-		if (lmb->flags & DRCONF_MEM_RESERVED)
-			break;
-
-		lmbs_available++;
+		/* Fail immediately if the whole range can't be hot-added */
+		if (lmb->flags & DRCONF_MEM_RESERVED) {
+			pr_err("Memory at %llx (drc index %x) is reserved\n",
+					lmb->base_addr, lmb->drc_index);
+			return -EINVAL;
+		}
 	}
 
-	if (lmbs_available < lmbs_to_add)
-		return -EINVAL;
-
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
 		if (lmb->flags & DRCONF_MEM_ASSIGNED)
 			continue;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 8a2b8d64265b..ab9fc6506861 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace)
 	mfcr	r0
 	stw	r0,8(r1)
 	HVSC				/* invoke the hypervisor */
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets)
 	HCALL_BRANCH(plpar_hcall_norets_trace)
 	HVSC				/* invoke the hypervisor */
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -129,6 +136,10 @@ plpar_hcall_norets_trace:
 	HCALL_INST_PRECALL(R4)
 	HVSC
 	HCALL_INST_POSTCALL_NORETS
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr
@@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -188,6 +202,9 @@ plpar_hcall_trace:
 
 	HCALL_INST_POSTCALL(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9)
 	std	r11,56(r12)
 	std	r0, 64(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -300,6 +323,9 @@ plpar_hcall9_trace:
 
 	HCALL_INST_POSTCALL(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw)
 	std	r11,56(r12)
 	std	r0, 64(r12)
 
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b3..c6c79ef55e13 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
 #include <linux/kobject.h>
 #include <linux/dma-map-ops.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index ef26fe40efb0..f48e87ac89c9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -18,6 +18,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/papr_pdsm.h>
 #include <asm/mce.h>
+#include <asm/unaligned.h>
 
 #define BIND_ANY_ADDR (~0ul)
 
@@ -114,6 +115,9 @@ struct papr_scm_priv {
 	/* Health information for the dimm */
 	u64 health_bitmap;
 
+	/* Holds the last known dirty shutdown counter value */
+	u64 dirty_shutdown_counter;
+
 	/* length of the stat buffer as expected by phyp */
 	size_t stat_buffer_len;
 };
@@ -260,7 +264,7 @@ err_out:
  * Query the Dimm performance stats from PHYP and copy them (if returned) to
  * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
  * (num_stats + header) bytes.
- * - If buff_stats == NULL the return value is the size in byes of the buffer
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
  * needed to hold all supported performance-statistics.
  * - If buff_stats != NULL and num_stats == 0 then we copy all known
  * performance-statistics to 'buff_stat' and expect to be large enough to
@@ -310,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
 		dev_err(&p->pdev->dev,
 			"Unknown performance stats, Err:0x%016lX\n", ret[0]);
 		return -ENOENT;
+	} else if (rc == H_AUTHORITY) {
+		dev_info(&p->pdev->dev,
+			 "Permission denied while accessing performance stats");
+		return -EPERM;
+	} else if (rc == H_UNSUPPORTED) {
+		dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+		return -EOPNOTSUPP;
 	} else if (rc != H_SUCCESS) {
 		dev_err(&p->pdev->dev,
 			"Failed to query performance stats, Err:%lld\n", rc);
@@ -596,6 +607,16 @@ free_stats:
 	return rc;
 }
 
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+			 union nd_pdsm_payload *payload)
+{
+	payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+	payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+	return sizeof(struct nd_papr_pdsm_health);
+}
+
 /* Fetch the DIMM health info and populate it in provided package. */
 static int papr_pdsm_health(struct papr_scm_priv *p,
 			    union nd_pdsm_payload *payload)
@@ -639,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
 
 	/* Populate the fuel gauge meter in the payload */
 	papr_pdsm_fuel_gauge(p, payload);
+	/* Populate the dirty-shutdown-counter field */
+	papr_pdsm_dsc(p, payload);
 
 	rc = sizeof(struct nd_papr_pdsm_health);
 
@@ -900,15 +923,41 @@ static ssize_t flags_show(struct device *dev,
 }
 DEVICE_ATTR_RO(flags);
 
+static ssize_t dirty_shutdown_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+					 struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+	/* For if perf-stats not available remove perf_stats sysfs */
+	if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+		return 0;
+
+	return attr->mode;
+}
+
 /* papr_scm specific dimm attributes */
 static struct attribute *papr_nd_attributes[] = {
 	&dev_attr_flags.attr,
 	&dev_attr_perf_stats.attr,
+	&dev_attr_dirty_shutdown.attr,
 	NULL,
 };
 
 static struct attribute_group papr_nd_attribute_group = {
 	.name = "papr",
+	.is_visible = papr_nd_attribute_visible,
 	.attrs = papr_nd_attributes,
 };
 
@@ -924,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	struct nd_region_desc ndr_desc;
 	unsigned long dimm_flags;
 	int target_nid, online_nid;
-	ssize_t stat_size;
 
 	p->bus_desc.ndctl = papr_scm_ndctl;
 	p->bus_desc.module = THIS_MODULE;
@@ -1009,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	list_add_tail(&p->region_list, &papr_nd_regions);
 	mutex_unlock(&papr_ndr_lock);
 
-	/* Try retriving the stat buffer and see if its supported */
-	stat_size = drc_pmem_query_stats(p, NULL, 0);
-	if (stat_size > 0) {
-		p->stat_buffer_len = stat_size;
-		dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
-			p->stat_buffer_len);
-	} else {
-		dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
-	}
-
 	return 0;
 
 err:	nvdimm_bus_unregister(p->bus);
@@ -1094,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev)
 	u32 drc_index, metadata_size;
 	u64 blocks, block_size;
 	struct papr_scm_priv *p;
+	u8 uuid_raw[UUID_SIZE];
 	const char *uuid_str;
-	u64 uuid[2];
+	ssize_t stat_size;
+	uuid_t uuid;
 	int rc;
 
 	/* check we have all the required DT properties */
@@ -1137,17 +1177,28 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
 	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
 
+	if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+				 &p->dirty_shutdown_counter))
+		p->dirty_shutdown_counter = 0;
+
 	/* We just need to ensure that set cookies are unique across */
-	uuid_parse(uuid_str, (uuid_t *) uuid);
+	uuid_parse(uuid_str, &uuid);
+
 	/*
-	 * cookie1 and cookie2 are not really little endian
-	 * we store a little endian representation of the
-	 * uuid str so that we can compare this with the label
-	 * area cookie irrespective of the endian config with which
-	 * the kernel is built.
+	 * The cookie1 and cookie2 are not really little endian.
+	 * We store a raw buffer representation of the
+	 * uuid string so that we can compare this with the label
+	 * area cookie irrespective of the endian configuration
+	 * with which the kernel is built.
+	 *
+	 * Historically we stored the cookie in the below format.
+	 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+	 *	cookie1 was 0xfd423b0b671b5172
+	 *	cookie2 was 0xaabce8cae35b1d8d
 	 */
-	p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
-	p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
+	export_uuid(uuid_raw, &uuid);
+	p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+	p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
 
 	/* might be zero */
 	p->metadata_size = metadata_size;
@@ -1172,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->res.name  = pdev->name;
 	p->res.flags = IORESOURCE_MEM;
 
+	/* Try retrieving the stat buffer and see if its supported */
+	stat_size = drc_pmem_query_stats(p, NULL, 0);
+	if (stat_size > 0) {
+		p->stat_buffer_len = stat_size;
+		dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+			p->stat_buffer_len);
+	}
+
 	rc = papr_scm_nvdimm_init(p);
 	if (rc)
 		goto err2;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9d4ef65da7f3..167f2e1b8d39 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
 	if ((be64_to_cpu(regs->msr) &
 			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
-		regs->nip = be64_to_cpu((__be64)regs->nip);
-		regs->msr = 0;
+		regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+		regs_set_return_msr(regs, 0);
 	}
 #endif
 
@@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
 		mce_err.severity = MCE_SEV_SEVERE;
 	else if (severity == RTAS_SEVERITY_ERROR)
 		mce_err.severity = MCE_SEV_SEVERE;
-	else if (severity == RTAS_SEVERITY_FATAL)
-		mce_err.severity = MCE_SEV_FATAL;
 	else
 		mce_err.severity = MCE_SEV_FATAL;
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 754e493b7c05..631a0d57b6cd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
 		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
 
+	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+	if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+		security_ftr_clear(SEC_FTR_STF_BARRIER);
+
 	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
 		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index c70b4be9f0a5..f47429323eee 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -105,9 +105,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 		return 1;
 	}
 
-	/* Fixup atomic count: it exited inside IRQ handler. */
-	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
-
 	/* 
 	 * If the RTAS start-cpu token does not exist then presume the
 	 * cpu is already spinning.
@@ -211,7 +208,9 @@ static __init void pSeries_smp_probe(void)
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return;
 
-	if (check_kvm_guest()) {
+	check_kvm_guest();
+
+	if (is_kvm_guest()) {
 		/*
 		 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
 		 * faults to the hypervisor which then reads the instruction
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 000000000000..b5c1cf1bc64d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS		1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+
+static long hcall_return_busy_check(long rc)
+{
+	/* Check if we are stalled for some time */
+	if (H_IS_LONG_BUSY(rc)) {
+		msleep(get_longbusy_msecs(rc));
+		rc = H_BUSY;
+	} else if (rc == H_BUSY) {
+		cond_resched();
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+				     u8 wintype, u16 credits)
+{
+	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+	long rc;
+
+	do {
+		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+				  credits, domain[0], domain[1], domain[2],
+				  domain[3], domain[4], domain[5]);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS) {
+		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+			return -ENOTSUPP;
+		}
+		win->vas_win.winid = retbuf[0];
+		win->win_addr = retbuf[1];
+		win->complete_irq = retbuf[2];
+		win->fault_irq = retbuf[3];
+		return 0;
+	}
+
+	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+		rc, wintype, credits);
+
+	return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+		rc, winid);
+	return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+	long rc;
+	u32 lpid = mfspr(SPRN_PID);
+
+	/*
+	 * AMR value is not supported in Linux VAS implementation.
+	 * The hypervisor ignores it if 0 is passed.
+	 */
+	do {
+		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+					win->vas_win.winid, lpid, 0,
+					VAS_MOD_WIN_FLAGS, 0);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
+			rc, win->vas_win.winid, lpid);
+	return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ *		capabilities which provides all feature(s) that are
+ *		available. Then query the hypervisor to get the
+ *		corresponding capabilities for the specific feature.
+ *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ *			and VAS GZIP Default capabilities.
+ *			H_QUERY_NX_CAPABILITIES provides NX GZIP
+ *			capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(hcall, query_type, result);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
+			hcall, rc, query_type, result);
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+		rc, winid, buffer);
+	return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+	struct pseries_vas_window *txwin = data;
+	struct coprocessor_request_block crb;
+	struct vas_user_win_ref *tsk_ref;
+	int rc;
+
+	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+	if (!rc) {
+		tsk_ref = &txwin->vas_win.task_ref;
+		vas_dump_crb(&crb);
+		vas_update_csb(&crb, tsk_ref);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+				 u64 *domain, u8 wintype)
+{
+	int rc;
+
+	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+	if (rc)
+		return rc;
+	/*
+	 * On PowerVM, the hypervisor setup and forwards the fault
+	 * interrupt per window. So the IRQ setup and fault handling
+	 * will be done for each open window separately.
+	 */
+	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+	if (!txwin->fault_virq) {
+		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+		rc = -EINVAL;
+		goto out_win;
+	}
+
+	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+				txwin->vas_win.winid);
+	if (!txwin->name) {
+		rc = -ENOMEM;
+		goto out_irq;
+	}
+
+	rc = request_threaded_irq(txwin->fault_virq, NULL,
+				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+				  txwin->name, txwin);
+	if (rc) {
+		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+		       txwin->vas_win.winid, txwin->fault_virq, rc);
+		goto out_free;
+	}
+
+	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+	return 0;
+out_free:
+	kfree(txwin->name);
+out_irq:
+	irq_dispose_mapping(txwin->fault_virq);
+out_win:
+	h_deallocate_vas_window(txwin->vas_win.winid);
+	return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+	free_irq(txwin->fault_virq, txwin);
+	kfree(txwin->name);
+	irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+					      enum vas_cop_type cop_type)
+{
+	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+	struct vas_cop_feat_caps *cop_feat_caps;
+	struct vas_caps *caps;
+	struct pseries_vas_window *txwin;
+	int rc;
+
+	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+	if (!txwin)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * A VAS window can have many credits which means that many
+	 * requests can be issued simultaneously. But the hypervisor
+	 * restricts one credit per window.
+	 * The hypervisor introduces 2 different types of credits:
+	 * Default credit type (Uses normal priority FIFO):
+	 *	A limited number of credits are assigned to partitions
+	 *	based on processor entitlement. But these credits may be
+	 *	over-committed on a system depends on whether the CPUs
+	 *	are in shared or dedicated modes - that is, more requests
+	 *	may be issued across the system than NX can service at
+	 *	once which can result in paste command failure (RMA_busy).
+	 *	Then the process has to resend requests or fall-back to
+	 *	SW compression.
+	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
+	 *	To avoid NX HW contention, the system admins can assign
+	 *	QoS credits for each LPAR so that this partition is
+	 *	guaranteed access to NX resources. These credits are
+	 *	assigned to partitions via the HMC.
+	 *	Refer PAPR for more information.
+	 *
+	 * Allocate window with QoS credits if user requested. Otherwise
+	 * default credits are used.
+	 */
+	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+	else
+		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+	cop_feat_caps = &caps->caps;
+
+	if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
+			atomic_read(&cop_feat_caps->target_lpar_creds)) {
+		pr_err("Credits are not available to allocate window\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (vas_id == -1) {
+		/*
+		 * The user space is requesting to allocate a window on
+		 * a VAS instance where the process is executing.
+		 * On PowerVM, domain values are passed to the hypervisor
+		 * to select VAS instance. Useful if the process is
+		 * affinity to NUMA node.
+		 * The hypervisor selects VAS instance if
+		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+		 * The h_allocate_vas_window hcall is defined to take a
+		 * domain values as specified by h_home_node_associativity,
+		 * So no unpacking needs to be done.
+		 */
+		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+				  VPHN_FLAG_VCPU, smp_processor_id());
+		if (rc != H_SUCCESS) {
+			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+			goto out;
+		}
+	}
+
+	/*
+	 * Allocate / Deallocate window hcalls and setup / free IRQs
+	 * have to be protected with mutex.
+	 * Open VAS window: Allocate window hcall and setup IRQ
+	 * Close VAS window: Deallocate window hcall and free IRQ
+	 *	The hypervisor waits until all NX requests are
+	 *	completed before closing the window. So expects OS
+	 *	to handle NX faults, means IRQ can be freed only
+	 *	after the deallocate window hcall is returned.
+	 * So once the window is closed with deallocate hcall before
+	 * the IRQ is freed, it can be assigned to new allocate
+	 * hcall with the same fault IRQ by the hypervisor. It can
+	 * result in setup IRQ fail for the new window since the
+	 * same fault IRQ is not freed by the OS before.
+	 */
+	mutex_lock(&vas_pseries_mutex);
+	rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+				   cop_feat_caps->win_type);
+	mutex_unlock(&vas_pseries_mutex);
+	if (rc)
+		goto out;
+
+	/*
+	 * Modify window and it is ready to use.
+	 */
+	rc = h_modify_vas_window(txwin);
+	if (!rc)
+		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+	if (rc)
+		goto out_free;
+
+	vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+	txwin->win_type = cop_feat_caps->win_type;
+	mutex_lock(&vas_pseries_mutex);
+	list_add(&txwin->win_list, &caps->list);
+	mutex_unlock(&vas_pseries_mutex);
+
+	return &txwin->vas_win;
+
+out_free:
+	/*
+	 * Window is not operational. Free IRQ before closing
+	 * window so that do not have to hold mutex.
+	 */
+	free_irq_setup(txwin);
+	h_deallocate_vas_window(txwin->vas_win.winid);
+out:
+	atomic_dec(&cop_feat_caps->used_lpar_creds);
+	kfree(txwin);
+	return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+	struct pseries_vas_window *win;
+
+	win = container_of(vwin, struct pseries_vas_window, vas_win);
+	return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+	int rc = 0;
+
+	/*
+	 * The hypervisor waits for all requests including faults
+	 * are processed before closing the window - Means all
+	 * credits have to be returned. In the case of fault
+	 * request, a credit is returned after OS issues
+	 * H_GET_NX_FAULT hcall.
+	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+	 * hcall.
+	 */
+	rc = h_deallocate_vas_window(win->vas_win.winid);
+	if (!rc)
+		free_irq_setup(win);
+
+	return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+	struct pseries_vas_window *win;
+	struct vas_cop_feat_caps *caps;
+	int rc = 0;
+
+	if (!vwin)
+		return -EINVAL;
+
+	win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+	/* Should not happen */
+	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+		pr_err("Window (%u): Invalid window type %u\n",
+				vwin->winid, win->win_type);
+		return -EINVAL;
+	}
+
+	caps = &vascaps[win->win_type].caps;
+	mutex_lock(&vas_pseries_mutex);
+	rc = deallocate_free_window(win);
+	if (rc) {
+		mutex_unlock(&vas_pseries_mutex);
+		return rc;
+	}
+
+	list_del(&win->win_list);
+	atomic_dec(&caps->used_lpar_creds);
+	mutex_unlock(&vas_pseries_mutex);
+
+	put_vas_user_win_ref(&vwin->task_ref);
+	mm_context_remove_vas_window(vwin->task_ref.mm);
+
+	kfree(win);
+	return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+	.open_win	= vas_allocate_window,	/* Open and configure window */
+	.paste_addr	= vas_paste_address,	/* To do copy/paste */
+	.close_win	= vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+			     const char *name)
+{
+	int rc;
+
+	if (!copypaste_feat)
+		return -ENOTSUPP;
+
+	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+	vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+				struct hv_vas_cop_feat_caps *hv_caps)
+{
+	struct vas_cop_feat_caps *caps;
+	struct vas_caps *vcaps;
+	int rc = 0;
+
+	vcaps = &vascaps[type];
+	memset(vcaps, 0, sizeof(*vcaps));
+	INIT_LIST_HEAD(&vcaps->list);
+
+	caps = &vcaps->caps;
+
+	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+					  (u64)virt_to_phys(hv_caps));
+	if (rc)
+		return rc;
+
+	caps->user_mode = hv_caps->user_mode;
+	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+		pr_err("User space COPY/PASTE is not supported\n");
+		return -ENOTSUPP;
+	}
+
+	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+	caps->win_type = hv_caps->win_type;
+	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+		pr_err("Unsupported window type %u\n", caps->win_type);
+		return -EINVAL;
+	}
+	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+	atomic_set(&caps->target_lpar_creds,
+		   be16_to_cpu(hv_caps->target_lpar_creds));
+	if (feat == VAS_GZIP_DEF_FEAT) {
+		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+		if (caps->max_win_creds < DEF_WIN_CREDS) {
+			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+			       DEF_WIN_CREDS, caps->max_win_creds);
+			return -EINVAL;
+		}
+	}
+
+	copypaste_feat = true;
+
+	return 0;
+}
+
+static int __init pseries_vas_init(void)
+{
+	struct hv_vas_cop_feat_caps *hv_cop_caps;
+	struct hv_vas_all_caps *hv_caps;
+	int rc;
+
+	/*
+	 * Linux supports user space COPY/PASTE only with Radix
+	 */
+	if (!radix_enabled()) {
+		pr_err("API is supported only with radix page tables\n");
+		return -ENOTSUPP;
+	}
+
+	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+	if (!hv_caps)
+		return -ENOMEM;
+	/*
+	 * Get VAS overall capabilities by passing 0 to feature type.
+	 */
+	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+					  (u64)virt_to_phys(hv_caps));
+	if (rc)
+		goto out;
+
+	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+	hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
+	if (!hv_cop_caps) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	/*
+	 * QOS capabilities available
+	 */
+	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+					  VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+
+		if (rc)
+			goto out_cop;
+	}
+	/*
+	 * Default capabilities available
+	 */
+	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+					  VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
+		if (rc)
+			goto out_cop;
+	}
+
+	pr_info("GZIP feature is available\n");
+
+out_cop:
+	kfree(hv_cop_caps);
+out:
+	kfree(hv_caps);
+	return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644
index 000000000000..4ecb3fcabd10
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE	PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL	PPC_BIT(1)
+#define VAS_MOD_WIN_DR		PPC_BIT(3)
+#define VAS_MOD_WIN_PR		PPC_BIT(4)
+#define VAS_MOD_WIN_SF		PPC_BIT(5)
+#define VAS_MOD_WIN_TA		PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS	(VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+				VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE		0x0
+#define VAS_WIN_CLOSED		0x1
+#define VAS_WIN_INACTIVE	0x2	/* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS	0x3
+
+#define VAS_COPY_PASTE_USER_MODE	0x00000001
+#define VAS_COP_OP_USER_MODE		0x00000010
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+	VAS_GZIP_QOS_FEAT_TYPE,
+	VAS_GZIP_DEF_FEAT_TYPE,
+	VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+	__be64	descriptor;
+	u8	win_type;		/* Default or QoS type */
+	u8	user_mode;
+	__be16	max_lpar_creds;
+	__be16	max_win_creds;
+	union {
+		__be16	reserved;
+		__be16	def_lpar_creds; /* Used for default capabilities */
+	};
+	__be16	target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+	u64		descriptor;
+	u8		win_type;	/* Default or QoS type */
+	u8		user_mode;	/* User mode copy/paste or COP HCALL */
+	u16		max_lpar_creds;	/* Max credits available in LPAR */
+	/* Max credits can be assigned per window */
+	u16		max_win_creds;
+	union {
+		u16	reserved;	/* Used for QoS credit type */
+		u16	def_lpar_creds; /* Used for default credit type */
+	};
+	/* Total LPAR available credits. Can be different from max LPAR */
+	/* credits due to DLPAR operation */
+	atomic_t	target_lpar_creds;
+	atomic_t	used_lpar_creds; /* Used credits so far */
+	u16		avail_lpar_creds; /* Remaining available credits */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+	struct vas_cop_feat_caps caps;
+	struct list_head list;	/* List of open windows */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+	__be16	version;
+	u8	win_type;
+	u8	status;
+	__be16	credits;	/* No of credits assigned to this window */
+	__be16	reserved;
+	__be32	pid;		/* LPAR Process ID */
+	__be32	tid;		/* LPAR Thread ID */
+	__be64	win_addr;	/* Paste address */
+	__be32	interrupt;	/* Interrupt when NX request completes */
+	__be32	fault;		/* Interrupt when NX sees fault */
+	/* Associativity Domain Identifiers as returned in */
+	/* H_HOME_NODE_ASSOCIATIVITY */
+	__be64	domain[6];
+	__be64	win_util;	/* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+	struct vas_window vas_win;
+	u64 win_addr;		/* Physical paste address */
+	u8 win_type;		/* QoS or Default window */
+	u32 complete_irq;	/* Completion interrupt */
+	u32 fault_irq;		/* Fault interrupt */
+	u64 domain[6];		/* Associativity domain Ids */
+				/* this window is allocated */
+	u64 util;
+
+	/* List of windows opened which is used for LPM */
+	struct list_head win_list;
+	u64 flags;
+	char *name;
+	int fault_virq;
+};
+#endif /* _VAS_H */