From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 7 Feb 2017 19:58:02 +0200
Subject: swiotlb-xen: implement xen_swiotlb_dma_mmap callback

This function creates userspace mapping for the DMA-coherent memory.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@globallogic.com>
Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index bd62d94f8ac5..cd1684e4e864 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.unmap_page = xen_swiotlb_unmap_page,
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
+	.mmap = xen_swiotlb_dma_mmap,
 };
 
 int __init xen_mm_init(void)
-- 
cgit v1.2.3-59-g8ed1b


From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001
From: Andrii Anisov <andrii_anisov@epam.com>
Date: Tue, 7 Feb 2017 19:58:03 +0200
Subject: swiotlb-xen: implement xen_swiotlb_get_sgtable callback

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c         |  1 +
 drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++++++
 include/xen/swiotlb-xen.h |  6 ++++++
 3 files changed, 35 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index cd1684e4e864..76ea48a614e1 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
 	.mmap = xen_swiotlb_dma_mmap,
+	.get_sgtable = xen_swiotlb_get_sgtable,
 };
 
 int __init xen_mm_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c7f61fc0572a..23e30b4e1fb6 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -699,3 +699,31 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
+
+/*
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	if (__generic_dma_ops(dev)->get_sgtable) {
+#if 0
+	/*
+	 * This check verifies that the page belongs to the current domain and
+	 * is not one mapped from another domain.
+	 * This check is for debug only, and should not go to production build
+	 */
+		unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
+		BUG_ON (!page_is_ram(bfn));
+#endif
+		return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
+							   handle, size, attrs);
+	}
+#endif
+	return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable);
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index a315c876aaa9..1f6d78f044b6 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -2,6 +2,7 @@
 #define __LINUX_SWIOTLB_XEN_H
 
 #include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
 #include <linux/swiotlb.h>
 
 extern int xen_swiotlb_init(int verbose, bool early);
@@ -60,4 +61,9 @@ extern int
 xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		     unsigned long attrs);
+
+extern int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs);
 #endif /* __LINUX_SWIOTLB_XEN_H */
-- 
cgit v1.2.3-59-g8ed1b


From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 13 Jan 2017 15:07:51 -0500
Subject: xen: do not re-use pirq number cached in pci device msi msg data

Revert the main part of commit:
af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")

That commit introduced reading the pci device's msi message data to see
if a pirq was previously configured for the device's msi/msix, and re-use
that pirq.  At the time, that was the correct behavior.  However, a
later change to Qemu caused it to call into the Xen hypervisor to unmap
all pirqs for a pci device, when the pci device disables its MSI/MSIX
vectors; specifically the Qemu commit:
c976437c7dba9c7444fb41df45468968aaa326ad
("qemu-xen: free all the pirqs for msi/msix when driver unload")

Once Qemu added this pirq unmapping, it was no longer correct for the
kernel to re-use the pirq number cached in the pci device msi message
data.  All Qemu releases since 2.1.0 contain the patch that unmaps the
pirqs when the pci device disables its MSI/MSIX vectors.

This bug is causing failures to initialize multiple NVMe controllers
under Xen, because the NVMe driver sets up a single MSIX vector for
each controller (concurrently), and then after using that to talk to
the controller for some configuration data, it disables the single MSIX
vector and re-configures all the MSIX vectors it needs.  So the MSIX
setup code tries to re-use the cached pirq from the first vector
for each controller, but the hypervisor has already given away that
pirq to another controller, and its initialization fails.

This is discussed in more detail at:
https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html

Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/pci/xen.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e1fb269c87af..292ab0364a89 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		return 1;
 
 	for_each_pci_msi_entry(msidesc, dev) {
-		__pci_read_msi_msg(msidesc, &msg);
-		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
-			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
-		if (msg.data != XEN_PIRQ_MSI_DATA ||
-		    xen_irq_from_pirq(pirq) < 0) {
-			pirq = xen_allocate_pirq_msi(dev, msidesc);
-			if (pirq < 0) {
-				irq = -ENODEV;
-				goto error;
-			}
-			xen_msi_compose_msg(dev, pirq, &msg);
-			__pci_write_msi_msg(msidesc, &msg);
-			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
-		} else {
-			dev_dbg(&dev->dev,
-				"xen: msi already bound to pirq=%d\n", pirq);
+		pirq = xen_allocate_pirq_msi(dev, msidesc);
+		if (pirq < 0) {
+			irq = -ENODEV;
+			goto error;
 		}
+		xen_msi_compose_msg(dev, pirq, &msg);
+		__pci_write_msi_msg(msidesc, &msg);
+		dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
 					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
 					       (type == PCI_CAP_ID_MSIX) ?
-- 
cgit v1.2.3-59-g8ed1b


From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 1 Mar 2017 01:23:24 +0900
Subject: kprobes/x86: Fix kernel panic when certain exception-handling
 addresses are probed

Fix to the exception table entry check by using probed address
instead of the address of copied instruction.

This bug may cause unexpected kernel panic if user probe an address
where an exception can happen which should be fixup by __ex_table
(e.g. copy_from_user.)

Unless user puts a kprobe on such address, this doesn't
cause any problem.

This bug has been introduced years ago, by commit:

  464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently").

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently")
Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/common.h | 2 +-
 arch/x86/kernel/kprobes/core.c   | 6 +++---
 arch/x86/kernel/kprobes/opt.c    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index c6ee63f927ab..d688826e5736 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 #endif
 
 /* Ensure if the instruction can be boostable */
-extern int can_boost(kprobe_opcode_t *instruction);
+extern int can_boost(kprobe_opcode_t *instruction, void *addr);
 /* Recover instruction if given address is probed */
 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 					 unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe1640..88b3c942473d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes, void *addr)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
-	if (search_exception_tables((unsigned long)opcodes))
+	if (search_exception_tables((unsigned long)addr))
 		return 0;	/* Page fault may occur on this address. */
 
 retry:
@@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p)
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	if (can_boost(p->ainsn.insn))
+	if (can_boost(p->ainsn.insn, p->addr))
 		p->ainsn.boostable = 0;
 	else
 		p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3d1bee9d6a72..3e7c6e5a08ff 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
 
 	while (len < RELATIVEJUMP_SIZE) {
 		ret = __copy_instruction(dest + len, src + len);
-		if (!ret || !can_boost(dest + len))
+		if (!ret || !can_boost(dest + len, src + len))
 			return -EINVAL;
 		len += ret;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 27 Feb 2017 23:50:58 +0100
Subject: x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/

... since this is all x86-specific data and it makes sense to have it
under x86/ logically instead in the toplevel debugfs dir.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kdebugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index bdb83e431d89..38b64587b31b 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
 	struct dentry *dbp, *version, *data;
 	int error = -ENOMEM;
 
-	dbp = debugfs_create_dir("boot_params", NULL);
+	dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
 	if (!dbp)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-59-g8ed1b


From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:05:59 +0100
Subject: s390/crypt: fix missing unlock in ctr_paes_crypt on error path

The ctr mode of protected key aes uses the ctrblk page if the
ctrblk_lock could be acquired. If the protected key has to be
reestablished and this operation fails the unlock for the
ctrblk_lock is missing. Add it.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/paes_s390.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index d69ea495c4d7..716b17238599 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			ret = blkcipher_walk_done(desc, walk, nbytes - n);
 		}
 		if (k < n) {
-			if (__ctr_paes_set_key(ctx) != 0)
+			if (__ctr_paes_set_key(ctx) != 0) {
+				if (locked)
+					spin_unlock(&ctrblk_lock);
 				return blkcipher_walk_done(desc, walk, -EIO);
+			}
 		}
 	}
 	if (locked)
-- 
cgit v1.2.3-59-g8ed1b


From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:42:01 +0100
Subject: s390: fix in-kernel program checks

A program check inside the kernel takes a slightly different path in
entry.S compare to a normal user fault. A recent change moved the store
of the breaking event address into the path taken for in-kernel program
checks as well, but %r14 has not been setup to point to the correct
location. A wild store is the consequence.

Move the store of the breaking event address to the code path for
user space faults.

Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index dff2152350a7..6a7d737d514c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -490,7 +490,7 @@ ENTRY(pgm_check_handler)
 	jnz	.Lpgm_svcper		# -> single stepped svc
 1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	3f
+	j	4f
 2:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lgr	%r14,%r12
@@ -499,8 +499,8 @@ ENTRY(pgm_check_handler)
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
 	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r10,__THREAD_last_break(%r14)
+3:	stg	%r10,__THREAD_last_break(%r14)
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -509,14 +509,14 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	4f
+	jz	5f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4:	REENABLE_IRQS
+5:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
-- 
cgit v1.2.3-59-g8ed1b


From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:16:03 +0100
Subject: s390/cputime: remove last traces of cputime_t

The cputime_t type is a thing of the past, replace the last occurences
of the type in the s390 code with a simple u64.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 14 ++------------
 arch/s390/kernel/vtime.c        |  2 +-
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d1c407ddf703..e5672d6276a6 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,32 +8,22 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
-#include <asm/div64.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
-	return n / base;
-}
-
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
 {
-	return (__force unsigned long long) cputime >> 12;
+	return cputime >> 12;
 }
 
-
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 31bd96e81167..8f5f59a151b4 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
 }
 
 static void account_system_index_scaled(struct task_struct *p,
-					cputime_t cputime, cputime_t scaled,
+					u64 cputime, u64 scaled,
 					enum cpu_usage_stat index)
 {
 	p->stimescaled += cputime_to_nsecs(scaled);
-- 
cgit v1.2.3-59-g8ed1b


From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:18:34 +0100
Subject: s390/cputime: reset all accounting fields on fork

copy_thread has to reset all cputime related field in the task struct,
not only user_timer and system_timer.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 54281660582c..249deafaa6ee 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
 	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
 	/* Initialize per thread user and system timer values */
 	p->thread.user_timer = 0;
+	p->thread.guest_timer = 0;
 	p->thread.system_timer = 0;
+	p->thread.hardirq_timer = 0;
+	p->thread.softirq_timer = 0;
 
 	frame->sf.back_chain = 0;
 	/* new return point is ret_from_fork */
-- 
cgit v1.2.3-59-g8ed1b


From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:21:10 +0100
Subject: s390/cputime: provide archicture specific cputime_to_nsecs

The generic cputime_to_nsecs function first converts the cputime
to micro-seconds and then multiplies the result with 1000. This
looses some bits of accuracy, provide our own version of
cputime_to_nsecs that does not loose precision.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index e5672d6276a6..9072bf63a846 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,6 +8,7 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
+#include <asm/timex.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
@@ -17,13 +18,18 @@
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
 /*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
  */
 static inline u64 cputime_to_usecs(const u64 cputime)
 {
 	return cputime >> 12;
 }
 
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
+
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-- 
cgit v1.2.3-59-g8ed1b


From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:47:57 +0100
Subject: s390/timex: micro optimization for tod_to_ns

The conversion of a TOD value to nano-seconds currently uses a 32/32 bit
split with the calculation for "nsecs = (TOD * 125) >> 9". Using a
55/9 bit split saves an instruction.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/timex.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 354344dcc198..118535123f34 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
  *    ns = (todval * 125) >> 9;
  *
  * In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
  * we end up with
  *
- *    ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
 static inline unsigned long long tod_to_ns(unsigned long long todval)
 {
-	unsigned long long ns;
-
-	ns = ((todval >> 32) << 23) * 125;
-	ns += ((todval & 0xffffffff) * 125) >> 9;
-	return ns;
+	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Tue, 28 Feb 2017 13:50:52 +0800
Subject: x86/apic: Fix a warning message in logical CPU IDs allocation

The current warning message in allocate_logical_cpuid() is somewhat confusing:

  Only 1 processors supported.Processor 2/0x2 and the rest are ignored.

As it might imply that there's only one CPU in the system - while what we ran
into here is a kernel limitation.

Fix the warning message to clarify all that:

  APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored.

( Also update the error return from -1 to -EINVAL, which is the more
  canonical return value. )

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4261b3282ad9..11088b86e5c7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid)
 
 	/* Allocate a new cpuid. */
 	if (nr_logical_cpuids >= nr_cpu_ids) {
-		WARN_ONCE(1, "Only %d processors supported."
+		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
 			     "Processor %d/0x%x and the rest are ignored.\n",
-			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-		return -1;
+			     nr_cpu_ids, nr_logical_cpuids, apicid);
+		return -EINVAL;
 	}
 
 	cpuid_to_apicid[nr_logical_cpuids] = apicid;
-- 
cgit v1.2.3-59-g8ed1b


From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Thu, 23 Feb 2017 17:16:41 +0800
Subject: x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR()

The following commit:

  2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled")

... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this
check is also duplicated in try_to_enable_IR() - and it will never succeed in
calling irq_remapping_enable().

Remove the whole irq_remapping_enable() complication: if the IO-APIC is
disabled we cannot enable IRQ remapping.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11088b86e5c7..aee7deddabd0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
 static inline void __x2apic_enable(void) { }
 #endif /* !CONFIG_X86_X2APIC */
 
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (!x2apic_enabled() && skip_ioapic_setup) {
-		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-		return -1;
-	}
-#endif
-	return irq_remapping_enable();
-}
-
 void __init enable_IR_x2apic(void)
 {
 	unsigned long flags;
 	int ret, ir_stat;
 
-	if (skip_ioapic_setup)
+	if (skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
 		return;
+	}
 
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
 
 	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
-		ir_stat = try_to_enable_IR();
+		ir_stat = irq_remapping_enable();
 	/* ir_stat contains the remap mode or an error code */
 	try_to_enable_x2apic(ir_stat);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 16 Feb 2017 17:00:50 +1100
Subject: perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS

We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as
well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS.

Consistently use the plurals.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: davem@davemloft.net
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/trace/kprobetrace.txt         |  2 +-
 Documentation/trace/uprobetracer.txt        |  2 +-
 arch/powerpc/configs/85xx/kmp204x_defconfig |  2 +-
 arch/s390/configs/default_defconfig         |  2 +-
 arch/s390/configs/gcov_defconfig            |  2 +-
 arch/s390/configs/performance_defconfig     |  2 +-
 arch/s390/defconfig                         |  2 +-
 kernel/trace/Kconfig                        |  6 +++---
 kernel/trace/Makefile                       |  4 ++--
 kernel/trace/trace.c                        | 10 +++++-----
 kernel/trace/trace_probe.h                  |  4 ++--
 11 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index e4991fb1eedc..41ef9d8efe95 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
 functions). Unlike the Tracepoint based event, this can be added and removed
 dynamically, on the fly.
 
-To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
 
 Similar to the events tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index fa7b680ee8a0..bf526a7c5559 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -7,7 +7,7 @@
 Overview
 --------
 Uprobe based trace events are similar to kprobe based trace events.
-To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
 
 Similar to the kprobe-event tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
index aaaaa609cd24..34a4da23f000 100644
--- a/arch/powerpc/configs/85xx/kmp204x_defconfig
+++ b/arch/powerpc/configs/85xx/kmp204x_defconfig
@@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_RCU_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=y
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 143b1e00b818..4b176fe83da4 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f05d2d6e1087..0de46cc397f6 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 2358bf33c5ef..e167557b434c 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 68bfd09f1b02..97189dbaf34b 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5d..d4a06e714645 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config KPROBE_EVENT
+config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
 	bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
 	  This option is also required by perf-probe subcommand of perf tools.
 	  If you want to use perf tools, this option is strongly recommended.
 
-config UPROBE_EVENT
+config UPROBE_EVENTS
 	bool "Enable uprobes-based dynamic events"
 	depends on ARCH_SUPPORTS_UPROBES
 	depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+	depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
 	bool
 	default y
 	help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e57980845549..90f2701d92a7 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
 obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 707445ceb7ef..f35109514a01 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
 	"\t  accepts: event-definitions (one definition per line)\n"
 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
 	"\t           -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"\t    place: <path>:<offset>\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 0c0ae54d44c6..903273c93e61 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
 {
 	return NULL;
 }
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
 
 struct probe_arg {
 	struct fetch_param	fetch;
-- 
cgit v1.2.3-59-g8ed1b


From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001
From: Andrew Banman <abanman@hpe.com>
Date: Fri, 17 Feb 2017 11:07:49 -0600
Subject: x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack
 register

Writing to the software acknowledge clear register when there are no
pending messages causes a HUB error to assert. The original intent of this
write was to clear the pending bits before start of operation, but this is
an incorrect method and has been determined to be unnecessary.

Signed-off-by: Andrew Banman <abanman@hpe.com>
Acked-by: Mike Travis <mike.travis@hpe.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: rja@hpe.com
Cc: sivanich@hpe.com
Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 766d4d3529a1..f25982cdff90 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
 
 	ops.write_payload_first(pnode, first);
 	ops.write_payload_last(pnode, last);
-	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
-- 
cgit v1.2.3-59-g8ed1b


From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 19 Feb 2017 19:32:48 +0100
Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk

Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot.

This adds the appropriate quirk, thus fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e244c19a2451..01c9cda3e1b7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
-- 
cgit v1.2.3-59-g8ed1b


From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:17:17 +0100
Subject: x86/hyperv: Hide unused label

This new 32-bit warning just showed up:

arch/x86/hyperv/hv_init.c: In function 'hyperv_init':
arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label]

The easiest solution is to move the label up into the existing #ifdef that
has the goto.

Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: devel@linuxdriverproject.org
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/hyperv/hv_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index db64baf0e500..8bef70e7f3cc 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
+register_msr_cs:
 #endif
 	/*
 	 * For 32 bit guests just use the MSR based mechanism for reading
 	 * the partition counter.
 	 */
 
-register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-- 
cgit v1.2.3-59-g8ed1b


From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 21:29:22 +0900
Subject: x86/vmware: Remove duplicate inclusion of asm/timer.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: akataria@vmware.com
Cc: virtualization@lists.linux-foundation.org
Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/vmware.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 891f4dad7b2c..22403a28caf5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
-#include <asm/timer.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt
-- 
cgit v1.2.3-59-g8ed1b


From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 22:07:03 +0900
Subject: x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: fenghua.yu@intel.com
Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb9..759577d9d166 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -27,7 +27,6 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>
-- 
cgit v1.2.3-59-g8ed1b


From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Tue, 28 Feb 2017 21:34:28 +0800
Subject: x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC

The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq()
and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The
problem is that IRQs are allocated and freed in pci_enable_device()
and pci_disable_device(). But there are some drivers which don't call
pci_disable_device(), and they have good reasons not calling it, so
if they're using IOAPIC their IRQs won't have a chance to be released
from the IOAPIC. When this happens IOAPIC hot-removal fails with a
kernel stack dump and an error message like this:

  [149335.697989] pin16 on IOAPIC2 is still in use.

It turns out that we can fix it in a different way without moving IRQ
allocation into pcibios_alloc_irq(), thus avoiding the regression of
991de2e59090. We can keep the allocation and freeing of IRQs as is
within pci_enable_device()/pci_disable_device(), without breaking any
previous assumption of the rest of the system, keeping compatibility
with both the legacy and the modern drivers. We can accomplish this by
implementing the existing __weak hook of pcibios_release_device() thus
when a pci device is about to be deleted we get notified in the hook
and take the chance to release its IRQ, if any, from the IOAPIC.

Implement pcibios_release_device() for x86 to release any IRQ not released
by the driver.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: fengguang.wu@intel.com
Cc: helgaas@kernel.org
Cc: kbuild-all@01.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/common.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 0cb52ae0a8f0..190e718694b1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+	if (atomic_dec_return(&dev->enable_cnt) >= 0)
+		pcibios_disable_device(dev);
+
+}
+#endif
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)
-- 
cgit v1.2.3-59-g8ed1b


From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 23 Feb 2017 14:26:03 -0800
Subject: x86/pkeys: Check against max pkey to avoid overflows

Kirill reported a warning from UBSAN about undefined behavior when using
protection keys.  He is running on hardware that actually has support for
it, which is not widely available.

The warning triggers because of very large shifts of integers when doing a
pkey_free() of a large, invalid value. This happens because we never check
that the pkey "fits" into the mm_pkey_allocation_map().

I do not believe there is any danger here of anything bad happening
other than some aliasing issues where somebody could do:

	pkey_free(35);

and the kernel would effectively execute:

	pkey_free(8);

While this might be confusing to an app that was doing something stupid, it
has to do something stupid and the effects are limited to the app shooting
itself in the foot.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Cc: kirill.shutemov@linux.intel.com
Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pkeys.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34684adb6899..b3b09b98896d 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline
 bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
+	/*
+	 * "Allocated" pkeys are those that have been returned
+	 * from pkey_alloc().  pkey 0 is special, and never
+	 * returned from pkey_alloc().
+	 */
+	if (pkey <= 0)
+		return false;
+	if (pkey >= arch_max_pkey())
+		return false;
 	return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
 static inline
 int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	/*
-	 * pkey 0 is special, always allocated and can never
-	 * be freed.
-	 */
-	if (!pkey)
-		return -EINVAL;
 	if (!mm_pkey_is_allocated(mm, pkey))
 		return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 18 Feb 2017 12:31:40 +0100
Subject: x86/events: Remove last remnants of old filenames

Update to the new file paths, remove them from introductory comments.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/amd/core.c                               | 2 +-
 arch/x86/events/intel/cstate.c                           | 2 +-
 arch/x86/events/intel/rapl.c                             | 2 +-
 arch/x86/events/intel/uncore.h                           | 6 +++---
 tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index afb222b63cae..c84584bb9402 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		/* moved to perf_event_amd_uncore.c */
+		/* moved to uncore.c */
 		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index aff4b5b69d40..238ae3248ba5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
  *
  * Copyright (C) 2015, Intel Corp.
  * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22054ca49026..9d05c7e67f60 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index ad986c1e29bc..df5989f27b1b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
 extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
 int snbep_uncore_pci_init(void);
 void snbep_uncore_cpu_init(void);
 int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
 int skx_uncore_pci_init(void);
 void skx_uncore_cpu_init(void);
 
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 7913363bde5c..4f3c758d875d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
 #error Instruction buffer size too small
 #endif
 
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
 static void intel_pt_insn_decoder(struct insn *insn,
 				  struct intel_pt_insn *intel_pt_insn)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:35 +1100
Subject: x86/purgatory: Make functions and variables static

Sparse emits several 'symbol not declared' warnings for various
functions and variables.

Add static keyword to functions and variables which have file scope
only.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 25e068ba3382..2a5f4373c797 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -18,11 +18,11 @@ struct sha_region {
 	unsigned long len;
 };
 
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+static unsigned long backup_dest;
+static unsigned long backup_src;
+static unsigned long backup_sz;
 
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
 
 struct sha_region sha_regions[16] = {};
 
@@ -39,7 +39,7 @@ static int copy_backup_region(void)
 	return 0;
 }
 
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
 {
 	struct sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
-- 
cgit v1.2.3-59-g8ed1b


From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:36 +1100
Subject: x86/purgatory: Fix sparse warning, symbol not declared

Sparse emits warning, 'symbol not declared' for a function that has
neither file scope nor a forward declaration. The functions only call
site is an ASM file.

Add a header file with the function declaration. Include the header file in
the C source file defining the function in order to fix the sparse
warning. Include the header file in ASM file containing the call site to
document the usage.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c    | 1 +
 arch/x86/purgatory/purgatory.h    | 8 ++++++++
 arch/x86/purgatory/setup-x86_64.S | 1 +
 3 files changed, 10 insertions(+)
 create mode 100644 arch/x86/purgatory/purgatory.h

(limited to 'arch')

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 2a5f4373c797..b6d5c8946e66 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,6 +11,7 @@
  */
 
 #include "sha256.h"
+#include "purgatory.h"
 #include "../boot/string.h"
 
 struct sha_region {
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
new file mode 100644
index 000000000000..e2e365a6c192
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.h
@@ -0,0 +1,8 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#ifndef __ASSEMBLY__
+extern void purgatory(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index fe3c91ba1bd0..f90e9dfa90bb 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
+#include "purgatory.h"
 
 	.text
 	.globl purgatory_start
-- 
cgit v1.2.3-59-g8ed1b


From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 21 Feb 2017 19:36:39 +0100
Subject: x86/boot: Correct setup_header.start_sys name

It is called start_sys_seg elsewhere so rename it to that. It is an
obsolete field so we could just as well directly call it __u16 __pad...

No functional change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/uapi/asm/bootparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 5138dacf8bb8..07244ea16765 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
 	__u32	header;
 	__u16	version;
 	__u32	realmode_swtch;
-	__u16	start_sys;
+	__u16	start_sys_seg;
 	__u16	kernel_version;
 	__u8	type_of_loader;
 	__u8	loadflags;
-- 
cgit v1.2.3-59-g8ed1b


From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 15:53:38 +0100
Subject: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock()
 interface

Wanpeng Li reported that since the following commit:

  acb04058de49 ("sched/clock: Fix hotplug crash")

... KVM always runs with unstable sched-clock even though KVM's
kvm_clock _is_ stable.

The problem is that we've tied clear_sched_clock_stable() to the TSC
state, and overlooked that sched_clock() is a paravirt function.

Solve this by doing two things:

 - tie the sched_clock() stable state more clearly to the TSC stable
   state for the normal (!paravirt) case.

 - only call clear_sched_clock_stable() when we mark TSC unstable
   when we use native_sched_clock().

The first means we can actually run with stable sched_clock in more
situations then before, which is good. And since commit:

  12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method")

... this should be reliable. Since any detection of TSC fail now results
in marking the TSC unstable.

Reported-by: Wanpeng Li <kernellwp@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: acb04058de49 ("sched/clock: Fix hotplug crash")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c       |  4 ----
 arch/x86/kernel/cpu/centaur.c   |  2 --
 arch/x86/kernel/cpu/common.c    |  3 ---
 arch/x86/kernel/cpu/cyrix.c     |  1 -
 arch/x86/kernel/cpu/intel.c     |  4 ----
 arch/x86/kernel/cpu/transmeta.c |  2 --
 arch/x86/kernel/tsc.c           | 35 +++++++++++++++++++++++------------
 7 files changed, 23 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4e95b2e0d95f..30d924ae5c34 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 2c234a6d94c4..bad8ff078a21 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 #endif
-
-	clear_sched_clock_stable();
 }
 
 static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c64ca5929cb5..9d98e2e15d54 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c)
 			strcpy(c->x86_model_id, "386");
 	}
 #endif
-	clear_sched_clock_stable();
 }
 
 static const struct cpu_dev default_cpu = {
@@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	 */
 	if (this_cpu->c_init)
 		this_cpu->c_init(c);
-	else
-		clear_sched_clock_stable();
 
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 47416f959a48..31e679238e8d 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		break;
 	}
-	clear_sched_clock_stable();
 }
 
 static void init_cyrix(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 017ecd3bb553..2388bafe5c37 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index c1ea5b999839..6a9ad73a2c54 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
 		if (xlvl >= 0x80860001)
 			c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
 	}
-
-	clear_sched_clock_stable();
 }
 
 static void init_transmeta(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2724dc82f992..911129fda2f9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -326,9 +326,16 @@ unsigned long long sched_clock(void)
 {
 	return paravirt_sched_clock();
 }
+
+static inline bool using_native_sched_clock(void)
+{
+	return pv_time_ops.sched_clock == native_sched_clock;
+}
 #else
 unsigned long long
 sched_clock(void) __attribute__((alias("native_sched_clock")));
+
+static inline bool using_native_sched_clock(void) { return true; }
 #endif
 
 int check_tsc_unstable(void)
@@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 {
 	if (tsc_unstable)
 		return;
+
 	tsc_unstable = 1;
-	clear_sched_clock_stable();
+	if (using_native_sched_clock())
+		clear_sched_clock_stable();
 	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
@@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = {
 
 void mark_tsc_unstable(char *reason)
 {
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
+	if (tsc_unstable)
+		return;
+
+	tsc_unstable = 1;
+	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-		disable_sched_clock_irqtime();
-		pr_info("Marking TSC unstable due to %s\n", reason);
-		/* Change only the rating, when not registered */
-		if (clocksource_tsc.mult)
-			clocksource_mark_unstable(&clocksource_tsc);
-		else {
-			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
-			clocksource_tsc.rating = 0;
-		}
+	disable_sched_clock_irqtime();
+	pr_info("Marking TSC unstable due to %s\n", reason);
+	/* Change only the rating, when not registered */
+	if (clocksource_tsc.mult) {
+		clocksource_mark_unstable(&clocksource_tsc);
+	} else {
+		clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
+		clocksource_tsc.rating = 0;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 1 Mar 2017 21:10:17 +0100
Subject: x86/hpet: Prevent might sleep splat on resume

Sergey reported a might sleep warning triggered from the hpet resume
path. It's caused by the call to disable_irq() from interrupt disabled
context.

The problem with the low level resume code is that it is not accounted as a
special system_state like we do during the boot process. Calling the same
code during system boot would not trigger the warning. That's inconsistent
at best.

In this particular case it's trivial to replace the disable_irq() with
disable_hardirq() because this particular code path is solely used from
system resume and the involved hpet interrupts can never be force threaded.


Reported-and-tested-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dc6ba5bda9fc..89ff7af2de50 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 
 		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-		disable_irq(hdev->irq);
+		disable_hardirq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 		enable_irq(hdev->irq);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 24 Feb 2017 12:56:12 +0100
Subject: s390/ipl: always use load normal for CCW-type re-IPL

commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
missed to convert one code path to use load normal semantics for
re-IPL. Convert the missing code path as well.

Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index b67dafb7b7cf..e545ffe5155a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
+	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
-- 
cgit v1.2.3-59-g8ed1b


From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Thu, 2 Mar 2017 15:23:42 +0100
Subject: KVM: s390: Fix guest migration for huge guests resulting in panic

While we can technically not run huge page guests right now, we can
setup a guest with huge pages. Trying to migrate it will trigger a
VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it
will happily try to work on non-existing page table entries.

With this patch, we always return "dirty" if we encounter a large page
when migrating. This at least fixes the immediate problem until we
have proper handling for both kind of pages.

Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.")
Cc: <stable@vger.kernel.org> # 3.16+

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b48dc5f1900b..463e5ef02304 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
 	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
 	pgste_t pgste;
 	pte_t *ptep;
 	pte_t pte;
 	bool dirty;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return false;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return false;
+	/* We can't run guests backed by huge pages, but userspace can
+	 * still set them up and then try to migrate them without any
+	 * migration support.
+	 */
+	if (pmd_large(*pmd))
+		return true;
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (unlikely(!ptep))
 		return false;
 
-- 
cgit v1.2.3-59-g8ed1b


From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:42 +0530
Subject: powerpc: Emulation support for load/store instructions on LE

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Cc: stable@vger.kernel.org # v3.18+
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 846dba2c6360..9c542ec70c5b 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case LARX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case STCX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case LOAD:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
 		if (!err) {
 			if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case LOAD_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
 		else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case LOAD_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case LOAD_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case STORE:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if ((op.type & UPDATE) && size == sizeof(long) &&
 		    op.reg == 1 && op.update_reg == 1 &&
 		    !(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case STORE_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
 		else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case STORE_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case STORE_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:43 +0530
Subject: powerpc: emulate_step() tests for load/store instructions

Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  emulate_step_test: ld             : PASS
  emulate_step_test: lwz            : PASS
  emulate_step_test: lwzx           : PASS
  emulate_step_test: std            : PASS
  emulate_step_test: ldarx / stdcx. : PASS
  emulate_step_test: lfsx           : PASS
  emulate_step_test: stfsx          : PASS
  emulate_step_test: lfdx           : PASS
  emulate_step_test: stfdx          : PASS
  emulate_step_test: lvx            : PASS
  emulate_step_test: stvx           : PASS
  emulate_step_test: lxvd2x         : PASS
  emulate_step_test: stxvd2x        : PASS

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
[mpe: Drop start/complete lines, make it all __init]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/Makefile             |   1 +
 arch/powerpc/lib/test_emulate_step.c  | 434 ++++++++++++++++++++++++++++++++++
 3 files changed, 442 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index d99bd442aacb..e7d6d86563ee 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND		0x40800000
 #define PPC_INST_LBZCIX			0x7c0006aa
 #define PPC_INST_STBCIX			0x7c0007aa
+#define PPC_INST_LWZX			0x7c00002e
+#define PPC_INST_LFSX			0x7c00042e
+#define PPC_INST_STFSX			0x7c00052e
+#define PPC_INST_LFDX			0x7c0004ae
+#define PPC_INST_STFDX			0x7c0005ae
+#define PPC_INST_LVX			0x7c0000ce
+#define PPC_INST_STVX			0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)	(((a) & 0x1f) << 16)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0e649d72fe8d..2b5e09020cfe 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -20,6 +20,7 @@ obj64-y	+= copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
 
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000000000000..2534c1447554
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,434 @@
+/*
+ * Simple sanity test for emulate_step load/store instructions.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+
+#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)	(PPC_INST_LD | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)	(PPC_INST_LWZ | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b)	(PPC_INST_LWZX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)	(PPC_INST_STD | ___PPC_RS(r) |		\
+					___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)	(PPC_INST_LDARX | ___PPC_RT(t) |	\
+					___PPC_RA(a) | ___PPC_RB(b) |	\
+					__PPC_EH(eh))
+#define TEST_STDCX(s, a, b)	(PPC_INST_STDCX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b)	(PPC_INST_LFSX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFSX(s, a, b)	(PPC_INST_STFSX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFDX(t, a, b)	(PPC_INST_LFDX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFDX(s, a, b)	(PPC_INST_STFDX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LVX(t, a, b)	(PPC_INST_LVX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STVX(s, a, b)	(PPC_INST_STVX | ___PPC_RS(s) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LXVD2X(s, a, b)	(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
+#define TEST_STXVD2X(s, a, b)	(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
+
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+	static unsigned long msr;
+	static bool msr_cached;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	if (likely(msr_cached)) {
+		regs->msr = msr;
+		return;
+	}
+
+	asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+	regs->msr |= MSR_FP;
+	regs->msr |= MSR_VEC;
+	regs->msr |= MSR_VSX;
+
+	msr = regs->msr;
+	msr_cached = true;
+}
+
+static void __init show_result(char *ins, char *result)
+{
+	pr_info("%-14s : %s\n", ins, result);
+}
+
+static void __init test_ld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* ld r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("ld", "PASS");
+	else
+		show_result("ld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* lwz r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("lwz", "PASS");
+	else
+		show_result("lwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+	struct pt_regs regs;
+	unsigned int a[3] = {0x0, 0x0, 0x1234};
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) a;
+	regs.gpr[4] = 8;
+	regs.gpr[5] = 0x8765;
+
+	/* lwzx r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+	if (stepped == 1 && regs.gpr[5] == a[2])
+		show_result("lwzx", "PASS");
+	else
+		show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[5] = 0x5678;
+
+	/* std r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
+	if (stepped == 1 || regs.gpr[5] == a)
+		show_result("std", "PASS");
+	else
+		show_result("std", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+	unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+	init_pt_regs(&regs);
+	asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+	/*** ldarx ***/
+
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[4] = 0;
+	regs.gpr[5] = 0x5678;
+
+	/* ldarx r5, r3, r4, 0 */
+	stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+
+	/*
+	 * Don't touch 'a' here. Touching 'a' can do Load/store
+	 * of 'a' which result in failure of subsequent stdcx.
+	 * Instead, use hardcoded value for comparison.
+	 */
+	if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+		show_result("ldarx / stdcx.", "FAIL (ldarx)");
+		return;
+	}
+
+
+	/*** stdcx. ***/
+
+	regs.gpr[5] = 0x9ABC;
+
+	/* stdcx. r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+
+	/*
+	 * Two possible scenarios that indicates successful emulation
+	 * of stdcx. :
+	 *  1. Reservation is active and store is performed. In this
+	 *     case cr0.eq bit will be set to 1.
+	 *  2. Reservation is not active and store is not performed.
+	 *     In this case cr0.eq bit will be set to 0.
+	 */
+	if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+			|| (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+		show_result("ldarx / stdcx.", "PASS");
+	else
+		show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfsx ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfsx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfsx", "PASS");
+	else
+		show_result("lfsx", "FAIL");
+
+
+	/*** stfsx ***/
+
+	c.a = 678.91;
+
+	/* stfsx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfsx", "PASS");
+	else
+		show_result("stfsx", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfdx ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfdx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfdx", "PASS");
+	else
+		show_result("lfdx", "FAIL");
+
+
+	/*** stfdx ***/
+
+	c.a = 987654.32;
+
+	/* stfdx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfdx", "PASS");
+	else
+		show_result("stfdx", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+	show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lvx ***/
+
+	cached_b[0] = c.b[0] = 923745;
+	cached_b[1] = c.b[1] = 2139478;
+	cached_b[2] = c.b[2] = 9012;
+	cached_b[3] = c.b[3] = 982134;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lvx vrt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lvx", "PASS");
+	else
+		show_result("lvx", "FAIL");
+
+
+	/*** stvx ***/
+
+	c.b[0] = 4987513;
+	c.b[1] = 84313948;
+	c.b[2] = 71;
+	c.b[3] = 498532;
+
+	/* stvx vrs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stvx", "PASS");
+	else
+		show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+	show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+	show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lxvd2x ***/
+
+	cached_b[0] = c.b[0] = 18233;
+	cached_b[1] = c.b[1] = 34863571;
+	cached_b[2] = c.b[2] = 834;
+	cached_b[3] = c.b[3] = 6138911;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+
+	if (stepped == 1)
+		show_result("lxvd2x", "PASS");
+	else
+		show_result("lxvd2x", "FAIL");
+
+
+	/*** stxvd2x ***/
+
+	c.b[0] = 21379463;
+	c.b[1] = 87;
+	c.b[2] = 374234;
+	c.b[3] = 4;
+
+	/* stxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stxvd2x", "PASS");
+	else
+		show_result("stxvd2x", "FAIL");
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static int __init test_emulate_step(void)
+{
+	test_ld();
+	test_lwz();
+	test_lwzx();
+	test_std();
+	test_ldarx_stdcx();
+	test_lfsx_stfsx();
+	test_lfdx_stfdx();
+	test_lvx_stvx();
+	test_lxvd2x_stxvd2x();
+
+	return 0;
+}
+late_initcall(test_emulate_step);
-- 
cgit v1.2.3-59-g8ed1b


From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 27 Feb 2017 14:32:41 +1100
Subject: powerpc/64: Invalidate process table caching after setting process
 table

The POWER9 MMU reads and caches entries from the process table.
When we kexec from one kernel to another, the second kernel sets
its process table pointer but doesn't currently do anything to
make the CPU invalidate any cached entries from the old process table.
This adds a tlbie (TLB invalidate entry) instruction with parameters
to invalidate caching of the process table after the new process
table is installed.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index feeda90cd06d..01c94f141164 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void)
 	 */
 	register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
 	pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+	asm volatile("ptesync" : : : "memory");
+	asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
 
 static void __init radix_init_partition_table(void)
-- 
cgit v1.2.3-59-g8ed1b


From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Mon, 27 Feb 2017 11:10:07 +0530
Subject: powerpc/powernv: Fix bug due to labeling ambiguity in
 power_enter_stop

Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to
power9_idle_stop") added additional code in power_enter_stop() to
distinguish between stop requests whose PSSCR had ESL=EC=1 from those
which did not. When ESL=EC=1, we do a forward-jump to a location
labelled by "1", which had the code to handle the ESL=EC=1 case.

Unfortunately just a couple of instructions before this label, is the
macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its
expansion.

As a result, the current code can result in directly executing stop
instruction for deep stop requests with PSSCR ESL=EC=1, without saving
the hypervisor state.

Fix this BUG by labeling the location that handles ESL=EC=1 case with
a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion
a la .Lxx from Anton Blanchard).

While at it, rename the label "2" labelling the location of the code
handling entry into deep stop states with ".Lhandle_deep_stop".

For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro
to an not-so commonly used value in order to avoid similar mishaps in
the future.

Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cpuidle.h |  4 ++--
 arch/powerpc/kernel/idle_book3s.S  | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index fd321eb423cb..155731557c9b 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
 	std	r0,0(r1);					\
 	ptesync;						\
 	ld	r0,0(r1);					\
-1:	cmpd	cr0,r0,r0;					\
-	bne	1b;						\
+236:	cmpd	cr0,r0,r0;					\
+	bne	236b;						\
 	IDLE_INST;						\
 
 #define	IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 5f61cc0349c0..995728736677 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -276,19 +276,21 @@ power_enter_stop:
  */
 	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
 	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-	bne	 1f
+	bne	 .Lhandle_esl_ec_set
 	IDLE_STATE_ENTER_SEQ(PPC_STOP)
 	li	r3,0  /* Since we didn't lose state, return 0 */
 	b 	pnv_wakeup_noloss
+
+.Lhandle_esl_ec_set:
 /*
  * Check if the requested state is a deep idle state.
  */
-1:	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 	cmpd	r3,r4
-	bge	2f
+	bge	.Lhandle_deep_stop
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-2:
+.Lhandle_deep_stop:
 /*
  * Entering deep idle state.
  * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
-- 
cgit v1.2.3-59-g8ed1b


From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2016 13:46:20 +1100
Subject: powerpc: Fix compiling a BE kernel with a powerpc64le toolchain

GCC can compile with either endian, but the default ABI version is set
based on the default endianness of the toolchain. Alan Modra says:

  you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc
  generate powerpc64 code

The opposite is true for powerpc64 when generating -mlittle it
requires -mabi=elfv2 to generate v2 ABI, which we were already doing.

This change adds ABI annotations together with endianness for all cases,
LE and BE. This fixes the case of building a BE kernel with a toolchain
that is LE by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 31286fa7873c..19b0d1a81959 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -72,8 +72,15 @@ GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
+endif
+
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
@@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
 else
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
-- 
cgit v1.2.3-59-g8ed1b


From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001
From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Date: Thu, 16 Feb 2017 09:11:29 -0600
Subject: powerpc/booke: Fix boot crash due to null hugepd

On 32-bit book-e machines, hugepd_ok() no longer takes into account null
hugepd values, causing this crash at boot:

  Unable to handle kernel paging request for data at address 0x80000000
  ...
  NIP [c0018378] follow_huge_addr+0x38/0xf0
  LR [c001836c] follow_huge_addr+0x2c/0xf0
  Call Trace:
   follow_huge_addr+0x2c/0xf0 (unreliable)
   follow_page_mask+0x40/0x3e0
   __get_user_pages+0xc8/0x450
   get_user_pages_remote+0x8c/0x250
   copy_strings+0x110/0x390
   copy_strings_kernel+0x2c/0x50
   do_execveat_common+0x478/0x630
   do_execve+0x2c/0x40
   try_to_run_init_process+0x18/0x60
   kernel_init+0xbc/0x110
   ret_from_kernel_thread+0x5c/0x64

This impacts all nxp (ex-freescale) 32-bit booke platforms.

This was caused by the change of hugepd_t.pd from signed to unsigned,
and the update to the nohash version of hugepd_ok(). Previously
hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas
now we need to explicitly check that the value is not zero and also that
PD_HUGE is *clear*.

This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte()
because on 32-bit we use pgtable-nopud.h, which causes the pgd_none()
check to be always false.

Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Madalin-Cristian Bucur <madalin.bucur@nxp.com>
Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
[mpe: Flesh out change log details.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 0cd8a3852763..e5805ad78e12 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd)
 	return ((hpd_val(hpd) & 0x4) != 0);
 #else
 	/* We clear the top bit to indicate hugepd */
-	return ((hpd_val(hpd) & PD_HUGE) ==  0);
+	return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
 #endif
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 2 Mar 2017 17:41:24 +1100
Subject: powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n

The recent commit to allow calling OPAL calls in real mode, commit
ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode
calls"), introduced a bug when CONFIG_JUMP_LABEL=n.

The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but
we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled.
This leads to us using the tracepoint refcount as the MSR value,
typically zero, and saving that into PACASAVEDMSR. When we return from
OPAL we use that value as the MSR value for rfid, meaning we switch to
32-bit BE real mode - hilarity ensues.

Fix it by using r11 in OPAL_BRANCH, which is not live at the time the
macro is used in OPAL_CALL.

Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls")
Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6693f75e93d1..da8a0f7a035c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -39,8 +39,8 @@ opal_tracepoint_refcount:
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r12,opal_tracepoint_refcount@toc(r2);		\
-	cmpdi	r12,0;						\
+	ld	r11,opal_tracepoint_refcount@toc(r2);		\
+	cmpdi	r11,0;						\
 	bne-	LABEL;						\
 1:
 
-- 
cgit v1.2.3-59-g8ed1b


From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001
From: Shile Zhang <shile.zhang@nokia.com>
Date: Sat, 4 Feb 2017 17:03:40 +0800
Subject: powerpc/64: Fix checksum folding in csum_add()

Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64:
Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold")
missed a case in csum_add(). Fix it.

Signed-off-by: Shile Zhang <shile.zhang@nokia.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 4e63787dc3be..842124b199b5 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
 
 #ifdef __powerpc64__
 	res += (__force u64)addend;
-	return (__force __wsum)((u32)res + (res >> 32));
+	return (__force __wsum) from64to32(res);
 #else
 	asm("addc %0,%0,%1;"
 	    "addze %0,%0;"
-- 
cgit v1.2.3-59-g8ed1b


From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 17 Feb 2017 14:32:18 +0000
Subject: arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs

When invalidating guest TLBs, special care must be taken to
actually shoot the guest TLBs and not the host ones if we're
running on a VHE system.  This is controlled by the HCR_EL2.TGE
bit, which we forget to clear before invalidating TLBs.

Address the issue by introducing two wrappers (__tlb_switch_to_guest
and __tlb_switch_to_host) that take care of both the VTTBR_EL2
and HCR_EL2.TGE switching.

Reported-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/tlb.c | 64 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index e8e7ba2bc11f..9e1d2b75eecd 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,14 +18,62 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+	u64 val;
+
+	/*
+	 * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+	 * most TLB operations target EL2/EL0. In order to affect the
+	 * guest TLBs (EL1/EL0), we need to change one of these two
+	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+	 * let's flip TGE before executing the TLB operation.
+	 */
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	write_sysreg(val, hcr_el2);
+	isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+			    __tlb_switch_to_guest_nvhe,
+			    __tlb_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+	/*
+	 * We're done with the TLB operation, let's restore the host's
+	 * view of HCR_EL2.
+	 */
+	write_sysreg(0, vttbr_el2);
+	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+			    __tlb_switch_to_host_nvhe,
+			    __tlb_switch_to_host_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
 
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
-- 
cgit v1.2.3-59-g8ed1b


From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Fri, 3 Mar 2017 11:58:44 +1100
Subject: powerpc/xics: Work around limitations of OPAL XICS priority handling

The CPPR (Current Processor Priority Register) of a XICS interrupt
presentation controller contains a value N, such that only interrupts
with a priority "more favoured" than N will be received by the CPU,
where "more favoured" means "less than". So if the CPPR has the value 5
then only interrupts with a priority of 0-4 inclusive will be received.

In theory the CPPR can support a value of 0 to 255 inclusive.
In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR
to 0 rejects all interrupts, setting it to 0xff allows all interrupts.
The values 4 and 5 are used to differentiate IPIs from external
interrupts. Setting the CPPR to 5 allows IPIs to be received but not
external interrupts.

The CPPR emulation in the OPAL XICS implementation only directly
supports priorities 0 and 0xff. All other priorities are considered
equivalent, and mapped to a single priority value internally. This means
when using icp-opal we can not allow IPIs but not externals.

This breaks Linux's use of priority values when a CPU is hot unplugged.
After migrating IRQs away from the CPU that is being offlined, we set
the priority to 5, meaning we still want the offline CPU to receive
IPIs. But the effect of the OPAL XICS emulation's use of a single
priority value is that all interrupts are rejected by the CPU. With the
CPU offline, and not receiving IPIs, we may not be able to wake it up to
bring it back online.

The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values
of 0 to 4 inclusive will correctly cause all interrupts to be rejected,
so we pass those CPPR values through to OPAL. However if we are called
with a CPPR of 5 or greater, the caller is expecting to be able to allow
IPIs but not external interrupts. We know this doesn't work, so instead
of rejecting all interrupts we choose the opposite which is to allow all
interrupts. This is still not correct behaviour, but we know for the
only existing caller (xics_migrate_irqs_away()), that it is the better
option.

The other part of the fix is in xics_migrate_irqs_away(). Instead of
setting priority (CPPR) to 0, and then back to 5 before migrating IRQs,
we migrate the IRQs before setting the priority back to 5. This should
have no effect on an ICP backend with a working set_priority(), and on
icp-opal it means we will keep all interrupts blocked until after we've
finished doing the IRQ migration. Additionally we wait for 5ms after
doing the migration to make sure there are no IRQs in flight.

Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend")
Cc: stable@vger.kernel.org # v4.8+
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
[mpe: Rewrote comments and change log, change delay to 5ms]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xics/icp-opal.c    | 10 ++++++++++
 arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index f9670eabfcfa..b53f80f0b4d8 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void)
 
 static void icp_opal_set_cpu_priority(unsigned char cppr)
 {
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
 	xics_set_base_cppr(cppr);
 	opal_int_set_cppr(cppr);
 	iosync();
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 69d858e51ac7..23efe4e42172 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/delay.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void)
 	/* Remove ourselves from the global interrupt queue */
 	xics_set_cpu_giq(xics_default_distrib_server, 0);
 
-	/* Allow IPIs again... */
-	icp_ops->set_priority(DEFAULT_PRIORITY);
-
 	for_each_irq_desc(virq, desc) {
 		struct irq_chip *chip;
 		long server;
@@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void)
 unlock:
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-- 
cgit v1.2.3-59-g8ed1b


From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:47 +1100
Subject: powerpc: Parse the command line before calling CAS

On POWER9 the hypervisor requires the guest to decide whether it would
like to use a hash or radix mmu model at the time it calls
ibm,client-architecture-support (CAS) based on what the hypervisor has
said it's allowed to do. It is possible to disable radix by passing
"disable_radix" on the command line. The next patch will add support for
the new CAS format, thus we need to parse the command line before calling
CAS so we can correctly select which mmu we would like to use.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a3944540fe0d..bf3966d12565 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 */
 	prom_check_initrd(r3, r4);
 
+	/*
+	 * Do early parsing of command line
+	 */
+	early_cmdline_parse();
+
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 	/*
 	 * On pSeries, inform the firmware about our capabilities
@@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	if (of_platform != PLATFORM_POWERMAC)
 		copy_and_flush(0, kbase, 0x100, 0);
 
-	/*
-	 * Do early parsing of command line
-	 */
-	early_cmdline_parse();
-
 	/*
 	 * Initialize memory management within prom_init
 	 */
-- 
cgit v1.2.3-59-g8ed1b


From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:48 +1100
Subject: powerpc: Update to new option-vector-5 format for CAS

On POWER9 the ibm,client-architecture-support (CAS) negotiation process
has been updated to change how the host to guest negotiation is done for
the new hash/radix mmu as well as the nest mmu, process tables and guest
translation shootdown (GTSE).

This is documented in the unreleased PAPR ACR "CAS option vector
additions for P9".

The host tells the guest which options it supports in
ibm,arch-vec-5-platform-support. The guest then chooses a subset of these
to request in the CAS call and these are agreed to in the
ibm,architecture-vec-5 property of the chosen node.

Thus we read ibm,arch-vec-5-platform-support and make our selection before
calling CAS. We then parse the ibm,architecture-vec-5 property of the
chosen node to check whether we should run as hash or radix.

ibm,arch-vec-5-platform-support format:

index value pairs: <index, val> ... <index, val>

index: Option vector 5 byte number
val:   Some representation of supported values

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
[mpe: Don't print about unknown options, be consistent with OV5_FEAT]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/prom.h |  18 ++++---
 arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++++++++++-
 arch/powerpc/mm/init_64.c       |  36 ++++++++++---
 3 files changed, 150 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 4a90634e8322..35c00d7a0cf8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -160,12 +160,18 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_ENCR		0x1120	/* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-Processors supported */
 #define OV5_XIVE_EXPLOIT	0x1701	/* XIVE exploitation supported */
-#define OV5_MMU_RADIX_300	0x1880	/* ISA v3.00 radix MMU supported */
-#define OV5_MMU_HASH_300	0x1840	/* ISA v3.00 hash MMU supported */
-#define OV5_MMU_SEGM_RADIX	0x1820	/* radix mode (no segmentation) */
-#define OV5_MMU_PROC_TBL	0x1810	/* hcall selects SLB or proc table */
-#define OV5_MMU_SLB		0x1800	/* always use SLB */
-#define OV5_MMU_GTSE		0x1808	/* Guest translation shootdown */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT		0x18C0	/* MMU Mode Support Mask */
+#define OV5_MMU_HASH		0x1800	/* Hash MMU Only */
+#define OV5_MMU_RADIX		0x1840	/* Radix MMU Only */
+#define OV5_MMU_EITHER		0x1880	/* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC		0x18C0	/* Hash or Radix Can Switch Later */
+#define OV5_NMMU		0x1820	/* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL	0x1980	/* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE		0x1940	/* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE		0x1A40	/* Guest Translation Shoot Down Avail */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX		0x02	/* Linux is our OS */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index bf3966d12565..1c1b44ec7642 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
+static bool __initdata prom_radix_disable;
+
+struct platform_support {
+	bool hash_mmu;
+	bool radix_mmu;
+	bool radix_gtse;
+};
+
 /* Platforms codes are now obsolete in the kernel. Now only used within this
  * file and ultimately gone too. Feel free to change them if you need, they
  * are not shared with anything outside of this file anymore
@@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void)
 		prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
 #endif
 	}
+
+	opt = strstr(prom_cmd_line, "disable_radix");
+	if (opt) {
+		prom_debug("Radix disabled from cmdline\n");
+		prom_radix_disable = true;
+	}
 }
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -695,6 +709,8 @@ struct option_vector5 {
 	u8 byte22;
 	u8 intarch;
 	u8 mmu;
+	u8 hash_ext;
+	u8 radix_ext;
 } __packed;
 
 struct option_vector6 {
@@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
 		.reserved3 = 0,
 		.subprocessors = 1,
 		.intarch = 0,
-		.mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
-			OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
+		.mmu = 0,
+		.hash_ext = 0,
+		.radix_ext = 0,
 	},
 
 	/* option vector 6: IBM PAPR hints */
@@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void)
 
 }
 
+static void __init prom_parse_mmu_model(u8 val,
+					struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_MMU_DYNAMIC):
+	case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+		prom_debug("MMU - either supported\n");
+		support->radix_mmu = !prom_radix_disable;
+		support->hash_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+		prom_debug("MMU - radix only\n");
+		if (prom_radix_disable) {
+			/*
+			 * If we __have__ to do radix, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		support->radix_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_HASH):
+		prom_debug("MMU - hash only\n");
+		support->hash_mmu = true;
+		break;
+	default:
+		prom_debug("Unknown mmu support option: 0x%x\n", val);
+		break;
+	}
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+					       struct platform_support *support)
+{
+	switch (index) {
+	case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+		break;
+	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+		if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
+			prom_debug("Radix - GTSE supported\n");
+			support->radix_gtse = true;
+		}
+		break;
+	}
+}
+
+static void __init prom_check_platform_support(void)
+{
+	struct platform_support supported = {
+		.hash_mmu = false,
+		.radix_mmu = false,
+		.radix_gtse = false
+	};
+	int prop_len = prom_getproplen(prom.chosen,
+				       "ibm,arch-vec-5-platform-support");
+	if (prop_len > 1) {
+		int i;
+		u8 vec[prop_len];
+		prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+			   prop_len);
+		prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
+			     &vec, sizeof(vec));
+		for (i = 0; i < prop_len; i += 2) {
+			prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
+								  , vec[i]
+								  , vec[i + 1]);
+			prom_parse_platform_support(vec[i], vec[i + 1],
+						    &supported);
+		}
+	}
+
+	if (supported.radix_mmu && supported.radix_gtse) {
+		/* Radix preferred - but we require GTSE for now */
+		prom_debug("Asking for radix with GTSE\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+		ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+	} else if (supported.hash_mmu) {
+		/* Default to hash mmu (if we can) */
+		prom_debug("Asking for hash\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+	} else {
+		/* We're probably on a legacy hypervisor */
+		prom_debug("Assuming legacy hash support\n");
+	}
+}
 
 static void __init prom_send_capabilities(void)
 {
@@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void)
 	prom_arg_t ret;
 	u32 cores;
 
+	/* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+	prom_check_platform_support();
+
 	root = call_prom("open", 1, 1, ADDR("/"));
 	if (root != 0) {
 		/* We need to tell the FW about the number of cores we support.
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6aa3b76aa0d6..9be992083d2a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,18 +356,42 @@ static void early_check_vec5(void)
 	unsigned long root, chosen;
 	int size;
 	const u8 *vec5;
+	u8 mmu_supported;
 
 	root = of_get_flat_dt_root();
 	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
-	if (chosen == -FDT_ERR_NOTFOUND)
+	if (chosen == -FDT_ERR_NOTFOUND) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
+	}
 	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
-	if (!vec5)
+	if (!vec5) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
-	if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
-	    !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
-		/* Hypervisor doesn't support radix */
+	}
+	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+
+	/* Check for supported configuration */
+	mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+			OV5_FEAT(OV5_MMU_SUPPORT);
+	if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+		/* Hypervisor only supports radix - check enabled && GTSE */
+		if (!early_radix_enabled()) {
+			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+						OV5_FEAT(OV5_RADIX_GTSE))) {
+			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
+		}
+		/* Do radix anyway - the hypervisor said we had to */
+		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+		/* Hypervisor only supports hash - disable radix */
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+	}
 }
 
 void __init mmu_early_init_devtree(void)
@@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void)
 	 * even though the ibm,architecture-vec-5 property created by
 	 * skiboot doesn't have the necessary bits set.
 	 */
-	if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+	if (!(mfmsr() & MSR_HV))
 		early_check_vec5();
 
 	if (early_radix_enabled())
-- 
cgit v1.2.3-59-g8ed1b


From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 5 Mar 2017 10:54:34 +1100
Subject: powerpc/64: Avoid panic during boot due to divide by zero in
 init_cache_info()

I see a panic in early boot when building with a recent gcc toolchain.
The issue is a divide by zero, which is undefined. Older toolchains
let us get away with it:

int foo(int a) { return a / 0; }

foo:
	li 9,0
	divw 3,3,9
	extsw 3,3
	blr

But newer ones catch it:

foo:
	trap

Add a check to avoid the divide by zero.

Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache")
Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index adf2084f214b..9cfaa8b69b5f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
 	info->line_size = lsize;
 	info->block_size = bsize;
 	info->log_block_size = __ilog2(bsize);
-	info->blocks_per_page = PAGE_SIZE / bsize;
+	if (bsize)
+		info->blocks_per_page = PAGE_SIZE / bsize;
+	else
+		info->blocks_per_page = 0;
 
 	if (sets == 0)
 		info->assoc = 0xffff;
-- 
cgit v1.2.3-59-g8ed1b


From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 5 Mar 2017 19:16:44 +0100
Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk

Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang
when it should reboot. This adds the appropriate quirk, thus
fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 01c9cda3e1b7..4194d6f9bb29 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TAW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */
-- 
cgit v1.2.3-59-g8ed1b


From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2017 10:57:48 +0200
Subject: x86/build/x86_64_defconfig: Enable CONFIG_R8169

Very common PCIe ethernet card. Already enabled in i386_defconfig.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ef4a099defc..6205d3b81e6d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
+CONFIG_R8169=y
 CONFIG_FDDI=y
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-- 
cgit v1.2.3-59-g8ed1b


From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 21:51:32 +1100
Subject: powerpc/64: Fix L1D cache shape vector reporting L1I values

It seems we didn't pay quite enough attention when testing the new cache
shape vectors, which means we didn't notice the bug where the vector for
the L1D was using the L1I values. Fix it, resulting in eg:

  L1I  cache size:     0x8000      32768B         32K
  L1I  line size:        0x80       8-way associative
  L1D  cache size:    0x10000      65536B         64K
  L1D  line size:        0x80       8-way associative

Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors")
Cut-and-paste-bug-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Badly-reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/elf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 93b9b84568e8..09bde6e34f5d 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 #define ARCH_DLINFO_CACHE_GEOMETRY					\
 	NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size);		\
 	NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i));	\
-	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size);		\
-	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i));	\
+	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size);		\
+	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d));	\
 	NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size);		\
 	NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2));	\
 	NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size);		\
-- 
cgit v1.2.3-59-g8ed1b


From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 22:53:59 +1100
Subject: powerpc: Sort the selects under CONFIG_PPC

We have a big list of selects under CONFIG_PPC, and currently they're
completely unsorted. This means people tend to add new selects at the
bottom of the list, and so two commits which both add a new select will
often conflict.

Instead sort it alphabetically. This is nicer in and of itself, but also
means two commits that add a new select will have a greater chance of
not conflicting.

Add a note at the top and bottom asking people to keep it sorted.

And while we're here pad out the 'if' expressions to make them stand
out.

Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 138 +++++++++++++++++++++++++++------------------------
 1 file changed, 72 insertions(+), 66 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 494091762bd7..97a8bc8a095c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK
 config PPC
 	bool
 	default y
-	select BUILDTIME_EXTABLE_SORT
+	#
+	# Please keep this list sorted alphabetically.
+	#
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
+	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+	select ARCH_WANT_IPC_PARSE_VERSION
 	select BINFMT_ELF
-	select ARCH_HAS_ELF_RANDOMIZE
-	select OF
-	select OF_EARLY_FLATTREE
-	select OF_RESERVED_MEM
-	select HAVE_FTRACE_MCOUNT_RECORD
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS
+	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
+	select EDAC_ATOMIC_SCRUB
+	select EDAC_SUPPORT
+	select GENERIC_ATOMIC64			if PPC32
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
+	select GENERIC_CMOS_UPDATE
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select GENERIC_TIME_VSYSCALL_OLD
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HARDENED_USERCOPY
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_CBPF_JIT			if !PPC64
+	select HAVE_CONTEXT_TRACKING		if PPC64
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
-	select HAVE_FUNCTION_TRACER
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL
+	select HAVE_EBPF_JIT			if PPC64
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS	if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
-	select SYSCTL_EXCEPTION_TRACE
-	select VIRT_TO_BUS if !PPC64
+	select HAVE_GENERIC_RCU_GUP
+	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
-	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
+	select HAVE_KERNEL_GZIP
 	select HAVE_KPROBES
-	select HAVE_OPTPROBES if PPC64
-	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
-	select HAVE_ARCH_TRACEHOOK
+	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select HAVE_DMA_API_DEBUG
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI				if PERF_EVENTS
 	select HAVE_OPROFILE
-	select HAVE_DEBUG_KMEMLEAK
-	select ARCH_HAS_SG_CHAIN
-	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_RCU_TABLE_FREE		if SMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-	select ARCH_WANT_IPC_PARSE_VERSION
-	select SPARSE_IRQ
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
 	select IRQ_DOMAIN
-	select GENERIC_IRQ_SHOW
-	select GENERIC_IRQ_SHOW_LEVEL
 	select IRQ_FORCED_THREADING
-	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT if !PPC64
-	select HAVE_EBPF_JIT if PPC64
-	select HAVE_ARCH_JUMP_LABEL
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-	select ARCH_HAS_GCOV_PROFILE_ALL
-	select GENERIC_SMP_IDLE_THREAD
-	select GENERIC_CMOS_UPDATE
-	select GENERIC_TIME_VSYSCALL_OLD
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-	select GENERIC_STRNCPY_FROM_USER
-	select GENERIC_STRNLEN_USER
-	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select CLONE_BACKWARDS
-	select ARCH_USE_BUILTIN_BSWAP
-	select OLD_SIGSUSPEND
-	select OLD_SIGACTION if PPC32
-	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_IRQ_EXIT_ON_IRQ_STACK
-	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
-	select HAVE_ARCH_AUDITSYSCALL
-	select ARCH_SUPPORTS_ATOMIC_RMW
-	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
-	select HAVE_GENERIC_RCU_GUP
-	select HAVE_PERF_EVENTS_NMI if PPC64
-	select HAVE_NMI if PERF_EVENTS
-	select EDAC_SUPPORT
-	select EDAC_ATOMIC_SCRUB
-	select ARCH_HAS_DMA_SET_COHERENT_MASK
-	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select HAVE_ARCH_SECCOMP_FILTER
-	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
-	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select GENERIC_CPU_AUTOPROBE
-	select HAVE_VIRT_CPU_ACCOUNTING
-	select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
-	select HAVE_ARCH_HARDENED_USERCOPY
-	select HAVE_KERNEL_GZIP
-	select HAVE_CONTEXT_TRACKING if PPC64
+	select OF
+	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
+	select OLD_SIGACTION			if PPC32
+	select OLD_SIGSUSPEND
+	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
+	select VIRT_TO_BUS			if !PPC64
+	#
+	# Please keep this list sorted alphabetically.
+	#
 
 config GENERIC_CSUM
 	def_bool n
-- 
cgit v1.2.3-59-g8ed1b


From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 2 Mar 2017 12:41:48 -0800
Subject: kvm: nVMX: VMCLEAR should not cause the vCPU to shut down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VMCLEAR should silently ignore a failure to clear the launch state of
the VMCS referenced by the operand.

Signed-off-by: Jim Mattson <jmattson@google.com>
[Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa8601833..3b626d6dc3ac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 zero = 0;
 	gpa_t vmptr;
-	struct vmcs12 *vmcs12;
-	struct page *page;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
-	page = nested_get_page(vcpu, vmptr);
-	if (page == NULL) {
-		/*
-		 * For accurate processor emulation, VMCLEAR beyond available
-		 * physical memory should do nothing at all. However, it is
-		 * possible that a nested vmx bug, not a guest hypervisor bug,
-		 * resulted in this case, so let's shut down before doing any
-		 * more damage:
-		 */
-		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		return 1;
-	}
-	vmcs12 = kmap(page);
-	vmcs12->launch_state = 0;
-	kunmap(page);
-	nested_release_page(page);
+	kvm_vcpu_write_guest(vcpu,
+			vmptr + offsetof(struct vmcs12, launch_state),
+			&zero, sizeof(zero));
 
 	nested_free_vmcs02(vmx, vmptr);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 17:05:57 -0800
Subject: avr32: Fix build error caused by include file reshuffling

Various avr32 builds fail:

  arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type
  arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode'

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: oprofile-list@lists.sf.net
Fixes: f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from ...")
Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/avr32/oprofile/backtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c
index 75d9ad6f99cf..29cf2f191bfd 100644
--- a/arch/avr32/oprofile/backtrace.c
+++ b/arch/avr32/oprofile/backtrace.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <linux/uaccess.h>
 
 /* The first two words of each frame on the stack look like this if we have
-- 
cgit v1.2.3-59-g8ed1b


From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 10:27:14 -0800
Subject: h8300: Fix build breakage caused by header file changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following h8300 build failures:

  arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’:
  arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: uclinux-h8-devel@lists.sourceforge.jp
Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...")
Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/h8300/kernel/ptrace_h.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c
index fe3b5673baba..f5ff3b794c85 100644
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/ptrace.h>
 
 #define BREAKINST 0x5730 /* trapa #3 */
-- 
cgit v1.2.3-59-g8ed1b


From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Tue, 7 Mar 2017 08:20:38 -0600
Subject: irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065

On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware
implementation uses 16Bytes for Interrupt Translation Entry (ITE),
but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size.

It might cause kernel memory corruption depending on the number
of MSI(x) that are configured and the amount of memory that has
been allocated for ITEs in its_create_device().

This patch fixes the potential memory corruption by setting the
correct ITE size to 16Bytes.

Cc: stable@vger.kernel.org
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 10 ++++++++++
 drivers/irqchip/irq-gic-v3-its.c       | 16 ++++++++++++++++
 3 files changed, 27 insertions(+)

(limited to 'arch')

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index a71b8095dbd8..2f66683500b8 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..8c7c244247b6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
 
 	  If unsure, say Y.
 
+config QCOM_QDF2400_ERRATUM_0065
+	bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+	default y
+	help
+	  On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+	  ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+	  been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 23201004fd7a..f77f840d2b5f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 	its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }
 
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+	struct its_node *its = data;
+
+	/* On QDF2400, the size of the ITE is 16Bytes */
+	its->ite_size = 16;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
 	{
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
 		.mask	= 0xffff0fff,
 		.init	= its_enable_quirk_cavium_23144,
 	},
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+	{
+		.desc	= "ITS: QDF2400 erratum 0065",
+		.iidr	= 0x00001070, /* QDF2400 ITS rev 1.x */
+		.mask	= 0xffffffff,
+		.init	= its_enable_quirk_qdf2400_e0065,
+	},
 #endif
 	{
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 6 Mar 2017 04:03:28 -0800
Subject: KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

    WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029
    nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
    CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:15 [inline]
     dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
     panic+0x1fb/0x412 kernel/panic.c:179
     __warn+0x1c4/0x1e0 kernel/panic.c:540
     warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
     nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
     vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline]
     vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324
     kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099
     do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128
     __msr_io arch/x86/kvm/x86.c:2577 [inline]
     msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614
     kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497
     kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721
     vfs_ioctl fs/ioctl.c:43 [inline]
     do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683
     SYSC_ioctl fs/ioctl.c:698 [inline]
     SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689
     entry_SYSCALL_64_fastpath+0x1f/0xc2

The syzkaller folks reported a nested_run_pending warning during userspace
clear VMX capability which is exposed to L1 before.

The warning gets thrown while doing

(*(uint32_t*)0x20aecfe8 = (uint32_t)0x1);
(*(uint32_t*)0x20aecfec = (uint32_t)0x0);
(*(uint32_t*)0x20aecff0 = (uint32_t)0x3a);
(*(uint32_t*)0x20aecff4 = (uint32_t)0x0);
(*(uint64_t*)0x20aecff8 = (uint64_t)0x0);
r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul,
		0x20aecfe8ul, 0, 0, 0, 0, 0, 0);

i.e. KVM_SET_MSR ioctl with

struct kvm_msrs {
	.nmsrs = 1,
		.pad = 0,
		.entries = {
			{.index = MSR_IA32_FEATURE_CONTROL,
			 .reserved = 0,
			 .data = 0}
		}
}

The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to
reset here. This patch resets the nested_run_pending since the CPU is going
to be reset hence there should be nothing pending.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b626d6dc3ac..ab338581b3ec 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
  */
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu))
+	if (is_guest_mode(vcpu)) {
+		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
+	}
 	free_nested(to_vmx(vcpu));
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f050fe7a9164945dd1c28be05bf00e8cfb082ccf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:11 +0000
Subject: arm: KVM: Survive unknown traps from guests

Currently we BUG() if we see a HSR.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently
unallocated HSR EC encodings are reserved, and per ARM DDI
0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c
are reserved for future use with synchronous exceptions, and EC values
within the range 0x2d - 0x3f may be used for either synchronous or
asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_arm.h |  1 +
 arch/arm/kvm/handle_exit.c     | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089fb44dc..a3f0b3d50089 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -209,6 +209,7 @@
 #define HSR_EC_IABT_HYP	(0x21)
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
+#define HSR_EC_MAX	(0x3f)
 
 #define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 4e40d1955e35..96af65a30d78 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+		      hsr);
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... HSR_EC_MAX]	= kvm_handle_unknown_ec,
 	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
 	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x\n",
-			(unsigned int)kvm_vcpu_get_hsr(vcpu));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ba4dd156eabdca93501d92a980ba27fa5f4bbd27 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:12 +0000
Subject: arm64: KVM: Survive unknown traps from guests

Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page
D7-1937, EC values within the range 0x00 - 0x2c are reserved for future
use with synchronous exceptions, and EC values within the range 0x2d -
0x3f may be used for either synchronous or asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/handle_exit.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 1bfe30dfbfe7..fa1b18e364fc 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return ret;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+		      hsr, esr_get_class_string(hsr));
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,
 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
 	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 	u8 hsr_ec = ESR_ELx_EC(hsr);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
-			hsr, esr_get_class_string(hsr));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 97ee351b50a49717543533cfb85b4bf9d88c9680 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 7 Mar 2017 16:14:49 +1100
Subject: powerpc/boot: Fix zImage TOC alignment

Recent toolchains force the TOC to be 256 byte aligned. We need to
enforce this alignment in the zImage linker script, otherwise pointers
to our TOC variables (__toc_start) could be incorrect. If the actual
start of the TOC and __toc_start don't have the same value we crash
early in the zImage wrapper.

Cc: stable@vger.kernel.org
Suggested-by: Alan Modra <amodra@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/zImage.lds.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 861e72109df2..f080abfc2f83 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,7 @@ SECTIONS
   }
 
 #ifdef CONFIG_PPC64_BOOT_WRAPPER
+  . = ALIGN(256);
   .got :
   {
     __toc_start = .;
-- 
cgit v1.2.3-59-g8ed1b


From aa2be9b3d6d2d699e9ca7cbfc00867c80e5da213 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Fri, 3 Mar 2017 17:56:55 +1100
Subject: crypto: powerpc - Fix initialisation of crc32c context

Turning on crypto self-tests on a POWER8 shows:

    alg: hash: Test 1 failed for crc32c-vpmsum
    00000000: ff ff ff ff

Comparing the code with the Intel CRC32c implementation on which
ours is based shows that we are doing an init with 0, not ~0
as CRC32c requires.

This probably wasn't caught because btrfs does its own weird
open-coded initialisation.

Initialise our internal context to ~0 on init.

This makes the self-tests pass, and btrfs continues to work.

Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c")
Cc: Anton Blanchard <anton@samba.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Axtens <dja@axtens.net>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/crc32c-vpmsum_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 9fa046d56eba..411994551afc 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
 {
 	u32 *key = crypto_tfm_ctx(tfm);
 
-	*key = 0;
+	*key = ~0;
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From fc69910f329d61821897871e0e957eda39beb3d8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 Mar 2017 08:29:31 +0100
Subject: MIPS: Add missing include files

After the split of linux/sched.h, several platforms in arch/mips stopped building.

Add the respective additional #include statements to fix the problem I first
tried adding these into asm/processor.h, but ran into circular header
dependencies with that which I could not figure out.

The commit I listed as causing the problem is the branch merge, as there is
likely a combination of multiple patches in that branch.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Cc: ralf@linux-mips.org
Fixes: 1827adb11ad2 ("Merge branch 'WIP.sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
Link: http://lkml.kernel.org/r/20170308072931.3836696-1-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/cavium-octeon/cpu.c                  | 2 ++
 arch/mips/cavium-octeon/crypto/octeon-crypto.c | 1 +
 arch/mips/cavium-octeon/smp.c                  | 1 +
 arch/mips/include/asm/fpu.h                    | 1 +
 arch/mips/kernel/smp-bmips.c                   | 1 +
 arch/mips/kernel/smp-mt.c                      | 1 +
 arch/mips/loongson64/loongson-3/cop2-ex.c      | 1 +
 arch/mips/netlogic/common/smp.c                | 1 +
 arch/mips/netlogic/xlp/cop2-ex.c               | 3 +++
 arch/mips/sgi-ip22/ip28-berr.c                 | 1 +
 arch/mips/sgi-ip27/ip27-berr.c                 | 2 ++
 arch/mips/sgi-ip27/ip27-smp.c                  | 3 +++
 arch/mips/sgi-ip32/ip32-berr.c                 | 1 +
 arch/mips/sgi-ip32/ip32-reset.c                | 1 +
 14 files changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c
index a5b427909b5c..036d56cc4591 100644
--- a/arch/mips/cavium-octeon/cpu.c
+++ b/arch/mips/cavium-octeon/cpu.c
@@ -10,7 +10,9 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 4d22365844af..cfb4a146cf17 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -9,6 +9,7 @@
 #include <asm/cop2.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
+#include <linux/sched/task_stack.h>
 
 #include "octeon-crypto.h"
 
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4b94b7fbafa3..3de786545ded 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -12,6 +12,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/export.h>
 
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 321752bcbab6..f94455f964ec 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/ptrace.h>
 #include <linux/thread_info.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 3daa2cae50b0..1b070a76fcdd 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index e077ea3e11fb..e398cbc3d776 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip/mips-gic.h>
 #include <linux/compiler.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 
 #include <linux/atomic.h>
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c
index ea13764d0a03..621d6af5f6eb 100644
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/loongson-3/cop2-ex.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
+#include <linux/ptrace.h>
 
 #include <asm/fpu.h>
 #include <asm/cop2.h>
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 10d86d54880a..bddf1ef553a4 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
 
diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c
index 52bc5de42005..21e439b3db70 100644
--- a/arch/mips/netlogic/xlp/cop2-ex.c
+++ b/arch/mips/netlogic/xlp/cop2-ex.c
@@ -9,11 +9,14 @@
  * Copyright (C) 2009 Wind River Systems,
  *   written by Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 1f2a5bc4779e..75460e1e106b 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index d12879eb2b1f..83efe03d5c60 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -12,7 +12,9 @@
 #include <linux/signal.h>	/* for SIGBUS */
 #include <linux/sched.h>	/* schow_regs(), force_sig() */
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 
+#include <asm/ptrace.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/sn0/hub.h>
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f5ed45e8f442..4cd47d23d81a 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,10 +8,13 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/topology.h>
 #include <linux/nodemask.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/gda.h>
 #include <asm/sn/intr.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index 57d8c7486fe6..c1f12a9cf305 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 8bd415c8729f..b3b442def423 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/notifier.h>
 #include <linux/delay.h>
 #include <linux/rtc/ds1685.h>
-- 
cgit v1.2.3-59-g8ed1b


From 672a2c87c83649fb0167202342ce85af9a3b4f1c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 14:56:05 +0100
Subject: axonram: Fix gendisk handling

It is invalid to call del_gendisk() when disk->queue is NULL. Fix error
handling in axon_ram_probe() to avoid doing that.

Also del_gendisk() does not drop a reference to gendisk allocated by
alloc_disk(). That has to be done by put_disk(). Add that call where
needed.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 arch/powerpc/sysdev/axonram.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ada29eaed6e2..f523ac883150 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -274,7 +274,9 @@ failed:
 			if (bank->disk->major > 0)
 				unregister_blkdev(bank->disk->major,
 						bank->disk->disk_name);
-			del_gendisk(bank->disk);
+			if (bank->disk->flags & GENHD_FL_UP)
+				del_gendisk(bank->disk);
+			put_disk(bank->disk);
 		}
 		device->dev.platform_data = NULL;
 		if (bank->io_addr != 0)
@@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device)
 	device_remove_file(&device->dev, &dev_attr_ecc);
 	free_irq(bank->irq_id, device);
 	del_gendisk(bank->disk);
+	put_disk(bank->disk);
 	iounmap((void __iomem *) bank->io_addr);
 	kfree(bank);
 
-- 
cgit v1.2.3-59-g8ed1b


From f04d108029063a8a67528a88449c412aecf4d3d8 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:26:30 +0530
Subject: powerpc/perf: Fix perf_get_data_addr() for power9 DD1

Power9 DD1 do not support PMU_HAS_SIER flag and sdsync in
perf_get_data_addr() defaults to MMCRA_SDSYNC which is wrong. Since
power9 MMCRA does not support SDSYNC bit, patch includes PPMU_NO_SIAR
flag to the check and set the sdsync with MMCRA_SAMPLE_ENABLE;

Fixes: 27593d72c4ad ("powerpc/perf: Use MSR to report privilege level on P9 DD1")
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 595dd718ea87..2ff13249f87a 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 			sdsync = POWER7P_MMCRA_SDAR_VALID;
 		else if (ppmu->flags & PPMU_ALT_SIPR)
 			sdsync = POWER6_MMCRA_SDSYNC;
+		else if (ppmu->flags & PPMU_NO_SIAR)
+			sdsync = MMCRA_SAMPLE_ENABLE;
 		else
 			sdsync = MMCRA_SDSYNC;
 
-- 
cgit v1.2.3-59-g8ed1b


From 78b4416aa249365dd3c1b64da4d3a232014320b0 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:29:03 +0530
Subject: powerpc/perf: Handle sdar_mode for marked event in power9

MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
continous sampling mode. On P9 it must be set to 0b00 when
MMCRA[63] is set.

Fixes: c7c3f568beff2 ('powerpc/perf: macros for power9 format encoding')
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 43 ++++++++++++++++++++++++++++++++-------
 arch/powerpc/perf/isa207-common.h |  1 +
 2 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index e79fb5fb817d..cd951fd231c4 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
 	return !(event & ~valid_mask);
 }
 
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+	if (event & EVENT_IS_MARKED)
+		return true;
+
+	return false;
+}
 
-	return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+	/*
+	 * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+	 * continous sampling mode.
+	 *
+	 * Incase of Power8:
+	 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+	 * mode and will be un-changed when setting MMCRA[63] (Marked events).
+	 *
+	 * Incase of Power9:
+	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+	 *               or if group already have any marked events.
+	 * Non-Marked events (for DD1):
+	 *	MMCRA[SDAR_MODE] will be set to 0b01
+	 * For rest
+	 *	MMCRA[SDAR_MODE] will be set from event code.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+		else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |= MMCRA_SDAR_MODE_TLB;
+	} else
+		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
 static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 		value |= CNST_L1_QUAL_VAL(cache);
 	}
 
-	if (event & EVENT_IS_MARKED) {
+	if (is_event_marked(event)) {
 		mask  |= CNST_SAMPLE_MASK;
 		value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
 	}
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 		}
 
 		/* In continuous sampling mode, update SDAR on TLB miss */
-		mmcra |= mmcra_sdar_mode(event[i]);
+		mmcra_sdar_mode(event[i], &mmcra);
 
 		if (event[i] & EVENT_IS_L1) {
 			cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 			mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
 		}
 
-		if (event[i] & EVENT_IS_MARKED) {
+		if (is_event_marked(event[i])) {
 			mmcra |= MMCRA_SAMPLE_ENABLE;
 
 			val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index cf9bd8990159..899210f14ee4 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -246,6 +246,7 @@
 #define MMCRA_THR_CMP_SHIFT		32
 #define MMCRA_SDAR_MODE_SHIFT		42
 #define MMCRA_SDAR_MODE_TLB		(1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES	~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_IFM_SHIFT			30
 
 /* MMCR1 Threshold Compare bit constant for power9 */
-- 
cgit v1.2.3-59-g8ed1b


From 7aafac11e308d37ed3c509829bb43d80c1811ac3 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 22 Feb 2017 15:43:59 +1100
Subject: powerpc/powernv/ioda2: Gracefully fail if too many TCE levels
 requested

The IODA2 specification says that a 64 DMA address cannot use top 4 bits
(3 are reserved and one is a "TVE select"); bottom page_shift bits
cannot be used for multilevel table addressing either.

The existing IODA2 table allocation code aligns the minimum TCE table
size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages,
we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs
13 bits, the maximum number of levels is 48/13 = 3 so we physically
cannot address more and EEH happens on DMA accesses.

This adds a check that too many levels were requested.

It is still possible to have 5 levels in the case of 4K system page size.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6901a06da2f9..957a57a6c812 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2624,6 +2624,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 	level_shift = entries_shift + 3;
 	level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
 
+	if ((level_shift - 3) * levels + page_shift >= 60)
+		return -EINVAL;
+
 	/* Allocate TCE table */
 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
 			levels, tce_table_size, &offset, &total_allocated);
-- 
cgit v1.2.3-59-g8ed1b


From 7af4df85796589e60a2dfc0f821eca0c4bbce4d2 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:33 +0530
Subject: KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on arm/arm64

Return KVM_USER_MEM_SLOTS for userspace capability query on
NR_MEMSLOTS.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c9a2103faeb9..96dba7cd8be7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
 	case KVM_CAP_MSI_DEVID:
 		if (!kvm)
 			r = -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 3e92f94a3b8e925a6dd7ec88a5794b2084b5fb65 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:34 +0530
Subject: KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS definition that are
 unused

arm/arm64 architecture doesnt use private memslots, hence removing
KVM_PRIVATE_MEM_SLOTS macro definition.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   | 1 -
 arch/arm64/include/asm/kvm_host.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index cc495d799c67..31ee468ce667 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 #define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f21fd3894370..6ac17ee887c9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -31,7 +31,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 
-- 
cgit v1.2.3-59-g8ed1b


From 955a3fc6d2a1c11d6d00bce4f3816100ce0530cf Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:35 +0530
Subject: KVM: arm64: Increase number of user memslots to 512

Having only 32 memslots is a real constraint for the maximum
number of PCI devices that can be assigned to a single guest.
Assuming each PCI device/virtual function having two memory BAR
regions, we could assign only 15 devices/virtual functions to a
guest.

Hence increase KVM_USER_MEM_SLOTS to 512 as done in other archs like
powerpc.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6ac17ee887c9..e7705e7bb07b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,7 +30,7 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#define KVM_USER_MEM_SLOTS 32
+#define KVM_USER_MEM_SLOTS 512
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 
-- 
cgit v1.2.3-59-g8ed1b


From db08e1d53034a54fe177ced70476fda73954b9e9 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 21 Feb 2017 13:41:31 +1100
Subject: powerpc/powernv/ioda2: Update iommu table base on ownership change

On POWERNV platform, in order to do DMA via IOMMU (i.e. 32bit DMA in
our case), a device needs an iommu_table pointer set via
set_iommu_table_base().

The codeflow is:
- pnv_pci_ioda2_setup_dma_pe()
	- pnv_pci_ioda2_setup_default_config()
	- pnv_ioda_setup_bus_dma() [1]

pnv_pci_ioda2_setup_dma_pe() creates IOMMU groups,
pnv_pci_ioda2_setup_default_config() does default DMA setup,
pnv_ioda_setup_bus_dma() takes a bus PE (on IODA2, all physical function
PEs as bus PEs except NPU), walks through all underlying buses and
devices, adds all devices to an IOMMU group and sets iommu_table.

On IODA2, when VFIO is used, it takes ownership over a PE which means it
removes all tables and creates new ones (with a possibility of sharing
them among PEs). So when the ownership is returned from VFIO to
the kernel, the iommu_table pointer written to a device at [1] is
stale and needs an update.

This adds an "add_to_group" parameter to pnv_ioda_setup_bus_dma()
(in fact re-adds as it used to be there a while ago for different
reasons) to tell the helper if a device needs to be added to
an IOMMU group with an iommu_table update or just the latter.

This calls pnv_ioda_setup_bus_dma(..., false) from
pnv_ioda2_release_ownership() so when the ownership is restored,
32bit DMA can work again for a device. This does the same thing
on obtaining ownership as the iommu_table point is stale at this point
anyway and it is safer to have NULL there.

We did not hit this earlier as all tested devices in recent years were
only using 64bit DMA; the rare exception for this is MPT3 SAS adapter
which uses both 32bit and 64bit DMA access and it has not been tested
with VFIO much.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 957a57a6c812..e36738291c32 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
 }
 
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-				   struct pci_bus *bus)
+				   struct pci_bus *bus,
+				   bool add_to_group)
 {
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
 		set_dma_offset(&dev->dev, pe->tce_bypass_base);
-		iommu_add_device(&dev->dev);
+		if (add_to_group)
+			iommu_add_device(&dev->dev);
 
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+					add_to_group);
 	}
 }
 
@@ -2191,7 +2194,7 @@ found:
 		set_iommu_table_base(&pe->pdev->dev, tbl);
 		iommu_add_device(&pe->pdev->dev);
 	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
 	return;
  fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 
 	pnv_pci_ioda2_set_bypass(pe, false);
 	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 	pnv_ioda2_table_free(tbl);
 }
 
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
 						table_group);
 
 	pnv_pci_ioda2_setup_default_config(pe);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 }
 
 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2731,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	if (pe->flags & PNV_IODA_PE_DEV)
 		iommu_add_device(&pe->pdev->dev);
 	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 }
 
 #ifdef CONFIG_PCI_MSI
-- 
cgit v1.2.3-59-g8ed1b


From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Tue, 7 Mar 2017 17:51:49 +0100
Subject: KVM: nVMX: do not warn when MSR bitmap address is not backed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(),
we have already checked that the MSR bitmap address is valid (4k aligned
and within physical limits).  SDM doesn't specify what happens if the
there is no memory mapped at the valid address, but Intel CPUs treat the
situation as if the bitmap was configured to trap all MSRs.

KVM already does that by returning false and a correct handling doesn't
need the guest-trigerrable warning that was reported by syzkaller:
(The warning was originally there to catch some possible bugs in nVMX.)

  ------------[ cut here ]------------
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:15 [inline]
   dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
   panic+0x1fb/0x412 kernel/panic.c:179
   __warn+0x1c4/0x1e0 kernel/panic.c:540
   warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
   nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
   nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
   enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline]
   nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561
   handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312
   vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526
   vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline]
   vcpu_run arch/x86/kvm/x86.c:7044 [inline]
   kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205
   kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
[Jim Mattson explained the bare metal behavior: "I believe this behavior
 would be documented in the chipset data sheet rather than the SDM,
 since the chipset returns all 1s for an unclaimed read."]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ab338581b3ec..98e82ee1e699 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 		return false;
 
 	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-	if (!page) {
-		WARN_ON(1);
+	if (!page)
 		return false;
-	}
 	msr_bitmap_l1 = (unsigned long *)kmap(page);
 
 	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
-- 
cgit v1.2.3-59-g8ed1b


From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:02 +0300
Subject: x86/cpufeature: Add 5-level paging detection

Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level
paging.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4e7772387c6e..b04bb6dfed7f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -289,7 +289,8 @@
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
+#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
-- 
cgit v1.2.3-59-g8ed1b


From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:05 +0300
Subject: arch, mm: convert all architectures to use 5level-fixup.h

If an architecture uses 4level-fixup.h we don't need to do anything as
it includes 5level-fixup.h.

If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK
before inclusion of the header. It makes asm-generic code to use
5level-fixup.h.

If an architecture has 4-level paging or folds levels on its own,
include 5level-fixup.h directly.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/hugepage.h                  | 1 +
 arch/arc/include/asm/pgtable.h                   | 1 +
 arch/arm/include/asm/pgtable.h                   | 1 +
 arch/arm64/include/asm/pgtable-types.h           | 4 ++++
 arch/avr32/include/asm/pgtable-2level.h          | 1 +
 arch/cris/include/asm/pgtable.h                  | 1 +
 arch/frv/include/asm/pgtable.h                   | 1 +
 arch/h8300/include/asm/pgtable.h                 | 1 +
 arch/hexagon/include/asm/pgtable.h               | 1 +
 arch/ia64/include/asm/pgtable.h                  | 2 ++
 arch/metag/include/asm/pgtable.h                 | 1 +
 arch/microblaze/include/asm/page.h               | 3 ++-
 arch/mips/include/asm/pgtable-32.h               | 1 +
 arch/mips/include/asm/pgtable-64.h               | 1 +
 arch/mn10300/include/asm/page.h                  | 1 +
 arch/nios2/include/asm/pgtable.h                 | 1 +
 arch/openrisc/include/asm/pgtable.h              | 1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h     | 3 +++
 arch/powerpc/include/asm/nohash/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h  | 3 +++
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 +
 arch/s390/include/asm/pgtable.h                  | 1 +
 arch/score/include/asm/pgtable.h                 | 1 +
 arch/sh/include/asm/pgtable-2level.h             | 1 +
 arch/sh/include/asm/pgtable-3level.h             | 1 +
 arch/sparc/include/asm/pgtable_64.h              | 1 +
 arch/tile/include/asm/pgtable_32.h               | 1 +
 arch/tile/include/asm/pgtable_64.h               | 1 +
 arch/um/include/asm/pgtable-2level.h             | 1 +
 arch/um/include/asm/pgtable-3level.h             | 1 +
 arch/unicore32/include/asm/pgtable.h             | 1 +
 arch/x86/include/asm/pgtable_types.h             | 4 ++++
 arch/xtensa/include/asm/pgtable.h                | 1 +
 34 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 317ff773e1ca..b18fcb606908 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -11,6 +11,7 @@
 #define _ASM_ARC_HUGEPAGE_H
 
 #include <linux/types.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index e94ca72b974e..ee22d40afef4 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -37,6 +37,7 @@
 
 #include <asm/page.h>
 #include <asm/mmu.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <linux/const.h>
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index a8d656d9aec7..1c462381c225 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -20,6 +20,7 @@
 
 #else
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 69b2fd41503c..345a072b5856 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
 #if CONFIG_PGTABLE_LEVELS == 2
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 4
+#include <asm-generic/5level-fixup.h>
 #endif
 
 #endif	/* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h
index 425dd567b5b9..d5b1c63993ec 100644
--- a/arch/avr32/include/asm/pgtable-2level.h
+++ b/arch/avr32/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
 #define __ASM_AVR32_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 2a3210ba4c72..fa3a73004cc5 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _CRIS_PGTABLE_H
 
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index a0513d463a1f..ab6e7e961b54 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -16,6 +16,7 @@
 #ifndef _ASM_PGTABLE_H
 #define _ASM_PGTABLE_H
 
+#include <asm-generic/5level-fixup.h>
 #include <asm/mem-layout.h>
 #include <asm/setup.h>
 #include <asm/processor.h>
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
index 8341db67821d..7d265d28ba5e 100644
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -1,5 +1,6 @@
 #ifndef _H8300_PGTABLE_H
 #define _H8300_PGTABLE_H
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
 #define pgtable_cache_init()   do { } while (0)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 49eab8136ec3..24a9177fb897 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -26,6 +26,7 @@
  */
 #include <linux/swap.h>
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* A handy thing to have if one has the RAM. Declared in head.S */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 384794e665fc..6cc22c8d8923 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr;
 
 
 #if CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #endif
+#include <asm-generic/5level-fixup.h>
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index ffa3a3a2ecad..0c151e5af079 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _METAG_PGTABLE_H
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index fd850879854d..d506bb0893f9 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 #   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
 typedef struct { pmd_t		pue[1]; }	pud_t;
-typedef struct { pud_t		pge[1]; }	pgd_t;
+typedef struct { pud_t		p4e[1]; }	p4d_t;
+typedef struct { p4d_t		pge[1]; }	pgd_t;
 #   endif /* CONFIG_MMU */
 
 # define pte_val(x)	((x).pte)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index d21f3da7bdb6..6f94bed571c4 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,6 +16,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 extern int temp_tlb_entry;
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 514cbc0a6a67..130a2a6c1531 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,6 +17,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
 #else
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h
index 3810a6f740fd..dfe730a5ede0 100644
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -57,6 +57,7 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) })
 #define __pgprot(x)	((pgprot_t) { (x) })
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 298393c3cb42..db4f7d179220 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #define FIRST_USER_ADDRESS	0UL
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 3567aa7be555..ff97374ca069 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -25,6 +25,7 @@
 #ifndef __ASM_OPENRISC_PGTABLE_H
 #define __ASM_OPENRISC_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 012223638815..26ed228d4dc6 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/book3s/32/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c7015..13c39b6d5d64 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1,9 +1,12 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 
+#include <asm-generic/5level-fixup.h>
+
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #endif
+
 /*
  * Common bits between hash and Radix page table
  */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index ba9921bf202e..5134ade2e850 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
 #define _ASM_POWERPC_NOHASH_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index d0db98793dd8..9f4de0a1035e 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/5level-fixup.h>
+
 /*
  * Entries per page directory level.  The PTE level must use a 64b record
  * for each page table entry.  The PMD and PGD level use a 32b record for
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index 55b28ef3409a..1facb584dd29 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7ed1972b1920..93e37b12e882 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,6 +24,7 @@
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h
index 0553e5cd5985..46ff8fd678a7 100644
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -2,6 +2,7 @@
 #define _ASM_SCORE_PGTABLE_H
 
 #include <linux/const.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h
index 19bd89db17e7..f75cf4387257 100644
--- a/arch/sh/include/asm/pgtable-2level.h
+++ b/arch/sh/include/asm/pgtable-2level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_2LEVEL_H
 #define __ASM_SH_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 249a985d9648..9b1e776eca31 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_3LEVEL_H
 #define __ASM_SH_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 56e49c8f770d..8a598528ec1f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -12,6 +12,7 @@
  * the SpitFire page tables.
  */
 
+#include <asm-generic/5level-fixup.h>
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <asm/types.h>
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index d26a42279036..5f8c615cb5e9 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
 #define MAXMEM		(_VMALLOC_START - PAGE_OFFSET)
 
 /* We have no pmd or pud since we are strictly a two-level page table */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline int pud_huge_page(pud_t pud)	{ return 0; }
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index e96cec52f6d8..96fe58b45118 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -59,6 +59,7 @@
 #ifndef __ASSEMBLY__
 
 /* We have no pud since we are a three-level page table. */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index cfbe59752469..179c0ea87a0c 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __UM_PGTABLE_2LEVEL_H
 #define __UM_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index bae8523a162f..c4d876dfb9ac 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,6 +7,7 @@
 #ifndef __UM_PGTABLE_3LEVEL_H
 #define __UM_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 818d0f5598e3..a4f2bef37e70 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -12,6 +12,7 @@
 #ifndef __UNICORE_PGTABLE_H__
 #define __UNICORE_PGTABLE_H__
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/cpu-single.h>
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8b4de22d6429..62484333673d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
 }
 
 #if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/5level-fixup.h>
+
 typedef struct { pudval_t pud; } pud_t;
 
 static inline pud_t native_make_pud(pmdval_t val)
@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud)
 	return pud.pud;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 static inline pudval_t native_pud_val(pud_t pud)
@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 	return pmd.pmd;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 8aa0e0d9cbb2..30dd5b2e4ad5 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -11,6 +11,7 @@
 #ifndef _XTENSA_PGTABLE_H
 #define _XTENSA_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
 #include <asm/kmem_layout.h>
-- 
cgit v1.2.3-59-g8ed1b


From 8bd49ac86677ca43d64f08c45864e438283d6a76 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 6 Mar 2017 09:57:17 +0100
Subject: s390: wire up statx system call

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/unistd.h | 4 +++-
 arch/s390/kernel/compat_wrapper.c   | 1 +
 arch/s390/kernel/syscalls.S         | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4384bc797a54..152de9b796e1 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,9 @@
 #define __NR_copy_file_range	375
 #define __NR_preadv2		376
 #define __NR_pwritev2		377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx		379
+#define NR_syscalls 380
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index ae2cda5eee5a..e89cc2e71db1 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9b59e6212d8f..2659b5cfeddb 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)
-- 
cgit v1.2.3-59-g8ed1b


From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:31 -0800
Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  disble||disable
  disbled||disabled

I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c
untouched.  The macro is not referenced at all, but this commit is
touching only comment blocks just in case.

Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst        | 2 +-
 arch/cris/arch-v32/drivers/cryptocop.c  | 2 +-
 arch/x86/kernel/ftrace.c                | 2 +-
 drivers/crypto/ux500/cryp/cryp.c        | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 2 +-
 drivers/hv/channel.c                    | 2 +-
 drivers/isdn/hisax/st5481_b.c           | 2 +-
 drivers/mtd/spi-nor/spi-nor.c           | 2 +-
 drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +-
 drivers/scsi/aic7xxx/aic79xx_core.c     | 2 +-
 drivers/usb/gadget/legacy/inode.c       | 3 +--
 drivers/usb/host/xhci.c                 | 4 ++--
 include/linux/regulator/machine.h       | 2 +-
 kernel/cgroup/cgroup.c                  | 2 +-
 kernel/events/core.c                    | 2 +-
 scripts/spelling.txt                    | 2 ++
 sound/soc/amd/acp-pcm-dma.c             | 2 +-
 17 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 2c41b713841f..44886c91e112 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims
 to collect more or less stable coverage that is function of syscall inputs.
 To achieve this goal it does not collect coverage in soft/hard interrupts
 and instrumentation of some inherently non-deterministic parts of kernel is
-disbled (e.g. scheduler, locking).
+disabled (e.g. scheduler, locking).
 
 Usage
 -----
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index ae6903d7fdbe..14970f11bbf2 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void)
 		dma_in_cfg.en = regk_dma_no;
 		REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg);
 
-		/* Disble the cryptocop. */
+		/* Disable the cryptocop. */
 		rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg);
 		rw_cfg.en = 0;
 		REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8639bb2ae058..8f3d9cf26ff9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -535,7 +535,7 @@ static void run_sync(void)
 {
 	int enable_irqs = irqs_disabled();
 
-	/* We may be called with interrupts disbled (on bootup). */
+	/* We may be called with interrupts disabled (on bootup). */
 	if (enable_irqs)
 		local_irq_enable();
 	on_each_cpu(do_sync_core, NULL, 1);
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c
index 43a0c8a26ab0..00a16ab601cb 100644
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ b/drivers/crypto/ux500/cryp/cryp.c
@@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data,
 void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
 {
 	/*
-	 * We always need to disble the hardware before trying to flush the
+	 * We always need to disable the hardware before trying to flush the
 	 * FIFO. This is something that isn't written in the design
 	 * specification, but we have been informed by the hardware designers
 	 * that this must be done.
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31375bdde6f1..011800f621c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* disble sdma engine before programing it */
+	/* disable sdma engine before programing it */
 	sdma_v3_0_ctx_switch_enable(adev, false);
 	sdma_v3_0_enable(adev, false);
 
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 81a80c82f1bd..bd0d1988feb2 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	/*
 	 * In case a device driver's probe() fails (e.g.,
 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
-	 * rescinded later (e.g., we dynamically disble an Integrated Service
+	 * rescinded later (e.g., we dynamically disable an Integrated Service
 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
 	 * here we should skip most of the below cleanup work.
 	 */
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index 409849165838..f64a36007800 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
 			}
 		}
 	} else {
-		// Disble B channel interrupts
+		// Disable B channel interrupts
 		st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL);
 
 		// Disable B channel FIFOs
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1ae872bfc3ba..747645c74134 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor)
 }
 
 /*
- * Send write disble instruction to the chip.
+ * Send write disable instruction to the chip.
  */
 static inline int write_disable(struct spi_nor *nor)
 {
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 6d31f92ef2b6..90b3b46f85cc 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1163,7 +1163,7 @@ struct ib_mac_iocb_rsp {
 	u8 opcode;		/* 0x20 */
 	u8 flags1;
 #define IB_MAC_IOCB_RSP_OI	0x01	/* Overide intr delay */
-#define IB_MAC_IOCB_RSP_I	0x02	/* Disble Intr Generation */
+#define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
 #define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
 #define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
 #define IB_MAC_IOCB_RSP_NU	0x08	/* No checksum rcvd */
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 109e2c99e6c1..95d8f25cbcca 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit)
 		 * does not disable its parity logic prior to
 		 * the start of the reset.  This may cause a
 		 * parity error to be detected and thus a
-		 * spurious SERR or PERR assertion.  Disble
+		 * spurious SERR or PERR assertion.  Disable
 		 * PERR and SERR responses during the CHIPRST.
 		 */
 		mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index a2615d64d07c..79a2d8fba6b6 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *);
 
 /* /dev/gadget/$CHIP represents ep0 and the whole device */
 enum ep0_state {
-	/* DISBLED is the initial state.
-	 */
+	/* DISABLED is the initial state. */
 	STATE_DEV_DISABLED = 0,
 
 	/* Only one open() of /dev/gadget/$CHIP; only one file tracks
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6d6c46000e56..50aee8b7718b 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
-	/* disble usb3 ports Wake bits*/
+	/* disable usb3 ports Wake bits */
 	port_index = xhci->num_usb3_ports;
 	port_array = xhci->usb3_ports;
 	while (port_index--) {
@@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 			writel(t2, port_array[port_index]);
 	}
 
-	/* disble usb2 ports Wake bits*/
+	/* disable usb2 ports Wake bits */
 	port_index = xhci->num_usb2_ports;
 	port_array = xhci->usb2_ports;
 	while (port_index--) {
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index ad3e5158e586..c9f795e9a2ee 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -65,7 +65,7 @@ struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
-	int disabled; /* is the regulator disbled in this suspend state */
+	int disabled; /* is the regulator disabled in this suspend state */
 };
 
 /**
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0125589c7428..48851327a15e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css)
  *
  * Returns 0 on success, -errno on failure.  On failure, csses which have
  * been processed already aren't cleaned up.  The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
+ * cleaning up with cgroup_apply_control_disable().
  */
 static int cgroup_apply_control_enable(struct cgroup *cgrp)
 {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f2e32..a17ed56c8ce1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event,
  */
 #define PERF_CPU_HRTIMER (1000 / HZ)
 /*
- * function must be called with interrupts disbled
+ * function must be called with interrupts disabled
  */
 static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
 {
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 0458b037c8a1..6dae4df472f6 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -372,6 +372,8 @@ disassocation||disassociation
 disapear||disappear
 disapeared||disappeared
 disappared||disappeared
+disble||disable
+disbled||disabled
 disconnet||disconnect
 discontinous||discontinuous
 dispertion||dispersion
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index ec1067a679da..08b1399d1da2 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg)
 	writel(val, acp_mmio + (reg * 4));
 }
 
-/* Configure a given dma channel parameters - enable/disble,
+/* Configure a given dma channel parameters - enable/disable,
  * number of descriptors, priority
  */
 static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num,
-- 
cgit v1.2.3-59-g8ed1b


From 52c50ca75c534c0772b71900a29b3a71439b32ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:36 -0800
Subject: powerpc/mm: handle protnone ptes on fork

We need to mark pages of parent process read only on fork.  Numa fault
pte needs a protnone ptes variant with saved write flag set.  On fork we
need to make sure we remove the saved write bit.  Instead of adding the
protnone check in the caller update ptep_set_wrprotect variants to clear
savedwrite bit.

Without this we see random segfaults in application on fork.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 73 ++++++++++++++++------------
 1 file changed, 42 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c7015..f0b08acda5eb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,23 +347,53 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
+static inline int pte_write(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	/*
+	 * Saved write ptes are prot none ptes that doesn't have
+	 * privileged bit sit. We mark prot none as one which has
+	 * present and pviliged bit set and RWX cleared. To mark
+	 * protnone which used to have _PAGE_WRITE set we clear
+	 * the privileged bit.
+	 */
+	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
+}
+#else
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	return false;
+}
+#endif
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	/*
+	 * We should not find protnone for hugetlb, but this complete the
+	 * interface.
+	 */
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
@@ -397,11 +427,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, ~0UL, 0, 0);
 }
 
-static inline int pte_write(pte_t pte)
-{
-	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-}
-
 static inline int pte_dirty(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
@@ -466,19 +491,6 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
 
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
-{
-	/*
-	 * Saved write ptes are prot none ptes that doesn't have
-	 * privileged bit sit. We mark prot none as one which has
-	 * present and pviliged bit set and RWX cleared. To mark
-	 * protnone which used to have _PAGE_WRITE set we clear
-	 * the privileged bit.
-	 */
-	VM_BUG_ON(!pte_protnone(pte));
-	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
-}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -982,11 +994,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-
-	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	if (pmd_write((*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	else if (unlikely(pmd_savedwrite(*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 static inline int pmd_trans_huge(pmd_t pmd)
-- 
cgit v1.2.3-59-g8ed1b


From d19469e8415813cceaa494b6f538e327b9a95f3b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:39 -0800
Subject: power/mm: update pte_write and pte_wrprotect to handle savedwrite

We use pte_write() to check whethwer the pte entry is writable.  This is
mostly used to later mark the pte read only if it is writable.  The other
use of pte_write() is to check whether the pte_entry is writable so that
hardware page table entry can be marked accordingly.  This is used in kvm
where we look at qemu page table entry and update hardware hash page table
for the guest with correct write enable bit.

With the above, for the first usage we should also check the savedwrite
bit so that we can correctly clear the savedwite bit.  For the later, we
add a new variant __pte_write().

With this we can revert write_protect_page part of 595cd8f256d2 ("mm/ksm:
handle protnone saved writes when making page write protect").  But I left
it as it is as an example code for savedwrite check.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 24 +++++++++++++++++++-----
 arch/powerpc/kvm/book3s_64_mmu_hv.c          |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c          |  2 +-
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f0b08acda5eb..ec1e731e6a2d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,7 +347,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
-static inline int pte_write(pte_t pte)
+static inline int __pte_write(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
 }
@@ -373,11 +373,16 @@ static inline bool pte_savedwrite(pte_t pte)
 }
 #endif
 
+static inline int pte_write(pte_t pte)
+{
+	return __pte_write(pte) || pte_savedwrite(pte);
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
@@ -390,7 +395,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	 * We should not find protnone for hugetlb, but this complete the
 	 * interface.
 	 */
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
@@ -490,7 +495,13 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	VM_BUG_ON(!pte_protnone(pte));
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
-
+#else
+#define pte_clear_savedwrite pte_clear_savedwrite
+static inline pte_t pte_clear_savedwrite(pte_t pte)
+{
+	VM_WARN_ON(1);
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -518,6 +529,8 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
+	if (unlikely(pte_savedwrite(pte)))
+		return pte_clear_savedwrite(pte);
 	return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
@@ -938,6 +951,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
 #define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -994,7 +1008,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-	if (pmd_write((*pmdp)))
+	if (__pmd_write((*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 	else if (unlikely(pmd_savedwrite(*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f3158fb16de3..8c68145ba1bd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 							 hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
-				if (pte_write(pte))
+				if (__pte_write(pte))
 					write_ok = 1;
 			}
 			local_irq_restore(flags);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6fca970373ee..ce6f2121fffe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte = kvmppc_read_update_linux_pte(ptep, writing);
 		if (pte_present(pte) && !pte_protnone(pte)) {
-			if (writing && !pte_write(pte))
+			if (writing && !__pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
 			is_ci = pte_ci(pte);
-- 
cgit v1.2.3-59-g8ed1b


From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:42 -0800
Subject: x86, mm: fix gup_pte_range() vs DAX mappings

gup_pte_range() fails to check pte_allows_gup() before translating a DAX
pte entry, pte_devmap(), to a page.  This allows writes to read-only
mappings, and bypasses the DAX cacheline dirty tracking due to missed
'mkwrite' faults.  The gup_huge_pmd() path and the gup_huge_pud() path
correctly check pte_allows_gup() before checking for _devmap() entries.

Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Dave Hansen <dave.hansen@linux.intel.com>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Xiong Zhou <xzhou@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 99c7805a9693..9d32ee608807 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 			return 0;
 		}
 
+		if (!pte_allows_gup(pte_val(pte), write)) {
+			pte_unmap(ptep);
+			return 0;
+		}
+
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
@@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 				pte_unmap(ptep);
 				return 0;
 			}
-		} else if (!pte_allows_gup(pte_val(pte), write) ||
-			   pte_special(pte)) {
+		} else if (pte_special(pte)) {
 			pte_unmap(ptep);
 			return 0;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:45 -0800
Subject: x86, mm: unify exit paths in gup_pte_range()

All exit paths from gup_pte_range() require pte_unmap() of the original
pte page before returning.  Refactor the code to have a single exit
point to do the unmap.

This mirrors the flow of the generic gup_pte_range() in mm/gup.c.

Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 9d32ee608807..1f3b6ef105cd 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	struct dev_pagemap *pgmap = NULL;
-	int nr_start = *nr;
-	pte_t *ptep;
+	int nr_start = *nr, ret = 0;
+	pte_t *ptep, *ptem;
 
-	ptep = pte_offset_map(&pmd, addr);
+	/*
+	 * Keep the original mapped PTE value (ptem) around since we
+	 * might increment ptep off the end of the page when finishing
+	 * our loop iteration.
+	 */
+	ptem = ptep = pte_offset_map(&pmd, addr);
 	do {
 		pte_t pte = gup_get_pte(ptep);
 		struct page *page;
 
 		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_protnone(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (pte_protnone(pte))
+			break;
 
-		if (!pte_allows_gup(pte_val(pte), write)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (!pte_allows_gup(pte_val(pte), write))
+			break;
 
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
 				undo_dev_pagemap(nr, nr_start, pages);
-				pte_unmap(ptep);
-				return 0;
+				break;
 			}
-		} else if (pte_special(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		} else if (pte_special(pte))
+			break;
+
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 		get_page(page);
@@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		(*nr)++;
 
 	} while (ptep++, addr += PAGE_SIZE, addr != end);
-	pte_unmap(ptep - 1);
+	if (addr == end)
+		ret = 1;
+	pte_unmap(ptem);
 
-	return 1;
+	return ret;
 }
 
 static inline void get_head_page_multiple(struct page *page, int nr)
-- 
cgit v1.2.3-59-g8ed1b


From ca5b58ea3db88b5e69ba2a1f6bc3cf239cdcc64f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 9 Mar 2017 16:17:34 -0800
Subject: sh: cayman: IDE support fix

Remove incorrect CONFIG_IDE ifdef (CONFIG_IDE config option is for
internal drivers/ide/ use) and make IDE hardware interface always
initialized (not only when IDE subsystem is built-in).

This patch allows Cayman board to work with modular IDE subsystem
support and removes the requirement of having the whole core IDE
subsystem built-in when using libata PATA support.

Link: http://lkml.kernel.org/r/1990884.yFoE6lSB9G@amdc3058
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/boards/mach-cayman/setup.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index 340fd40b381d..9c292c27e0d7 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
 	SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
 
-#ifdef CONFIG_IDE
 	/*
 	 * Only IDE1 exists on the Cayman
 	 */
@@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
-#endif
 
 	/* Exit the configuration state */
 	outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
-- 
cgit v1.2.3-59-g8ed1b


From 1363875bdb6317a2d0798284d7aaf320f0782f6d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:46 +1000
Subject: powerpc/64s: fix handling of non-synchronous machine checks

A synchronous machine check is an exception raised by the attempt to
execute the current instruction. If the error can't be corrected, it
can make sense to SIGBUS the currently running process.

In other cases, the error condition is not related to the current
instruction, so killing the current process is not the right thing to
do.

Today, all machine checks are MCE_SEV_ERROR_SYNC, so this has no
practical change. It will be used to handle POWER9 asynchronous
machine checks.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 86d9fde93c17..e0f856bfbfe8 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
 					struct machine_check_event *evt)
 {
 	int recovered = 0;
-	uint64_t ea = get_mce_fault_addr(evt);
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 		/* Platform corrected itself */
 		recovered = 1;
-	} else if (ea && !is_kernel_addr(ea)) {
+	} else if (evt->severity == MCE_SEV_FATAL) {
+		/* Fatal machine check */
+		pr_err("Machine check interrupt is fatal\n");
+		recovered = 0;
+	} else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+			(user_mode(regs) && !is_global_init(current))) {
 		/*
-		 * Faulting address is not in kernel text. We should be fine.
-		 * We need to find which process uses this address.
 		 * For now, kill the task if we have received exception when
 		 * in userspace.
 		 *
 		 * TODO: Queue up this address for hwpoisioning later.
 		 */
-		if (user_mode(regs) && !is_global_init(current)) {
-			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-			recovered = 1;
-		} else
-			recovered = 0;
-	} else if (user_mode(regs) && !is_global_init(current) &&
-		evt->severity == MCE_SEV_ERROR_SYNC) {
-		/*
-		 * If we have received a synchronous error when in userspace
-		 * kill the task.
-		 */
 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 		recovered = 1;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c1bbf387d6191e6e18f3adc4db45b922822c2ba4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:47 +1000
Subject: powerpc/64s: allow machine check handler to set severity and
 initiator

Currently severity and initiator are always set to MCE_SEV_ERROR_SYNC and
MCE_INITIATOR_CPU in the core mce code. Allow them to be set by the
machine specific mce handlers.

No functional change for existing handlers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mce.h  | 3 ++-
 arch/powerpc/kernel/mce.c       | 5 +++--
 arch/powerpc/kernel/mce_power.c | 6 ++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f97d8cb6bdf6..b2a5865ccd87 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -177,7 +177,8 @@ struct mce_error_info {
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
 	} u;
-	uint8_t		reserved[2];
+	enum MCE_Severity	severity:8;
+	enum MCE_Initiator	initiator:8;
 };
 
 #define MAX_MC_EVT	100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index c6923ff45131..949507277436 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -90,13 +90,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	mce->gpr3 = regs->gpr[3];
 	mce->in_use = 1;
 
-	mce->initiator = MCE_INITIATOR_CPU;
 	/* Mark it recovered if we have handled it and MSR(RI=1). */
 	if (handled && (regs->msr & MSR_RI))
 		mce->disposition = MCE_DISPOSITION_RECOVERED;
 	else
 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
-	mce->severity = MCE_SEV_ERROR_SYNC;
+
+	mce->initiator = mce_err->initiator;
+	mce->severity = mce_err->severity;
 
 	/*
 	 * Populate the mce error_type and type-specific error_type.
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991c4ece..c37fc5fdd433 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -281,6 +281,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 
@@ -352,6 +355,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 
-- 
cgit v1.2.3-59-g8ed1b


From 7b9f71f974a12740e79e918cfd58c2fce0b5b580 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:48 +1000
Subject: powerpc/64s: POWER9 machine check handler

Add POWER9 machine check handler. There are several new types of errors
added, so logging messages for those are also added.

This doesn't attempt to reuse any of the P7/8 defines or functions,
because that becomes too complex. The better option in future is to use
a table driven approach.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/bitops.h |   4 +
 arch/powerpc/include/asm/mce.h    | 105 +++++++++++++++++
 arch/powerpc/kernel/cputable.c    |   3 +
 arch/powerpc/kernel/mce.c         |  83 ++++++++++++++
 arch/powerpc/kernel/mce_power.c   | 231 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 426 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 73eb794d6163..bc5fdfd22788 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -51,6 +51,10 @@
 #define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
 #define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
 
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)			\
+	((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
 #include <asm/barrier.h>
 
 /* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index b2a5865ccd87..ed62efe01e49 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,6 +66,55 @@
 
 #define P8_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_SLB_ERRORS | \
 					 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1)	(((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1)	(	\
+	PPC_BITEXTRACT(srr1, 45, 0) |	\
+	PPC_BITEXTRACT(srr1, 44, 1) |	\
+	PPC_BITEXTRACT(srr1, 43, 2) |	\
+	PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE				1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY			2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT			3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT			4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT			5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD			6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT			8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT	9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA			11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK		12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE		13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT	14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN	15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE					(PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK			(PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT			(PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT		(PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT			(PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB			(PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE				(PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB			(PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB			(PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD				(PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK			(PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN		(PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN				(PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS		(P9_DSISR_MC_ERAT_MULTIHIT | \
+					 P9_DSISR_MC_SLB_PARITY_MFSLB | \
+					 P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
 enum MCE_Version {
 	MCE_V1 = 1,
 };
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
 	MCE_ERROR_TYPE_SLB = 2,
 	MCE_ERROR_TYPE_ERAT = 3,
 	MCE_ERROR_TYPE_TLB = 4,
+	MCE_ERROR_TYPE_USER = 5,
+	MCE_ERROR_TYPE_RA = 6,
+	MCE_ERROR_TYPE_LINK = 7,
 };
 
 enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
 	MCE_TLB_ERROR_MULTIHIT = 2,
 };
 
+enum MCE_UserErrorType {
+	MCE_USER_ERROR_INDETERMINATE = 0,
+	MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+	MCE_RA_ERROR_INDETERMINATE = 0,
+	MCE_RA_ERROR_IFETCH = 1,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+	MCE_RA_ERROR_LOAD = 4,
+	MCE_RA_ERROR_STORE = 5,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+	MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+	MCE_LINK_ERROR_INDETERMINATE = 0,
+	MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+	MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+	MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
 struct machine_check_event {
 	enum MCE_Version	version:8;	/* 0x00 */
 	uint8_t			in_use;		/* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
 			uint64_t	effective_address;
 			uint8_t		reserved_2[16];
 		} tlb_error;
+
+		struct {
+			enum MCE_UserErrorType user_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} user_error;
+
+		struct {
+			enum MCE_RaErrorType ra_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} ra_error;
+
+		struct {
+			enum MCE_LinkErrorType link_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} link_error;
 	} u;
 };
 
@@ -176,6 +278,9 @@ struct mce_error_info {
 		enum MCE_SlbErrorType slb_error_type:8;
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
+		enum MCE_UserErrorType user_error_type:8;
+		enum MCE_RaErrorType ra_error_type:8;
+		enum MCE_LinkErrorType link_error_type:8;
 	} u;
 	enum MCE_Severity	severity:8;
 	enum MCE_Initiator	initiator:8;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bb7a1890aeb7..e79b9daa873c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
 extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 949507277436..a1475e6aef3a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
 	case MCE_ERROR_TYPE_TLB:
 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+		break;
 	case MCE_ERROR_TYPE_UNKNOWN:
 	default:
 		break;
@@ -116,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 		mce->u.erat_error.effective_address_provided = true;
 		mce->u.erat_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+		mce->u.user_error.effective_address_provided = true;
+		mce->u.user_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+		mce->u.ra_error.effective_address_provided = true;
+		mce->u.ra_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+		mce->u.link_error.effective_address_provided = true;
+		mce->u.link_error.effective_address = addr;
 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 		mce->u.ue_error.effective_address_provided = true;
 		mce->u.ue_error.effective_address = addr;
@@ -240,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 		"Parity",
 		"Multihit",
 	};
+	static const char *mc_user_types[] = {
+		"Indeterminate",
+		"tlbie(l) invalid",
+	};
+	static const char *mc_ra_types[] = {
+		"Indeterminate",
+		"Instruction fetch (bad)",
+		"Page table walk ifetch (bad)",
+		"Page table walk ifetch (foreign)",
+		"Load (bad)",
+		"Store (bad)",
+		"Page table walk Load/Store (bad)",
+		"Page table walk Load/Store (foreign)",
+		"Load/Store (foreign)",
+	};
+	static const char *mc_link_types[] = {
+		"Indeterminate",
+		"Instruction fetch (timeout)",
+		"Page table walk ifetch (timeout)",
+		"Load (timeout)",
+		"Store (timeout)",
+		"Page table walk Load/Store (timeout)",
+	};
 
 	/* Print things out */
 	if (evt->version != MCE_V1) {
@@ -316,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 			printk("%s    Effective address: %016llx\n",
 			       level, evt->u.tlb_error.effective_address);
 		break;
+	case MCE_ERROR_TYPE_USER:
+		subtype = evt->u.user_error.user_error_type <
+			ARRAY_SIZE(mc_user_types) ?
+			mc_user_types[evt->u.user_error.user_error_type]
+			: "Unknown";
+		printk("%s  Error type: User [%s]\n", level, subtype);
+		if (evt->u.user_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.user_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_RA:
+		subtype = evt->u.ra_error.ra_error_type <
+			ARRAY_SIZE(mc_ra_types) ?
+			mc_ra_types[evt->u.ra_error.ra_error_type]
+			: "Unknown";
+		printk("%s  Error type: Real address [%s]\n", level, subtype);
+		if (evt->u.ra_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.ra_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		subtype = evt->u.link_error.link_error_type <
+			ARRAY_SIZE(mc_link_types) ?
+			mc_link_types[evt->u.link_error.link_error_type]
+			: "Unknown";
+		printk("%s  Error type: Link [%s]\n", level, subtype);
+		if (evt->u.link_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.link_error.effective_address);
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		printk("%s  Error type: Unknown\n", level);
@@ -342,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 		if (evt->u.tlb_error.effective_address_provided)
 			return evt->u.tlb_error.effective_address;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		if (evt->u.user_error.effective_address_provided)
+			return evt->u.user_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		if (evt->u.ra_error.effective_address_provided)
+			return evt->u.ra_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		if (evt->u.link_error.effective_address_provided)
+			return evt->u.link_error.effective_address;
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c37fc5fdd433..763d6f58caa8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
 }
 #endif
 
+static void flush_erat(void)
+{
+	asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+	if (what == MCE_FLUSH_SLB) {
+		flush_and_reload_slb();
+		return 1;
+	}
+#endif
+	if (what == MCE_FLUSH_ERAT) {
+		flush_erat();
+		return 1;
+	}
+	if (what == MCE_FLUSH_TLB) {
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+	if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+		dsisr &= ~slb;
+	if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+		dsisr &= ~erat;
+	if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+		dsisr &= ~tlb;
+	/* Any other errors we don't understand? */
+	if (dsisr)
+		return 0;
+	return 1;
+}
+
 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 {
 	long handled = 1;
@@ -378,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
 	return handled;
 }
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	return mce_handle_flush_derrors(dsisr,
+			P9_DSISR_MC_SLB_PARITY_MFSLB |
+			P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+			P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+			P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_SLB);
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_TLB);
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		return mce_flush(MCE_FLUSH_ERAT);
+	default:
+		return 0;
+	}
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	mce_err->severity = MCE_SEV_ERROR_SYNC;
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	if (dsisr & P9_DSISR_MC_USER_TLBIE)
+		*addr = regs->nip;
+	else
+		*addr = regs->dar;
+
+	if (dsisr & P9_DSISR_MC_UE) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+		mce_err->error_type = MCE_ERROR_TYPE_USER;
+		mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+	} else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+	} else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+	} else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+	}
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->severity = MCE_SEV_FATAL;
+		break;
+	default:
+		mce_err->severity = MCE_SEV_ERROR_SYNC;
+		break;
+	}
+
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	*addr = regs->nip;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_UE:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+		break;
+	default:
+		break;
+	}
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+	uint64_t nip, addr;
+	long handled;
+	struct mce_error_info mce_error_info = { 0 };
+
+	nip = regs->nip;
+
+	if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+		handled = mce_handle_derror_p9(regs);
+		mce_get_derror_p9(regs, &mce_error_info, &addr);
+	} else {
+		handled = mce_handle_ierror_p9(regs);
+		mce_get_ierror_p9(regs, &mce_error_info, &addr);
+	}
+
+	/* Handle UE error. */
+	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+		handled = mce_handle_ue_error(regs);
+
+	save_mce_event(regs, handled, &mce_error_info, nip, addr);
+	return handled;
+}
-- 
cgit v1.2.3-59-g8ed1b


From a1016e94cce9fb6ea56d7602263783e2d95d6e92 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Mar 2017 17:14:32 +0000
Subject: ARM: wire up statx syscall

Wire up the new statx syscall for ARM.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/tools/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 3c2cb5d5adfa..0bb0e9c6376c 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -411,3 +411,4 @@
 394	common	pkey_mprotect		sys_pkey_mprotect
 395	common	pkey_alloc		sys_pkey_alloc
 396	common	pkey_free		sys_pkey_free
+397	common	statx			sys_statx
-- 
cgit v1.2.3-59-g8ed1b


From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Thu, 9 Mar 2017 14:00:17 +0100
Subject: x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA reboot quirk

The reboot quirk for ASUS EeeBook X205TA contains a typo in
DMI_PRODUCT_NAME, improperly referring to X205TAW instead of
X205TA, which prevents the quirk from being triggered. The
model X205TAW already has a reboot quirk of its own.

This fix simply removes the inappropriate final letter W.

Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk")
Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4194d6f9bb29..067f9813fd2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		.ident = "ASUS EeeBook X205TA",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
 		},
 	},
 	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
-- 
cgit v1.2.3-59-g8ed1b


From cb6950b7152fb3760942f9cb16bd2a35e5a1bfd1 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 7 Mar 2017 00:34:57 +0530
Subject: arm64: kprobes: remove kprobe_exceptions_notify

Commit fc62d0207ae0 ("kprobes: Introduce weak variant of
kprobe_exceptions_notify()") introduces a generic empty version of the
function for architectures that don't need special handling, like arm64.
As such, remove the arch/arm64/ specific handler.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/probes/kprobes.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 2a07aae5b8a2..c5c45942fb6e 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 	return 0;
 }
 
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
-{
-	return NOTIFY_DONE;
-}
-
 static void __kprobes kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p, *cur_kprobe;
-- 
cgit v1.2.3-59-g8ed1b


From b0de0ccc8b9edd8846828e0ecdc35deacdf186b0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 6 Mar 2017 19:06:40 +0000
Subject: arm64: kasan: avoid bad virt_to_pfn()

Booting a v4.11-rc1 kernel with DEBUG_VIRTUAL and KASAN enabled produces
the following splat (trimmed for brevity):

[    0.000000] virt_to_phys used for non-linear address: ffff200008080000 (0xffff200008080000)
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x48/0x70
[    0.000000] PC is at __virt_to_phys+0x48/0x70
[    0.000000] LR is at __virt_to_phys+0x48/0x70
[    0.000000] Call trace:
[    0.000000] [<ffff2000080b1ac0>] __virt_to_phys+0x48/0x70
[    0.000000] [<ffff20000a03b86c>] kasan_init+0x1c0/0x498
[    0.000000] [<ffff20000a034018>] setup_arch+0x2fc/0x948
[    0.000000] [<ffff20000a030c68>] start_kernel+0xb8/0x570
[    0.000000] [<ffff20000a0301e8>] __primary_switched+0x6c/0x74

This is because we use virt_to_pfn() on a kernel image address when
trying to figure out its nid, so that we can allocate its shadow from
the same node.

As with other recent changes, this patch uses lm_alias() to solve this.

We could instead use NUMA_NO_NODE, as x86 does for all shadow
allocations, though we'll likely want the "real" memory shadow to be
backed from its corresponding nid anyway, so we may as well be
consistent and find the nid for the image shadow.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/kasan_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 55d1e9205543..687a358a3733 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -162,7 +162,7 @@ void __init kasan_init(void)
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-			 pfn_to_nid(virt_to_pfn(_text)));
+			 pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
 	/*
 	 * vmemmap_populate() has populated the shadow region that covers the
-- 
cgit v1.2.3-59-g8ed1b


From 5c2a625937ba49bc691089370638223d310cda9a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Mar 2017 16:27:04 -0800
Subject: arm64: support keyctl() system call in 32-bit mode

As is the case for a number of other architectures that have a 32-bit
compat mode, enable KEYS_COMPAT if both COMPAT and KEYS are enabled.
This allows AArch32 programs to use the keyctl() system call when
running on an AArch64 kernel.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..f21e9a76ff67 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1063,6 +1063,10 @@ config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+	def_bool y
+	depends on COMPAT && KEYS
+
 endmenu
 
 menu "Power management options"
-- 
cgit v1.2.3-59-g8ed1b


From 14088540ad63c648e5cdf490412033f792d16b6b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 10 Mar 2017 17:44:18 +0000
Subject: arm64: use const cap for system_uses_ttbr0_pan()

Since commit 4b65a5db362783ab ("arm64: Introduce
uaccess_{disable,enable} functionality based on TTBR0_EL1"),
system_uses_ttbr0_pan() has used cpus_have_cap() to determine whether
PAN is present.

Since commit a4023f682739439b ("arm64: Add hypervisor safe helper for
checking constant capabilities"), which was introduced around the same
time, cpus_have_cap() doesn't try to use a static key, and must always
perform a load, test, and consitional branch (likely a tbnz for the
latter two).

Elsewhere, we moved to using cpus_have_const_cap(), which can use a
static key (i.e. a non-conditional branch), which is patched at runtime
when the feature is detected.

This patch makes system_uses_ttbr0_pan() use cpus_have_const_cap(). The
static key is likely a win for hot-paths like the uacccess primitives,
and this makes our usage consistent regardless.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 05310ad8c5ab..f31c48d0cd68 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
 static inline bool system_uses_ttbr0_pan(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-		!cpus_have_cap(ARM64_HAS_PAN);
+		!cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3-59-g8ed1b


From 0e4c0e6ea7d4a988a5ae2791c7cb5769b5256dad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 17 Feb 2017 15:25:08 +0100
Subject: arm64: kernel: Update kerneldoc for cpu_suspend() rename

Commit af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it
aligned with arm") renamed cpu_suspend() to arm_cpuidle_suspend(), but
forgot to update the kerneldoc header.

Fixes: af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpuidle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 75a0f8acef66..fd691087dc9a 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
 }
 
 /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
  * @arg: argument to pass to CPU suspend operations
  *
  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
-- 
cgit v1.2.3-59-g8ed1b


From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Mar 2017 13:17:18 +0100
Subject: kexec, x86/purgatory: Unbreak it and clean it up

The purgatory code defines global variables which are referenced via a
symbol lookup in the kexec code (core and arch).

A recent commit addressing sparse warnings made these static and thereby
broke kexec_file.

Why did this happen? Simply because the whole machinery is undocumented and
lacks any form of forward declarations. The variable names are unspecific
and lack a prefix, so adding forward declarations creates shadow variables
in the core code. Aside of that the code relies on magic constants and
duplicate struct definitions with no way to ensure that these things stay
in sync. The section placement of the purgatory variables happened by
chance and not by design.

Unbreak kexec and cleanup the mess:

 - Add proper forward declarations and document the usage
 - Use common struct definition
 - Use the proper common defines instead of magic constants
 - Add a purgatory_ prefix to have a proper name space
 - Use ARRAY_SIZE() instead of a homebrewn reimplementation
 - Add proper sections to the purgatory variables [ From Mike ]

Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static")
Reported-by: Mike Galbraith <<efault@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: "Tobin C. Harding" <me@tobin.cc>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/purgatory/trampoline.S | 12 ++++++------
 arch/x86/include/asm/purgatory.h    | 20 ++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c  |  9 ++++++---
 arch/x86/purgatory/purgatory.c      | 35 +++++++++++++++++------------------
 arch/x86/purgatory/purgatory.h      |  8 --------
 arch/x86/purgatory/setup-x86_64.S   |  2 +-
 arch/x86/purgatory/sha256.h         |  1 -
 include/linux/purgatory.h           | 23 +++++++++++++++++++++++
 kernel/kexec_file.c                 |  8 ++++----
 kernel/kexec_internal.h             |  6 +-----
 10 files changed, 78 insertions(+), 46 deletions(-)
 create mode 100644 arch/x86/include/asm/purgatory.h
 delete mode 100644 arch/x86/purgatory/purgatory.h
 create mode 100644 include/linux/purgatory.h

(limited to 'arch')

diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index f9760ccf4032..3696ea6c4826 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -116,13 +116,13 @@ dt_offset:
 
 	.data
 	.balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
 	.skip	32
-	.size sha256_digest, . - sha256_digest
+	.size purgatory_sha256_digest, . - purgatory_sha256_digest
 
 	.balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
 	.skip	8 * 2 * 16
-	.size sha_regions, . - sha_regions
+	.size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
new file mode 100644
index 000000000000..d7da2729903d
--- /dev/null
+++ b/arch/x86/include/asm/purgatory.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif	/* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 307b1f4543de..857cdbd02867 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image)
 
 	/* Setup copying of backup region */
 	if (image->type == KEXEC_TYPE_CRASH) {
-		ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_dest",
 				&image->arch.backup_load_addr,
 				sizeof(image->arch.backup_load_addr), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_src",
 				&image->arch.backup_src_start,
 				sizeof(image->arch.backup_src_start), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_sz",
 				&image->arch.backup_src_sz,
 				sizeof(image->arch.backup_src_sz), 0);
 		if (ret)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b6d5c8946e66..470edad96bb9 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -10,22 +10,19 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
 #include "sha256.h"
-#include "purgatory.h"
 #include "../boot/string.h"
 
-struct sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
 
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
 
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
 
 /*
  * On x86, second kernel requries first 640K of memory to boot. Copy
@@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {};
  */
 static int copy_backup_region(void)
 {
-	if (backup_dest)
-		memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+	if (purgatory_backup_dest) {
+		memcpy((void *)purgatory_backup_dest,
+		       (void *)purgatory_backup_src, purgatory_backup_sz);
+	}
 	return 0;
 }
 
 static int verify_sha256_digest(void)
 {
-	struct sha_region *ptr, *end;
+	struct kexec_sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
 	struct sha256_state sctx;
 
 	sha256_init(&sctx);
-	end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
-	for (ptr = sha_regions; ptr < end; ptr++)
+	end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+	for (ptr = purgatory_sha_regions; ptr < end; ptr++)
 		sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
 
 	sha256_final(&sctx, digest);
 
-	if (memcmp(digest, sha256_digest, sizeof(digest)))
+	if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
deleted file mode 100644
index e2e365a6c192..000000000000
--- a/arch/x86/purgatory/purgatory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif	/* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index f90e9dfa90bb..dfae9b9e60b5 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,7 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-#include "purgatory.h"
+#include <asm/purgatory.h>
 
 	.text
 	.globl purgatory_start
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
index bd15a4127735..2867d9825a57 100644
--- a/arch/x86/purgatory/sha256.h
+++ b/arch/x86/purgatory/sha256.h
@@ -10,7 +10,6 @@
 #ifndef SHA256_H
 #define SHA256_H
 
-
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644
index 000000000000..d60d4e278609
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b56a558e406d..b118735fea9d 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
 		ret = crypto_shash_final(desc, digest);
 		if (ret)
 			goto out_free_digest;
-		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
-						sha_regions, sha_region_sz, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+						     sha_regions, sha_region_sz, 0);
 		if (ret)
 			goto out_free_digest;
 
-		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
-						digest, SHA256_DIGEST_SIZE, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+						     digest, SHA256_DIGEST_SIZE, 0);
 		if (ret)
 			goto out_free_digest;
 	}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 4cef7e4706b0..799a8a452187 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
 extern struct mutex kexec_mutex;
 
 #ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
+#include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
-- 
cgit v1.2.3-59-g8ed1b


From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:23 +0800
Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when
 booting"

Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting")

The mapping of "cpuid <-> nodeid" is established at boot time via ACPI
tables to keep associations of workqueues and other node related items
consistent across cpu hotplug.

But, ACPI tables are unreliable and failures with that boot time mapping
have been reported on machines where the ACPI table and the physical
information which is retrieved at actual hotplug is inconsistent.

Revert the mapping implementation so it can be replaced with a less error
prone approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c   |  2 +-
 drivers/acpi/acpi_processor.c |  5 ---
 drivers/acpi/bus.c            |  1 -
 drivers/acpi/processor_core.c | 73 -------------------------------------------
 include/linux/acpi.h          |  3 --
 5 files changed, 1 insertion(+), 83 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae32838cac5f..f6b0e87d2388 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #include <acpi/processor.h>
 
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int nid;
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 4467a8089ab8..5d208a99d0c9 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
 
 void __weak arch_unregister_cpu(int cpu) {}
 
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
-	return -ENODEV;
-}
-
 static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 {
 	unsigned long long sta;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 80cb5eb75b63..34fbe027e73a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
 	acpi_wakeup_device_init();
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
-	acpi_set_processor_mapping();
 	return 0;
 }
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 611a5585a902..a84386204659 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
-	int type, id;
-	u32 acpi_id;
-	acpi_status status;
-	acpi_object_type acpi_type;
-	unsigned long long tmp;
-	union acpi_object object = { 0 };
-	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-	status = acpi_get_type(handle, &acpi_type);
-	if (ACPI_FAILURE(status))
-		return false;
-
-	switch (acpi_type) {
-	case ACPI_TYPE_PROCESSOR:
-		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = object.processor.proc_id;
-
-		/* validate the acpi_id */
-		if(acpi_processor_validate_proc_id(acpi_id))
-			return false;
-		break;
-	case ACPI_TYPE_DEVICE:
-		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = tmp;
-		break;
-	default:
-		return false;
-	}
-
-	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
-	*phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
-	id = acpi_map_cpuid(*phys_id, acpi_id);
-
-	if (id < 0)
-		return false;
-	*cpuid = id;
-	return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
-			   void **rv)
-{
-	phys_cpuid_t phys_id;
-	int cpu_id;
-
-	if (!map_processor(handle, &phys_id, &cpu_id))
-		return AE_ERROR;
-
-	acpi_map_cpu2node(handle, cpu_id, phys_id);
-	return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
-	/* Set persistent cpu <-> node mapping for all processors. */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-			    ACPI_UINT32_MAX, set_processor_node_mapping,
-			    NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
 			 u64 *phys_addr, int *ioapic_id)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 673acda012af..63a7519b00cc 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 		 int *pcpu);
 int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-void acpi_set_processor_mapping(void);
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:25 +0800
Subject: x86/acpi: Restore the order of CPU IDs

The following commits:

  f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at
boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage
for cpuid <-> apicid mapping")

... registered all the possible CPUs at boot time via ACPI tables to
make the mapping of cpuid <-> apicid fixed. Both enabled and disabled
CPUs could have a logical CPU ID after boot time.

But, ACPI tables are unreliable. the number amd order of Local APIC
entries which depends on the firmware is often inconsistent with the
physical devices. Even if they are consistent, The disabled CPUs which
take up some logical CPU IDs will also make the order discontinuous.

Revert the part of disabled CPUs registration, keep the allocation
logic of logical CPU IDs and also keep some code location changes.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c |  7 ++++++-
 arch/x86/kernel/apic/apic.c | 26 +++++++-------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f6b0e87d2388..b2879cc23db4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 		return -EINVAL;
 	}
 
+	if (!enabled) {
+		++disabled_cpus;
+		return -EINVAL;
+	}
+
 	if (boot_cpu_physical_apicid != -1U)
 		ver = boot_cpu_apic_version;
 
-	cpu = __generic_processor_info(id, ver, enabled);
+	cpu = generic_processor_info(id, ver);
 	if (cpu >= 0)
 		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index aee7deddabd0..8ccb7ef512e0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid)
 	return nr_logical_cpuids++;
 }
 
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
 {
 	int cpu, max = nr_cpu_ids;
 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 	if (num_processors >= nr_cpu_ids) {
 		int thiscpu = max + disabled_cpus;
 
-		if (enabled) {
-			pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-				   "reached. Processor %d/0x%x ignored.\n",
-				   max, thiscpu, apicid);
-		}
+		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+			   "reached. Processor %d/0x%x ignored.\n",
+			   max, thiscpu, apicid);
 
 		disabled_cpus++;
 		return -EINVAL;
@@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 		apic->x86_32_early_logical_apicid(cpu);
 #endif
 	set_cpu_possible(cpu, true);
-
-	if (enabled) {
-		num_processors++;
-		physid_set(apicid, phys_cpu_present_map);
-		set_cpu_present(cpu, true);
-	} else {
-		disabled_cpus++;
-	}
+	physid_set(apicid, phys_cpu_present_map);
+	set_cpu_present(cpu, true);
+	num_processors++;
 
 	return cpu;
 }
 
-int generic_processor_info(int apicid, int version)
-{
-	return __generic_processor_info(apicid, version, true);
-}
-
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();
-- 
cgit v1.2.3-59-g8ed1b


From 0acf611997d9d05dbfb559c3c6e379c861eb5957 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Feb 2017 11:07:57 -0800
Subject: score: Fix implicit includes now failing build after extable change

After changing from module.h to extable.h, score builds fail with:

  arch/score/kernel/traps.c: In function 'do_ri':
  arch/score/kernel/traps.c:248:4: error: implicit declaration of function 'user_disable_single_step'
  arch/score/mm/extable.c: In function 'fixup_exception':
  arch/score/mm/extable.c:32:38: error: dereferencing pointer to incomplete type
  arch/score/mm/extable.c:34:24: error: dereferencing pointer to incomplete type

because extable.h doesn't drag in the same amount of headers as the
module.h did.  Add in the headers which were implicitly expected.

Fixes: 90858794c960 ("module.h: remove extable.h include now users have migrated")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
[PG: tweak commit log; refresh for sched header refactoring.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/score/kernel/traps.c | 1 +
 arch/score/mm/extable.c   | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index e359ec675869..12daf45369b4 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c
index ec871355fc2d..6736a3ad6286 100644
--- a/arch/score/mm/extable.c
+++ b/arch/score/mm/extable.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 01:31:19 +0100
Subject: x86/tlb: Fix tlb flushing when lguest clears PGE

Fengguang reported random corruptions from various locations on x86-32
after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and
9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set")
that uses the former. While x86-32 doesn't have a JIT like x86_64, the
bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to
ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module
support built in and therefore never had the DEBUG_SET_MODULE_RONX setting
enabled.

After investigating the crashes further, it turned out that using
set_memory_ro() and set_memory_rw() didn't have the desired effect, for
example, setting the pages as read-only on x86-32 would still let
probe_kernel_write() succeed without error. This behavior would manifest
itself in situations where the vmalloc'ed buffer was accessed prior to
set_memory_*() such as in case of bpf_prog_alloc(). In cases where it
wasn't, the page attribute changes seemed to have taken effect, leading to
the conclusion that a TLB invalidate didn't happen. Moreover, it turned out
that this issue reproduced with qemu in "-cpu kvm64" mode, but not for
"-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger
a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(),
though.

There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends
on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush
(depends on X86_FEATURE_PGE), and cr3 based flush.  For "-cpu host" case in
my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu
kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually
worked fine, and further investigating the cr4 one turned out that
X86_CR4_PGE bit was not set in cr4 register, meaning the
__native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same
value instead of clearing X86_CR4_PGE in the first write to trigger the
flush.

It turned out that X86_CR4_PGE was cleared from cr4 during init from
lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also
cleared from there due to concerns of using PGE in guest kernel that can
lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V:
Host") in init()). The CPU feature bits are cleared in dynamic
boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses
static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB
flushing to use, meaning they still used the old setting of the host
kernel.

Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate
to static_cpu_has() checks is too late at this point as sections have been
patched already, so for now, it seems reasonable to switch back to
boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b
("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via
cr3 as originally intended, properly makes the new page attributes visible
and thus fixes the crashes seen by Fengguang.

Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: bp@suse.de
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: lkp@01.org
Cc: Laura Abbott <labbott@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com
Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/tlbflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6fa85944af83..fc5abff9b7fd 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 static inline void __flush_tlb_all(void)
 {
-	if (static_cpu_has(X86_FEATURE_PGE))
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__flush_tlb_global();
 	else
 		__flush_tlb();
-- 
cgit v1.2.3-59-g8ed1b


From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 12 Mar 2017 17:07:44 +0200
Subject: x86/platform/intel-mid: Correct MSI IRQ line for watchdog device

The interrupt line used for the watchdog is 12, according to the official
Intel Edison BSP code.

And indeed after fixing it we start getting an interrupt and thus the
watchdog starts working again:

  [  191.699951] Kernel panic - not syncing: Kernel Watchdog

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield")
Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 86edd1e941eb..9e304e2ea4f5 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -19,7 +19,7 @@
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
 
 static struct platform_device wdt_dev = {
 	.name = "intel_mid_wdt",
-- 
cgit v1.2.3-59-g8ed1b


From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Mar 2017 15:57:12 +0100
Subject: x86/tsc: Fix ART for TSC_KNOWN_FREQ

Subhransu reported that convert_art_to_tsc() isn't working for him.

The ART to TSC relation is only set up for systems which use the refined
TSC calibration. Systems with known TSC frequency (available via CPUID 15)
are not using the refined calibration and therefor the ART to TSC relation
is never established.

Add the setup to the known frequency init path which skips ART
calibration. The init code needs to be duplicated as for systems which use
refined calibration the ART setup must be delayed until calibration has
been done.

The problem has been there since the ART support was introdduced, but only
detected now because Subhransu tested the first time on hardware which has
TSC frequency enumerated via CPUID 15.

Note for stable: The conditional has changed from TSC_RELIABLE to
     	 	 TSC_KNOWN_FREQUENCY.

[ tglx: Rewrote changelog and identified the proper 'Fixes' commit ]

Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource")
Reported-by: "Prusty, Subhransu S" <subhransu.s.prusty@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: christopher.s.hall@intel.com
Cc: kevin.b.stanton@intel.com
Cc: john.stultz@linaro.org
Cc: akataria@vmware.com
Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4f7a9833d8e5..c73a7f9e881a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
 	 * the refined calibration and directly register it as a clocksource.
 	 */
 	if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+		if (boot_cpu_has(X86_FEATURE_ART))
+			art_related_clocksource = &clocksource_tsc;
 		clocksource_register_khz(&clocksource_tsc, tsc_khz);
 		return 0;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 7 Mar 2017 15:08:42 -0600
Subject: x86/platform: Remove warning message for duplicate NMI handlers

Remove the WARNING message associated with multiple NMI handlers as
there are at least two that are legitimate.  These are the KGDB and the
UV handlers and both want to be called if the NMI has not been claimed
by any other NMI handler.

Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate
when high frequency NMI tools are in use, notably the perf tools.  It is
required on systems that cannot sustain a high NMI call rate without
adversely affecting the system operation.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Frank Ramsay <frank.ramsay@hpe.com>
Cc: Tony Ernst <tony.ernst@hpe.com>
Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f088ea4c66e7..a723ae9440ab 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	spin_lock_irqsave(&desc->lock, flags);
 
 	/*
-	 * most handlers of type NMI_UNKNOWN never return because
-	 * they just assume the NMI is theirs.  Just a sanity check
-	 * to manage expectations
+	 * Indicate if there are multiple registrations on the
+	 * internal NMI handler call chains (SERR and IO_CHECK).
 	 */
-	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
 
-- 
cgit v1.2.3-59-g8ed1b


From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:21 +0200
Subject: x86/platform/intel-mid: Use common power off sequence

Intel Medfield may use common for Intel MID devices power sequence.
Remove unneded custom power off stub.

While here, remove function forward declaration.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/intel-mid/mfld.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index e793fe509971..e42978d4deaf 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -17,16 +17,6 @@
 
 #include "intel_mid_weak_decls.h"
 
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
-	.arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
 static unsigned long __init mfld_calibrate_tsc(void)
 {
 	unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
 static void __init penwell_arch_setup(void)
 {
 	x86_platform.calibrate_tsc = mfld_calibrate_tsc;
-	pm_power_off = mfld_power_off;
 }
 
+static struct intel_mid_ops penwell_ops = {
+	.arch_setup = penwell_arch_setup,
+};
+
 void *get_penwell_ops(void)
 {
 	return &penwell_ops;
-- 
cgit v1.2.3-59-g8ed1b


From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:22 +0200
Subject: x86/platform/intel-mid: Add power button support for Merrifield

Intel Merrifield platform has a Basin Cove PMIC to handle in particular
power button events. Add necessary bits to enable it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/intel-mid/device_libs/Makefile   |  1 +
 .../device_libs/platform_mrfld_power_btn.c         | 82 ++++++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index a7dbec4dce27..3dbde04febdc 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
 obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
new file mode 100644
index 000000000000..a6c3705a28ad
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+	{
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+	.name		= "msic_power_btn",
+	.id		= PLATFORM_DEVID_NONE,
+	.num_resources	= ARRAY_SIZE(mrfld_power_btn_resources),
+	.resource	= mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+					     unsigned long code, void *data)
+{
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&mrfld_power_btn_dev);
+		return 0;
+	}
+
+	return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+	.notifier_call	= mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before
+	 * PMIC power button device can be registered:
+	 */
+	intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+	return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+	struct resource *res = mrfld_power_btn_resources;
+	struct sfi_device_table_entry *pentry = info;
+
+	res->start = res->end = pentry->irq;
+	return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+	.name			= "bcove_power_btn",
+	.type			= SFI_DEV_TYPE_IPC,
+	.delay			= 1,
+	.msic			= 1,
+	.get_platform_data	= &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);
-- 
cgit v1.2.3-59-g8ed1b


From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 13 Mar 2017 19:33:37 +0300
Subject: x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y

The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y
options selected. With branch profiling enabled we end up calling
ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is
built with KASAN instrumentation, so calling it before kasan has been
initialized leads to crash.

Use DISABLE_BRANCH_PROFILING define to make sure that we don't call
ftrace_likely_update() from early code before kasan_early_init().

Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Alexander Potapenko <glider@google.com>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: lkp@01.org
Cc: Dmitry Vyukov <dvyukov@google.com>
Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c    | 1 +
 arch/x86/mm/kasan_init_64.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372f5dbb..b5785c197e53 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
+#define DISABLE_BRANCH_PROFILING
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3..4c90cfdc128b 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
-- 
cgit v1.2.3-59-g8ed1b


From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:12:53 +0100
Subject: mm, x86: Fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

In file included from include/linux/mm.h:68:0,
                 from include/linux/suspend.h:8,
                 from arch/x86/kernel/asm-offsets.c:12:
arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
 #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED,
so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear
definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Garnier <thgarnie@google.com>
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..585ee0d42d18 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 13 Mar 2017 23:27:47 -0500
Subject: x86/unwind: Fix last frame check for aligned function stacks

Pavel Machek reported the following warning on x86-32:

  WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value   (null)

The warning is caused by the unwinder not realizing that it reached the
end of the stack, due to an unusual prologue which gcc sometimes
generates for aligned stacks.  The prologue is based on a gcc feature
called the Dynamic Realign Argument Pointer (DRAP).  It's almost always
enabled for aligned stacks when -maccumulate-outgoing-args isn't set.

This issue is similar to the one fixed by the following commit:

  8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks")

... but that fix was specific to x86-64.

Make the fix more generic to cover x86-32 as well, and also ensure that
the return address referred to by the frame pointer is a copy of the
original return address.

Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers")
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 478d15dbaee4..08339262b666 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
 	return sizeof(*regs);
 }
 
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
 static bool is_last_task_frame(struct unwind_state *state)
 {
-	unsigned long bp = (unsigned long)state->bp;
-	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+	unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+	unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
 
 	/*
 	 * We have to check for the last task frame at two different locations
 	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame by a word in the prologue of a
-	 * function called by head/entry code.
+	 * change the offset of the stack frame in the prologue of a function
+	 * called by head/entry code.  Examples:
+	 *
+	 * <start_secondary>:
+	 *      push   %edi
+	 *      lea    0x8(%esp),%edi
+	 *      and    $0xfffffff8,%esp
+	 *      pushl  -0x4(%edi)
+	 *      push   %ebp
+	 *      mov    %esp,%ebp
+	 *
+	 * <x86_64_start_kernel>:
+	 *      lea    0x8(%rsp),%r10
+	 *      and    $0xfffffffffffffff0,%rsp
+	 *      pushq  -0x8(%r10)
+	 *      push   %rbp
+	 *      mov    %rsp,%rbp
+	 *
+	 * Note that after aligning the stack, it pushes a duplicate copy of
+	 * the return address before pushing the frame pointer.
 	 */
-	return bp == regs - FRAME_HEADER_SIZE ||
-	       bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+	return (state->bp == last_bp ||
+		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 14 Mar 2017 15:20:53 +0100
Subject: x86/intel_rdt: Put group node in rdtgroup_kn_unlock

The rdtgroup_kn_unlock waits for the last user to release and put its
node. But it's calling kernfs_put on the node which calls the
rdtgroup_kn_unlock, which might not be the group's directory node, but
another group's file node.

This race could be easily reproduced by running 2 instances
of following script:

  mount -t resctrl resctrl /sys/fs/resctrl/
  pushd /sys/fs/resctrl/
  mkdir krava
  echo "krava" > krava/schemata
  rmdir krava
  popd
  umount  /sys/fs/resctrl

It triggers the slub debug error message with following command
line config: slub_debug=,kernfs_node_cache.

Call kernfs_put on the group's node to fix it.

Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Shaohua Li <shli@fb.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c05509d38b1f..9ac2a5cdd9c2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 	    (rdtgrp->flags & RDT_DELETED)) {
 		kernfs_unbreak_active_protection(kn);
-		kernfs_put(kn);
+		kernfs_put(rdtgrp->kn);
 		kfree(rdtgrp);
 	} else {
 		kernfs_unbreak_active_protection(kn);
-- 
cgit v1.2.3-59-g8ed1b


From 8af42949d1681379c1a97d230de9242c1f4f326a Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:41:55 +0900
Subject: openrisc: xchg: fix `computed is not used` warning

When building allmodconfig this warning shows.

  fs/ocfs2/file.c: In function 'ocfs2_file_write_iter':
  ./arch/openrisc/include/asm/cmpxchg.h:81:3: warning: value computed is
  not used [-Wunused-value]
    ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
     ^

Applying the same patch logic that was done to the cmpxchg macro.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/cmpxchg.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index 5fcb9ac72693..f0a5d8b844d6 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 	return val;
 }
 
-#define xchg(ptr, with) \
-	((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with) 						\
+	({								\
+		(__typeof__(*(ptr))) __xchg((unsigned long)(with),	\
+					    (ptr),			\
+					    sizeof(*(ptr)));		\
+	})
 
 #endif /* __ASM_OPENRISC_CMPXCHG_H */
-- 
cgit v1.2.3-59-g8ed1b


From 154e67cd8e8f964809d0e75e44bb121b169c75b3 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:44:45 +0900
Subject: openrisc: fix issue handling 8 byte get_user calls

Was getting the following error with allmodconfig:

  ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined!

This was simply a missing break statement, causing an unwanted fall
through.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 140faa16685a..1311e6b13991 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@ do {									\
 	case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;		\
 	case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;		\
 	case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;		\
-	case 8: __get_user_asm2(x, ptr, retval);			\
+	case 8: __get_user_asm2(x, ptr, retval); break;			\
 	default: (x) = __get_user_bad();				\
 	}								\
 } while (0)
-- 
cgit v1.2.3-59-g8ed1b


From 363dad58e4a0f72dce0bf12d361d617239a80317 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Tue, 14 Mar 2017 22:52:49 +0900
Subject: openrisc: Export symbols needed by modules

This was detected by allmodconfig, errors reported:

 ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined!
 ERROR: "__ucmpdi2" [lib/842/842_decompress.ko] undefined!
 ERROR: "empty_zero_page" [fs/nfs/objlayout/objlayoutdriver.ko] undefined!
 ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined!
 ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined!
 ERROR: "__ucmpdi2" [fs/btrfs/btrfs.ko] undefined!
 ERROR: "pm_power_off" [drivers/regulator/act8865-regulator.ko] undefined!
 ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined!
 ERROR: "__clear_user" [drivers/md/dm-mod.ko] undefined!
 ERROR: "__clear_user" [net/netfilter/x_tables.ko] undefined!

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/or32_ksyms.c | 4 ++++
 arch/openrisc/kernel/process.c    | 1 +
 2 files changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 5c4695d13542..ee3e604959e1 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -30,6 +30,7 @@
 #include <asm/hardirq.h>
 #include <asm/delay.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
 
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
 DECLARE_EXPORT(__ashrdi3);
 DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
 
+EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 828a29110459..f8da545854f9 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
 }
 
 void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
 
 /*
  * When a process does an "exec", machine state like FPU and debug
-- 
cgit v1.2.3-59-g8ed1b


From 5f655322b1ba4bd46e26e307d04098f9c84df764 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 14 Mar 2017 11:47:29 -0400
Subject: parisc: support R_PARISC_SECREL32 relocation in modules

The parisc kernel doesn't work with CONFIG_MODVERSIONS since the commit
71810db27c1c853b335675bee335d893bc3d324b. It can't load modules with the
error: "module unix: Unknown relocation: 41".

The commit changes __kcrctab from 64-bit valus to 32-bit values. The
assembler generates R_PARISC_SECREL32 secrel relocation for them and the
module loader doesn't support this relocation.

This patch adds the R_PARISC_SECREL32 relocation to the module loader.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v4.10+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index a0ecdb4abcc8..c66c943d9322 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_DPREL21L:
 			/* left 21 bit of relative address */
 			val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_FPTR64:
 			/* 64-bit function address */
 			if(in_local(me, (void *)(val + addend))) {
-- 
cgit v1.2.3-59-g8ed1b


From 316ec0624f951166daedbe446988ef92ae72b59f Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sat, 11 Mar 2017 18:03:34 -0500
Subject: parisc: Optimize flush_kernel_vmap_range and
 invalidate_kernel_vmap_range

The previously submitted patch did not resolve the random segmentation
faults observed on the phantom buildd system.  There are still
unresolved problems with the Debian 4.8 and 4.9 kernels on C8000.

The attached patch removes the flush of the offset map pages and does a
whole data cache flush for large ranges.  No other arch flushes the
offset map in these routines as far as I can tell.

I have not observed any random segmentation faults on rp3440 in two
weeks of testing with 4.10.0 and 4.10.1.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org      # v4.8+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h | 23 ++---------------------
 arch/parisc/kernel/cache.c           | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 19c9c3c5f267..c7e15cc5c668 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates.  Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-
-	flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-	void *cursor = vaddr;
 
-	for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(cursor);
-
-		if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
-			flush_kernel_dcache_page(page);
-	}
-	flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 0dc72d5de861..c32a09095216 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 	}
 }
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);
-- 
cgit v1.2.3-59-g8ed1b


From 63d32d1e09cb2fc65b084b261976c06b40d19115 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:10:17 +0100
Subject: parisc: Wire up statx system call

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/uapi/asm/unistd.h | 3 ++-
 arch/parisc/kernel/syscall_table.S    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 6b0741e7a7ed..667c99421003 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -362,8 +362,9 @@
 #define __NR_copy_file_range	(__NR_Linux + 346)
 #define __NR_preadv2		(__NR_Linux + 347)
 #define __NR_pwritev2		(__NR_Linux + 348)
+#define __NR_statx		(__NR_Linux + 349)
 
-#define __NR_Linux_syscalls	(__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls	(__NR_statx + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 3cfef1de8061..44aeaa9c039f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -444,6 +444,7 @@
 	ENTRY_SAME(copy_file_range)
 	ENTRY_COMP(preadv2)
 	ENTRY_COMP(pwritev2)
+	ENTRY_SAME(statx)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
-- 
cgit v1.2.3-59-g8ed1b


From 186ecf14e58befba434f0774eea89e35f64d3c6a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:48:42 +0100
Subject: parisc: Avoid compiler warnings with access_ok()

Commit 09b871ffd4d8 (parisc: Define access_ok() as macro) missed to mark uaddr
as used, which then gives compiler warnings about unused variables.

Fix it by comparing uaddr to uaddr which then gets optimized away by the
compiler.

Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: 09b871ffd4d8 ("parisc: Define access_ok() as macro")
---
 arch/parisc/include/asm/uaccess.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index fb4382c28259..edfbf9d6a6dd 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,7 +32,8 @@
  * that put_user is the same as __put_user, etc.
  */
 
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size)	\
+	( (uaddr) == (uaddr) )
 
 #define put_user __put_user
 #define get_user __get_user
-- 
cgit v1.2.3-59-g8ed1b


From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 8 Mar 2017 14:30:34 +0100
Subject: x86/mpx: Make unnecessarily global function static

Make the function get_user_bd_entry() static as it is not used outside of
arch/x86/mm/mpx.c

This fixes a sparse warning.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mpx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 5126dfd52b18..cd44ae727df7 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
  * we might run off the end of the bounds table if we are on
  * a 64-bit kernel and try to get 8 bytes.
  */
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
 		long __user *bd_entry_ptr)
 {
 	u32 bd_entry_32;
-- 
cgit v1.2.3-59-g8ed1b


From f717629c7f834ab2efa05c7dbf0826f1d7c32ade Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 14:37:11 +0530
Subject: powerpc: Wire up statx() syscall

Test runs on a ppc64 BE guest succeeded. linux/samples/statx/test-statx
program was executed on the following file types,

1. Regular file
2. Directory
3. device file
4. symlink
5. Named pipe

The test run also included invoking test-statx with the runtime options
provided in the main() function of test-statx.c

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h      | 1 +
 arch/powerpc/include/asm/unistd.h      | 2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4b369d83fe9c..1c9470881c4a 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
 COMPAT_SYS_SPU(preadv2)
 COMPAT_SYS_SPU(pwritev2)
 SYSCALL(kexec_file_load)
+SYSCALL(statx)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index eb1acee91a20..9ba11dbcaca9 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		383
+#define NR_syscalls		384
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 2f26335a3c42..b85f14228857 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -393,5 +393,6 @@
 #define __NR_preadv2		380
 #define __NR_pwritev2		381
 #define __NR_kexec_file_load	382
+#define __NR_statx		383
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 16 Mar 2017 16:40:24 -0700
Subject: mm, x86: fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

  In file included from include/linux/mm.h:68:0,
                   from include/linux/suspend.h:8,
                   from arch/x86/kernel/asm-offsets.c:12:
  arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
   #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in
__PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect
#ifdef around the native_pud_clear definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Ackedy-by: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5..50d35e3185f5 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..585ee0d42d18 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 8971e1c79d3f6c9a5e6f7a65c50c41f434a4dae6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 17 Mar 2017 16:02:35 +1100
Subject: powerpc/pseries: Don't give a warning when HPT resizing isn't
 available

As of commit 438cc81a41e8 ("powerpc/pseries: Automatically resize HPT
for memory hot add/remove"), when running on the pseries platform, we
always attempt to use the PAPR extension to resize the hashed page
table (HPT) when we add or remove memory.

This is fine, but when the extension is not available we'll give a
harmless, but scary warning. Instead check if the firmware supports HPT
resizing before populating the mmu_hash_ops.resize_hpt pointer.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/lpar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 251060cf1713..8b1fe895daa3 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
 	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
 	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
 	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
-	mmu_hash_ops.resize_hpt		 = pseries_lpar_resize_hpt;
+
+	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
 }
 
 void radix_init_pseries(void)
-- 
cgit v1.2.3-59-g8ed1b


From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:39 -0700
Subject: x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm

If one thread mmaps a perf event while another thread in the same mm
is in some context where active_mm != mm (which can happen in the
scheduler, for example), refresh_pce() would write the wrong value
to CR4.PCE.  This broke some PAPI tests.

Reported-and-tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped")
Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1635c0c8df23..e07b36c5588a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-	if (current->mm)
-		load_mm_cr4(current->mm);
+	if (current->active_mm)
+		load_mm_cr4(current->active_mm);
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event)
-- 
cgit v1.2.3-59-g8ed1b


From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:40 -0700
Subject: x86/perf: Clarify why x86_pmu_event_mapped() isn't racy

Naively, it looks racy, but ->mmap_sem saves it.  Add a comment and a
lockdep assertion.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e07b36c5588a..183a972f9210 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
 	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 		return;
 
+	/*
+	 * This function relies on not being called concurrently in two
+	 * tasks in the same mm.  Otherwise one task could observe
+	 * perf_rdpmc_allowed > 1 and return all the way back to
+	 * userspace with CR4.PCE clear while another task is still
+	 * doing on_each_cpu_mask() to propagate CR4.PCE.
+	 *
+	 * For now, this can't happen because all callers hold mmap_sem
+	 * for write.  If this changes, we'll need a different solution.
+	 */
+	lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
 	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
 		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 74e3f6e63da6c8e8246fba1689e040bc926b4a1a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 14 Mar 2017 15:24:51 +0530
Subject: parisc: perf: Fix potential NULL pointer dereference

Fix potential NULL pointer dereference and clean up
coding style errors (code indent, trailing whitespaces).

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/perf.c | 94 ++++++++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index e282a5131d77..6017a5af2e6e 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
  *  in various PDC revisions.  The code is much more maintainable
  *  and reliable this way vs having to debug on every version of PDC
- *  on every box. 
+ *  on every box.
  */
 
 #include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
 static int perf_release(struct inode *inode, struct file *file);
 static int perf_open(struct inode *inode, struct file *file);
 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos);
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static void perf_start_counters(void);
 static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
 /*
  * configure:
  *
- * Configure the cpu with a given data image.  First turn off the counters, 
+ * Configure the cpu with a given data image.  First turn off the counters,
  * then download the image, then turn the counters back on.
  */
 static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
 	error = perf_stop_counters(raddr);
 	if (error != 0) {
 		printk("perf_config: perf_stop_counters = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
 	error = perf_write_image((uint64_t *)image_ptr);
 	if (error != 0) {
 		printk("perf_config: DOWNLOAD = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
 }
 
 /*
- * Open the device and initialize all of its memory.  The device is only 
+ * Open the device and initialize all of its memory.  The device is only
  * opened once, but can be "queried" by multiple processes that know its
  * file descriptor.
  */
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
  * called on the processor that the download should happen
  * on.
  */
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos)
 {
 	size_t image_size;
 	uint32_t image_type;
 	uint32_t interface_type;
 	uint32_t test;
 
-	if (perf_processor_interface == ONYX_INTF) 
+	if (perf_processor_interface == ONYX_INTF)
 		image_size = PCXU_IMAGE_SIZE;
-	else if (perf_processor_interface == CUDA_INTF) 
+	else if (perf_processor_interface == CUDA_INTF)
 		image_size = PCXW_IMAGE_SIZE;
-	else 
+	else
 		return -EFAULT;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 
 	/* First check the machine type is correct for
 	   the requested image */
-        if (((perf_processor_interface == CUDA_INTF) &&
-		       (interface_type != CUDA_INTF)) ||
-	    ((perf_processor_interface == ONYX_INTF) &&
-	               (interface_type != ONYX_INTF))) 
+	if (((perf_processor_interface == CUDA_INTF) &&
+			(interface_type != CUDA_INTF)) ||
+		((perf_processor_interface == ONYX_INTF) &&
+			(interface_type != ONYX_INTF)))
 		return -EINVAL;
 
 	/* Next check to make sure the requested image
 	   is valid */
-	if (((interface_type == CUDA_INTF) && 
+	if (((interface_type == CUDA_INTF) &&
 		       (test >= MAX_CUDA_IMAGES)) ||
-	    ((interface_type == ONYX_INTF) && 
-		       (test >= MAX_ONYX_IMAGES))) 
+	    ((interface_type == ONYX_INTF) &&
+		       (test >= MAX_ONYX_IMAGES)))
 		return -EINVAL;
 
 	/* Copy the image into the processor */
-	if (interface_type == CUDA_INTF) 
+	if (interface_type == CUDA_INTF)
 		return perf_config(cuda_images[test]);
 	else
 		return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 static void perf_patch_images(void)
 {
 #if 0 /* FIXME!! */
-/* 
+/*
  * NOTE:  this routine is VERY specific to the current TLB image.
  * If the image is changed, this routine might also need to be changed.
  */
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
 	extern void $i_dtlb_miss_2_0();
 	extern void PA2_0_iva();
 
-	/* 
+	/*
 	 * We can only use the lower 32-bits, the upper 32-bits should be 0
-	 * anyway given this is in the kernel 
+	 * anyway given this is in the kernel
 	 */
 	uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
 	uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
 
 	if (perf_processor_interface == ONYX_INTF) {
 		/* clear last 2 bytes */
-		onyx_images[TLBMISS][15] &= 0xffffff00;  
+		onyx_images[TLBMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[TLBHANDMISS][15] &= 0xffffff00;  
+		onyx_images[TLBHANDMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBHANDMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[BIG_CPI][15] &= 0xffffff00;  
+		onyx_images[BIG_CPI][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
 
 	} else if (perf_processor_interface == CUDA_INTF) {
 		/* Cuda interface */
-		cuda_images[TLBMISS][16] =  
+		cuda_images[TLBMISS][16] =
 			(cuda_images[TLBMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBMISS][17] = 
+		cuda_images[TLBMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[TLBHANDMISS][16] = 
+		cuda_images[TLBHANDMISS][16] =
 			(cuda_images[TLBHANDMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBHANDMISS][17] = 
+		cuda_images[TLBHANDMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[BIG_CPI][16] = 
+		cuda_images[BIG_CPI][16] =
 			(cuda_images[BIG_CPI][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[BIG_CPI][17] = 
+		cuda_images[BIG_CPI][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
 	} else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
 
 /*
  * ioctl routine
- * All routines effect the processor that they are executed on.  Thus you 
+ * All routines effect the processor that they are executed on.  Thus you
  * must be running on the processor that you wish to change.
  */
 
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			}
 
 			/* copy out the Counters */
-			if (copy_to_user((void __user *)arg, raddr, 
+			if (copy_to_user((void __user *)arg, raddr,
 					sizeof (raddr)) != 0) {
 				error =  -EFAULT;
 				break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
 	.open = perf_open,
 	.release = perf_release
 };
-	
+
 static struct miscdevice perf_dev = {
 	MISC_DYNAMIC_MINOR,
 	PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		/* OR sticky2 (bit 1496) to counter2 bit 32 */
 		tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
 		raddr[2] = (uint32_t)tmp64;
-		
+
 		/* Counter3 is bits 1497 to 1528 */
 		tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
 		/* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		userbuf[22] = 0;
 		userbuf[23] = 0;
 
-		/* 
+		/*
 		 * Write back the zeroed bytes + the image given
 		 * the read was destructive.
 		 */
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
 	} else {
 
 		/*
-		 * Read RDR-15 which contains the counters and sticky bits 
+		 * Read RDR-15 which contains the counters and sticky bits
 		 */
 		if (!perf_rdr_read_ubuf(15, userbuf)) {
 			return -13;
 		}
 
-		/* 
+		/*
 		 * Clear out the counters
 		 */
 		perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
 		raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
 	}
- 
+
 	return 0;
 }
 
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer)
 	i = tentry->num_words;
 	while (i--) {
 		buffer[i] = 0;
-	}	
+	}
 
 	/* Check for bits an even number of 64 */
 	if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
 	}
 
 	runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+	if (!runway) {
+		pr_err("perf_write_image: ioremap failed!\n");
+		return -ENOMEM;
+	}
 
 	/* Merge intrigue bits into Runway STATUS 0 */
 	tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
-	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 
+	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
 		     runway + RUNWAY_STATUS);
-	
+
 	/* Write RUNWAY DEBUG registers */
 	for (i = 0; i < 8; i++) {
 		__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
 	}
 
-	return 0; 
+	return 0;
 }
 
 /*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
 			perf_rdr_shift_out_U(rdr_num, buffer[i]);
 		} else {
 			perf_rdr_shift_out_W(rdr_num, buffer[i]);
-		}	
+		}
 	}
 printk("perf_rdr_write done\n");
 }
-- 
cgit v1.2.3-59-g8ed1b


From 73580dac7618e4bcd21679f553cf3c97323fec46 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 18 Mar 2017 17:13:27 +0100
Subject: parisc: Fix system shutdown halt

On those parisc machines which don't provide a software power off
function, the system currently kills the init process at the end of a
shutdown and unexpectedly restarts insteads of halting.
Fix it by adding a loop which will not return.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # 4.9+
---
 arch/parisc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 06f7ca7fe70b..b76f503eee4a 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -142,6 +142,8 @@ void machine_power_off(void)
 
 	printk(KERN_EMERG "System shut down completed.\n"
 	       "Please power this system off now.");
+
+	for (;;);
 }
 
 void (*pm_power_off)(void) = machine_power_off;
-- 
cgit v1.2.3-59-g8ed1b