From a13cff318cafbd493b8d5d679e5f3f761084c4fe Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 23 Oct 2014 12:07:14 +0200
Subject: s390/mm: recfactor global pgste updates

Replace the s390 specific page table walker for the pgste updates
with a call to the common code walk_page_range function.
There are now two pte modification functions, one for the reset
of the CMMA state and another one for the initialization of the
storage keys.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgalloc.h |   2 -
 arch/s390/include/asm/pgtable.h |   1 +
 arch/s390/kvm/kvm-s390.c        |   2 +-
 arch/s390/mm/pgtable.c          | 153 ++++++++++++++--------------------------
 4 files changed, 56 insertions(+), 102 deletions(-)

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d39a31c3cdf2..e510b9460efa 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,8 +22,6 @@ unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
-void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
-			    bool init_skey);
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57c882761dea..4399be1aaeff 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1747,6 +1747,7 @@ extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
 extern void s390_enable_skey(void);
+extern void s390_reset_cmma(struct mm_struct *mm);
 
 /*
  * No page table caches to initialise
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 55aade49b6d1..6b049ee75a56 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -271,7 +271,7 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MEM_CLR_CMMA:
 		mutex_lock(&kvm->lock);
 		idx = srcu_read_lock(&kvm->srcu);
-		page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+		s390_reset_cmma(kvm->arch.gmap->mm);
 		srcu_read_unlock(&kvm->srcu, idx);
 		mutex_unlock(&kvm->lock);
 		ret = 0;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1b79ca67392f..019afdf50b1a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -834,99 +834,6 @@ static inline void page_table_free_pgste(unsigned long *table)
 	__free_page(page);
 }
 
-static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
-			unsigned long addr, unsigned long end, bool init_skey)
-{
-	pte_t *start_pte, *pte;
-	spinlock_t *ptl;
-	pgste_t pgste;
-
-	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
-	pte = start_pte;
-	do {
-		pgste = pgste_get_lock(pte);
-		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
-		if (init_skey) {
-			unsigned long address;
-
-			pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
-					      PGSTE_GR_BIT | PGSTE_GC_BIT);
-
-			/* skip invalid and not writable pages */
-			if (pte_val(*pte) & _PAGE_INVALID ||
-			    !(pte_val(*pte) & _PAGE_WRITE)) {
-				pgste_set_unlock(pte, pgste);
-				continue;
-			}
-
-			address = pte_val(*pte) & PAGE_MASK;
-			page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
-		}
-		pgste_set_unlock(pte, pgste);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	pte_unmap_unlock(start_pte, ptl);
-
-	return addr;
-}
-
-static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
-			unsigned long addr, unsigned long end, bool init_skey)
-{
-	unsigned long next;
-	pmd_t *pmd;
-
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
-	} while (pmd++, addr = next, addr != end);
-
-	return addr;
-}
-
-static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
-			unsigned long addr, unsigned long end, bool init_skey)
-{
-	unsigned long next;
-	pud_t *pud;
-
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
-	} while (pud++, addr = next, addr != end);
-
-	return addr;
-}
-
-void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
-			    unsigned long end, bool init_skey)
-{
-	unsigned long addr, next;
-	pgd_t *pgd;
-
-	down_write(&mm->mmap_sem);
-	if (init_skey && mm_use_skey(mm))
-		goto out_up;
-	addr = start;
-	pgd = pgd_offset(mm, addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
-	} while (pgd++, addr = next, addr != end);
-	if (init_skey)
-		current->mm->context.use_skey = 1;
-out_up:
-	up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL(page_table_reset_pgste);
-
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq)
 {
@@ -992,11 +899,6 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
 	return NULL;
 }
 
-void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
-			    unsigned long end, bool init_skey)
-{
-}
-
 static inline void page_table_free_pgste(unsigned long *table)
 {
 }
@@ -1347,12 +1249,65 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
  * Enable storage key handling from now on and initialize the storage
  * keys with the default key.
  */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+			      unsigned long next, struct mm_walk *walk)
+{
+	unsigned long ptev;
+	pgste_t pgste;
+
+	pgste = pgste_get_lock(pte);
+	/* Clear storage key */
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+			      PGSTE_GR_BIT | PGSTE_GC_BIT);
+	ptev = pte_val(*pte);
+	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+	pgste_set_unlock(pte, pgste);
+	return 0;
+}
+
 void s390_enable_skey(void)
 {
-	page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+	struct mm_struct *mm = current->mm;
+
+	down_write(&mm->mmap_sem);
+	if (mm_use_skey(mm))
+		goto out_up;
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+	mm->context.use_skey = 1;
+
+out_up:
+	up_write(&mm->mmap_sem);
 }
 EXPORT_SYMBOL_GPL(s390_enable_skey);
 
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	pgste_t pgste;
+
+	pgste = pgste_get_lock(pte);
+	pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+	pgste_set_unlock(pte, pgste);
+	return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+	down_write(&mm->mmap_sem);
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+	up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
 /*
  * Test and reset if a guest page is dirty
  */
-- 
cgit v1.2.3-59-g8ed1b


From 593befa6ab74a805e4f503c8c737c3cffa8066b6 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 23 Oct 2014 12:07:44 +0200
Subject: mm: introduce mm_forbids_zeropage function

Add a new function stub to allow architectures to disable for
an mm_structthe backing of non-present, anonymous pages with
read-only empty zero pages.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/mm.h | 11 +++++++++++
 mm/huge_memory.c   |  2 +-
 mm/memory.c        |  2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27eb1bfbe704..ab7dadca4ea5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,6 +56,17 @@ extern int sysctl_legacy_va_layout;
 #define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
 #endif
 
+/*
+ * To prevent common memory management code establishing
+ * a zero page mapping on a read fault.
+ * This macro should be defined within <asm/pgtable.h>.
+ * s390 does this to prevent multiplexing of hardware bits
+ * related to the physical page in case of virtualization.
+ */
+#ifndef mm_forbids_zeropage
+#define mm_forbids_zeropage(X)	(0)
+#endif
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74c78aa8bc2f..7e9c15cb93a9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -805,7 +805,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_OOM;
 	if (unlikely(khugepaged_enter(vma)))
 		return VM_FAULT_OOM;
-	if (!(flags & FAULT_FLAG_WRITE) &&
+	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
 			transparent_hugepage_use_zero_page()) {
 		spinlock_t *ptl;
 		pgtable_t pgtable;
diff --git a/mm/memory.c b/mm/memory.c
index 1cc6bfbd872e..d722d4f481c9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2640,7 +2640,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_SIGBUS;
 
 	/* Use the zero-page for reads */
-	if (!(flags & FAULT_FLAG_WRITE)) {
+	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
 		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
 						vma->vm_page_prot));
 		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-- 
cgit v1.2.3-59-g8ed1b


From 2faee8ff9dc6f4bfe46f6d2d110add858140fb20 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 23 Oct 2014 12:08:38 +0200
Subject: s390/mm: prevent and break zero page mappings in case of storage keys

As soon as storage keys are enabled we need to stop working on zero page
mappings to prevent inconsistencies between storage keys and pgste.

Otherwise following data corruption could happen:
1) guest enables storage key
2) guest sets storage key for not mapped page X
   -> change goes to PGSTE
3) guest reads from page X
   -> as X was not dirty before, the page will be zero page backed,
      storage key from PGSTE for X will go to storage key for zero page
4) guest sets storage key for not mapped page Y (same logic as above
5) guest reads from page Y
   -> as Y was not dirty before, the page will be zero page backed,
      storage key from PGSTE for Y will got to storage key for zero page
      overwriting storage key for X

While holding the mmap sem, we are safe against changes on entries we
already fixed, as every fault would need to take the mmap_sem (read).

Other vCPUs executing storage key instructions will get a one time interception
and be serialized also with mmap_sem.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  5 +++++
 arch/s390/mm/pgtable.c          | 13 ++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 4399be1aaeff..df2e7f14ffb7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -479,6 +479,11 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 	return 0;
 }
 
+/*
+ * In the case that a guest uses storage keys
+ * faults should no longer be backed by zero pages
+ */
+#define mm_forbids_zeropage mm_use_skey
 static inline int mm_use_skey(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 019afdf50b1a..0f1e9ff6bc12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1256,6 +1256,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr,
 	pgste_t pgste;
 
 	pgste = pgste_get_lock(pte);
+	/*
+	 * Remove all zero page mappings,
+	 * after establishing a policy to forbid zero page mappings
+	 * following faults for that page will get fresh anonymous pages
+	 */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		ptep_flush_direct(walk->mm, addr, pte);
+		pte_val(*pte) = _PAGE_INVALID;
+	}
 	/* Clear storage key */
 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
@@ -1274,9 +1283,11 @@ void s390_enable_skey(void)
 	down_write(&mm->mmap_sem);
 	if (mm_use_skey(mm))
 		goto out_up;
+
+	mm->context.use_skey = 1;
+
 	walk.mm = mm;
 	walk_page_range(0, TASK_SIZE, &walk);
-	mm->context.use_skey = 1;
 
 out_up:
 	up_write(&mm->mmap_sem);
-- 
cgit v1.2.3-59-g8ed1b


From 3ac8e38015d4fd1c12e4e048a01a9f059a2053a2 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 23 Oct 2014 12:09:17 +0200
Subject: s390/mm: disable KSM for storage key enabled pages

When storage keys are enabled unmerge already merged pages and prevent
new pages from being merged.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  2 +-
 arch/s390/kvm/priv.c            | 17 ++++++++++++-----
 arch/s390/mm/pgtable.c          | 16 +++++++++++++++-
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index df2e7f14ffb7..00d460742e1e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1751,7 +1751,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern int vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
-extern void s390_enable_skey(void);
+extern int s390_enable_skey(void);
 extern void s390_reset_cmma(struct mm_struct *mm);
 
 /*
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 72bb2dd8b9cd..f47cb0c6d906 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -156,21 +156,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void __skey_check_enable(struct kvm_vcpu *vcpu)
+static int __skey_check_enable(struct kvm_vcpu *vcpu)
 {
+	int rc = 0;
 	if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
-		return;
+		return rc;
 
-	s390_enable_skey();
+	rc = s390_enable_skey();
 	trace_kvm_s390_skey_related_inst(vcpu);
 	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	return rc;
 }
 
 
 static int handle_skey(struct kvm_vcpu *vcpu)
 {
-	__skey_check_enable(vcpu);
+	int rc = __skey_check_enable(vcpu);
 
+	if (rc)
+		return rc;
 	vcpu->stat.instruction_storage_key++;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -683,7 +687,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		}
 
 		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
-			__skey_check_enable(vcpu);
+			int rc = __skey_check_enable(vcpu);
+
+			if (rc)
+				return rc;
 			if (set_guest_storage_key(current->mm, useraddr,
 					vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
 					vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 0f1e9ff6bc12..b1871d39e46e 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -18,6 +18,8 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1275,22 +1277,34 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
-void s390_enable_skey(void)
+int s390_enable_skey(void)
 {
 	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc = 0;
 
 	down_write(&mm->mmap_sem);
 	if (mm_use_skey(mm))
 		goto out_up;
 
 	mm->context.use_skey = 1;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+				MADV_UNMERGEABLE, &vma->vm_flags)) {
+			mm->context.use_skey = 0;
+			rc = -ENOMEM;
+			goto out_up;
+		}
+	}
+	mm->def_flags &= ~VM_MERGEABLE;
 
 	walk.mm = mm;
 	walk_page_range(0, TASK_SIZE, &walk);
 
 out_up:
 	up_write(&mm->mmap_sem);
+	return rc;
 }
 EXPORT_SYMBOL_GPL(s390_enable_skey);
 
-- 
cgit v1.2.3-59-g8ed1b


From f7f242ff004499e0904d3664713dfba01f24c408 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 15 Oct 2014 12:17:34 +0200
Subject: kprobes: introduce weak arch_check_ftrace_location() helper function

Introduce weak arch_check_ftrace_location() helper function which
architectures can override in order to implement handling of kprobes
on function tracer call sites on their own, without depending on
common code or implementing the KPROBES_ON_FTRACE feature.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/kprobes.h |  1 +
 kernel/kprobes.c        | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index f7296e57d614..5297f9fa0ef2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -335,6 +335,7 @@ extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
 #endif
 
+int arch_check_ftrace_location(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3995f546d0f3..317eb8ad28dd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1410,16 +1410,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
 	return ret;
 }
 
-static int check_kprobe_address_safe(struct kprobe *p,
-				     struct module **probed_mod)
+int __weak arch_check_ftrace_location(struct kprobe *p)
 {
-	int ret = 0;
 	unsigned long ftrace_addr;
 
-	/*
-	 * If the address is located on a ftrace nop, set the
-	 * breakpoint to the following instruction.
-	 */
 	ftrace_addr = ftrace_location((unsigned long)p->addr);
 	if (ftrace_addr) {
 #ifdef CONFIG_KPROBES_ON_FTRACE
@@ -1431,7 +1425,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
 		return -EINVAL;
 #endif
 	}
+	return 0;
+}
 
+static int check_kprobe_address_safe(struct kprobe *p,
+				     struct module **probed_mod)
+{
+	int ret;
+
+	ret = arch_check_ftrace_location(p);
+	if (ret)
+		return ret;
 	jump_label_lock();
 	preempt_disable();
 
-- 
cgit v1.2.3-59-g8ed1b


From c933146a5e41e42ea3eb4f34fa02e201da3f068e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 15 Oct 2014 12:17:38 +0200
Subject: s390/ftrace,kprobes: allow to patch first instruction

If the function tracer is enabled, allow to set kprobes on the first
instruction of a function (which is the function trace caller):

If no kprobe is set handling of enabling and disabling function tracing
of a function simply patches the first instruction. Either it is a nop
(right now it's an unconditional branch, which skips the mcount block),
or it's a branch to the ftrace_caller() function.

If a kprobe is being placed on a function tracer calling instruction
we encode if we actually have a nop or branch in the remaining bytes
after the breakpoint instruction (illegal opcode).
This is possible, since the size of the instruction used for the nop
and branch is six bytes, while the size of the breakpoint is only
two bytes.
Therefore the first two bytes contain the illegal opcode and the last
four bytes contain either "0" for nop or "1" for branch. The kprobes
code will then execute/simulate the correct instruction.

Instruction patching for kprobes and function tracer is always done
with stop_machine(). Therefore we don't have any races where an
instruction is patched concurrently on a different cpu.
Besides that also the program check handler which executes the function
trace caller instruction won't be executed concurrently to any
stop_machine() execution.

This allows to keep full fault based kprobes handling which generates
correct pt_regs contents automatically.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ftrace.h  |  52 ++++++++++++++--
 arch/s390/include/asm/kprobes.h |   1 +
 arch/s390/include/asm/lowcore.h |   4 +-
 arch/s390/include/asm/pgtable.h |  12 ++++
 arch/s390/kernel/asm-offsets.c  |   1 -
 arch/s390/kernel/early.c        |   4 --
 arch/s390/kernel/ftrace.c       | 132 +++++++++++++++++++++++++---------------
 arch/s390/kernel/kprobes.c      |  92 ++++++++++++++++++++--------
 arch/s390/kernel/mcount.S       |   1 +
 arch/s390/kernel/setup.c        |   2 -
 arch/s390/kernel/smp.c          |   1 -
 scripts/recordmcount.c          |   2 +-
 scripts/recordmcount.pl         |   2 +-
 13 files changed, 214 insertions(+), 92 deletions(-)

diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3aef8afec336..785041f1dc77 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -1,25 +1,67 @@
 #ifndef _ASM_S390_FTRACE_H
 #define _ASM_S390_FTRACE_H
 
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#define MCOUNT_INSN_SIZE	24
+#define MCOUNT_RETURN_FIXUP	18
+
 #ifndef __ASSEMBLY__
 
-extern void _mcount(void);
+void _mcount(void);
+void ftrace_caller(void);
+
 extern char ftrace_graph_caller_end;
+extern unsigned long ftrace_plt;
 
 struct dyn_arch_ftrace { };
 
-#define MCOUNT_ADDR ((long)_mcount)
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
 
+#define KPROBE_ON_FTRACE_NOP	0
+#define KPROBE_ON_FTRACE_CALL	1
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	return addr;
 }
 
-#endif /* __ASSEMBLY__ */
+struct ftrace_insn {
+	u16 opc;
+	s32 disp;
+} __packed;
 
-#define MCOUNT_INSN_SIZE  18
+static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+	/* jg .+24 */
+	insn->opc = 0xc0f4;
+	insn->disp = MCOUNT_INSN_SIZE / 2;
+#endif
+}
 
-#define ARCH_SUPPORTS_FTRACE_OPS 1
+static inline int is_ftrace_nop(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+	if (insn->disp == MCOUNT_INSN_SIZE / 2)
+		return 1;
+#endif
+	return 0;
+}
+
+static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
+					     unsigned long ip)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+	unsigned long target;
 
+	/* brasl r0,ftrace_caller */
+	target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
+	insn->opc = 0xc005;
+	insn->disp = (target - ip) / 2;
+#endif
+}
+
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 98629173ce3b..b47ad3b642cc 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -60,6 +60,7 @@ typedef u16 kprobe_opcode_t;
 struct arch_specific_insn {
 	/* copy of original instruction */
 	kprobe_opcode_t *insn;
+	unsigned int is_ftrace_insn : 1;
 };
 
 struct prev_kprobe {
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 6cc51fe84410..34fbcac61133 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -147,7 +147,7 @@ struct _lowcore {
 	__u32	softirq_pending;		/* 0x02ec */
 	__u32	percpu_offset;			/* 0x02f0 */
 	__u32	machine_flags;			/* 0x02f4 */
-	__u32	ftrace_func;			/* 0x02f8 */
+	__u8	pad_0x02f8[0x02fc-0x02f8];	/* 0x02f8 */
 	__u32	spinlock_lockval;		/* 0x02fc */
 
 	__u8	pad_0x0300[0x0e00-0x0300];	/* 0x0300 */
@@ -297,7 +297,7 @@ struct _lowcore {
 	__u64	percpu_offset;			/* 0x0378 */
 	__u64	vdso_per_cpu_data;		/* 0x0380 */
 	__u64	machine_flags;			/* 0x0388 */
-	__u64	ftrace_func;			/* 0x0390 */
+	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */
 	__u64	gmap;				/* 0x0398 */
 	__u32	spinlock_lockval;		/* 0x03a0 */
 	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 00d460742e1e..5ef1a266936a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -133,6 +133,18 @@ extern unsigned long MODULES_END;
 #define MODULES_LEN	(1UL << 31)
 #endif
 
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+	if (addr < (void *)MODULES_VADDR)
+		return 0;
+	if (addr > (void *)MODULES_END)
+		return 0;
+#endif
+	return 1;
+}
+
 /*
  * A 31 bit pagetable entry of S390 has following format:
  *  |   PFRA          |    |  OS  |
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ef279a136801..f3a78337ca86 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -156,7 +156,6 @@ int main(void)
 	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
 	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
 	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
-	DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
 	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
 	BLANK();
 	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cef2879edff3..302ac1f7f8e7 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -12,7 +12,6 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/ftrace.h>
 #include <linux/lockdep.h>
 #include <linux/module.h>
 #include <linux/pfn.h>
@@ -490,8 +489,5 @@ void __init startup_init(void)
 	detect_machine_facilities();
 	setup_topology();
 	sclp_early_detect();
-#ifdef CONFIG_DYNAMIC_FTRACE
-	S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
-#endif
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 51d14fe5eb9a..5744d25c1d33 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,6 +7,7 @@
  *		Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
+#include <linux/moduleloader.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
@@ -15,60 +16,39 @@
 #include <linux/kprobes.h>
 #include <trace/syscall.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
 #include "entry.h"
 
-void mcount_replace_code(void);
-void ftrace_disable_code(void);
-void ftrace_enable_insn(void);
-
 /*
  * The mcount code looks like this:
  *	stg	%r14,8(%r15)		# offset 0
  *	larl	%r1,<&counter>		# offset 6
  *	brasl	%r14,_mcount		# offset 12
  *	lg	%r14,8(%r15)		# offset 18
- * Total length is 24 bytes. The complete mcount block initially gets replaced
- * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop
- * only patch the jg/lg instruction within the block.
- * Note: we do not patch the first instruction to an unconditional branch,
- * since that would break kprobes/jprobes. It is easier to leave the larl
- * instruction in and only modify the second instruction.
+ * Total length is 24 bytes. Only the first instruction will be patched
+ * by ftrace_make_call / ftrace_make_nop.
  * The enabled ftrace code block looks like this:
- *	larl	%r0,.+24		# offset 0
- * >	lg	%r1,__LC_FTRACE_FUNC	# offset 6
- *	br	%r1			# offset 12
- *	brcl	0,0			# offset 14
- *	brc	0,0			# offset 20
+ * >	brasl	%r0,ftrace_caller	# offset 0
+ *	larl	%r1,<&counter>		# offset 6
+ *	brasl	%r14,_mcount		# offset 12
+ *	lg	%r14,8(%r15)		# offset 18
  * The ftrace function gets called with a non-standard C function call ABI
  * where r0 contains the return address. It is also expected that the called
  * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
  * The return point of the ftrace function has offset 24, so execution
  * continues behind the mcount block.
- *	larl	%r0,.+24		# offset 0
- * >	jg	.+18			# offset 6
- *	br	%r1			# offset 12
- *	brcl	0,0			# offset 14
- *	brc	0,0			# offset 20
+ * The disabled ftrace code block looks like this:
+ * >	jg	.+24			# offset 0
+ *	larl	%r1,<&counter>		# offset 6
+ *	brasl	%r14,_mcount		# offset 12
+ *	lg	%r14,8(%r15)		# offset 18
  * The jg instruction branches to offset 24 to skip as many instructions
  * as possible.
  */
-asm(
-	"	.align	4\n"
-	"mcount_replace_code:\n"
-	"	larl	%r0,0f\n"
-	"ftrace_disable_code:\n"
-	"	jg	0f\n"
-	"	br	%r1\n"
-	"	brcl	0,0\n"
-	"	brc	0,0\n"
-	"0:\n"
-	"	.align	4\n"
-	"ftrace_enable_insn:\n"
-	"	lg	%r1,"__stringify(__LC_FTRACE_FUNC)"\n");
-
-#define MCOUNT_BLOCK_SIZE	24
-#define MCOUNT_INSN_OFFSET	6
-#define FTRACE_INSN_SIZE	6
+
+unsigned long ftrace_plt;
 
 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 		       unsigned long addr)
@@ -79,24 +59,62 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		    unsigned long addr)
 {
-	/* Initial replacement of the whole mcount block */
-	if (addr == MCOUNT_ADDR) {
-		if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET,
-				       mcount_replace_code,
-				       MCOUNT_BLOCK_SIZE))
-			return -EPERM;
-		return 0;
+	struct ftrace_insn insn;
+	unsigned short op;
+	void *from, *to;
+	size_t size;
+
+	ftrace_generate_nop_insn(&insn);
+	size = sizeof(insn);
+	from = &insn;
+	to = (void *) rec->ip;
+	if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
+		return -EFAULT;
+	/*
+	 * If we find a breakpoint instruction, a kprobe has been placed
+	 * at the beginning of the function. We write the constant
+	 * KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original
+	 * instruction so that the kprobes handler can execute a nop, if it
+	 * reaches this breakpoint.
+	 */
+	if (op == BREAKPOINT_INSTRUCTION) {
+		size -= 2;
+		from += 2;
+		to += 2;
+		insn.disp = KPROBE_ON_FTRACE_NOP;
 	}
-	if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
-			       MCOUNT_INSN_SIZE))
+	if (probe_kernel_write(to, from, size))
 		return -EPERM;
 	return 0;
 }
 
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
-	if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn,
-			       FTRACE_INSN_SIZE))
+	struct ftrace_insn insn;
+	unsigned short op;
+	void *from, *to;
+	size_t size;
+
+	ftrace_generate_call_insn(&insn, rec->ip);
+	size = sizeof(insn);
+	from = &insn;
+	to = (void *) rec->ip;
+	if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
+		return -EFAULT;
+	/*
+	 * If we find a breakpoint instruction, a kprobe has been placed
+	 * at the beginning of the function. We write the constant
+	 * KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original
+	 * instruction so that the kprobes handler can execute a brasl if it
+	 * reaches this breakpoint.
+	 */
+	if (op == BREAKPOINT_INSTRUCTION) {
+		size -= 2;
+		from += 2;
+		to += 2;
+		insn.disp = KPROBE_ON_FTRACE_CALL;
+	}
+	if (probe_kernel_write(to, from, size))
 		return -EPERM;
 	return 0;
 }
@@ -111,6 +129,24 @@ int __init ftrace_dyn_arch_init(void)
 	return 0;
 }
 
+static int __init ftrace_plt_init(void)
+{
+	unsigned int *ip;
+
+	ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+	if (!ftrace_plt)
+		panic("cannot allocate ftrace plt\n");
+	ip = (unsigned int *) ftrace_plt;
+	ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+	ip[1] = 0x100a0004;
+	ip[2] = 0x07f10000;
+	ip[3] = FTRACE_ADDR >> 32;
+	ip[4] = FTRACE_ADDR & 0xffffffff;
+	set_memory_ro(ftrace_plt, 1);
+	return 0;
+}
+device_initcall(ftrace_plt_init);
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
  * Hook the return address and push it in the stack of return addresses
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 014d4729b134..d6716c29b7f8 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
+#include <linux/ftrace.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
 #include <asm/dis.h>
@@ -60,10 +61,21 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = {
 
 static void __kprobes copy_instruction(struct kprobe *p)
 {
+	unsigned long ip = (unsigned long) p->addr;
 	s64 disp, new_disp;
 	u64 addr, new_addr;
 
-	memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+	if (ftrace_location(ip) == ip) {
+		/*
+		 * If kprobes patches the instruction that is morphed by
+		 * ftrace make sure that kprobes always sees the branch
+		 * "jg .+24" that skips the mcount block
+		 */
+		ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
+		p->ainsn.is_ftrace_insn = 1;
+	} else
+		memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+	p->opcode = p->ainsn.insn[0];
 	if (!probe_is_insn_relative_long(p->ainsn.insn))
 		return;
 	/*
@@ -85,18 +97,6 @@ static inline int is_kernel_addr(void *addr)
 	return addr < (void *)_end;
 }
 
-static inline int is_module_addr(void *addr)
-{
-#ifdef CONFIG_64BIT
-	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
-	if (addr < (void *)MODULES_VADDR)
-		return 0;
-	if (addr > (void *)MODULES_END)
-		return 0;
-#endif
-	return 1;
-}
-
 static int __kprobes s390_get_insn_slot(struct kprobe *p)
 {
 	/*
@@ -132,43 +132,63 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 		return -EINVAL;
 	if (s390_get_insn_slot(p))
 		return -ENOMEM;
-	p->opcode = *p->addr;
 	copy_instruction(p);
 	return 0;
 }
 
-struct ins_replace_args {
-	kprobe_opcode_t *ptr;
-	kprobe_opcode_t opcode;
+int arch_check_ftrace_location(struct kprobe *p)
+{
+	return 0;
+}
+
+struct swap_insn_args {
+	struct kprobe *p;
+	unsigned int arm_kprobe : 1;
 };
 
-static int __kprobes swap_instruction(void *aref)
+static int __kprobes swap_instruction(void *data)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long status = kcb->kprobe_status;
-	struct ins_replace_args *args = aref;
-
+	struct swap_insn_args *args = data;
+	struct ftrace_insn new_insn, *insn;
+	struct kprobe *p = args->p;
+	size_t len;
+
+	new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+	len = sizeof(new_insn.opc);
+	if (!p->ainsn.is_ftrace_insn)
+		goto skip_ftrace;
+	len = sizeof(new_insn);
+	insn = (struct ftrace_insn *) p->addr;
+	if (args->arm_kprobe) {
+		if (is_ftrace_nop(insn))
+			new_insn.disp = KPROBE_ON_FTRACE_NOP;
+		else
+			new_insn.disp = KPROBE_ON_FTRACE_CALL;
+	} else {
+		ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
+		if (insn->disp == KPROBE_ON_FTRACE_NOP)
+			ftrace_generate_nop_insn(&new_insn);
+	}
+skip_ftrace:
 	kcb->kprobe_status = KPROBE_SWAP_INST;
-	probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode));
+	probe_kernel_write(p->addr, &new_insn, len);
 	kcb->kprobe_status = status;
 	return 0;
 }
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-	struct ins_replace_args args;
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
 
-	args.ptr = p->addr;
-	args.opcode = BREAKPOINT_INSTRUCTION;
 	stop_machine(swap_instruction, &args, NULL);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-	struct ins_replace_args args;
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
 
-	args.ptr = p->addr;
-	args.opcode = p->opcode;
 	stop_machine(swap_instruction, &args, NULL);
 }
 
@@ -459,6 +479,24 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
 	int fixup = probe_get_fixup_type(p->ainsn.insn);
 
+	/* Check if the kprobes location is an enabled ftrace caller */
+	if (p->ainsn.is_ftrace_insn) {
+		struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
+		struct ftrace_insn call_insn;
+
+		ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
+		/*
+		 * A kprobe on an enabled ftrace call site actually single
+		 * stepped an unconditional branch (ftrace nop equivalent).
+		 * Now we need to fixup things and pretend that a brasl r0,...
+		 * was executed instead.
+		 */
+		if (insn->disp == KPROBE_ON_FTRACE_CALL) {
+			ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
+			regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
+		}
+	}
+
 	if (fixup & FIXUP_PSW_NORMAL)
 		ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
 
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4300ea374826..b6dfc5bfcb89 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -27,6 +27,7 @@ ENTRY(ftrace_caller)
 	.globl	ftrace_regs_caller
 	.set	ftrace_regs_caller,ftrace_caller
 	lgr	%r1,%r15
+	aghi	%r0,MCOUNT_RETURN_FIXUP
 	aghi	%r15,-STACK_FRAME_SIZE
 	stg	%r1,__SF_BACKCHAIN(%r15)
 	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e80d9ff9a56d..4e532c67832f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -41,7 +41,6 @@
 #include <linux/ctype.h>
 #include <linux/reboot.h>
 #include <linux/topology.h>
-#include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/memory.h>
@@ -356,7 +355,6 @@ static void __init setup_lowcore(void)
 	lc->steal_timer = S390_lowcore.steal_timer;
 	lc->last_update_timer = S390_lowcore.last_update_timer;
 	lc->last_update_clock = S390_lowcore.last_update_clock;
-	lc->ftrace_func = S390_lowcore.ftrace_func;
 
 	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
 	restart_stack += ASYNC_SIZE;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fd9e60101f1..0b499f5cbe19 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -236,7 +236,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 	lc->percpu_offset = __per_cpu_offset[cpu];
 	lc->kernel_asce = S390_lowcore.kernel_asce;
 	lc->machine_flags = S390_lowcore.machine_flags;
-	lc->ftrace_func = S390_lowcore.ftrace_func;
 	lc->user_timer = lc->system_timer = lc->steal_timer = 0;
 	__ctl_store(lc->cregs_save_area, 0, 15);
 	save_access_regs((unsigned int *) lc->access_regs_save_area);
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 001facfa5b74..3d1984e59a30 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -404,7 +404,7 @@ do_file(char const *const fname)
 		}
 		if (w2(ghdr->e_machine) == EM_S390) {
 			reltype = R_390_64;
-			mcount_adjust_64 = -8;
+			mcount_adjust_64 = -14;
 		}
 		if (w2(ghdr->e_machine) == EM_MIPS) {
 			reltype = R_MIPS_64;
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d4b665610d67..56ea99a12ab7 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -243,7 +243,7 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "s390" && $bits == 64) {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
-    $mcount_adjust = -8;
+    $mcount_adjust = -14;
     $alignment = 8;
     $type = ".quad";
     $ld .= " -m elf64_s390";
-- 
cgit v1.2.3-59-g8ed1b


From 7a5388de5c70f7a92de71e03ce72692c1827d162 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 22 Oct 2014 12:42:38 +0200
Subject: s390/kprobes: make use of NOKPROBE_SYMBOL()

Use NOKPROBE_SYMBOL() instead of __kprobes annotation.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ftrace.c  |  4 +--
 arch/s390/kernel/idle.c    |  3 +-
 arch/s390/kernel/kprobes.c | 88 +++++++++++++++++++++++++++++-----------------
 arch/s390/kernel/process.c |  2 +-
 arch/s390/kernel/time.c    |  3 +-
 arch/s390/kernel/traps.c   | 14 ++++----
 arch/s390/mm/fault.c       |  6 ++--
 7 files changed, 74 insertions(+), 46 deletions(-)

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 5744d25c1d33..708c772b086d 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -152,8 +152,7 @@ device_initcall(ftrace_plt_init);
  * Hook the return address and push it in the stack of return addresses
  * in current thread info.
  */
-unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
-					      unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
 {
 	struct ftrace_graph_ent trace;
 
@@ -171,6 +170,7 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
 out:
 	return parent;
 }
+NOKPROBE_SYMBOL(prepare_ftrace_return);
 
 /*
  * Patch the kernel code at ftrace_graph_caller location. The instruction
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 7559f1beab29..05fbc2c98faf 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -19,7 +19,7 @@
 
 static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void __kprobes enabled_wait(void)
+void enabled_wait(void)
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
 	unsigned long long idle_time;
@@ -46,6 +46,7 @@ void __kprobes enabled_wait(void)
 	smp_wmb();
 	idle->sequence++;
 }
+NOKPROBE_SYMBOL(enabled_wait);
 
 static ssize_t show_idle_count(struct device *dev,
 				struct device_attribute *attr, char *buf)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index d6716c29b7f8..ee0396755430 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -59,7 +59,7 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = {
 	.insn_size = MAX_INSN_SIZE,
 };
 
-static void __kprobes copy_instruction(struct kprobe *p)
+static void copy_instruction(struct kprobe *p)
 {
 	unsigned long ip = (unsigned long) p->addr;
 	s64 disp, new_disp;
@@ -91,13 +91,14 @@ static void __kprobes copy_instruction(struct kprobe *p)
 	new_disp = ((addr + (disp * 2)) - new_addr) / 2;
 	*(s32 *)&p->ainsn.insn[1] = new_disp;
 }
+NOKPROBE_SYMBOL(copy_instruction);
 
 static inline int is_kernel_addr(void *addr)
 {
 	return addr < (void *)_end;
 }
 
-static int __kprobes s390_get_insn_slot(struct kprobe *p)
+static int s390_get_insn_slot(struct kprobe *p)
 {
 	/*
 	 * Get an insn slot that is within the same 2GB area like the original
@@ -111,8 +112,9 @@ static int __kprobes s390_get_insn_slot(struct kprobe *p)
 		p->ainsn.insn = get_insn_slot();
 	return p->ainsn.insn ? 0 : -ENOMEM;
 }
+NOKPROBE_SYMBOL(s390_get_insn_slot);
 
-static void __kprobes s390_free_insn_slot(struct kprobe *p)
+static void s390_free_insn_slot(struct kprobe *p)
 {
 	if (!p->ainsn.insn)
 		return;
@@ -122,8 +124,9 @@ static void __kprobes s390_free_insn_slot(struct kprobe *p)
 		free_insn_slot(p->ainsn.insn, 0);
 	p->ainsn.insn = NULL;
 }
+NOKPROBE_SYMBOL(s390_free_insn_slot);
 
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
 {
 	if ((unsigned long) p->addr & 0x01)
 		return -EINVAL;
@@ -135,6 +138,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	copy_instruction(p);
 	return 0;
 }
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
 
 int arch_check_ftrace_location(struct kprobe *p)
 {
@@ -146,7 +150,7 @@ struct swap_insn_args {
 	unsigned int arm_kprobe : 1;
 };
 
-static int __kprobes swap_instruction(void *data)
+static int swap_instruction(void *data)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long status = kcb->kprobe_status;
@@ -177,29 +181,33 @@ skip_ftrace:
 	kcb->kprobe_status = status;
 	return 0;
 }
+NOKPROBE_SYMBOL(swap_instruction);
 
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
 
 	stop_machine(swap_instruction, &args, NULL);
 }
+NOKPROBE_SYMBOL(arch_arm_kprobe);
 
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
 
 	stop_machine(swap_instruction, &args, NULL);
 }
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
 
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
 {
 	s390_free_insn_slot(p);
 }
+NOKPROBE_SYMBOL(arch_remove_kprobe);
 
-static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
-					struct pt_regs *regs,
-					unsigned long ip)
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+			      struct pt_regs *regs,
+			      unsigned long ip)
 {
 	struct per_regs per_kprobe;
 
@@ -219,10 +227,11 @@ static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 	regs->psw.addr = ip | PSW_ADDR_AMODE;
 }
+NOKPROBE_SYMBOL(enable_singlestep);
 
-static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
-					 struct pt_regs *regs,
-					 unsigned long ip)
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+			       struct pt_regs *regs,
+			       unsigned long ip)
 {
 	/* Restore control regs and psw mask, set new psw address */
 	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
@@ -230,41 +239,43 @@ static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
 	regs->psw.mask |= kcb->kprobe_saved_imask;
 	regs->psw.addr = ip | PSW_ADDR_AMODE;
 }
+NOKPROBE_SYMBOL(disable_singlestep);
 
 /*
  * Activate a kprobe by storing its pointer to current_kprobe. The
  * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
  * two kprobes can be active, see KPROBE_REENTER.
  */
-static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
 {
 	kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
 	kcb->prev_kprobe.status = kcb->kprobe_status;
 	__this_cpu_write(current_kprobe, p);
 }
+NOKPROBE_SYMBOL(push_kprobe);
 
 /*
  * Deactivate a kprobe by backing up to the previous state. If the
  * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
  * for any other state prev_kprobe.kp will be NULL.
  */
-static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
 }
+NOKPROBE_SYMBOL(pop_kprobe);
 
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
-					struct pt_regs *regs)
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
 
 	/* Replace the return addr with trampoline addr */
 	regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
 }
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
-static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
-					   struct kprobe *p)
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
 {
 	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SSDONE:
@@ -284,8 +295,9 @@ static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
 		BUG();
 	}
 }
+NOKPROBE_SYMBOL(kprobe_reenter_check);
 
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+static int kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb;
 	struct kprobe *p;
@@ -359,6 +371,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	preempt_enable_no_resched();
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_handler);
 
 /*
  * Function return probe trampoline:
@@ -375,8 +388,7 @@ static void __used kretprobe_trampoline_holder(void)
 /*
  * Called when the probe at kretprobe trampoline is hit
  */
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
-					      struct pt_regs *regs)
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
@@ -464,6 +476,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	 */
 	return 1;
 }
+NOKPROBE_SYMBOL(trampoline_probe_handler);
 
 /*
  * Called after single-stepping.  p->addr is the address of the
@@ -473,7 +486,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
  * single-stepped a copy of the instruction.  The address of this
  * copy is p->ainsn.insn.
  */
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
@@ -514,8 +527,9 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 
 	disable_singlestep(kcb, regs, ip);
 }
+NOKPROBE_SYMBOL(resume_execution);
 
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+static int post_kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct kprobe *p = kprobe_running();
@@ -542,8 +556,9 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 
 	return 1;
 }
+NOKPROBE_SYMBOL(post_kprobe_handler);
 
-static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct kprobe *p = kprobe_running();
@@ -605,8 +620,9 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 	}
 	return 0;
 }
+NOKPROBE_SYMBOL(kprobe_trap_handler);
 
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	int ret;
 
@@ -617,12 +633,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
 	return ret;
 }
+NOKPROBE_SYMBOL(kprobe_fault_handler);
 
 /*
  * Wrapper routine to for handling exceptions.
  */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *) data;
 	struct pt_regs *regs = args->regs;
@@ -654,8 +671,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 
 	return ret;
 }
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
 
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -673,13 +691,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
 	return 1;
 }
+NOKPROBE_SYMBOL(setjmp_pre_handler);
 
-void __kprobes jprobe_return(void)
+void jprobe_return(void)
 {
 	asm volatile(".word 0x0002");
 }
+NOKPROBE_SYMBOL(jprobe_return);
 
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long stack;
@@ -693,6 +713,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	preempt_enable_no_resched();
 	return 1;
 }
+NOKPROBE_SYMBOL(longjmp_break_handler);
 
 static struct kprobe trampoline = {
 	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
@@ -704,7 +725,8 @@ int __init arch_init_kprobes(void)
 	return register_kprobe(&trampoline);
 }
 
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
 {
 	return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
 }
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ed84cc224899..4192dfd55ddc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -61,7 +61,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return sf->gprs[8];
 }
 
-extern void __kprobes kernel_thread_starter(void);
+extern void kernel_thread_starter(void);
 
 /*
  * Free current thread data structures etc..
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 005d665fe4a5..20660dddb2d6 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -61,10 +61,11 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
-unsigned long long notrace __kprobes sched_clock(void)
+unsigned long long notrace sched_clock(void)
 {
 	return tod_to_ns(get_tod_clock_monotonic());
 }
+NOKPROBE_SYMBOL(sched_clock);
 
 /*
  * Monotonic_clock - returns # of nanoseconds passed since time_init()
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 9ff5ecba26ab..1e1b866b2966 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -87,16 +87,16 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
         }
 }
 
-static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code,
-			      char *str)
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 {
 	if (notify_die(DIE_TRAP, str, regs, 0,
 		       regs->int_code, si_signo) == NOTIFY_STOP)
 		return;
 	do_report_trap(regs, si_signo, si_code, str);
 }
+NOKPROBE_SYMBOL(do_trap);
 
-void __kprobes do_per_trap(struct pt_regs *regs)
+void do_per_trap(struct pt_regs *regs)
 {
 	siginfo_t info;
 
@@ -111,6 +111,7 @@ void __kprobes do_per_trap(struct pt_regs *regs)
 		(void __force __user *) current->thread.per_event.address;
 	force_sig_info(SIGTRAP, &info, current);
 }
+NOKPROBE_SYMBOL(do_per_trap);
 
 void default_trap_handler(struct pt_regs *regs)
 {
@@ -179,7 +180,7 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
 	do_trap(regs, SIGFPE, si_code, "floating point exception");
 }
 
-void __kprobes illegal_op(struct pt_regs *regs)
+void illegal_op(struct pt_regs *regs)
 {
 	siginfo_t info;
         __u8 opcode[6];
@@ -252,7 +253,7 @@ void __kprobes illegal_op(struct pt_regs *regs)
 	if (signal)
 		do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
 }
-
+NOKPROBE_SYMBOL(illegal_op);
 
 #ifdef CONFIG_MATHEMU
 void specification_exception(struct pt_regs *regs)
@@ -469,7 +470,7 @@ void space_switch_exception(struct pt_regs *regs)
 	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
 }
 
-void __kprobes kernel_stack_overflow(struct pt_regs * regs)
+void kernel_stack_overflow(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("Kernel stack overflow.\n");
@@ -477,6 +478,7 @@ void __kprobes kernel_stack_overflow(struct pt_regs * regs)
 	bust_spinlocks(0);
 	panic("Corrupt kernel stack, can't continue.");
 }
+NOKPROBE_SYMBOL(kernel_stack_overflow);
 
 void __init trap_init(void)
 {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a2b81d6ce8a5..ca70fad2fc92 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -548,7 +548,7 @@ out:
 	return fault;
 }
 
-void __kprobes do_protection_exception(struct pt_regs *regs)
+void do_protection_exception(struct pt_regs *regs)
 {
 	unsigned long trans_exc_code;
 	int fault;
@@ -574,8 +574,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs)
 	if (unlikely(fault))
 		do_fault_error(regs, fault);
 }
+NOKPROBE_SYMBOL(do_protection_exception);
 
-void __kprobes do_dat_exception(struct pt_regs *regs)
+void do_dat_exception(struct pt_regs *regs)
 {
 	int access, fault;
 
@@ -584,6 +585,7 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
 	if (unlikely(fault))
 		do_fault_error(regs, fault);
 }
+NOKPROBE_SYMBOL(do_dat_exception);
 
 #ifdef CONFIG_PFAULT 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 6972cae523de728ad5e8dae01da4a631d98b874c Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Wed, 15 Oct 2014 15:29:01 +0200
Subject: s390/mm: missing pte for gmap_ipte_notify should trigger a VM_BUG

After fixup_user_fault does not fail we have a writeable pte.
That pte might transform but it should not vanish.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b1871d39e46e..9c8a6dd8a49c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -752,8 +752,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
 			break;
 		/* Walk the process page table, lock and get pte pointer */
 		ptep = get_locked_pte(gmap->mm, addr, &ptl);
-		if (unlikely(!ptep))
-			continue;
+		VM_BUG_ON(!ptep);
 		/* Set notification bit in the pgste of the pte */
 		entry = *ptep;
 		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
-- 
cgit v1.2.3-59-g8ed1b


From fcbe08d66f57c368e77ca729dd01e6b539ffb3ff Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 24 Oct 2014 10:52:29 +0200
Subject: s390/mm: pmdp_get_and_clear_full optimization

Analog to ptep_get_and_clear_full define a variant of the
pmpd_get_and_clear primitive which gets the full hint from the
mmu_gather struct. This allows s390 to avoid a costly instruction
when destroying an address space.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 13 +++++++++++++
 include/asm-generic/pgtable.h   | 11 +++++++++++
 mm/huge_memory.c                |  3 ++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ef1a266936a..5e102422c9ab 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1651,6 +1651,19 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 	return pmd;
 }
 
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long address,
+					    pmd_t *pmdp, int full)
+{
+	pmd_t pmd = *pmdp;
+
+	if (!full)
+		pmdp_flush_lazy(mm, address, pmdp);
+	pmd_clear(pmdp);
+	return pmd;
+}
+
 #define __HAVE_ARCH_PMDP_CLEAR_FLUSH
 static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
 				     unsigned long address, pmd_t *pmdp)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 752e30d63904..177d5973b132 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -103,6 +103,17 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long address, pmd_t *pmdp,
+					    int full)
+{
+	return pmdp_get_and_clear(mm, address, pmdp);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 					    unsigned long address, pte_t *ptep,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7e9c15cb93a9..6a37f1b2ed1e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1400,7 +1400,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 * pgtable_trans_huge_withdraw after finishing pmdp related
 		 * operations.
 		 */
-		orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
+		orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd,
+						   tlb->fullmm);
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
 		if (is_huge_zero_pmd(orig_pmd)) {
-- 
cgit v1.2.3-59-g8ed1b


From f318a1229bd8d377282ddb37158812073701a22b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 29 Oct 2014 12:50:31 +0100
Subject: s390/cmpxchg: use compiler builtins

The kernel build for s390 fails for gcc compilers with version 3.x,
set the minimum required version of gcc to version 4.3.

As the atomic builtins are available with all gcc 4.x compilers,
use the __sync_val_compare_and_swap and __sync_bool_compare_and_swap
functions to replace the complex macro and inline assembler magic
in include/asm/cmpxchg.h. The compiler can just-do-it and generates
better code with the builtins.

While we are at it use __sync_bool_compare_and_swap for the
_raw_compare_and_swap function in the spinlock code as well.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cmpxchg.h  | 240 +++------------------------------------
 arch/s390/include/asm/spinlock.h |   9 +-
 arch/s390/kernel/asm-offsets.c   |   4 +-
 3 files changed, 19 insertions(+), 234 deletions(-)

diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4236408070e5..6259895fcd97 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -11,200 +11,28 @@
 #include <linux/types.h>
 #include <linux/bug.h>
 
-extern void __xchg_called_with_bad_pointer(void);
-
-static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
-{
-	unsigned long addr, old;
-	int shift;
-
-	switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" ((x & 0xff) << shift), "d" (~(0xff << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 4:
-		asm volatile(
-			"	l	%0,%3\n"
-			"0:	cs	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) ptr)
-			: "d" (x), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return old;
-#ifdef CONFIG_64BIT
-	case 8:
-		asm volatile(
-			"	lg	%0,%3\n"
-			"0:	csg	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=m" (*(long *) ptr)
-			: "d" (x), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return old;
-#endif /* CONFIG_64BIT */
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-#define xchg(ptr, x)							  \
-({									  \
-	__typeof__(*(ptr)) __ret;					  \
-	__ret = (__typeof__(*(ptr)))					  \
-		__xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
-	__ret;								  \
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __o = (o);					\
+	__typeof__(*(ptr)) __n = (n);					\
+	(__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\
 })
 
-/*
- * Atomic compare and exchange.	 Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.	Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#define __HAVE_ARCH_CMPXCHG
-
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	unsigned long addr, prev, tmp;
-	int shift;
-
-	switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
-			: "d" ((old & 0xff) << shift),
-			  "d" ((new & 0xff) << shift),
-			  "d" (~(0xff << shift))
-			: "memory", "cc");
-		return prev >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
-			: "d" ((old & 0xffff) << shift),
-			  "d" ((new & 0xffff) << shift),
-			  "d" (~(0xffff << shift))
-			: "memory", "cc");
-		return prev >> shift;
-	case 4:
-		asm volatile(
-			"	cs	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(int *) ptr)
-			: "0" (old), "d" (new), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev;
-#ifdef CONFIG_64BIT
-	case 8:
-		asm volatile(
-			"	csg	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(long *) ptr)
-			: "0" (old), "d" (new), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return prev;
-#endif /* CONFIG_64BIT */
-	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
-#define cmpxchg(ptr, o, n)						 \
-({									 \
-	__typeof__(*(ptr)) __ret;					 \
-	__ret = (__typeof__(*(ptr)))					 \
-		__cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \
-			  sizeof(*(ptr)));				 \
-	__ret;								 \
-})
+#define cmpxchg64	cmpxchg
+#define cmpxchg_local	cmpxchg
+#define cmpxchg64_local	cmpxchg
 
-#ifdef CONFIG_64BIT
-#define cmpxchg64(ptr, o, n)						\
+#define xchg(ptr, x)							\
 ({									\
-	cmpxchg((ptr), (o), (n));					\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(*(ptr)) __old;					\
+	do {								\
+		__old = *__ptr;						\
+	} while (!__sync_bool_compare_and_swap(__ptr, __old, x));	\
+	__old;								\
 })
-#else /* CONFIG_64BIT */
-static inline unsigned long long __cmpxchg64(void *ptr,
-					     unsigned long long old,
-					     unsigned long long new)
-{
-	register_pair rp_old = {.pair = old};
-	register_pair rp_new = {.pair = new};
-	unsigned long long *ullptr = ptr;
 
-	asm volatile(
-		"	cds	%0,%2,%1"
-		: "+d" (rp_old), "+Q" (*ullptr)
-		: "d" (rp_new)
-		: "memory", "cc");
-	return rp_old.pair;
-}
-
-#define cmpxchg64(ptr, o, n)				\
-({							\
-	__typeof__(*(ptr)) __ret;			\
-	__ret = (__typeof__(*(ptr)))			\
-		__cmpxchg64((ptr),			\
-			    (unsigned long long)(o),	\
-			    (unsigned long long)(n));	\
-	__ret;						\
-})
-#endif /* CONFIG_64BIT */
+#define __HAVE_ARCH_CMPXCHG
 
 #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
 ({									\
@@ -265,40 +93,4 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
 
 #define system_has_cmpxchg_double()	1
 
-#include <asm-generic/cmpxchg-local.h>
-
-static inline unsigned long __cmpxchg_local(void *ptr,
-					    unsigned long old,
-					    unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-	case 2:
-	case 4:
-#ifdef CONFIG_64BIT
-	case 8:
-#endif
-		return __cmpxchg(ptr, old, new, size);
-	default:
-		return __cmpxchg_local_generic(ptr, old, new, size);
-	}
-
-	return old;
-}
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)					\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	__ret = (__typeof__(*(ptr)))					\
-		__cmpxchg_local((ptr), (unsigned long)(o),		\
-				(unsigned long)(n), sizeof(*(ptr)));	\
-	__ret;								\
-})
-
-#define cmpxchg64_local(ptr, o, n)	cmpxchg64((ptr), (o), (n))
-
 #endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index d6bdf906caa5..0e37cd041241 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -18,14 +18,7 @@ extern int spin_retry;
 static inline int
 _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
 {
-	unsigned int old_expected = old;
-
-	asm volatile(
-		"	cs	%0,%3,%1"
-		: "=d" (old), "=Q" (*lock)
-		: "0" (old), "d" (new), "Q" (*lock)
-		: "cc", "memory" );
-	return old == old_expected;
+	return __sync_bool_compare_and_swap(lock, old, new);
 }
 
 /*
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f3a78337ca86..e07e91605353 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -17,8 +17,8 @@
  * Make sure that the compiler is new enough. We want a compiler that
  * is known to work with the "Q" assembler constraint.
  */
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too old; please use version 3.3.3 or newer
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#error Your compiler is too old; please use version 4.3 or newer
 #endif
 
 int main(void)
-- 
cgit v1.2.3-59-g8ed1b


From b19148f6e2d90738ecf0c2eeeb9bdbae25c59e9b Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 29 Oct 2014 19:12:04 +0100
Subject: s390/pci: improve irq number check for msix

s390s arch_setup_msi_irqs function ensures that we don't return with
more irqs than the PCI architecture allows and that a single PCI
function doesn't consume more irqs than the kernel is configured for.

At least the last check doesn't help much and should take the sum of
all irqs into account. Since that's already done by irq_alloc_desc
we can remove this check.

As for the first check we should use the value provided by the
firmware which can be less than what the PCI architecture allows.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h | 5 +----
 arch/s390/pci/pci.c         | 3 +--
 arch/s390/pci/pci_clp.c     | 1 +
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c030900320e0..ef803c202d42 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -50,10 +50,6 @@ struct zpci_fmb {
 	atomic64_t unmapped_pages;
 } __packed __aligned(16);
 
-#define ZPCI_MSI_VEC_BITS	11
-#define ZPCI_MSI_VEC_MAX	(1 << ZPCI_MSI_VEC_BITS)
-#define ZPCI_MSI_VEC_MASK	(ZPCI_MSI_VEC_MAX - 1)
-
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,
 	ZPCI_FN_STATE_STANDBY,
@@ -90,6 +86,7 @@ struct zpci_dev {
 
 	/* IRQ stuff */
 	u64		msi_addr;	/* MSI address */
+	unsigned int	max_msi;	/* maximum number of MSI's */
 	struct airq_iv *aibv;		/* adapter interrupt bit vector */
 	unsigned int	aisb;		/* number of the summary bit */
 
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2fa7b14b9c08..47a4568e141d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -369,8 +369,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
-	msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
-	msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
 
 	/* Allocate adapter summary indicator bit */
 	rc = -EIO;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 6e22a247de9b..d6e411ed8b1f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -62,6 +62,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
 	zdev->tlb_refresh = response->refresh;
 	zdev->dma_mask = response->dasm;
 	zdev->msi_addr = response->msia;
+	zdev->max_msi = response->noi;
 	zdev->fmb_update = response->mui;
 
 	switch (response->version) {
-- 
cgit v1.2.3-59-g8ed1b


From 5b9f2081e0844581cc94384eb052007002b2bfa8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 30 Oct 2014 10:30:45 +0100
Subject: s390/pci: add sparse annotations

Fix the following warnings from the sparse code checker:

arch/s390/include/asm/pci_io.h:165:49: warning: cast removes address space of expression
arch/s390/pci/pci.c:476:44: warning: cast removes address space of expression
arch/s390/pci/pci.c:491:36: warning: incorrect type in argument 2 (different address spaces)
arch/s390/pci/pci.c:491:36:    expected void [noderef] <asn:2>*addr
arch/s390/pci/pci.c:491:36:    got void *<noident>

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_io.h | 6 ++++--
 arch/s390/pci/pci.c            | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index d194d544d694..f664e96f48c7 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -139,7 +139,8 @@ static inline int zpci_memcpy_fromio(void *dst,
 	int size, rc = 0;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8);
+		size = zpci_get_max_write_size((u64 __force) src,
+					       (u64) dst, n, 8);
 		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
 		rc = zpci_read_single(req, dst, offset, size);
 		if (rc)
@@ -162,7 +163,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
 		return -EINVAL;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128);
+		size = zpci_get_max_write_size((u64 __force) dst,
+					       (u64) src, n, 128);
 		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
 
 		if (size > 8) /* main path */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 47a4568e141d..ed3725e2d16e 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -473,7 +473,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
-		pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
+		pdev->resource[i].start =
+			(resource_size_t __force) pci_iomap(pdev, i, 0);
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
 	}
 }
@@ -488,7 +489,8 @@ static void zpci_unmap_resources(struct zpci_dev *zdev)
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
-		pci_iounmap(pdev, (void *) pdev->resource[i].start);
+		pci_iounmap(pdev, (void __iomem __force *)
+			    pdev->resource[i].start);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a697e051160390065393e2926d9d080077239e9e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 30 Oct 2014 10:55:37 +0100
Subject: s390/mm: use correct unlock function in gmap_ipte_notify

The page table lock is acquired with a call to get_locked_pte,
replace the plain spin_unlock with the correct unlock function
pte_unmap_unlock.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9c8a6dd8a49c..71c7eff2c89f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -762,7 +762,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
 			gaddr += PAGE_SIZE;
 			len -= PAGE_SIZE;
 		}
-		spin_unlock(ptl);
+		pte_unmap_unlock(ptep, ptl);
 	}
 	up_read(&gmap->mm->mmap_sem);
 	return rc;
-- 
cgit v1.2.3-59-g8ed1b


From 37d2cd9d84851a2bd4a4bcc0157880ee4d8b5803 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 30 Oct 2014 13:45:43 +0100
Subject: s390/signal: add sparse annotations

Fix the following warnings from the sparse code checker:

arch/s390/kernel/signal.c:374:38: warning: cast removes address space of expression
arch/s390/kernel/signal.c:374:65: warning: incorrect type in initializer (different address spaces)
arch/s390/kernel/signal.c:374:65:    expected unsigned short [noderef] [usertype] <asn:1>*svc
arch/s390/kernel/signal.c:374:65:    got void *

arch/s390/kernel/compat_signal.c:437:38: warning: cast removes address space of expression
arch/s390/kernel/compat_signal.c:437:65: warning: incorrect type in initializer (different address spaces)
arch/s390/kernel/compat_signal.c:437:65:    expected unsigned short [noderef] [usertype] <asn:1>*svc
arch/s390/kernel/compat_signal.c:437:65:    got void *

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_signal.c | 2 +-
 arch/s390/kernel/signal.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 009f5eb11125..34d5fa7b01b5 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -434,7 +434,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
 		/* Signal frames without vectors registers are short ! */
-		__u16 __user *svc = (void *) frame + frame_size - 2;
+		__u16 __user *svc = (void __user *) frame + frame_size - 2;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
 		restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 0c1a0ff0a558..6a2ac257d98f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -371,7 +371,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
 	} else {
 		/* Signal frame without vector registers are short ! */
-		__u16 __user *svc = (void *) frame + frame_size - 2;
+		__u16 __user *svc = (void __user *) frame + frame_size - 2;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
 		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
-- 
cgit v1.2.3-59-g8ed1b


From eaf785d51dc6782da4cc87b5e891c8a9f8fa2c27 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 20 Oct 2014 15:41:39 +0200
Subject: s390/cpum_sf: Remove initialization of PMU event index

The git commit c719f56092add9b3d4192f57c64ce7af11105130
"perf: Fix and clean up initialization of pmu::event_idx" removed
the PMU event index callback for all architectures but x86,
remove the initialization of the event index as well.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 08e761318c17..8e1b77530060 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1383,7 +1383,6 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 		cpuhw->lsctl.ed = 1;
 
 	/* Set in_use flag and store event */
-	event->hw.idx = 0;	  /* only one sampling event per CPU supported */
 	cpuhw->event = event;
 	cpuhw->flags |= PMU_F_IN_USE;
 
-- 
cgit v1.2.3-59-g8ed1b


From b19556231156ce3e58ffd677747bf3ef7890a937 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Fri, 31 Oct 2014 14:10:14 +0100
Subject: s390/docs: Fix the documentation of the address spaces

The information about the address spaces was completely outdated, since
the usage of the address spaces changed quite a bit since the early days.
This patch now updates the information about the usage of the address
spaces, mostly by using the description from Heiko's patch "rework uaccess
code - fix locking issues" (457f2180951cdcbfb4657ddcc83b486e93497f56).

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/s390/Debugging390.txt | 88 +++++++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 27 deletions(-)

diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 462321c1aeea..2120eec48a5c 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -114,28 +114,25 @@ s/390 z/Architecture
 
 16-17 16-17   Address Space Control
 
-	      00 Primary Space Mode when DAT on
-	      The linux kernel currently runs in this mode, CR1 is affiliated with 
-              this mode & points to the primary segment table origin etc.
-
-	      01 Access register mode this mode is used in functions to 
-	      copy data between kernel & user space.
-
-	      10 Secondary space mode not used in linux however CR7 the
-	      register affiliated with this mode is & this & normally
-	      CR13=CR7 to allow us to copy data between kernel & user space.
-	      We do this as follows:
-	      We set ar2 to 0 to designate its
-	      affiliated gpr ( gpr2 )to point to primary=kernel space.
-	      We set ar4 to 1 to designate its
-	      affiliated gpr ( gpr4 ) to point to secondary=home=user space
-	      & then essentially do a memcopy(gpr2,gpr4,size) to
-	      copy data between the address spaces, the reason we use home space for the
-	      kernel & don't keep secondary space free is that code will not run in 
-	      secondary space.
-
-	      11 Home Space Mode all user programs run in this mode.
-	      it is affiliated with CR13.
+	      00 Primary Space Mode:
+	      The register CR1 contains the primary address-space control ele-
+	      ment (PASCE), which points to the primary space region/segment
+	      table origin.
+
+	      01 Access register mode
+
+	      10 Secondary Space Mode:
+	      The register CR7 contains the secondary address-space control
+	      element (SASCE), which points to the secondary space region or
+	      segment table origin.
+
+	      11 Home Space Mode:
+	      The register CR13 contains the home space address-space control
+	      element (HASCE), which points to the home space region/segment
+	      table origin.
+
+	      See "Address Spaces on Linux for s/390 & z/Architecture" below
+	      for more information about address space usage in Linux.
 
 18-19 18-19   Condition codes (CC)
 
@@ -249,9 +246,9 @@ currently 4TB of physical memory currently on z/Architecture.
 Address Spaces on Linux for s/390 & z/Architecture
 ==================================================
 
-Our addressing scheme is as follows
-
+Our addressing scheme is basically as follows:
 
+				   Primary Space	       Home Space
 Himem 0x7fffffff 2GB on s/390    *****************          ****************
 currently 0x3ffffffffff (2^42)-1 *  User Stack   *          *              *
 on z/Architecture.		 *****************          *              *
@@ -264,9 +261,46 @@ on z/Architecture.		 *****************          *              *
             			 *   Sections    *          *              *
 0x00000000                       *****************          ****************
 
-This also means that we need to look at the PSW problem state bit
-or the addressing mode to decide whether we are looking at
-user or kernel space.
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+- runs in home space mode
+- cr1 contains the user or kernel asce
+  -> the kernel asce is loaded when a uaccess requires primary or
+     secondary address mode
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+- primary space mode in case of a uaccess (copy_to_user) and uses
+  e.g. the mvcp instruction to access user space. However the kernel
+  will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+  instructions come from primary address space and data from secondary
+  space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
 
 Virtual Addresses on s/390 & z/Architecture
 ===========================================
-- 
cgit v1.2.3-59-g8ed1b


From a6b42afa3fc452339e157ad5245320804cf1206f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Tue, 28 Oct 2014 15:12:23 +0100
Subject: s390/docs: Remove sections that are not related to s390

Information how to use the GCC pre-processor, objdump, strace, top, etc.
are generic and not specific to the S390 architecture, so we do not need
this information in Debugging390.txt

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/s390/Debugging390.txt | 374 ------------------------------------
 1 file changed, 374 deletions(-)

diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 2120eec48a5c..08911b5c6b0e 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -26,11 +26,6 @@ The Linux for s/390 & z/Architecture Kernel Task Structure
 Register Usage & Stackframes on Linux for s/390 & z/Architecture
 A sample program with comments
 Compiling programs for debugging on Linux for s/390 & z/Architecture
-Figuring out gcc compile errors
-Debugging Tools
-objdump
-strace
-Performance Debugging 
 Debugging under VM
 s/390 & z/Architecture IO Overview
 Debugging IO on s/390 & z/Architecture under VM
@@ -740,376 +735,7 @@ Debugging with optimisation has since much improved after fixing
 some bugs, please make sure you are using gdb-5.0 or later developed 
 after Nov'2000.
 
-Figuring out gcc compile errors
-===============================
-If you are getting a lot of syntax errors compiling a program & the problem
-isn't blatantly obvious from the source.
-It often helps to just preprocess the file, this is done with the -E
-option in gcc.
-What this does is that it runs through the very first phase of compilation
-( compilation in gcc is done in several stages & gcc calls many programs to
-achieve its end result ) with the -E option gcc just calls the gcc preprocessor (cpp).
-The c preprocessor does the following, it joins all the files #included together
-recursively ( #include files can #include other files ) & also the c file you wish to compile.
-It puts a fully qualified path of the #included files in a comment & it
-does macro expansion.
-This is useful for debugging because
-1) You can double check whether the files you expect to be included are the ones
-that are being included ( e.g. double check that you aren't going to the i386 asm directory ).
-2) Check that macro definitions aren't clashing with typedefs,
-3) Check that definitions aren't being used before they are being included.
-4) Helps put the line emitting the error under the microscope if it contains macros.
-
-For convenience the Linux kernel's makefile will do preprocessing automatically for you
-by suffixing the file you want built with .i ( instead of .o )
-
-e.g.
-from the linux directory type
-make arch/s390/kernel/signal.i
-this will build
-
-s390-gcc -D__KERNEL__ -I/home1/barrow/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
--fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce   -E arch/s390/kernel/signal.c
-> arch/s390/kernel/signal.i  
-
-Now look at signal.i you should see something like.
-
-
-# 1 "/home1/barrow/linux/include/asm/types.h" 1
-typedef unsigned short umode_t;
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-If instead you are getting errors further down e.g.
-unknown instruction:2515 "move.l" or better still unknown instruction:2515 
-"Fixme not implemented yet, call Martin" you are probably are attempting to compile some code 
-meant for another architecture or code that is simply not implemented, with a fixme statement
-stuck into the inline assembly code so that the author of the file now knows he has work to do.
-To look at the assembly emitted by gcc just before it is about to call gas ( the gnu assembler )
-use the -S option.
-Again for your convenience the Linux kernel's Makefile will hold your hand &
-do all this donkey work for you also by building the file with the .s suffix.
-e.g.
-from the Linux directory type 
-make arch/s390/kernel/signal.s 
-
-s390-gcc -D__KERNEL__ -I/home1/barrow/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
--fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce  -S arch/s390/kernel/signal.c 
--o arch/s390/kernel/signal.s  
-
-
-This will output something like, ( please note the constant pool & the useful comments
-in the prologue to give you a hand at interpreting it ).
-
-.LC54:
-	.string	"misaligned (__u16 *) in __xchg\n"
-.LC57:
-	.string	"misaligned (__u32 *) in __xchg\n"
-.L$PG1: # Pool sys_sigsuspend
-.LC192:
-	.long	-262401
-.LC193:
-	.long	-1
-.LC194:
-	.long	schedule-.L$PG1
-.LC195:
-	.long	do_signal-.L$PG1
-	.align 4
-.globl sys_sigsuspend
-	.type	 sys_sigsuspend,@function
-sys_sigsuspend:
-#	leaf function           0
-#	automatics              16
-#	outgoing args           0
-#	need frame pointer      0
-#	call alloca             0
-#	has varargs             0
-#	incoming args (stack)   0
-#	function length         168
-	STM	8,15,32(15)
-	LR	0,15
-	AHI	15,-112
-	BASR	13,0
-.L$CO1:	AHI	13,.L$PG1-.L$CO1
-	ST	0,0(15)
-	LR    8,2
-	N     5,.LC192-.L$PG1(13) 
-
-Adding -g to the above output makes the output even more useful
-e.g. typing
-make CC:="s390-gcc -g" kernel/sched.s
-
-which compiles.
-s390-gcc -g -D__KERNEL__ -I/home/barrow/linux-2.3/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strict-aliasing -pipe -fno-strength-reduce   -S kernel/sched.c -o kernel/sched.s 
-
-also outputs stabs ( debugger ) info, from this info you can find out the
-offsets & sizes of various elements in structures.
-e.g. the stab for the structure
-struct rlimit {
-	unsigned long	rlim_cur;
-	unsigned long	rlim_max;
-};
-is
-.stabs "rlimit:T(151,2)=s8rlim_cur:(0,5),0,32;rlim_max:(0,5),32,32;;",128,0,0,0
-from this stab you can see that 
-rlimit_cur starts at bit offset 0 & is 32 bits in size
-rlimit_max starts at bit offset 32 & is 32 bits in size.
-
-
-Debugging Tools:
-================
-
-objdump
-=======
-This is a tool with many options the most useful being ( if compiled with -g).
-objdump --source <victim program or object file> > <victims debug listing >
-
-
-The whole kernel can be compiled like this ( Doing this will make a 17MB kernel
-& a 200 MB listing ) however you have to strip it before building the image
-using the strip command to make it a more reasonable size to boot it.
-
-A source/assembly mixed dump of the kernel can be done with the line
-objdump --source vmlinux > vmlinux.lst
-Also, if the file isn't compiled -g, this will output as much debugging information
-as it can (e.g. function names). This is very slow as it spends lots
-of time searching for debugging info. The following self explanatory line should be used 
-instead if the code isn't compiled -g, as it is much faster:
-objdump --disassemble-all --syms vmlinux > vmlinux.lst  
-
-As hard drive space is valuable most of us use the following approach.
-1) Look at the emitted psw on the console to find the crash address in the kernel.
-2) Look at the file System.map ( in the linux directory ) produced when building 
-the kernel to find the closest address less than the current PSW to find the
-offending function.
-3) use grep or similar to search the source tree looking for the source file
- with this function if you don't know where it is.
-4) rebuild this object file with -g on, as an example suppose the file was
-( /arch/s390/kernel/signal.o ) 
-5) Assuming the file with the erroneous function is signal.c Move to the base of the 
-Linux source tree.
-6) rm /arch/s390/kernel/signal.o
-7) make /arch/s390/kernel/signal.o
-8) watch the gcc command line emitted
-9) type it in again or alternatively cut & paste it on the console adding the -g option.
-10) objdump --source arch/s390/kernel/signal.o > signal.lst
-This will output the source & the assembly intermixed, as the snippet below shows
-This will unfortunately output addresses which aren't the same
-as the kernel ones you should be able to get around the mental arithmetic
-by playing with the --adjust-vma parameter to objdump.
-
-
-
-
-static inline void spin_lock(spinlock_t *lp)
-{
-      a0:       18 34           lr      %r3,%r4
-      a2:       a7 3a 03 bc     ahi     %r3,956
-        __asm__ __volatile("    lhi   1,-1\n"
-      a6:       a7 18 ff ff     lhi     %r1,-1
-      aa:       1f 00           slr     %r0,%r0
-      ac:       ba 01 30 00     cs      %r0,%r1,0(%r3)
-      b0:       a7 44 ff fd     jm      aa <sys_sigsuspend+0x2e>
-        saveset = current->blocked;
-      b4:       d2 07 f0 68     mvc     104(8,%r15),972(%r4)
-      b8:       43 cc
-        return (set->sig[0] & mask) != 0;
-} 
-
-6) If debugging under VM go down to that section in the document for more info.
-
-
-I now have a tool which takes the pain out of --adjust-vma
-& you are able to do something like
-make /arch/s390/kernel/traps.lst
-& it automatically generates the correctly relocated entries for
-the text segment in traps.lst.
-This tool is now standard in linux distro's in scripts/makelst
-
-strace:
--------
-Q. What is it ?
-A. It is a tool for intercepting calls to the kernel & logging them
-to a file & on the screen.
-
-Q. What use is it ?
-A. You can use it to find out what files a particular program opens.
-
-
-
-Example 1
----------
-If you wanted to know does ping work but didn't have the source 
-strace ping -c 1 127.0.0.1  
-& then look at the man pages for each of the syscalls below,
-( In fact this is sometimes easier than looking at some spaghetti
-source which conditionally compiles for several architectures ).
-Not everything that it throws out needs to make sense immediately.
-
-Just looking quickly you can see that it is making up a RAW socket
-for the ICMP protocol.
-Doing an alarm(10) for a 10 second timeout
-& doing a gettimeofday call before & after each read to see 
-how long the replies took, & writing some text to stdout so the user
-has an idea what is going on.
-
-socket(PF_INET, SOCK_RAW, IPPROTO_ICMP) = 3
-getuid()                                = 0
-setuid(0)                               = 0
-stat("/usr/share/locale/C/libc.cat", 0xbffff134) = -1 ENOENT (No such file or directory)
-stat("/usr/share/locale/libc/C", 0xbffff134) = -1 ENOENT (No such file or directory)
-stat("/usr/local/share/locale/C/libc.cat", 0xbffff134) = -1 ENOENT (No such file or directory)
-getpid()                                = 353
-setsockopt(3, SOL_SOCKET, SO_BROADCAST, [1], 4) = 0
-setsockopt(3, SOL_SOCKET, SO_RCVBUF, [49152], 4) = 0
-fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(3, 1), ...}) = 0
-mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40008000
-ioctl(1, TCGETS, {B9600 opost isig icanon echo ...}) = 0
-write(1, "PING 127.0.0.1 (127.0.0.1): 56 d"..., 42PING 127.0.0.1 (127.0.0.1): 56 data bytes
-) = 42
-sigaction(SIGINT, {0x8049ba0, [], SA_RESTART}, {SIG_DFL}) = 0 
-sigaction(SIGALRM, {0x8049600, [], SA_RESTART}, {SIG_DFL}) = 0
-gettimeofday({948904719, 138951}, NULL) = 0
-sendto(3, "\10\0D\201a\1\0\0\17#\2178\307\36"..., 64, 0, {sin_family=AF_INET,
-sin_port=htons(0), sin_addr=inet_addr("127.0.0.1")}, 16) = 64
-sigaction(SIGALRM, {0x8049600, [], SA_RESTART}, {0x8049600, [], SA_RESTART}) = 0
-sigaction(SIGALRM, {0x8049ba0, [], SA_RESTART}, {0x8049600, [], SA_RESTART}) = 0
-alarm(10)                               = 0
-recvfrom(3, "E\0\0T\0005\0\0@\1|r\177\0\0\1\177"..., 192, 0, 
-{sin_family=AF_INET, sin_port=htons(50882), sin_addr=inet_addr("127.0.0.1")}, [16]) = 84
-gettimeofday({948904719, 160224}, NULL) = 0
-recvfrom(3, "E\0\0T\0006\0\0\377\1\275p\177\0"..., 192, 0, 
-{sin_family=AF_INET, sin_port=htons(50882), sin_addr=inet_addr("127.0.0.1")}, [16]) = 84
-gettimeofday({948904719, 166952}, NULL) = 0
-write(1, "64 bytes from 127.0.0.1: icmp_se"..., 
-5764 bytes from 127.0.0.1: icmp_seq=0 ttl=255 time=28.0 ms
-
-Example 2
----------
-strace passwd 2>&1 | grep open
-produces the following output
-open("/etc/ld.so.cache", O_RDONLY)      = 3
-open("/opt/kde/lib/libc.so.5", O_RDONLY) = -1 ENOENT (No such file or directory)
-open("/lib/libc.so.5", O_RDONLY)        = 3
-open("/dev", O_RDONLY)                  = 3
-open("/var/run/utmp", O_RDONLY)         = 3
-open("/etc/passwd", O_RDONLY)           = 3
-open("/etc/shadow", O_RDONLY)           = 3
-open("/etc/login.defs", O_RDONLY)       = 4
-open("/dev/tty", O_RDONLY)              = 4 
-
-The 2>&1 is done to redirect stderr to stdout & grep is then filtering this input 
-through the pipe for each line containing the string open.
-
-
-Example 3
----------
-Getting sophisticated
-telnetd crashes & I don't know why
-
-Steps
------
-1) Replace the following line in /etc/inetd.conf
-telnet  stream  tcp     nowait  root    /usr/sbin/in.telnetd -h 
-with
-telnet  stream  tcp     nowait  root    /blah
-
-2) Create the file /blah with the following contents to start tracing telnetd 
-#!/bin/bash
-/usr/bin/strace -o/t1 -f /usr/sbin/in.telnetd -h 
-3) chmod 700 /blah to make it executable only to root
-4)
-killall -HUP inetd
-or ps aux | grep inetd
-get inetd's process id
-& kill -HUP inetd to restart it.
-
-Important options
------------------
--o is used to tell strace to output to a file in our case t1 in the root directory
--f is to follow children i.e.
-e.g in our case above telnetd will start the login process & subsequently a shell like bash.
-You will be able to tell which is which from the process ID's listed on the left hand side
-of the strace output.
--p<pid> will tell strace to attach to a running process, yup this can be done provided
- it isn't being traced or debugged already & you have enough privileges,
-the reason 2 processes cannot trace or debug the same program is that strace
-becomes the parent process of the one being debugged & processes ( unlike people )
-can have only one parent.
-
-
-However the file /t1 will get big quite quickly
-to test it telnet 127.0.0.1
-
-now look at what files in.telnetd execve'd
-413   execve("/usr/sbin/in.telnetd", ["/usr/sbin/in.telnetd", "-h"], [/* 17 vars */]) = 0
-414   execve("/bin/login", ["/bin/login", "-h", "localhost", "-p"], [/* 2 vars */]) = 0 
 
-Whey it worked!.
-
-
-Other hints:
-------------
-If the program is not very interactive ( i.e. not much keyboard input )
-& is crashing in one architecture but not in another you can do 
-an strace of both programs under as identical a scenario as you can
-on both architectures outputting to a file then.
-do a diff of the two traces using the diff program
-i.e.
-diff output1 output2
-& maybe you'll be able to see where the call paths differed, this
-is possibly near the cause of the crash. 
-
-More info
----------
-Look at man pages for strace & the various syscalls
-e.g. man strace, man alarm, man socket.
-
-
-Performance Debugging
-=====================
-gcc is capable of compiling in profiling code just add the -p option
-to the CFLAGS, this obviously affects program size & performance.
-This can be used by the gprof gnu profiling tool or the
-gcov the gnu code coverage tool ( code coverage is a means of testing
-code quality by checking if all the code in an executable in exercised by
-a tester ).
-
-
-Using top to find out where processes are sleeping in the kernel
-----------------------------------------------------------------
-To do this copy the System.map from the root directory where
-the linux kernel was built to the /boot directory on your 
-linux machine.
-Start top
-Now type fU<return>
-You should see a new field called WCHAN which
-tells you where each process is sleeping here is a typical output.
- 
- 6:59pm  up 41 min,  1 user,  load average: 0.00, 0.00, 0.00
-28 processes: 27 sleeping, 1 running, 0 zombie, 0 stopped
-CPU states:  0.0% user,  0.1% system,  0.0% nice, 99.8% idle
-Mem:   254900K av,   45976K used,  208924K free,       0K shrd,   28636K buff
-Swap:       0K av,       0K used,       0K free                    8620K cached
-
-  PID USER     PRI  NI  SIZE  RSS SHARE WCHAN     STAT  LIB %CPU %MEM   TIME COMMAND
-  750 root      12   0   848  848   700 do_select S       0  0.1  0.3   0:00 in.telnetd
-  767 root      16   0  1140 1140   964           R       0  0.1  0.4   0:00 top
-    1 root       8   0   212  212   180 do_select S       0  0.0  0.0   0:00 init
-    2 root       9   0     0    0     0 down_inte SW      0  0.0  0.0   0:00 kmcheck
-
-The time command
-----------------
-Another related command is the time command which gives you an indication
-of where a process is spending the majority of its time.
-e.g.
-time ping -c 5 nc
-outputs
-real	0m4.054s
-user	0m0.010s
-sys	0m0.010s
 
 Debugging under VM
 ==================
-- 
cgit v1.2.3-59-g8ed1b


From 99e97b7106d492a3cac4f7963f4a89935d2fbca4 Mon Sep 17 00:00:00 2001
From: Frank Blaschka <frank.blaschka@de.ibm.com>
Date: Thu, 6 Nov 2014 13:17:06 +0100
Subject: s390/io: add ioport_map stubs

add ioport_map stubs to make vfio build on s390.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/io.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index cd6b9ee7b69c..ba516f8f918a 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -38,6 +38,15 @@ static inline void iounmap(volatile void __iomem *addr)
 {
 }
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
 /*
  * s390 needs a private implementation of pci_iomap since ioremap with its
  * offset parameter isn't sufficient. That's because BAR spaces are not
-- 
cgit v1.2.3-59-g8ed1b


From afaa7d29bc04bf0fcf2e7bda2a802392a38d059b Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 14 Nov 2014 11:01:37 +0100
Subject: s390/irq: use irq 0

Irq 0 is currently unused on s390. Since there is no reason to
do this start counting at the beginning and gain an additional
irq. Also correctly report the smallest usable irq number for
dynamic allocation.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/irq.h | 11 ++++-------
 arch/s390/kernel/irq.c      |  5 +----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index b0d5f0a97a01..343ea7c987aa 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,11 +1,11 @@
 #ifndef _ASM_IRQ_H
 #define _ASM_IRQ_H
 
-#define EXT_INTERRUPT	1
-#define IO_INTERRUPT	2
-#define THIN_INTERRUPT	3
+#define EXT_INTERRUPT	0
+#define IO_INTERRUPT	1
+#define THIN_INTERRUPT	2
 
-#define NR_IRQS_BASE	4
+#define NR_IRQS_BASE	3
 
 #ifdef CONFIG_PCI_NR_MSI
 # define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
@@ -13,9 +13,6 @@
 # define NR_IRQS	NR_IRQS_BASE
 #endif
 
-/* This number is used when no interrupt has been assigned */
-#define NO_IRQ		0
-
 /* External interruption codes */
 #define EXT_IRQ_INTERRUPT_KEY	0x0040
 #define EXT_IRQ_CLK_COMP	0x1004
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 1b8a38ab7861..f238720690f3 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -127,13 +127,10 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(cpu)
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
-		goto out;
 	}
 	if (index < NR_IRQS) {
 		if (index >= NR_IRQS_BASE)
 			goto out;
-		/* Adjust index to process irqclass_main_desc array entries */
-		index--;
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		irq = irqclass_main_desc[index].irq;
 		for_each_online_cpu(cpu)
@@ -158,7 +155,7 @@ out:
 
 unsigned int arch_dynirq_lower_bound(unsigned int from)
 {
-	return from < THIN_INTERRUPT ? THIN_INTERRUPT : from;
+	return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 86c558e8d3b774580faf8250092388d52cfde63e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Nov 2014 16:37:47 +0100
Subject: s390: fix ptrace of user area if the inferior uses vector registers

The floating point registers of a process that uses vector instruction are
not store into task->thread.fp_regs anymore but in the upper halves of the
first 16 vector registers.
The ptrace interface for the peeks and pokes to the user area fails to take
this into account. Fix __peek_user[_compat] and __poke_user[_compat]
to use the vector array for the floating pointer register if the process
has one.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 97 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 22 deletions(-)

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 99a567b70d16..9eed6bb5c4a9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -248,14 +248,27 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 */
 		tmp = 0;
 
+	} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fp_regs.fpc;
+		tmp <<= BITS_PER_LONG - 32;
+
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
-		/* 
-		 * floating point regs. are stored in the thread structure
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
 		 */
-		offset = addr - (addr_t) &dummy->regs.fp_regs;
-		tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
-		if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
-			tmp <<= BITS_PER_LONG - 32;
+		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+		if (child->thread.vxrs)
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.vxrs + 2*offset);
+		else
+#endif
+			tmp = *(addr_t *)
+			       ((addr_t) &child->thread.fp_regs.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -383,16 +396,29 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 */
 		return 0;
 
+	} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if ((unsigned int) data != 0 ||
+		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+			return -EINVAL;
+		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are stored in the thread structure
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
 		 */
-		if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
-			if ((unsigned int) data != 0 ||
-			    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
-				return -EINVAL;
-		offset = addr - (addr_t) &dummy->regs.fp_regs;
-		*(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
+		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+		if (child->thread.vxrs)
+			*(addr_t *)((addr_t)
+				child->thread.vxrs + 2*offset) = data;
+		else
+#endif
+			*(addr_t *)((addr_t)
+				&child->thread.fp_regs.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -611,12 +637,26 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		 */
 		tmp = 0;
 
+	} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fp_regs.fpc;
+
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are stored in the thread structure 
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
 		 */
-	        offset = addr - (addr_t) &dummy32->regs.fp_regs;
-		tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset);
+		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+		if (child->thread.vxrs)
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.vxrs + 2*offset);
+		else
+#endif
+			tmp = *(__u32 *)
+			       ((addr_t) &child->thread.fp_regs.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -722,15 +762,28 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		return 0;
 
-	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+	} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
 		/*
-		 * floating point regs. are stored in the thread structure 
+		 * floating point control reg. is in the thread structure
 		 */
-		if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
-		    test_fp_ctl(tmp))
+		if (test_fp_ctl(tmp))
 			return -EINVAL;
-	        offset = addr - (addr_t) &dummy32->regs.fp_regs;
-		*(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
+		child->thread.fp_regs.fpc = data;
+
+	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
+		 */
+		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+		if (child->thread.vxrs)
+			*(__u32 *)((addr_t)
+				child->thread.vxrs + 2*offset) = tmp;
+		else
+#endif
+			*(__u32 *)((addr_t)
+				&child->thread.fp_regs.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
-- 
cgit v1.2.3-59-g8ed1b


From 4eafad7febd482092b331ea72c37274d745956be Mon Sep 17 00:00:00 2001
From: Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
Date: Fri, 14 Nov 2014 14:27:58 +0100
Subject: s390/kernel: add system calls for PCI memory access

Add the new __NR_s390_pci_mmio_write and __NR_s390_pci_mmio_read
system calls to allow user space applications to access device PCI I/O
memory pages on s390x platform.

[ Martin Schwidefsky: some code beautification ]

Signed-off-by: Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/unistd.h |   4 +-
 arch/s390/kernel/compat_wrapper.c   |   2 +
 arch/s390/kernel/entry.h            |   2 +
 arch/s390/kernel/syscalls.S         |   2 +
 arch/s390/pci/Makefile              |   2 +-
 arch/s390/pci/pci_mmio.c            | 115 ++++++++++++++++++++++++++++++++++++
 kernel/sys_ni.c                     |   2 +
 7 files changed, 127 insertions(+), 2 deletions(-)
 create mode 100644 arch/s390/pci/pci_mmio.c

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4197c89c52d4..2b446cf0cc65 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -287,7 +287,9 @@
 #define __NR_getrandom		349
 #define __NR_memfd_create	350
 #define __NR_bpf		351
-#define NR_syscalls 352
+#define __NR_s390_pci_mmio_write	352
+#define __NR_s390_pci_mmio_read		353
+#define NR_syscalls 354
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index c4f7a3d655b8..d7fa2f0f1425 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -218,3 +218,5 @@ COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char
 COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
 COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
 COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 0554b9771c9f..8e61393c8275 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,4 +74,6 @@ struct old_sigaction;
 long sys_s390_personality(unsigned int personality);
 long sys_s390_runtime_instr(int command, int signum);
 
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9f7087fd58de..a2987243bc76 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -360,3 +360,5 @@ SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
 SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
 SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
 SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
+SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
+SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index a9e1dc4ae442..805d8b29193a 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,4 +3,4 @@
 #
 
 obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_sysfs.o \
-			   pci_event.o pci_debug.o pci_insn.o
+			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
new file mode 100644
index 000000000000..62c5ea6d8682
--- /dev/null
+++ b/arch/s390/pci/pci_mmio.c
@@ -0,0 +1,115 @@
+/*
+ * Access to PCI I/O memory from user space programs.
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+
+static long get_pfn(unsigned long user_addr, unsigned long access,
+		    unsigned long *pfn)
+{
+	struct vm_area_struct *vma;
+	long ret;
+
+	down_read(&current->mm->mmap_sem);
+	ret = -EINVAL;
+	vma = find_vma(current->mm, user_addr);
+	if (!vma)
+		goto out;
+	ret = -EACCES;
+	if (!(vma->vm_flags & access))
+		goto out;
+	ret = follow_pfn(vma, user_addr, pfn);
+out:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+		const void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	unsigned long pfn;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else
+		buf = local_buf;
+
+	ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
+	if (ret)
+		goto out;
+	io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+	ret = -EFAULT;
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+		goto out;
+
+	if (copy_from_user(buf, user_buffer, length))
+		goto out;
+
+	memcpy_toio(io_addr, buf, length);
+	ret = 0;
+out:
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+		void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	unsigned long pfn;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else
+		buf = local_buf;
+
+	ret = get_pfn(mmio_addr, VM_READ, &pfn);
+	if (ret)
+		goto out;
+	io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+	ret = -EFAULT;
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+		goto out;
+
+	memcpy_fromio(buf, io_addr, length);
+
+	if (copy_to_user(user_buffer, buf, length))
+		goto out;
+
+	ret = 0;
+out:
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 02aa4185b17e..61eea02b53f5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -169,6 +169,8 @@ cond_syscall(ppc_rtas);
 cond_syscall(sys_spu_run);
 cond_syscall(sys_spu_create);
 cond_syscall(sys_subpage_prot);
+cond_syscall(sys_s390_pci_mmio_read);
+cond_syscall(sys_s390_pci_mmio_write);
 
 /* mmu depending weak syscall entries */
 cond_syscall(sys_mprotect);
-- 
cgit v1.2.3-59-g8ed1b


From 413d404768256eda4e13fdfce753fe2bbff2fcaf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 19 Nov 2014 13:31:08 +0100
Subject: s390/traps: print interrupt code and instruction length code

It always confuses me to see the mixed instruction length code and
interruption code on user space faults, while the message clearly
says it is the interruption code.
So split the value and print both values separately. Also add the ILC
output to the die() message, so thar user and kernel space faults
contain the same information.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/dumpstack.c | 3 ++-
 arch/s390/kernel/traps.c     | 3 ++-
 arch/s390/mm/fault.c         | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index acb412442e5e..a99852e96a77 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -191,7 +191,8 @@ void die(struct pt_regs *regs, const char *str)
 	console_verbose();
 	spin_lock_irq(&die_lock);
 	bust_spinlocks(1);
-	printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
+	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+	       regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	printk("PREEMPT ");
 #endif
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1e1b866b2966..b7a3ccffa345 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -49,7 +49,8 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
 		return;
 	if (!printk_ratelimit())
 		return;
-	printk("User process fault: interruption code 0x%X ", regs->int_code);
+	printk("User process fault: interruption code %04x ilc:%d ",
+	       regs->int_code & 0xffff, regs->int_code >> 17);
 	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
 	printk("\n");
 	show_regs(regs);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ca70fad2fc92..811937bb90be 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -261,8 +261,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
 		return;
 	if (!printk_ratelimit())
 		return;
-	printk(KERN_ALERT "User process fault: interruption code 0x%X ",
-	       regs->int_code);
+	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d",
+	       regs->int_code & 0xffff, regs->int_code >> 17);
 	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
 	printk(KERN_CONT "\n");
 	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
-- 
cgit v1.2.3-59-g8ed1b


From e56da345eb529370f21f36a52bbb114ba38ad921 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 19 Nov 2014 14:05:52 +0100
Subject: s390/traps: die on translation exceptions

Translation exceptions should never happen, since that implies that
either we screwed up the page tables or missed to properly flush the TLB.

In both cases we should not just simply kill user space or walk the kernel
exception tables. Instead an oops or a panic (panic_on_oops) is the better
answer.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/traps.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index b7a3ccffa345..f081cf1157c3 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -153,8 +153,6 @@ DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
 	      "privileged operation")
 DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 	      "special operation exception")
-DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
-	      "translation exception")
 
 #ifdef CONFIG_64BIT
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
@@ -181,6 +179,12 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
 	do_trap(regs, SIGFPE, si_code, "floating point exception");
 }
 
+void translation_exception(struct pt_regs *regs)
+{
+	/* May never happen. */
+	die(regs, "Translation exception");
+}
+
 void illegal_op(struct pt_regs *regs)
 {
 	siginfo_t info;
-- 
cgit v1.2.3-59-g8ed1b


From b3e06dc3f0256441e15ef9967ecf3500d3e4f022 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 24 Nov 2014 13:05:05 +0100
Subject: s390/sclp_async: add Kconfig option to specify the component id

Allow to specify the Compoment ID for Call Home via the kernel
configuration. This removes the need for distribution specific
patch against the sclp_async.c source file.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/Kconfig      | 10 ++++++++++
 drivers/s390/char/sclp_async.c |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index db2cb1f8a1b5..a5c6f7e157aa 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -102,6 +102,16 @@ config SCLP_ASYNC
 	  want for inform other people about your kernel panics,
 	  need this feature and intend to run your kernel in LPAR.
 
+config SCLP_ASYNC_ID
+       string "Component ID for Call Home"
+       depends on SCLP_ASYNC
+       default "000000000"
+       help
+	 The Component ID for Call Home is used to identify the correct
+	 problem reporting queue the call home records should be sent to.
+
+	 If your are unsure, please use the default value "000000000".
+
 config HMC_DRV
 	def_tristate m
 	prompt "Support for file transfers from HMC drive CD/DVD-ROM"
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
index 5f9f929e891c..19c25427f27f 100644
--- a/drivers/s390/char/sclp_async.c
+++ b/drivers/s390/char/sclp_async.c
@@ -137,7 +137,8 @@ static int sclp_async_send_wait(char *message)
 	 * Retain Queue
 	 * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
 	 */
-	strncpy(sccb->evbuf.comp_id, "000000000", sizeof(sccb->evbuf.comp_id));
+	strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
+		sizeof(sccb->evbuf.comp_id));
 	sccb->evbuf.header.length = sizeof(sccb->evbuf);
 	sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
 	sccb->header.function_code = SCLP_NORMAL_WRITE;
-- 
cgit v1.2.3-59-g8ed1b


From 7eed2e09abf0a0064a7e3f4a911b37d71202dfa4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 24 Nov 2014 14:30:32 +0100
Subject: s390/ftrace: provide working ftrace_return_address()

The common code ftrace_return_address(n), which is just a wrapper for
__builtin_return_address(n), will only work for n > 0 if CONFIG_FRAME_POINTER
is set to 'y'. Otherwise it will return 0.
Since on s390 we will never have that config option set to 'y'
ftrace_return_address() won't work at all for n > 0.

Luckily we always compile the kernel with -mkernel-backchain which
in turn means that __builtin_return_address(n) will always work.

So let ftrace_return_address(n) map to __builtin_return_address(n).

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ftrace.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 785041f1dc77..abb618f1ead2 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -8,6 +8,8 @@
 
 #ifndef __ASSEMBLY__
 
+#define ftrace_return_address(n) __builtin_return_address(n)
+
 void _mcount(void);
 void ftrace_caller(void);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8c080bd0a1f9f337add122e1c0252151a8f89c14 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sat, 22 Nov 2014 15:00:55 +0100
Subject: s390/pci: Deletion of unnecessary checks before the function call
 "debug_unregister"

The debug_unregister() function performs also input parameter validation.
Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_debug.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index eec598c5939f..3229a2e570df 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -158,10 +158,7 @@ int __init zpci_debug_init(void)
 
 void zpci_debug_exit(void)
 {
-	if (pci_debug_msg_id)
-		debug_unregister(pci_debug_msg_id);
-	if (pci_debug_err_id)
-		debug_unregister(pci_debug_err_id);
-
+	debug_unregister(pci_debug_msg_id);
+	debug_unregister(pci_debug_err_id);
 	debugfs_remove(debugfs_root);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 590aeeddc6c6d9b9c93bec56fc68512631489d2a Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Mon, 24 Nov 2014 10:45:47 +0100
Subject: s390/dasd: remove unused code

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 329db997ee66..3679aeca1c1a 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2326,21 +2326,11 @@ retry:
 			return -EAGAIN;
 
 		/* normal recovery for basedev IO */
-		if (__dasd_sleep_on_erp(cqr)) {
+		if (__dasd_sleep_on_erp(cqr))
+			/* handle erp first */
 			goto retry;
-			/* remember that ERP was needed */
-			rc = 1;
-			/* skip processing for active cqr */
-			if (cqr->status != DASD_CQR_TERMINATED &&
-			    cqr->status != DASD_CQR_NEED_ERP)
-				break;
-		}
 	}
 
-	/* start ERP requests in upper loop */
-	if (rc)
-		goto retry;
-
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2c17124bf33f0d9450375610ff79ad83531b5bfe Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Mon, 24 Nov 2014 10:53:19 +0100
Subject: s390/dasd: fix infinite term I/O loop

During device activation all paths could be lost and since the device
is not active it has no indication of this fact - hence the CQR will
time-out. The following cancelation might fail with -EINVAL because
CIO took over control and started path verification. In this case mark
the CQR as being CLEARED since it could not be running any more.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 3679aeca1c1a..8cb120e9c868 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1377,6 +1377,20 @@ int dasd_term_IO(struct dasd_ccw_req *cqr)
 				      "I/O error, retry");
 			break;
 		case -EINVAL:
+			/*
+			 * device not valid so no I/O could be running
+			 * handle CQR as termination successful
+			 */
+			cqr->status = DASD_CQR_CLEARED;
+			cqr->stopclk = get_tod_clock();
+			cqr->starttime = 0;
+			/* no retries for invalid devices */
+			cqr->retries = -1;
+			DBF_DEV_EVENT(DBF_ERR, device, "%s",
+				      "EINVAL, handle as terminated");
+			/* fake rc to success */
+			rc = 0;
+			break;
 		case -EBUSY:
 			DBF_DEV_EVENT(DBF_ERR, device, "%s",
 				      "device busy, retry later");
-- 
cgit v1.2.3-59-g8ed1b


From 932f0549f872cde022eed200910ee3291b1d3c69 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Mon, 24 Nov 2014 10:59:44 +0100
Subject: s390/dasd: fix list corruption for sleep_on requests

Fix race for sleep_on requests leading to list corruption.
The SLEEP_ON_END_TAG is set during CQR clean up. Remove it from
interrupt handler to avoid the CQR from being cleared when it is
still in the device_queue.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 8cb120e9c868..4abf11965484 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1697,11 +1697,8 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
 	if (cqr->status == DASD_CQR_CLEAR_PENDING &&
 	    scsw_fctl(&irb->scsw) & SCSW_FCTL_CLEAR_FUNC) {
 		cqr->status = DASD_CQR_CLEARED;
-		if (cqr->callback_data == DASD_SLEEPON_START_TAG)
-			cqr->callback_data = DASD_SLEEPON_END_TAG;
 		dasd_device_clear_timer(device);
 		wake_up(&dasd_flush_wq);
-		wake_up(&generic_waitq);
 		dasd_schedule_device_bh(device);
 		return;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 6ebdf1c79d09ad3d65c714a79db0a0c141a013ba Mon Sep 17 00:00:00 2001
From: Stefan Haberland <stefan.haberland@de.ibm.com>
Date: Mon, 24 Nov 2014 15:04:09 +0100
Subject: s390/dasd: retry partition detection

In case somebody attempted to open the device during online
processing the partition detection ioctl may have failed.

Added a retry loop to avoid not detected partitions.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_genhd.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index f224d59c4b6b..90f39f79f5d7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -99,15 +99,37 @@ void dasd_gendisk_free(struct dasd_block *block)
 int dasd_scan_partitions(struct dasd_block *block)
 {
 	struct block_device *bdev;
+	int retry, rc;
 
+	retry = 5;
 	bdev = bdget_disk(block->gdp, 0);
-	if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0)
+	if (!bdev) {
+		DBF_DEV_EVENT(DBF_ERR, block->base, "%s",
+			      "scan partitions error, bdget returned NULL");
 		return -ENODEV;
+	}
+
+	rc = blkdev_get(bdev, FMODE_READ, NULL);
+	if (rc < 0) {
+		DBF_DEV_EVENT(DBF_ERR, block->base,
+			      "scan partitions error, blkdev_get returned %d",
+			      rc);
+		return -ENODEV;
+	}
 	/*
 	 * See fs/partition/check.c:register_disk,rescan_partitions
 	 * Can't call rescan_partitions directly. Use ioctl.
 	 */
-	ioctl_by_bdev(bdev, BLKRRPART, 0);
+	rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
+	while (rc == -EBUSY && retry > 0) {
+		schedule();
+		rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
+		retry--;
+		DBF_DEV_EVENT(DBF_ERR, block->base,
+			      "scan partitions error, retry %d rc %d",
+			      retry, rc);
+	}
+
 	/*
 	 * Since the matching blkdev_put call to the blkdev_get in
 	 * this function is not called before dasd_destroy_partitions
-- 
cgit v1.2.3-59-g8ed1b


From 57f2ffe14fd125c240babd88b99d8a57d3ed060e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 15 Sep 2014 10:50:47 +0200
Subject: s390: remove diag 44 calls from cpu_relax()

Simplify cpu_relax() to a simple barrier(). Performance wise this doesn't
seem to make any big difference anymore, since nearly all lock variants
have directed yield semantics in the meantime.
Also this makes s390 behave like all other architectures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/processor.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index d559bdb03d18..bed05ea7ec27 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -217,8 +217,6 @@ static inline unsigned short stap(void)
  */
 static inline void cpu_relax(void)
 {
-	if (MACHINE_HAS_DIAG44)
-		asm volatile("diag 0,0,68");
 	barrier();
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ed7d56e174b11b76e2954f28605c5c16f8814fab Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 28 Nov 2014 12:36:48 +0100
Subject: s390/kprobes: fix instruction copy for out of line execution

When we generate the instruction for out of line execution the length
of the to be copied instruction was evaluated from a not initialized
memory location.
Therefore we ended up with a random (2, 4 or 6) number of bytes being
copied instead of taking the real instruction length into account.
This works surprisingly well most of the time, but still not always.

Reported-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index ee0396755430..1e4c710dfb92 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -74,7 +74,7 @@ static void copy_instruction(struct kprobe *p)
 		ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
 		p->ainsn.is_ftrace_insn = 1;
 	} else
-		memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+		memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
 	p->opcode = p->ainsn.insn[0];
 	if (!probe_is_insn_relative_long(p->ainsn.insn))
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 832a771034bf62444796ab8868264c3ea9e50866 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 1 Dec 2014 09:16:45 +0100
Subject: s390/debug: avoid function call for debug_sprintf_*

debug_sprintf_event/exception are called even for debug events
with a disabling debug level. All other functions already do
the check in a wrapper function. Lets do the same here.
Due to the var_args the compiler rejects to make this function
inline. So let's wrap this via a macro.
This patch saves around 80 ns on my z196 for a KVM round trip (we
have two debug statements for entry and exit) when KVM is build as
a module.
The savings for built-in drivers is smaller as we then avoid the
PLT overhead for a function call.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/debug.h | 29 +++++++++++++++++++++++++++--
 arch/s390/kernel/debug.c      | 12 ++++--------
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 530c15eb01e9..0206c8052328 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -151,9 +151,21 @@ debug_text_event(debug_info_t* id, int level, const char* txt)
  * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
  */
 extern debug_entry_t *
-debug_sprintf_event(debug_info_t* id,int level,char *string,...)
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
 	__attribute__ ((format(printf, 3, 4)));
 
+#define debug_sprintf_event(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_event(__id, __level,		\
+					      _fmt, ## __VA_ARGS__);	\
+	__ret;								\
+})
 
 static inline debug_entry_t*
 debug_exception(debug_info_t* id, int level, void* data, int length)
@@ -194,9 +206,22 @@ debug_text_exception(debug_info_t* id, int level, const char* txt)
  * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
  */
 extern debug_entry_t *
-debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
 	__attribute__ ((format(printf, 3, 4)));
 
+#define debug_sprintf_exception(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_exception(__id, __level,	\
+						  _fmt, ## __VA_ARGS__);\
+	__ret;								\
+})
+
 int debug_register_view(debug_info_t* id, struct debug_view* view);
 int debug_unregister_view(debug_info_t* id, struct debug_view* view);
 
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index ee8390da6ea7..c1f21aca76e7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1019,7 +1019,7 @@ debug_count_numargs(char *string)
  */
 
 debug_entry_t*
-debug_sprintf_event(debug_info_t* id, int level,char *string,...)
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
 {
 	va_list   ap;
 	int numargs,idx;
@@ -1027,8 +1027,6 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
 	debug_sprintf_entry_t *curr_event;
 	debug_entry_t *active;
 
-	if((!id) || (level > id->level))
-		return NULL;
 	if (!debug_active || !id->areas)
 		return NULL;
 	numargs=debug_count_numargs(string);
@@ -1050,14 +1048,14 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
 
 	return active;
 }
-EXPORT_SYMBOL(debug_sprintf_event);
+EXPORT_SYMBOL(__debug_sprintf_event);
 
 /*
  * debug_sprintf_exception:
  */
 
 debug_entry_t*
-debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
 {
 	va_list   ap;
 	int numargs,idx;
@@ -1065,8 +1063,6 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
 	debug_sprintf_entry_t *curr_event;
 	debug_entry_t *active;
 
-	if((!id) || (level > id->level))
-		return NULL;
 	if (!debug_active || !id->areas)
 		return NULL;
 
@@ -1089,7 +1085,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
 
 	return active;
 }
-EXPORT_SYMBOL(debug_sprintf_exception);
+EXPORT_SYMBOL(__debug_sprintf_exception);
 
 /*
  * debug_register_view:
-- 
cgit v1.2.3-59-g8ed1b


From 200e7c0ffb1b174a4aeaa05f7f43a91ac0fddde3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 1 Dec 2014 14:24:41 +0100
Subject: s390/idle: add missing irq off lockdep annotation

psw_idle() returns with interrupts disabled, so we should add the
missing annotation.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/idle.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 05fbc2c98faf..8814dd9cf644 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -35,6 +35,8 @@ void enabled_wait(void)
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
 
+	trace_hardirqs_off();
+
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	idle->sequence++;
 	smp_wmb();
-- 
cgit v1.2.3-59-g8ed1b


From 1ce2180498fd40ed3f6485fc5daadf4b711f305f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 28 Nov 2014 19:23:34 +0100
Subject: s390/idle: convert open coded idle time seqcount

s390 uses open coded seqcount to synchronize idle time accounting.
Lets consolidate it with the standard API.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/idle.h |  3 ++-
 arch/s390/kernel/idle.c      | 24 +++++++++++-------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 6af037f574b8..113cd963dbbe 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -9,9 +9,10 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/seqlock.h>
 
 struct s390_idle_data {
-	unsigned int sequence;
+	seqcount_t seqcount;
 	unsigned long long idle_count;
 	unsigned long long idle_time;
 	unsigned long long clock_idle_enter;
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 8814dd9cf644..7a55c29b0b33 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -38,15 +38,13 @@ void enabled_wait(void)
 	trace_hardirqs_off();
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
-	idle->sequence++;
-	smp_wmb();
+	write_seqcount_begin(&idle->seqcount);
 	idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
 	idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
 	idle->idle_time += idle_time;
 	idle->idle_count++;
 	account_idle_time(idle_time);
-	smp_wmb();
-	idle->sequence++;
+	write_seqcount_end(&idle->seqcount);
 }
 NOKPROBE_SYMBOL(enabled_wait);
 
@@ -55,14 +53,14 @@ static ssize_t show_idle_count(struct device *dev,
 {
 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
 	unsigned long long idle_count;
-	unsigned int sequence;
+	unsigned int seq;
 
 	do {
-		sequence = ACCESS_ONCE(idle->sequence);
+		seq = read_seqcount_begin(&idle->seqcount);
 		idle_count = ACCESS_ONCE(idle->idle_count);
 		if (ACCESS_ONCE(idle->clock_idle_enter))
 			idle_count++;
-	} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+	} while (read_seqcount_retry(&idle->seqcount, seq));
 	return sprintf(buf, "%llu\n", idle_count);
 }
 DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -72,15 +70,15 @@ static ssize_t show_idle_time(struct device *dev,
 {
 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
 	unsigned long long now, idle_time, idle_enter, idle_exit;
-	unsigned int sequence;
+	unsigned int seq;
 
 	do {
 		now = get_tod_clock();
-		sequence = ACCESS_ONCE(idle->sequence);
+		seq = read_seqcount_begin(&idle->seqcount);
 		idle_time = ACCESS_ONCE(idle->idle_time);
 		idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
 		idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
-	} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+	} while (read_seqcount_retry(&idle->seqcount, seq));
 	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
 	return sprintf(buf, "%llu\n", idle_time >> 12);
 }
@@ -90,14 +88,14 @@ cputime64_t arch_cpu_idle_time(int cpu)
 {
 	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
 	unsigned long long now, idle_enter, idle_exit;
-	unsigned int sequence;
+	unsigned int seq;
 
 	do {
 		now = get_tod_clock();
-		sequence = ACCESS_ONCE(idle->sequence);
+		seq = read_seqcount_begin(&idle->seqcount);
 		idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
 		idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
-	} while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+	} while (read_seqcount_retry(&idle->seqcount, seq));
 	return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8461b63ca01d125a245a0d0fb4821ea0656e5053 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 28 Nov 2014 19:23:35 +0100
Subject: s390: translate cputime magic constants to macros

Make the code more self-explanatory by naming magic constants.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 46 +++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index f8c196984853..b81712306360 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -10,6 +10,8 @@
 #include <linux/types.h>
 #include <asm/div64.h>
 
+#define CPUTIME_PER_USEC 4096ULL
+#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
@@ -38,24 +40,24 @@ static inline unsigned long __div(unsigned long long n, unsigned long base)
  */
 static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
 {
-	return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
+	return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ);
 }
 
 static inline cputime_t jiffies_to_cputime(const unsigned int jif)
 {
-	return (__force cputime_t)(jif * (4096000000ULL / HZ));
+	return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ));
 }
 
 static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
 {
 	unsigned long long jif = (__force unsigned long long) cputime;
-	do_div(jif, 4096000000ULL / HZ);
+	do_div(jif, CPUTIME_PER_SEC / HZ);
 	return jif;
 }
 
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
-	return (__force cputime64_t)(jif * (4096000000ULL / HZ));
+	return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ));
 }
 
 /*
@@ -68,7 +70,7 @@ static inline unsigned int cputime_to_usecs(const cputime_t cputime)
 
 static inline cputime_t usecs_to_cputime(const unsigned int m)
 {
-	return (__force cputime_t)(m * 4096ULL);
+	return (__force cputime_t)(m * CPUTIME_PER_USEC);
 }
 
 #define usecs_to_cputime64(m)		usecs_to_cputime(m)
@@ -78,12 +80,12 @@ static inline cputime_t usecs_to_cputime(const unsigned int m)
  */
 static inline unsigned int cputime_to_secs(const cputime_t cputime)
 {
-	return __div((__force unsigned long long) cputime, 2048000000) >> 1;
+	return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1;
 }
 
 static inline cputime_t secs_to_cputime(const unsigned int s)
 {
-	return (__force cputime_t)(s * 4096000000ULL);
+	return (__force cputime_t)(s * CPUTIME_PER_SEC);
 }
 
 /*
@@ -91,8 +93,8 @@ static inline cputime_t secs_to_cputime(const unsigned int s)
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
-	unsigned long long ret = value->tv_sec * 4096000000ULL;
-	return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
+	unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
+	return (__force cputime_t)(ret + (value->tv_nsec * CPUTIME_PER_USEC) / NSEC_PER_USEC);
 }
 
 static inline void cputime_to_timespec(const cputime_t cputime,
@@ -103,12 +105,12 @@ static inline void cputime_to_timespec(const cputime_t cputime,
 	register_pair rp;
 
 	rp.pair = __cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
-	value->tv_nsec = rp.subreg.even * 1000 / 4096;
+	asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_SEC / 2));
+	value->tv_nsec = rp.subreg.even * NSEC_PER_USEC / CPUTIME_PER_USEC;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
-	value->tv_sec = __cputime / 4096000000ULL;
+	value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC;
+	value->tv_sec = __cputime / CPUTIME_PER_SEC;
 #endif
 }
 
@@ -119,8 +121,8 @@ static inline void cputime_to_timespec(const cputime_t cputime,
  */
 static inline cputime_t timeval_to_cputime(const struct timeval *value)
 {
-	unsigned long long ret = value->tv_sec * 4096000000ULL;
-	return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
+	unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
+	return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC);
 }
 
 static inline void cputime_to_timeval(const cputime_t cputime,
@@ -131,12 +133,12 @@ static inline void cputime_to_timeval(const cputime_t cputime,
 	register_pair rp;
 
 	rp.pair = __cputime >> 1;
-	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
-	value->tv_usec = rp.subreg.even / 4096;
+	asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_USEC / 2));
+	value->tv_usec = rp.subreg.even / CPUTIME_PER_USEC;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_usec = (__cputime % 4096000000ULL) / 4096;
-	value->tv_sec = __cputime / 4096000000ULL;
+	value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC;
+	value->tv_sec = __cputime / CPUTIME_PER_SEC;
 #endif
 }
 
@@ -146,13 +148,13 @@ static inline void cputime_to_timeval(const cputime_t cputime,
 static inline clock_t cputime_to_clock_t(cputime_t cputime)
 {
 	unsigned long long clock = (__force unsigned long long) cputime;
-	do_div(clock, 4096000000ULL / USER_HZ);
+	do_div(clock, CPUTIME_PER_SEC / USER_HZ);
 	return clock;
 }
 
 static inline cputime_t clock_t_to_cputime(unsigned long x)
 {
-	return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
+	return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ));
 }
 
 /*
@@ -161,7 +163,7 @@ static inline cputime_t clock_t_to_cputime(unsigned long x)
 static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 {
 	unsigned long long clock = (__force unsigned long long) cputime;
-	do_div(clock, 4096000000ULL / USER_HZ);
+	do_div(clock, CPUTIME_PER_SEC / USER_HZ);
 	return clock;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4b4ee3ee0beb4f39efe5eb9581f07bddc4201240 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 1 Dec 2014 18:05:35 +0100
Subject: s390/simd: clear vector register pointer on fork/clone

The copy_thread function fails to reset the p->thread.vxrs pointer.
This causes the child to use the same vector register save area,
causing both data corruptions and multiple frees of the memory for
the save area after the tasks sharing the save area terminate.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 4192dfd55ddc..aa7a83948c7b 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -153,6 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	save_fp_ctl(&p->thread.fp_regs.fpc);
 	save_fp_regs(p->thread.fp_regs.fprs);
 	p->thread.fp_regs.pad = 0;
+	p->thread.vxrs = NULL;
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
-- 
cgit v1.2.3-59-g8ed1b


From 7490daf01f11b5b692f69963eaee9be4adc135c3 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 2 Dec 2014 16:07:12 +0100
Subject: s390/ptrace: always include vector registers in core files

On machines with support for vector registers the signal frame includes
an area for the vector registers and the ptrace regset interface allow
read and write. This is true even if the task never used any vector
instruction. Only elf core dumps do not include the vector registers,
to make things consistent always include the vector register note in
core dumps create on a machine with vector register support.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9eed6bb5c4a9..eabfb4594517 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1091,12 +1091,6 @@ static int s390_tdb_set(struct task_struct *target,
 	return 0;
 }
 
-static int s390_vxrs_active(struct task_struct *target,
-			      const struct user_regset *regset)
-{
-	return !!target->thread.vxrs;
-}
-
 static int s390_vxrs_low_get(struct task_struct *target,
 			     const struct user_regset *regset,
 			     unsigned int pos, unsigned int count,
@@ -1105,6 +1099,8 @@ static int s390_vxrs_low_get(struct task_struct *target,
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
 	if (target->thread.vxrs) {
 		if (target == current)
 			save_vx_regs(target->thread.vxrs);
@@ -1123,6 +1119,8 @@ static int s390_vxrs_low_set(struct task_struct *target,
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i, rc;
 
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
 	if (!target->thread.vxrs) {
 		rc = alloc_vector_registers(target);
 		if (rc)
@@ -1148,6 +1146,8 @@ static int s390_vxrs_high_get(struct task_struct *target,
 {
 	__vector128 vxrs[__NUM_VXRS_HIGH];
 
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
 	if (target->thread.vxrs) {
 		if (target == current)
 			save_vx_regs(target->thread.vxrs);
@@ -1165,6 +1165,8 @@ static int s390_vxrs_high_set(struct task_struct *target,
 {
 	int rc;
 
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
 	if (!target->thread.vxrs) {
 		rc = alloc_vector_registers(target);
 		if (rc)
@@ -1249,7 +1251,6 @@ static const struct user_regset s390_regsets[] = {
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.active = s390_vxrs_active,
 		.get = s390_vxrs_low_get,
 		.set = s390_vxrs_low_set,
 	},
@@ -1258,7 +1259,6 @@ static const struct user_regset s390_regsets[] = {
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
-		.active = s390_vxrs_active,
 		.get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
@@ -1472,7 +1472,6 @@ static const struct user_regset s390_compat_regsets[] = {
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.active = s390_vxrs_active,
 		.get = s390_vxrs_low_get,
 		.set = s390_vxrs_low_set,
 	},
@@ -1481,7 +1480,6 @@ static const struct user_regset s390_compat_regsets[] = {
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
-		.active = s390_vxrs_active,
 		.get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
-- 
cgit v1.2.3-59-g8ed1b


From 86ed42f401cb8fa54a8c553e075ea411caac7404 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 3 Dec 2014 17:00:08 +0100
Subject: s390: use local symbol names in entry[64].S

To improve the output of the perf tool hide most of the symbols
from entry[64].S by using the '.L' prefix.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S   | 424 ++++++++++++++++++++++-----------------------
 arch/s390/kernel/entry64.S | 372 +++++++++++++++++++--------------------
 2 files changed, 398 insertions(+), 398 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 70203265196f..398329b2b518 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -53,7 +53,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
 	basr	%r2,%r0
-	l	%r1,BASED(.Lhardirqs_on)
+	l	%r1,BASED(.Lc_hardirqs_on)
 	basr	%r14,%r1		# call trace_hardirqs_on_caller
 #endif
 	.endm
@@ -61,7 +61,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.macro	TRACE_IRQS_OFF
 #ifdef CONFIG_TRACE_IRQFLAGS
 	basr	%r2,%r0
-	l	%r1,BASED(.Lhardirqs_off)
+	l	%r1,BASED(.Lc_hardirqs_off)
 	basr	%r14,%r1		# call trace_hardirqs_off_caller
 #endif
 	.endm
@@ -70,7 +70,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #ifdef CONFIG_LOCKDEP
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jz	.+10
-	l	%r1,BASED(.Llockdep_sys_exit)
+	l	%r1,BASED(.Lc_lockdep_sys_exit)
 	basr	%r14,%r1		# call lockdep_sys_exit
 #endif
 	.endm
@@ -87,8 +87,8 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	tmh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	lr	%r14,%r9
-	sl	%r14,BASED(.Lcritical_start)
-	cl	%r14,BASED(.Lcritical_length)
+	sl	%r14,BASED(.Lc_critical_start)
+	cl	%r14,BASED(.Lc_critical_length)
 	jhe	0f
 	la	%r11,\savearea		# inside critical section, do cleanup
 	bras	%r14,cleanup_critical
@@ -162,7 +162,7 @@ ENTRY(__switch_to)
 	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
-__critical_start:
+.L__critical_start:
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
@@ -170,145 +170,145 @@ __critical_start:
 
 ENTRY(system_call)
 	stpt	__LC_SYNC_ENTER_TIMER
-sysc_stm:
+.Lsysc_stm:
 	stm	%r8,%r15,__LC_SAVE_AREA_SYNC
 	l	%r12,__LC_THREAD_INFO
 	l	%r13,__LC_SVC_NEW_PSW+4
 	lhi	%r14,_PIF_SYSCALL
-sysc_per:
+.Lsysc_per:
 	l	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-sysc_vtime:
+.Lsysc_vtime:
 	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(8,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
 	st	%r14,__PT_FLAGS(%r11)
-sysc_do_svc:
+.Lsysc_do_svc:
 	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
 	lh	%r8,__PT_INT_CODE+2(%r11)
 	sla	%r8,2				# shift and test for svc0
-	jnz	sysc_nr_ok
+	jnz	.Lsysc_nr_ok
 	# svc 0: system call number in %r1
 	cl	%r1,BASED(.Lnr_syscalls)
-	jnl	sysc_nr_ok
+	jnl	.Lsysc_nr_ok
 	sth	%r1,__PT_INT_CODE+2(%r11)
 	lr	%r8,%r1
 	sla	%r8,2
-sysc_nr_ok:
+.Lsysc_nr_ok:
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	st	%r2,__PT_ORIG_GPR2(%r11)
 	st	%r7,STACK_FRAME_OVERHEAD(%r15)
 	l	%r9,0(%r8,%r10)			# get system call addr.
 	tm	__TI_flags+3(%r12),_TIF_TRACE
-	jnz	sysc_tracesys
+	jnz	.Lsysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	st	%r2,__PT_R2(%r11)		# store return value
 
-sysc_return:
+.Lsysc_return:
 	LOCKDEP_SYS_EXIT
-sysc_tif:
+.Lsysc_tif:
 	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	sysc_restore
+	jno	.Lsysc_restore
 	tm	__PT_FLAGS+3(%r11),_PIF_WORK
-	jnz	sysc_work
+	jnz	.Lsysc_work
 	tm	__TI_flags+3(%r12),_TIF_WORK
-	jnz	sysc_work			# check for thread work
+	jnz	.Lsysc_work			# check for thread work
 	tm	__LC_CPU_FLAGS+3,_CIF_WORK
-	jnz	sysc_work
-sysc_restore:
+	jnz	.Lsysc_work
+.Lsysc_restore:
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
 	lm	%r0,%r15,__PT_R0(%r11)
 	lpsw	__LC_RETURN_PSW
-sysc_done:
+.Lsysc_done:
 
 #
 # One of the work bits is on. Find out which one.
 #
-sysc_work:
+.Lsysc_work:
 	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
-	jo	sysc_mcck_pending
+	jo	.Lsysc_mcck_pending
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
-	jo	sysc_reschedule
+	jo	.Lsysc_reschedule
 	tm	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
-	jo	sysc_singlestep
+	jo	.Lsysc_singlestep
 	tm	__TI_flags+3(%r12),_TIF_SIGPENDING
-	jo	sysc_sigpending
+	jo	.Lsysc_sigpending
 	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
-	jo	sysc_notify_resume
+	jo	.Lsysc_notify_resume
 	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
-	jo	sysc_uaccess
-	j	sysc_return		# beware of critical section cleanup
+	jo	.Lsysc_uaccess
+	j	.Lsysc_return		# beware of critical section cleanup
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
-sysc_reschedule:
-	l	%r1,BASED(.Lschedule)
-	la	%r14,BASED(sysc_return)
+.Lsysc_reschedule:
+	l	%r1,BASED(.Lc_schedule)
+	la	%r14,BASED(.Lsysc_return)
 	br	%r1			# call schedule
 
 #
 # _CIF_MCCK_PENDING is set, call handler
 #
-sysc_mcck_pending:
-	l	%r1,BASED(.Lhandle_mcck)
-	la	%r14,BASED(sysc_return)
+.Lsysc_mcck_pending:
+	l	%r1,BASED(.Lc_handle_mcck)
+	la	%r14,BASED(.Lsysc_return)
 	br	%r1			# TIF bit will be cleared by handler
 
 #
 # _CIF_ASCE is set, load user space asce
 #
-sysc_uaccess:
+.Lsysc_uaccess:
 	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
 	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
-sysc_sigpending:
+.Lsysc_sigpending:
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Ldo_signal)
+	l	%r1,BASED(.Lc_do_signal)
 	basr	%r14,%r1		# call do_signal
 	tm	__PT_FLAGS+3(%r11),_PIF_SYSCALL
-	jno	sysc_return
+	jno	.Lsysc_return
 	lm	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
 	xr	%r8,%r8			# svc 0 returns -ENOSYS
 	clc	__PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
-	jnl	sysc_nr_ok		# invalid svc number -> do svc 0
+	jnl	.Lsysc_nr_ok		# invalid svc number -> do svc 0
 	lh	%r8,__PT_INT_CODE+2(%r11)	# load new svc number
 	sla	%r8,2
-	j	sysc_nr_ok		# restart svc
+	j	.Lsysc_nr_ok		# restart svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
 #
-sysc_notify_resume:
+.Lsysc_notify_resume:
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Ldo_notify_resume)
-	la	%r14,BASED(sysc_return)
+	l	%r1,BASED(.Lc_do_notify_resume)
+	la	%r14,BASED(.Lsysc_return)
 	br	%r1			# call do_notify_resume
 
 #
 # _PIF_PER_TRAP is set, call do_per_trap
 #
-sysc_singlestep:
+.Lsysc_singlestep:
 	ni	__PT_FLAGS+3(%r11),255-_PIF_PER_TRAP
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r1,BASED(.Ldo_per_trap)
-	la	%r14,BASED(sysc_return)
+	l	%r1,BASED(.Lc_do_per_trap)
+	la	%r14,BASED(.Lsysc_return)
 	br	%r1			# call do_per_trap
 
 #
 # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
 # and after the system call
 #
-sysc_tracesys:
-	l	%r1,BASED(.Ltrace_enter)
+.Lsysc_tracesys:
+	l	%r1,BASED(.Lc_trace_enter)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	la	%r3,0
 	xr	%r0,%r0
@@ -316,22 +316,22 @@ sysc_tracesys:
 	st	%r0,__PT_R2(%r11)
 	basr	%r14,%r1		# call do_syscall_trace_enter
 	cl	%r2,BASED(.Lnr_syscalls)
-	jnl	sysc_tracenogo
+	jnl	.Lsysc_tracenogo
 	lr	%r8,%r2
 	sll	%r8,2
 	l	%r9,0(%r8,%r10)
-sysc_tracego:
+.Lsysc_tracego:
 	lm	%r3,%r7,__PT_R3(%r11)
 	st	%r7,STACK_FRAME_OVERHEAD(%r15)
 	l	%r2,__PT_ORIG_GPR2(%r11)
 	basr	%r14,%r9		# call sys_xxx
 	st	%r2,__PT_R2(%r11)	# store return value
-sysc_tracenogo:
+.Lsysc_tracenogo:
 	tm	__TI_flags+3(%r12),_TIF_TRACE
-	jz	sysc_return
-	l	%r1,BASED(.Ltrace_exit)
+	jz	.Lsysc_return
+	l	%r1,BASED(.Lc_trace_exit)
 	lr	%r2,%r11		# pass pointer to pt_regs
-	la	%r14,BASED(sysc_return)
+	la	%r14,BASED(.Lsysc_return)
 	br	%r1			# call do_syscall_trace_exit
 
 #
@@ -341,18 +341,18 @@ ENTRY(ret_from_fork)
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	l	%r12,__LC_THREAD_INFO
 	l	%r13,__LC_SVC_NEW_PSW+4
-	l	%r1,BASED(.Lschedule_tail)
+	l	%r1,BASED(.Lc_schedule_tail)
 	basr	%r14,%r1		# call schedule_tail
 	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jne	sysc_tracenogo
+	jne	.Lsysc_tracenogo
 	# it's a kernel thread
 	lm	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	basr	%r14,%r9
-	j	sysc_tracenogo
+	j	.Lsysc_tracenogo
 
 /*
  * Program check handler routine
@@ -369,7 +369,7 @@ ENTRY(pgm_check_handler)
 	tmh	%r8,0x4000		# PER bit set in old PSW ?
 	jnz	0f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jnz	pgm_svcper		# -> single stepped svc
+	jnz	.Lpgm_svcper		# -> single stepped svc
 0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
@@ -386,42 +386,42 @@ ENTRY(pgm_check_handler)
 	jz	0f
 	l	%r1,__TI_task(%r12)
 	tmh	%r8,0x0001		# kernel per event ?
-	jz	pgm_kprobe
+	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
 0:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Ljump_table)
+	l	%r1,BASED(.Lc_jump_table)
 	la	%r10,0x7f
 	n	%r10,__PT_INT_CODE(%r11)
-	je	sysc_return
+	je	.Lsysc_return
 	sll	%r10,2
 	l	%r1,0(%r10,%r1)		# load address of handler routine
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # PER event in supervisor state, must be kprobes
 #
-pgm_kprobe:
+.Lpgm_kprobe:
 	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Ldo_per_trap)
+	l	%r1,BASED(.Lc_do_per_trap)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# call do_per_trap
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # single stepped system call
 #
-pgm_svcper:
+.Lpgm_svcper:
 	mvc	__LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
-	mvc	__LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
+	mvc	__LC_RETURN_PSW+4(4),BASED(.Lc_sysc_per)
 	lhi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpsw	__LC_RETURN_PSW		# branch to sysc_per and enable irqs
+	lpsw	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
 
 /*
  * IO interrupt handler routine
@@ -435,9 +435,9 @@ ENTRY(io_int_handler)
 	l	%r13,__LC_SVC_NEW_PSW+4
 	lm	%r8,%r9,__LC_IO_OLD_PSW
 	tmh	%r8,0x0001		# interrupting from user ?
-	jz	io_skip
+	jz	.Lio_skip
 	UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
-io_skip:
+.Lio_skip:
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
@@ -446,35 +446,35 @@ io_skip:
 	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-io_loop:
-	l	%r1,BASED(.Ldo_IRQ)
+.Lio_loop:
+	l	%r1,BASED(.Lc_do_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	lhi	%r3,IO_INTERRUPT
 	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
-	jz	io_call
+	jz	.Lio_call
 	lhi	%r3,THIN_INTERRUPT
-io_call:
+.Lio_call:
 	basr	%r14,%r1		# call do_IRQ
 	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
-	jz	io_return
+	jz	.Lio_return
 	tpi	0
-	jz	io_return
+	jz	.Lio_return
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	j	io_loop
-io_return:
+	j	.Lio_loop
+.Lio_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
-io_tif:
+.Lio_tif:
 	tm	__TI_flags+3(%r12),_TIF_WORK
-	jnz	io_work			# there is work to do (signals etc.)
+	jnz	.Lio_work		# there is work to do (signals etc.)
 	tm	__LC_CPU_FLAGS+3,_CIF_WORK
-	jnz	io_work
-io_restore:
+	jnz	.Lio_work
+.Lio_restore:
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
 	lm	%r0,%r15,__PT_R0(%r11)
 	lpsw	__LC_RETURN_PSW
-io_done:
+.Lio_done:
 
 #
 # There is work todo, find out in which context we have been interrupted:
@@ -483,15 +483,15 @@ io_done:
 #    the preemption counter and if it is zero call preempt_schedule_irq
 # Before any work can be done, a switch to the kernel stack is required.
 #
-io_work:
+.Lio_work:
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jo	io_work_user		# yes -> do resched & signal
+	jo	.Lio_work_user		# yes -> do resched & signal
 #ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r12)
-	jnz	io_restore		# preemption disabled
+	jnz	.Lio_restore		# preemption disabled
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
-	jno	io_restore
+	jno	.Lio_restore
 	# switch to kernel stack
 	l	%r1,__PT_R15(%r11)
 	ahi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
@@ -499,20 +499,20 @@ io_work:
 	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lr	%r15,%r1
-	# TRACE_IRQS_ON already done at io_return, call
+	# TRACE_IRQS_ON already done at .Lio_return, call
 	# TRACE_IRQS_OFF to keep things symmetrical
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Lpreempt_irq)
+	l	%r1,BASED(.Lc_preempt_irq)
 	basr	%r14,%r1		# call preempt_schedule_irq
-	j	io_return
+	j	.Lio_return
 #else
-	j	io_restore
+	j	.Lio_restore
 #endif
 
 #
 # Need to do work before returning to userspace, switch to kernel stack
 #
-io_work_user:
+.Lio_work_user:
 	l	%r1,__LC_KERNEL_STACK
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
 	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
@@ -522,74 +522,74 @@ io_work_user:
 #
 # One of the work bits is on. Find out which one.
 #
-io_work_tif:
+.Lio_work_tif:
 	tm	__LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING
-	jo	io_mcck_pending
+	jo	.Lio_mcck_pending
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
-	jo	io_reschedule
+	jo	.Lio_reschedule
 	tm	__TI_flags+3(%r12),_TIF_SIGPENDING
-	jo	io_sigpending
+	jo	.Lio_sigpending
 	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
-	jo	io_notify_resume
+	jo	.Lio_notify_resume
 	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
-	jo	io_uaccess
-	j	io_return		# beware of critical section cleanup
+	jo	.Lio_uaccess
+	j	.Lio_return		# beware of critical section cleanup
 
 #
 # _CIF_MCCK_PENDING is set, call handler
 #
-io_mcck_pending:
-	# TRACE_IRQS_ON already done at io_return
-	l	%r1,BASED(.Lhandle_mcck)
+.Lio_mcck_pending:
+	# TRACE_IRQS_ON already done at .Lio_return
+	l	%r1,BASED(.Lc_handle_mcck)
 	basr	%r14,%r1		# TIF bit will be cleared by handler
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _CIF_ASCE is set, load user space asce
 #
-io_uaccess:
+.Lio_uaccess:
 	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
 	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
-io_reschedule:
-	# TRACE_IRQS_ON already done at io_return
-	l	%r1,BASED(.Lschedule)
+.Lio_reschedule:
+	# TRACE_IRQS_ON already done at .Lio_return
+	l	%r1,BASED(.Lc_schedule)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	basr	%r14,%r1		# call scheduler
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
-io_sigpending:
-	# TRACE_IRQS_ON already done at io_return
-	l	%r1,BASED(.Ldo_signal)
+.Lio_sigpending:
+	# TRACE_IRQS_ON already done at .Lio_return
+	l	%r1,BASED(.Lc_do_signal)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# call do_signal
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
-io_notify_resume:
-	# TRACE_IRQS_ON already done at io_return
-	l	%r1,BASED(.Ldo_notify_resume)
+.Lio_notify_resume:
+	# TRACE_IRQS_ON already done at .Lio_return
+	l	%r1,BASED(.Lc_do_notify_resume)
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# call do_notify_resume
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 /*
  * External interrupt handler routine
@@ -603,9 +603,9 @@ ENTRY(ext_int_handler)
 	l	%r13,__LC_SVC_NEW_PSW+4
 	lm	%r8,%r9,__LC_EXT_OLD_PSW
 	tmh	%r8,0x0001		# interrupting from user ?
-	jz	ext_skip
+	jz	.Lext_skip
 	UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
-ext_skip:
+.Lext_skip:
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
@@ -614,29 +614,29 @@ ext_skip:
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Ldo_IRQ)
+	l	%r1,BASED(.Lc_do_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	lhi	%r3,EXT_INTERRUPT
 	basr	%r14,%r1		# call do_IRQ
-	j	io_return
+	j	.Lio_return
 
 /*
- * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
  */
 ENTRY(psw_idle)
 	st	%r3,__SF_EMPTY(%r15)
 	basr	%r1,0
-	la	%r1,psw_idle_lpsw+4-.(%r1)
+	la	%r1,.Lpsw_idle_lpsw+4-.(%r1)
 	st	%r1,__SF_EMPTY+4(%r15)
 	oi	__SF_EMPTY+4(%r15),0x80
 	stck	__CLOCK_IDLE_ENTER(%r2)
 	stpt	__TIMER_IDLE_ENTER(%r2)
-psw_idle_lpsw:
+.Lpsw_idle_lpsw:
 	lpsw	__SF_EMPTY(%r15)
 	br	%r14
-psw_idle_end:
+.Lpsw_idle_end:
 
-__critical_end:
+.L__critical_end:
 
 /*
  * Machine check handler routines
@@ -650,7 +650,7 @@ ENTRY(mcck_int_handler)
 	l	%r13,__LC_SVC_NEW_PSW+4
 	lm	%r8,%r9,__LC_MCK_OLD_PSW
 	tm	__LC_MCCK_CODE,0x80	# system damage?
-	jo	mcck_panic		# yes -> rest of mcck code invalid
+	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	la	%r14,__LC_CPU_TIMER_SAVE_AREA
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
@@ -668,22 +668,22 @@ ENTRY(mcck_int_handler)
 2:	spt	0(%r14)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
-	jno	mcck_panic		# no -> skip cleanup critical
+	jno	.Lmcck_panic		# no -> skip cleanup critical
 	tm	%r8,0x0001		# interrupting from user ?
-	jz	mcck_skip
+	jz	.Lmcck_skip
 	UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
-mcck_skip:
+.Lmcck_skip:
 	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
 	stm	%r8,%r9,__PT_PSW(%r11)
 	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r1,BASED(.Ldo_machine_check)
+	l	%r1,BASED(.Lc_do_machine_check)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# call s390_do_machine_check
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	mcck_return
+	jno	.Lmcck_return
 	l	%r1,__LC_KERNEL_STACK	# switch to kernel stack
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
 	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
@@ -691,12 +691,12 @@ mcck_skip:
 	lr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
 	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
-	jno	mcck_return
+	jno	.Lmcck_return
 	TRACE_IRQS_OFF
-	l	%r1,BASED(.Lhandle_mcck)
+	l	%r1,BASED(.Lc_handle_mcck)
 	basr	%r14,%r1		# call s390_handle_mcck
 	TRACE_IRQS_ON
-mcck_return:
+.Lmcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
@@ -706,15 +706,15 @@ mcck_return:
 0:	lm	%r0,%r15,__PT_R0(%r11)
 	lpsw	__LC_RETURN_MCCK_PSW
 
-mcck_panic:
+.Lmcck_panic:
 	l	%r14,__LC_PANIC_STACK
 	slr	%r14,%r15
 	sra	%r14,PAGE_SHIFT
 	jz	0f
 	l	%r15,__LC_PANIC_STACK
-	j	mcck_skip
+	j	.Lmcck_skip
 0:	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	mcck_skip
+	j	.Lmcck_skip
 
 #
 # PSW restart interrupt handler
@@ -764,58 +764,58 @@ stack_overflow:
 1:	.long	kernel_stack_overflow
 #endif
 
-cleanup_table:
+.Lcleanup_table:
 	.long	system_call + 0x80000000
-	.long	sysc_do_svc + 0x80000000
-	.long	sysc_tif + 0x80000000
-	.long	sysc_restore + 0x80000000
-	.long	sysc_done + 0x80000000
-	.long	io_tif + 0x80000000
-	.long	io_restore + 0x80000000
-	.long	io_done + 0x80000000
+	.long	.Lsysc_do_svc + 0x80000000
+	.long	.Lsysc_tif + 0x80000000
+	.long	.Lsysc_restore + 0x80000000
+	.long	.Lsysc_done + 0x80000000
+	.long	.Lio_tif + 0x80000000
+	.long	.Lio_restore + 0x80000000
+	.long	.Lio_done + 0x80000000
 	.long	psw_idle + 0x80000000
-	.long	psw_idle_end + 0x80000000
+	.long	.Lpsw_idle_end + 0x80000000
 
 cleanup_critical:
-	cl	%r9,BASED(cleanup_table)	# system_call
+	cl	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
-	cl	%r9,BASED(cleanup_table+4)	# sysc_do_svc
-	jl	cleanup_system_call
-	cl	%r9,BASED(cleanup_table+8)	# sysc_tif
+	cl	%r9,BASED(.Lcleanup_table+4)	# .Lsysc_do_svc
+	jl	.Lcleanup_system_call
+	cl	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_tif
 	jl	0f
-	cl	%r9,BASED(cleanup_table+12)	# sysc_restore
-	jl	cleanup_sysc_tif
-	cl	%r9,BASED(cleanup_table+16)	# sysc_done
-	jl	cleanup_sysc_restore
-	cl	%r9,BASED(cleanup_table+20)	# io_tif
+	cl	%r9,BASED(.Lcleanup_table+12)	# .Lsysc_restore
+	jl	.Lcleanup_sysc_tif
+	cl	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_done
+	jl	.Lcleanup_sysc_restore
+	cl	%r9,BASED(.Lcleanup_table+20)	# .Lio_tif
 	jl	0f
-	cl	%r9,BASED(cleanup_table+24)	# io_restore
-	jl	cleanup_io_tif
-	cl	%r9,BASED(cleanup_table+28)	# io_done
-	jl	cleanup_io_restore
-	cl	%r9,BASED(cleanup_table+32)	# psw_idle
+	cl	%r9,BASED(.Lcleanup_table+24)	# .Lio_restore
+	jl	.Lcleanup_io_tif
+	cl	%r9,BASED(.Lcleanup_table+28)	# .Lio_done
+	jl	.Lcleanup_io_restore
+	cl	%r9,BASED(.Lcleanup_table+32)	# psw_idle
 	jl	0f
-	cl	%r9,BASED(cleanup_table+36)	# psw_idle_end
-	jl	cleanup_idle
+	cl	%r9,BASED(.Lcleanup_table+36)	# .Lpsw_idle_end
+	jl	.Lcleanup_idle
 0:	br	%r14
 
-cleanup_system_call:
+.Lcleanup_system_call:
 	# check if stpt has been executed
-	cl	%r9,BASED(cleanup_system_call_insn)
+	cl	%r9,BASED(.Lcleanup_system_call_insn)
 	jh	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
 	chi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
 0:	# check if stm has been executed
-	cl	%r9,BASED(cleanup_system_call_insn+4)
+	cl	%r9,BASED(.Lcleanup_system_call_insn+4)
 	jh	0f
 	mvc	__LC_SAVE_AREA_SYNC(32),0(%r11)
 0:	# set up saved registers r12, and r13
 	st	%r12,16(%r11)		# r12 thread-info pointer
 	st	%r13,20(%r11)		# r13 literal-pool pointer
 	# check if the user time calculation has been done
-	cl	%r9,BASED(cleanup_system_call_insn+8)
+	cl	%r9,BASED(.Lcleanup_system_call_insn+8)
 	jh	0f
 	l	%r10,__LC_EXIT_TIMER
 	l	%r15,__LC_EXIT_TIMER+4
@@ -824,7 +824,7 @@ cleanup_system_call:
 	st	%r10,__LC_USER_TIMER
 	st	%r15,__LC_USER_TIMER+4
 0:	# check if the system time calculation has been done
-	cl	%r9,BASED(cleanup_system_call_insn+12)
+	cl	%r9,BASED(.Lcleanup_system_call_insn+12)
 	jh	0f
 	l	%r10,__LC_LAST_UPDATE_TIMER
 	l	%r15,__LC_LAST_UPDATE_TIMER+4
@@ -848,20 +848,20 @@ cleanup_system_call:
 	# setup saved register 15
 	st	%r15,28(%r11)		# r15 stack pointer
 	# set new psw address and exit
-	l	%r9,BASED(cleanup_table+4)	# sysc_do_svc + 0x80000000
+	l	%r9,BASED(.Lcleanup_table+4)	# .Lsysc_do_svc + 0x80000000
 	br	%r14
-cleanup_system_call_insn:
+.Lcleanup_system_call_insn:
 	.long	system_call + 0x80000000
-	.long	sysc_stm + 0x80000000
-	.long	sysc_vtime + 0x80000000 + 36
-	.long	sysc_vtime + 0x80000000 + 76
+	.long	.Lsysc_stm + 0x80000000
+	.long	.Lsysc_vtime + 0x80000000 + 36
+	.long	.Lsysc_vtime + 0x80000000 + 76
 
-cleanup_sysc_tif:
-	l	%r9,BASED(cleanup_table+8)	# sysc_tif + 0x80000000
+.Lcleanup_sysc_tif:
+	l	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_tif + 0x80000000
 	br	%r14
 
-cleanup_sysc_restore:
-	cl	%r9,BASED(cleanup_sysc_restore_insn)
+.Lcleanup_sysc_restore:
+	cl	%r9,BASED(.Lcleanup_sysc_restore_insn)
 	jhe	0f
 	l	%r9,12(%r11)		# get saved pointer to pt_regs
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9)
@@ -869,15 +869,15 @@ cleanup_sysc_restore:
 	lm	%r0,%r7,__PT_R0(%r9)
 0:	lm	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
-cleanup_sysc_restore_insn:
-	.long	sysc_done - 4 + 0x80000000
+.Lcleanup_sysc_restore_insn:
+	.long	.Lsysc_done - 4 + 0x80000000
 
-cleanup_io_tif:
-	l	%r9,BASED(cleanup_table+20)	# io_tif + 0x80000000
+.Lcleanup_io_tif:
+	l	%r9,BASED(.Lcleanup_table+20)	# .Lio_tif + 0x80000000
 	br	%r14
 
-cleanup_io_restore:
-	cl	%r9,BASED(cleanup_io_restore_insn)
+.Lcleanup_io_restore:
+	cl	%r9,BASED(.Lcleanup_io_restore_insn)
 	jhe	0f
 	l	%r9,12(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9)
@@ -885,10 +885,10 @@ cleanup_io_restore:
 	lm	%r0,%r7,__PT_R0(%r9)
 0:	lm	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
-cleanup_io_restore_insn:
-	.long	io_done - 4 + 0x80000000
+.Lcleanup_io_restore_insn:
+	.long	.Lio_done - 4 + 0x80000000
 
-cleanup_idle:
+.Lcleanup_idle:
 	# copy interrupt clock & cpu timer
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
@@ -897,7 +897,7 @@ cleanup_idle:
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
 0:	# check if stck has been executed
-	cl	%r9,BASED(cleanup_idle_insn)
+	cl	%r9,BASED(.Lcleanup_idle_insn)
 	jhe	1f
 	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
 	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3)
@@ -913,12 +913,12 @@ cleanup_idle:
 	stm	%r9,%r10,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
 	# prepare return psw
-	n	%r8,BASED(cleanup_idle_wait)	# clear irq & wait state bits
+	n	%r8,BASED(.Lcleanup_idle_wait)	# clear irq & wait state bits
 	l	%r9,24(%r11)			# return from psw_idle
 	br	%r14
-cleanup_idle_insn:
-	.long	psw_idle_lpsw + 0x80000000
-cleanup_idle_wait:
+.Lcleanup_idle_insn:
+	.long	.Lpsw_idle_lpsw + 0x80000000
+.Lcleanup_idle_wait:
 	.long	0xfcfdffff
 
 /*
@@ -933,30 +933,30 @@ cleanup_idle_wait:
 /*
  * Symbol constants
  */
-.Ldo_machine_check:	.long	s390_do_machine_check
-.Lhandle_mcck:		.long	s390_handle_mcck
-.Ldo_IRQ:		.long	do_IRQ
-.Ldo_signal:		.long	do_signal
-.Ldo_notify_resume:	.long	do_notify_resume
-.Ldo_per_trap:		.long	do_per_trap
-.Ljump_table:		.long	pgm_check_table
-.Lschedule:		.long	schedule
+.Lc_do_machine_check:	.long	s390_do_machine_check
+.Lc_handle_mcck:	.long	s390_handle_mcck
+.Lc_do_IRQ:		.long	do_IRQ
+.Lc_do_signal:		.long	do_signal
+.Lc_do_notify_resume:	.long	do_notify_resume
+.Lc_do_per_trap:	.long	do_per_trap
+.Lc_jump_table:		.long	pgm_check_table
+.Lc_schedule:		.long	schedule
 #ifdef CONFIG_PREEMPT
-.Lpreempt_irq:		.long	preempt_schedule_irq
+.Lc_preempt_irq:	.long	preempt_schedule_irq
 #endif
-.Ltrace_enter:		.long	do_syscall_trace_enter
-.Ltrace_exit:		.long	do_syscall_trace_exit
-.Lschedule_tail:	.long	schedule_tail
-.Lsysc_per:		.long	sysc_per + 0x80000000
+.Lc_trace_enter:	.long	do_syscall_trace_enter
+.Lc_trace_exit:		.long	do_syscall_trace_exit
+.Lc_schedule_tail:	.long	schedule_tail
+.Lc_sysc_per:		.long	.Lsysc_per + 0x80000000
 #ifdef CONFIG_TRACE_IRQFLAGS
-.Lhardirqs_on:		.long	trace_hardirqs_on_caller
-.Lhardirqs_off:		.long	trace_hardirqs_off_caller
+.Lc_hardirqs_on:	.long	trace_hardirqs_on_caller
+.Lc_hardirqs_off:	.long	trace_hardirqs_off_caller
 #endif
 #ifdef CONFIG_LOCKDEP
-.Llockdep_sys_exit:	.long	lockdep_sys_exit
+.Lc_lockdep_sys_exit:	.long	lockdep_sys_exit
 #endif
-.Lcritical_start:	.long	__critical_start + 0x80000000
-.Lcritical_length:	.long	__critical_end - __critical_start
+.Lc_critical_start:	.long	.L__critical_start + 0x80000000
+.Lc_critical_length:	.long	.L__critical_end - .L__critical_start
 
 		.section .rodata, "a"
 #define SYSCALL(esa,esame,emu)	.long esa
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 7b2e03afd017..c329446a951d 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -91,7 +91,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.if	\reason==1
 	# Some program interrupts are suppressing (e.g. protection).
 	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to rewind_pad
+	# do_protection_exception will rewind to .Lrewind_pad
 	jh	.+42
 	.else
 	jhe	.+42
@@ -192,7 +192,7 @@ ENTRY(__switch_to)
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
-__critical_start:
+.L__critical_start:
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
@@ -200,15 +200,15 @@ __critical_start:
 
 ENTRY(system_call)
 	stpt	__LC_SYNC_ENTER_TIMER
-sysc_stmg:
+.Lsysc_stmg:
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
 	lghi	%r14,_PIF_SYSCALL
-sysc_per:
+.Lsysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-sysc_vtime:
+.Lsysc_vtime:
 	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r13
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -216,39 +216,39 @@ sysc_vtime:
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
 	stg	%r14,__PT_FLAGS(%r11)
-sysc_do_svc:
+.Lsysc_do_svc:
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
 	llgh	%r8,__PT_INT_CODE+2(%r11)
 	slag	%r8,%r8,2			# shift and test for svc 0
-	jnz	sysc_nr_ok
+	jnz	.Lsysc_nr_ok
 	# svc 0: system call number in %r1
 	llgfr	%r1,%r1				# clear high word in r1
 	cghi	%r1,NR_syscalls
-	jnl	sysc_nr_ok
+	jnl	.Lsysc_nr_ok
 	sth	%r1,__PT_INT_CODE+2(%r11)
 	slag	%r8,%r1,2
-sysc_nr_ok:
+.Lsysc_nr_ok:
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lgf	%r9,0(%r8,%r10)			# get system call add.
 	tm	__TI_flags+7(%r12),_TIF_TRACE
-	jnz	sysc_tracesys
+	jnz	.Lsysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	stg	%r2,__PT_R2(%r11)		# store return value
 
-sysc_return:
+.Lsysc_return:
 	LOCKDEP_SYS_EXIT
-sysc_tif:
+.Lsysc_tif:
 	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	sysc_restore
+	jno	.Lsysc_restore
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
-	jnz	sysc_work
+	jnz	.Lsysc_work
 	tm	__TI_flags+7(%r12),_TIF_WORK
-	jnz	sysc_work			# check for work
+	jnz	.Lsysc_work			# check for work
 	tm	__LC_CPU_FLAGS+7,_CIF_WORK
-	jnz	sysc_work
-sysc_restore:
+	jnz	.Lsysc_work
+.Lsysc_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
@@ -256,101 +256,101 @@ sysc_restore:
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
 	lpswe	__LC_RETURN_PSW
-sysc_done:
+.Lsysc_done:
 
 #
 # One of the work bits is on. Find out which one.
 #
-sysc_work:
+.Lsysc_work:
 	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jo	sysc_mcck_pending
+	jo	.Lsysc_mcck_pending
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jo	sysc_reschedule
+	jo	.Lsysc_reschedule
 #ifdef CONFIG_UPROBES
 	tm	__TI_flags+7(%r12),_TIF_UPROBE
-	jo	sysc_uprobe_notify
+	jo	.Lsysc_uprobe_notify
 #endif
 	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
-	jo	sysc_singlestep
+	jo	.Lsysc_singlestep
 	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
-	jo	sysc_sigpending
+	jo	.Lsysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
-	jo	sysc_notify_resume
+	jo	.Lsysc_notify_resume
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
-	jo	sysc_uaccess
-	j	sysc_return		# beware of critical section cleanup
+	jo	.Lsysc_uaccess
+	j	.Lsysc_return		# beware of critical section cleanup
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
-sysc_reschedule:
-	larl	%r14,sysc_return
+.Lsysc_reschedule:
+	larl	%r14,.Lsysc_return
 	jg	schedule
 
 #
 # _CIF_MCCK_PENDING is set, call handler
 #
-sysc_mcck_pending:
-	larl	%r14,sysc_return
+.Lsysc_mcck_pending:
+	larl	%r14,.Lsysc_return
 	jg	s390_handle_mcck	# TIF bit will be cleared by handler
 
 #
 # _CIF_ASCE is set, load user space asce
 #
-sysc_uaccess:
+.Lsysc_uaccess:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
-sysc_sigpending:
+.Lsysc_sigpending:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
 	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
-	jno	sysc_return
+	jno	.Lsysc_return
 	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
 	lghi	%r8,0			# svc 0 returns -ENOSYS
 	llgh	%r1,__PT_INT_CODE+2(%r11)	# load new svc number
 	cghi	%r1,NR_syscalls
-	jnl	sysc_nr_ok		# invalid svc number -> do svc 0
+	jnl	.Lsysc_nr_ok		# invalid svc number -> do svc 0
 	slag	%r8,%r1,2
-	j	sysc_nr_ok		# restart svc
+	j	.Lsysc_nr_ok		# restart svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
 #
-sysc_notify_resume:
+.Lsysc_notify_resume:
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,sysc_return
+	larl	%r14,.Lsysc_return
 	jg	do_notify_resume
 
 #
 # _TIF_UPROBE is set, call uprobe_notify_resume
 #
 #ifdef CONFIG_UPROBES
-sysc_uprobe_notify:
+.Lsysc_uprobe_notify:
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,sysc_return
+	larl	%r14,.Lsysc_return
 	jg	uprobe_notify_resume
 #endif
 
 #
 # _PIF_PER_TRAP is set, call do_per_trap
 #
-sysc_singlestep:
+.Lsysc_singlestep:
 	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,sysc_return
+	larl	%r14,.Lsysc_return
 	jg	do_per_trap
 
 #
 # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
 # and after the system call
 #
-sysc_tracesys:
+.Lsysc_tracesys:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	la	%r3,0
 	llgh	%r0,__PT_INT_CODE+2(%r11)
@@ -358,20 +358,20 @@ sysc_tracesys:
 	brasl	%r14,do_syscall_trace_enter
 	lghi	%r0,NR_syscalls
 	clgr	%r0,%r2
-	jnh	sysc_tracenogo
+	jnh	.Lsysc_tracenogo
 	sllg	%r8,%r2,2
 	lgf	%r9,0(%r8,%r10)
-sysc_tracego:
+.Lsysc_tracego:
 	lmg	%r3,%r7,__PT_R3(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r2,__PT_ORIG_GPR2(%r11)
 	basr	%r14,%r9		# call sys_xxx
 	stg	%r2,__PT_R2(%r11)	# store return value
-sysc_tracenogo:
+.Lsysc_tracenogo:
 	tm	__TI_flags+7(%r12),_TIF_TRACE
-	jz	sysc_return
+	jz	.Lsysc_return
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,sysc_return
+	larl	%r14,.Lsysc_return
 	jg	do_syscall_trace_exit
 
 #
@@ -384,13 +384,13 @@ ENTRY(ret_from_fork)
 	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jne	sysc_tracenogo
+	jne	.Lsysc_tracenogo
 	# it's a kernel thread
 	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	basr	%r14,%r9
-	j	sysc_tracenogo
+	j	.Lsysc_tracenogo
 
 /*
  * Program check handler routine
@@ -409,7 +409,7 @@ ENTRY(pgm_check_handler)
 	tmhh	%r8,0x4000		# PER bit set in old PSW ?
 	jnz	0f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jnz	pgm_svcper		# -> single stepped svc
+	jnz	.Lpgm_svcper		# -> single stepped svc
 0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
@@ -432,7 +432,7 @@ ENTRY(pgm_check_handler)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jz	0f
 	tmhh	%r8,0x0001		# kernel per event ?
-	jz	pgm_kprobe
+	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
@@ -443,31 +443,31 @@ ENTRY(pgm_check_handler)
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
 	sll	%r10,2
-	je	sysc_return
+	je	.Lsysc_return
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # PER event in supervisor state, must be kprobes
 #
-pgm_kprobe:
+.Lpgm_kprobe:
 	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
-	j	sysc_return
+	j	.Lsysc_return
 
 #
 # single stepped system call
 #
-pgm_svcper:
+.Lpgm_svcper:
 	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
-	larl	%r14,sysc_per
+	larl	%r14,.Lsysc_per
 	stg	%r14,__LC_RETURN_PSW+8
 	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpswe	__LC_RETURN_PSW		# branch to sysc_per and enable irqs
+	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
 
 /*
  * IO interrupt handler routine
@@ -483,10 +483,10 @@ ENTRY(io_int_handler)
 	HANDLE_SIE_INTERCEPT %r14,2
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user?
-	jz	io_skip
+	jz	.Lio_skip
 	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
 	LAST_BREAK %r14
-io_skip:
+.Lio_skip:
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -494,29 +494,29 @@ io_skip:
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-io_loop:
+.Lio_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lghi	%r3,IO_INTERRUPT
 	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
-	jz	io_call
+	jz	.Lio_call
 	lghi	%r3,THIN_INTERRUPT
-io_call:
+.Lio_call:
 	brasl	%r14,do_IRQ
 	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
-	jz	io_return
+	jz	.Lio_return
 	tpi	0
-	jz	io_return
+	jz	.Lio_return
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	j	io_loop
-io_return:
+	j	.Lio_loop
+.Lio_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
-io_tif:
+.Lio_tif:
 	tm	__TI_flags+7(%r12),_TIF_WORK
-	jnz	io_work 		# there is work to do (signals etc.)
+	jnz	.Lio_work		# there is work to do (signals etc.)
 	tm	__LC_CPU_FLAGS+7,_CIF_WORK
-	jnz	io_work
-io_restore:
+	jnz	.Lio_work
+.Lio_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
@@ -524,7 +524,7 @@ io_restore:
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
 	lpswe	__LC_RETURN_PSW
-io_done:
+.Lio_done:
 
 #
 # There is work todo, find out in which context we have been interrupted:
@@ -535,15 +535,15 @@ io_done:
 #    the preemption counter and if it is zero call preempt_schedule_irq
 # Before any work can be done, a switch to the kernel stack is required.
 #
-io_work:
+.Lio_work:
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jo	io_work_user		# yes -> do resched & signal
+	jo	.Lio_work_user		# yes -> do resched & signal
 #ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r12)
-	jnz	io_restore		# preemption is disabled
+	jnz	.Lio_restore		# preemption is disabled
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jno	io_restore
+	jno	.Lio_restore
 	# switch to kernel stack
 	lg	%r1,__PT_R15(%r11)
 	aghi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
@@ -551,19 +551,19 @@ io_work:
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
-	# TRACE_IRQS_ON already done at io_return, call
+	# TRACE_IRQS_ON already done at .Lio_return, call
 	# TRACE_IRQS_OFF to keep things symmetrical
 	TRACE_IRQS_OFF
 	brasl	%r14,preempt_schedule_irq
-	j	io_return
+	j	.Lio_return
 #else
-	j	io_restore
+	j	.Lio_restore
 #endif
 
 #
 # Need to do work before returning to userspace, switch to kernel stack
 #
-io_work_user:
+.Lio_work_user:
 	lg	%r1,__LC_KERNEL_STACK
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
@@ -573,70 +573,70 @@ io_work_user:
 #
 # One of the work bits is on. Find out which one.
 #
-io_work_tif:
+.Lio_work_tif:
 	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jo	io_mcck_pending
+	jo	.Lio_mcck_pending
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
-	jo	io_reschedule
+	jo	.Lio_reschedule
 	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
-	jo	io_sigpending
+	jo	.Lio_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
-	jo	io_notify_resume
+	jo	.Lio_notify_resume
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
-	jo	io_uaccess
-	j	io_return		# beware of critical section cleanup
+	jo	.Lio_uaccess
+	j	.Lio_return		# beware of critical section cleanup
 
 #
 # _CIF_MCCK_PENDING is set, call handler
 #
-io_mcck_pending:
-	# TRACE_IRQS_ON already done at io_return
+.Lio_mcck_pending:
+	# TRACE_IRQS_ON already done at .Lio_return
 	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _CIF_ASCE is set, load user space asce
 #
-io_uaccess:
+.Lio_uaccess:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
-io_reschedule:
-	# TRACE_IRQS_ON already done at io_return
+.Lio_reschedule:
+	# TRACE_IRQS_ON already done at .Lio_return
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	brasl	%r14,schedule		# call scheduler
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_SIGPENDING or is set, call do_signal
 #
-io_sigpending:
-	# TRACE_IRQS_ON already done at io_return
+.Lio_sigpending:
+	# TRACE_IRQS_ON already done at .Lio_return
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 #
 # _TIF_NOTIFY_RESUME or is set, call do_notify_resume
 #
-io_notify_resume:
-	# TRACE_IRQS_ON already done at io_return
+.Lio_notify_resume:
+	# TRACE_IRQS_ON already done at .Lio_return
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_notify_resume
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_return
+	j	.Lio_return
 
 /*
  * External interrupt handler routine
@@ -652,10 +652,10 @@ ENTRY(ext_int_handler)
 	HANDLE_SIE_INTERCEPT %r14,3
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	ext_skip
+	jz	.Lext_skip
 	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
 	LAST_BREAK %r14
-ext_skip:
+.Lext_skip:
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -669,23 +669,23 @@ ext_skip:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lghi	%r3,EXT_INTERRUPT
 	brasl	%r14,do_IRQ
-	j	io_return
+	j	.Lio_return
 
 /*
- * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
  */
 ENTRY(psw_idle)
 	stg	%r3,__SF_EMPTY(%r15)
-	larl	%r1,psw_idle_lpsw+4
+	larl	%r1,.Lpsw_idle_lpsw+4
 	stg	%r1,__SF_EMPTY+8(%r15)
 	STCK	__CLOCK_IDLE_ENTER(%r2)
 	stpt	__TIMER_IDLE_ENTER(%r2)
-psw_idle_lpsw:
+.Lpsw_idle_lpsw:
 	lpswe	__SF_EMPTY(%r15)
 	br	%r14
-psw_idle_end:
+.Lpsw_idle_end:
 
-__critical_end:
+.L__critical_end:
 
 /*
  * Machine check handler routines
@@ -701,7 +701,7 @@ ENTRY(mcck_int_handler)
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
 	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
-	jo	mcck_panic		# yes -> rest of mcck code invalid
+	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
@@ -719,13 +719,13 @@ ENTRY(mcck_int_handler)
 2:	spt	0(%r14)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
-	jno	mcck_panic		# no -> skip cleanup critical
+	jno	.Lmcck_panic		# no -> skip cleanup critical
 	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
 	tm	%r8,0x0001		# interrupting from user ?
-	jz	mcck_skip
+	jz	.Lmcck_skip
 	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
 	LAST_BREAK %r14
-mcck_skip:
+.Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),0(%r14)
@@ -735,7 +735,7 @@ mcck_skip:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,s390_do_machine_check
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	mcck_return
+	jno	.Lmcck_return
 	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
@@ -743,11 +743,11 @@ mcck_skip:
 	lgr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
 	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
-	jno	mcck_return
+	jno	.Lmcck_return
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
 	TRACE_IRQS_ON
-mcck_return:
+.Lmcck_return:
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
@@ -758,14 +758,14 @@ mcck_return:
 0:	lmg	%r11,%r15,__PT_R11(%r11)
 	lpswe	__LC_RETURN_MCCK_PSW
 
-mcck_panic:
+.Lmcck_panic:
 	lg	%r14,__LC_PANIC_STACK
 	slgr	%r14,%r15
 	srag	%r14,%r14,PAGE_SHIFT
 	jz	0f
 	lg	%r15,__LC_PANIC_STACK
 0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	mcck_skip
+	j	.Lmcck_skip
 
 #
 # PSW restart interrupt handler
@@ -815,69 +815,69 @@ stack_overflow:
 #endif
 
 	.align	8
-cleanup_table:
+.Lcleanup_table:
 	.quad	system_call
-	.quad	sysc_do_svc
-	.quad	sysc_tif
-	.quad	sysc_restore
-	.quad	sysc_done
-	.quad	io_tif
-	.quad	io_restore
-	.quad	io_done
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
 	.quad	psw_idle
-	.quad	psw_idle_end
+	.quad	.Lpsw_idle_end
 
 cleanup_critical:
-	clg	%r9,BASED(cleanup_table)	# system_call
+	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
-	clg	%r9,BASED(cleanup_table+8)	# sysc_do_svc
-	jl	cleanup_system_call
-	clg	%r9,BASED(cleanup_table+16)	# sysc_tif
+	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
+	jl	.Lcleanup_system_call
+	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
 	jl	0f
-	clg	%r9,BASED(cleanup_table+24)	# sysc_restore
-	jl	cleanup_sysc_tif
-	clg	%r9,BASED(cleanup_table+32)	# sysc_done
-	jl	cleanup_sysc_restore
-	clg	%r9,BASED(cleanup_table+40)	# io_tif
+	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
+	jl	.Lcleanup_sysc_tif
+	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
+	jl	.Lcleanup_sysc_restore
+	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
 	jl	0f
-	clg	%r9,BASED(cleanup_table+48)	# io_restore
-	jl	cleanup_io_tif
-	clg	%r9,BASED(cleanup_table+56)	# io_done
-	jl	cleanup_io_restore
-	clg	%r9,BASED(cleanup_table+64)	# psw_idle
+	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
+	jl	.Lcleanup_io_tif
+	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
+	jl	.Lcleanup_io_restore
+	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
 	jl	0f
-	clg	%r9,BASED(cleanup_table+72)	# psw_idle_end
-	jl	cleanup_idle
+	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
+	jl	.Lcleanup_idle
 0:	br	%r14
 
 
-cleanup_system_call:
+.Lcleanup_system_call:
 	# check if stpt has been executed
-	clg	%r9,BASED(cleanup_system_call_insn)
+	clg	%r9,BASED(.Lcleanup_system_call_insn)
 	jh	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
 	cghi	%r11,__LC_SAVE_AREA_ASYNC
 	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
 0:	# check if stmg has been executed
-	clg	%r9,BASED(cleanup_system_call_insn+8)
+	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
 	jh	0f
 	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
 0:	# check if base register setup + TIF bit load has been done
-	clg	%r9,BASED(cleanup_system_call_insn+16)
+	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
 	jhe	0f
 	# set up saved registers r10 and r12
 	stg	%r10,16(%r11)		# r10 last break
 	stg	%r12,32(%r11)		# r12 thread-info pointer
 0:	# check if the user time update has been done
-	clg	%r9,BASED(cleanup_system_call_insn+24)
+	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
 	jh	0f
 	lg	%r15,__LC_EXIT_TIMER
 	slg	%r15,__LC_SYNC_ENTER_TIMER
 	alg	%r15,__LC_USER_TIMER
 	stg	%r15,__LC_USER_TIMER
 0:	# check if the system time update has been done
-	clg	%r9,BASED(cleanup_system_call_insn+32)
+	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
 	jh	0f
 	lg	%r15,__LC_LAST_UPDATE_TIMER
 	slg	%r15,__LC_EXIT_TIMER
@@ -904,21 +904,21 @@ cleanup_system_call:
 	# setup saved register r15
 	stg	%r15,56(%r11)		# r15 stack pointer
 	# set new psw address and exit
-	larl	%r9,sysc_do_svc
+	larl	%r9,.Lsysc_do_svc
 	br	%r14
-cleanup_system_call_insn:
+.Lcleanup_system_call_insn:
 	.quad	system_call
-	.quad	sysc_stmg
-	.quad	sysc_per
-	.quad	sysc_vtime+18
-	.quad	sysc_vtime+42
+	.quad	.Lsysc_stmg
+	.quad	.Lsysc_per
+	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+42
 
-cleanup_sysc_tif:
-	larl	%r9,sysc_tif
+.Lcleanup_sysc_tif:
+	larl	%r9,.Lsysc_tif
 	br	%r14
 
-cleanup_sysc_restore:
-	clg	%r9,BASED(cleanup_sysc_restore_insn)
+.Lcleanup_sysc_restore:
+	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
 	je	0f
 	lg	%r9,24(%r11)		# get saved pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
@@ -926,15 +926,15 @@ cleanup_sysc_restore:
 	lmg	%r0,%r7,__PT_R0(%r9)
 0:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
-cleanup_sysc_restore_insn:
-	.quad	sysc_done - 4
+.Lcleanup_sysc_restore_insn:
+	.quad	.Lsysc_done - 4
 
-cleanup_io_tif:
-	larl	%r9,io_tif
+.Lcleanup_io_tif:
+	larl	%r9,.Lio_tif
 	br	%r14
 
-cleanup_io_restore:
-	clg	%r9,BASED(cleanup_io_restore_insn)
+.Lcleanup_io_restore:
+	clg	%r9,BASED(.Lcleanup_io_restore_insn)
 	je	0f
 	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
@@ -942,10 +942,10 @@ cleanup_io_restore:
 	lmg	%r0,%r7,__PT_R0(%r9)
 0:	lmg	%r8,%r9,__LC_RETURN_PSW
 	br	%r14
-cleanup_io_restore_insn:
-	.quad	io_done - 4
+.Lcleanup_io_restore_insn:
+	.quad	.Lio_done - 4
 
-cleanup_idle:
+.Lcleanup_idle:
 	# copy interrupt clock & cpu timer
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
@@ -954,7 +954,7 @@ cleanup_idle:
 	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
 	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
 0:	# check if stck & stpt have been executed
-	clg	%r9,BASED(cleanup_idle_insn)
+	clg	%r9,BASED(.Lcleanup_idle_insn)
 	jhe	1f
 	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
 	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
@@ -973,17 +973,17 @@ cleanup_idle:
 	nihh	%r8,0xfcfd		# clear irq & wait state bits
 	lg	%r9,48(%r11)		# return from psw_idle
 	br	%r14
-cleanup_idle_insn:
-	.quad	psw_idle_lpsw
+.Lcleanup_idle_insn:
+	.quad	.Lpsw_idle_lpsw
 
 /*
  * Integer constants
  */
 	.align	8
 .Lcritical_start:
-	.quad	__critical_start
+	.quad	.L__critical_start
 .Lcritical_length:
-	.quad	__critical_end - __critical_start
+	.quad	.L__critical_end - .L__critical_start
 
 
 #if IS_ENABLED(CONFIG_KVM)
@@ -1000,25 +1000,25 @@ ENTRY(sie64a)
 	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
 	lg	%r14,__LC_GMAP			# get gmap pointer
 	ltgr	%r14,%r14
-	jz	sie_gmap
+	jz	.Lsie_gmap
 	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-sie_gmap:
+.Lsie_gmap:
 	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),1		# last exit...
-	jnz	sie_done
+	jnz	.Lsie_done
 	LPP	__SF_EMPTY(%r15)		# set guest id
 	sie	0(%r14)
-sie_done:
+.Lsie_done:
 	LPP	__SF_EMPTY+16(%r15)		# set host id
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and sie_done should not cause program
+# instructions between sie64a and .Lsie_done should not cause program
 # interrupts. So lets use a nop (47 00 00 00) as a landing pad.
 # See also HANDLE_SIE_INTERCEPT
-rewind_pad:
+.Lrewind_pad:
 	nop	0
 	.globl sie_exit
 sie_exit:
@@ -1027,19 +1027,19 @@ sie_exit:
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
 	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
 	br	%r14
-sie_fault:
+.Lsie_fault:
 	lghi	%r14,-EFAULT
 	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
 	j	sie_exit
 
 	.align	8
 .Lsie_critical:
-	.quad	sie_gmap
+	.quad	.Lsie_gmap
 .Lsie_critical_length:
-	.quad	sie_done - sie_gmap
+	.quad	.Lsie_done - .Lsie_gmap
 
-	EX_TABLE(rewind_pad,sie_fault)
-	EX_TABLE(sie_exit,sie_fault)
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
 
 		.section .rodata, "a"
-- 
cgit v1.2.3-59-g8ed1b


From 9de45f736f21655400fd56b85bfbaf507cc2959d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 4 Dec 2014 11:07:19 +0100
Subject: s390/mm: fix memory leak of ptlock in pmd_free_tlb

The pmd_free_tlb function fails to call pgtable_pmd_page_dtor.
Without the call the ptlock for the pmd tables will not be freed.
Add the missing call.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/tlb.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 572c59949004..06d8741ad6f4 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -121,6 +121,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 #ifdef CONFIG_64BIT
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
+	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	tlb_remove_table(tlb, pmd);
 #endif
 }
-- 
cgit v1.2.3-59-g8ed1b


From e2578b82c4dfb0d339e25abc57ef6d6c3a932ff6 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:30:49 +0100
Subject: s390/eadm: change timeout value

Tests have shown that 5 seconds is sometimes not enough for an IRQ
to arrive (especially when the device is doing garbage collection).
Let's wait a little longer.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/eadm_sch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index 37f0834300ea..bee8c11cd086 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -31,7 +31,7 @@
 MODULE_DESCRIPTION("driver for s390 eadm subchannels");
 MODULE_LICENSE("GPL");
 
-#define EADM_TIMEOUT (5 * HZ)
+#define EADM_TIMEOUT (7 * HZ)
 static DEFINE_SPINLOCK(list_lock);
 static LIST_HEAD(eadm_list);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9d4df77fab7347a74a9938521ffad8d8fab2671d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:32:13 +0100
Subject: s390/scm_block: use mempool to manage aidaw requests

We currently use one preallocated page per HW request to store
aidaws. With this patch we use mempool to allocate an aidaw page
whenever we need it.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c         | 45 ++++++++++++++++++++++++++++++------
 drivers/s390/block/scm_blk.h         |  3 ++-
 drivers/s390/block/scm_blk_cluster.c | 19 +++++++++++----
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 56046ab39629..5b2abadea094 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -10,6 +10,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mempool.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
@@ -20,6 +21,7 @@
 
 debug_info_t *scm_debug;
 static int scm_major;
+static mempool_t *aidaw_pool;
 static DEFINE_SPINLOCK(list_lock);
 static LIST_HEAD(inactive_requests);
 static unsigned int nr_requests = 64;
@@ -36,7 +38,6 @@ static void __scm_free_rq(struct scm_request *scmrq)
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 
 	free_page((unsigned long) scmrq->aob);
-	free_page((unsigned long) scmrq->aidaw);
 	__scm_free_rq_cluster(scmrq);
 	kfree(aobrq);
 }
@@ -53,6 +54,8 @@ static void scm_free_rqs(void)
 		__scm_free_rq(scmrq);
 	}
 	spin_unlock_irq(&list_lock);
+
+	mempool_destroy(aidaw_pool);
 }
 
 static int __scm_alloc_rq(void)
@@ -65,9 +68,8 @@ static int __scm_alloc_rq(void)
 		return -ENOMEM;
 
 	scmrq = (void *) aobrq->data;
-	scmrq->aidaw = (void *) get_zeroed_page(GFP_DMA);
 	scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
-	if (!scmrq->aob || !scmrq->aidaw) {
+	if (!scmrq->aob) {
 		__scm_free_rq(scmrq);
 		return -ENOMEM;
 	}
@@ -89,6 +91,10 @@ static int scm_alloc_rqs(unsigned int nrqs)
 {
 	int ret = 0;
 
+	aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), 0);
+	if (!aidaw_pool)
+		return -ENOMEM;
+
 	while (nrqs-- && !ret)
 		ret = __scm_alloc_rq();
 
@@ -111,8 +117,13 @@ out:
 
 static void scm_request_done(struct scm_request *scmrq)
 {
+	struct msb *msb = &scmrq->aob->msb[0];
+	u64 aidaw = msb->data_addr;
 	unsigned long flags;
 
+	if ((msb->flags & MSB_FLAG_IDA) && aidaw)
+		mempool_free(virt_to_page(aidaw), aidaw_pool);
+
 	spin_lock_irqsave(&list_lock, flags);
 	list_add(&scmrq->list, &inactive_requests);
 	spin_unlock_irqrestore(&list_lock, flags);
@@ -123,15 +134,26 @@ static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
 	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
 }
 
-static void scm_request_prepare(struct scm_request *scmrq)
+struct aidaw *scm_aidaw_alloc(void)
+{
+	struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
+
+	return page ? page_address(page) : NULL;
+}
+
+static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct aidaw *aidaw = scmrq->aidaw;
+	struct aidaw *aidaw = scm_aidaw_alloc();
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
 	struct bio_vec bv;
 
+	if (!aidaw)
+		return -ENOMEM;
+
+	memset(aidaw, 0, PAGE_SIZE);
 	msb->bs = MSB_BS_4K;
 	scmrq->aob->request.msb_count = 1;
 	msb->scm_addr = scmdev->address +
@@ -147,6 +169,8 @@ static void scm_request_prepare(struct scm_request *scmrq)
 		aidaw->data_addr = (u64) page_address(bv.bv_page);
 		aidaw++;
 	}
+
+	return 0;
 }
 
 static inline void scm_request_init(struct scm_blk_dev *bdev,
@@ -157,7 +181,6 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	struct aob *aob = scmrq->aob;
 
 	memset(aob, 0, sizeof(*aob));
-	memset(scmrq->aidaw, 0, PAGE_SIZE);
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
 	aob->request.data = (u64) aobrq;
@@ -236,7 +259,15 @@ static void scm_blk_request(struct request_queue *rq)
 			scm_initiate_cluster_request(scmrq);
 			return;
 		}
-		scm_request_prepare(scmrq);
+
+		if (scm_request_prepare(scmrq)) {
+			SCM_LOG(5, "no aidaw");
+			scm_release_cluster(scmrq);
+			scm_request_done(scmrq);
+			scm_ensure_queue_restart(bdev);
+			return;
+		}
+
 		atomic_inc(&bdev->queued_reqs);
 		blk_start_request(req);
 
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index e59331e6c2e5..a315ef0e96f5 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -31,7 +31,6 @@ struct scm_blk_dev {
 struct scm_request {
 	struct scm_blk_dev *bdev;
 	struct request *request;
-	struct aidaw *aidaw;
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
@@ -55,6 +54,8 @@ void scm_blk_irq(struct scm_device *, void *, int);
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
 
+struct aidaw *scm_aidaw_alloc(void);
+
 int scm_drv_init(void);
 void scm_drv_cleanup(void);
 
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 9aae909d47a5..4787f80e5537 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -114,14 +114,14 @@ void scm_blk_dev_cluster_setup(struct scm_blk_dev *bdev)
 	blk_queue_io_opt(bdev->rq, CLUSTER_SIZE);
 }
 
-static void scm_prepare_cluster_request(struct scm_request *scmrq)
+static int scm_prepare_cluster_request(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
 	struct request *req = scmrq->request;
-	struct aidaw *aidaw = scmrq->aidaw;
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
+	struct aidaw *aidaw;
 	struct bio_vec bv;
 	int i = 0;
 	u64 addr;
@@ -131,6 +131,11 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
 		scmrq->cluster.state = CLUSTER_READ;
 		/* fall through */
 	case CLUSTER_READ:
+		aidaw = scm_aidaw_alloc();
+		if (!aidaw)
+			return -ENOMEM;
+
+		memset(aidaw, 0, PAGE_SIZE);
 		scmrq->aob->request.msb_count = 1;
 		msb->bs = MSB_BS_4K;
 		msb->oc = MSB_OC_READ;
@@ -153,6 +158,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
 
 		break;
 	case CLUSTER_WRITE:
+		aidaw = (void *) msb->data_addr;
 		msb->oc = MSB_OC_WRITE;
 
 		for (addr = msb->scm_addr;
@@ -173,6 +179,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq)
 		}
 		break;
 	}
+	return 0;
 }
 
 bool scm_need_cluster_request(struct scm_request *scmrq)
@@ -186,9 +193,13 @@ bool scm_need_cluster_request(struct scm_request *scmrq)
 /* Called with queue lock held. */
 void scm_initiate_cluster_request(struct scm_request *scmrq)
 {
-	scm_prepare_cluster_request(scmrq);
+	if (scm_prepare_cluster_request(scmrq))
+		goto requeue;
 	if (eadm_start_aob(scmrq->aob))
-		scm_request_requeue(scmrq);
+		goto requeue;
+	return;
+requeue:
+	scm_request_requeue(scmrq);
 }
 
 bool scm_test_cluster_request(struct scm_request *scmrq)
-- 
cgit v1.2.3-59-g8ed1b


From de88d0d28fe932637eb5b7ebf9e638256cf07979 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:41:47 +0100
Subject: s390/scm_block: allocate aidaw pages only when necessary

AOBs (the structure describing the HW request) need to be 4K
aligned but very little of that page is actually used. With
this patch we place aidaws at the end of the AOB page and only
allocate a separate page for aidaws when we have to (lists of
aidaws must not cross page boundaries).

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c         | 32 ++++++++++++++++++++++++++++----
 drivers/s390/block/scm_blk.h         |  3 ++-
 drivers/s390/block/scm_blk_cluster.c | 13 ++++++-------
 3 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 5b2abadea094..f5c369ce7e73 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -121,7 +121,8 @@ static void scm_request_done(struct scm_request *scmrq)
 	u64 aidaw = msb->data_addr;
 	unsigned long flags;
 
-	if ((msb->flags & MSB_FLAG_IDA) && aidaw)
+	if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
+	    IS_ALIGNED(aidaw, PAGE_SIZE))
 		mempool_free(virt_to_page(aidaw), aidaw_pool);
 
 	spin_lock_irqsave(&list_lock, flags);
@@ -134,26 +135,47 @@ static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
 	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
 }
 
-struct aidaw *scm_aidaw_alloc(void)
+static inline struct aidaw *scm_aidaw_alloc(void)
 {
 	struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
 
 	return page ? page_address(page) : NULL;
 }
 
+static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw)
+{
+	unsigned long _aidaw = (unsigned long) aidaw;
+	unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw;
+
+	return (bytes / sizeof(*aidaw)) * PAGE_SIZE;
+}
+
+struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes)
+{
+	struct aidaw *aidaw;
+
+	if (scm_aidaw_bytes(scmrq->next_aidaw) >= bytes)
+		return scmrq->next_aidaw;
+
+	aidaw = scm_aidaw_alloc();
+	if (aidaw)
+		memset(aidaw, 0, PAGE_SIZE);
+	return aidaw;
+}
+
 static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct aidaw *aidaw = scm_aidaw_alloc();
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
+	struct aidaw *aidaw;
 	struct bio_vec bv;
 
+	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(scmrq->request));
 	if (!aidaw)
 		return -ENOMEM;
 
-	memset(aidaw, 0, PAGE_SIZE);
 	msb->bs = MSB_BS_4K;
 	scmrq->aob->request.msb_count = 1;
 	msb->scm_addr = scmdev->address +
@@ -188,6 +210,8 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
 	scmrq->error = 0;
+	/* We don't use all msbs - place aidaws at the end of the aob page. */
+	scmrq->next_aidaw = (void *) &aob->msb[1];
 	scm_request_cluster_init(scmrq);
 }
 
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index a315ef0e96f5..6334e1609208 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -30,6 +30,7 @@ struct scm_blk_dev {
 
 struct scm_request {
 	struct scm_blk_dev *bdev;
+	struct aidaw *next_aidaw;
 	struct request *request;
 	struct aob *aob;
 	struct list_head list;
@@ -54,7 +55,7 @@ void scm_blk_irq(struct scm_device *, void *, int);
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
 
-struct aidaw *scm_aidaw_alloc(void);
+struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes);
 
 int scm_drv_init(void);
 void scm_drv_cleanup(void);
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 4787f80e5537..2fd01320b978 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -131,16 +131,9 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 		scmrq->cluster.state = CLUSTER_READ;
 		/* fall through */
 	case CLUSTER_READ:
-		aidaw = scm_aidaw_alloc();
-		if (!aidaw)
-			return -ENOMEM;
-
-		memset(aidaw, 0, PAGE_SIZE);
-		scmrq->aob->request.msb_count = 1;
 		msb->bs = MSB_BS_4K;
 		msb->oc = MSB_OC_READ;
 		msb->flags = MSB_FLAG_IDA;
-		msb->data_addr = (u64) aidaw;
 		msb->blk_count = write_cluster_size;
 
 		addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
@@ -151,6 +144,12 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 			       CLUSTER_SIZE))
 			msb->blk_count = 2 * write_cluster_size;
 
+		aidaw = scm_aidaw_fetch(scmrq, msb->blk_count * PAGE_SIZE);
+		if (!aidaw)
+			return -ENOMEM;
+
+		scmrq->aob->request.msb_count = 1;
+		msb->data_addr = (u64) aidaw;
 		for (i = 0; i < msb->blk_count; i++) {
 			aidaw->data_addr = (u64) scmrq->cluster.buf[i];
 			aidaw++;
-- 
cgit v1.2.3-59-g8ed1b


From bbc610a96524fbfa4ed38c4b1fc6348a1169f358 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:43:58 +0100
Subject: s390/scm_block: handle multiple requests in one HW request

Handle up to 8 block layer requests per HW request. These requests
can be processed in parallel on the device leading to better
throughput (and less interrupts). The overhead for additional
requests is small since we don't blindly allocate new aidaws but
try to use what's left of the previous one.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c         | 145 ++++++++++++++++++++++++-----------
 drivers/s390/block/scm_blk.h         |   3 +-
 drivers/s390/block/scm_blk_cluster.c |  47 ++++++++----
 3 files changed, 132 insertions(+), 63 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index f5c369ce7e73..cd27cb92ac6d 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -117,13 +117,19 @@ out:
 
 static void scm_request_done(struct scm_request *scmrq)
 {
-	struct msb *msb = &scmrq->aob->msb[0];
-	u64 aidaw = msb->data_addr;
 	unsigned long flags;
+	struct msb *msb;
+	u64 aidaw;
+	int i;
 
-	if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
-	    IS_ALIGNED(aidaw, PAGE_SIZE))
-		mempool_free(virt_to_page(aidaw), aidaw_pool);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++) {
+		msb = &scmrq->aob->msb[i];
+		aidaw = msb->data_addr;
+
+		if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
+		    IS_ALIGNED(aidaw, PAGE_SIZE))
+			mempool_free(virt_to_page(aidaw), aidaw_pool);
+	}
 
 	spin_lock_irqsave(&list_lock, flags);
 	list_add(&scmrq->list, &inactive_requests);
@@ -167,51 +173,57 @@ static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct msb *msb = &scmrq->aob->msb[0];
+	int pos = scmrq->aob->request.msb_count;
+	struct msb *msb = &scmrq->aob->msb[pos];
+	struct request *req = scmrq->request[pos];
 	struct req_iterator iter;
 	struct aidaw *aidaw;
 	struct bio_vec bv;
 
-	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(scmrq->request));
+	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(req));
 	if (!aidaw)
 		return -ENOMEM;
 
 	msb->bs = MSB_BS_4K;
-	scmrq->aob->request.msb_count = 1;
-	msb->scm_addr = scmdev->address +
-		((u64) blk_rq_pos(scmrq->request) << 9);
-	msb->oc = (rq_data_dir(scmrq->request) == READ) ?
-		MSB_OC_READ : MSB_OC_WRITE;
+	scmrq->aob->request.msb_count++;
+	msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
+	msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE;
 	msb->flags |= MSB_FLAG_IDA;
 	msb->data_addr = (u64) aidaw;
 
-	rq_for_each_segment(bv, scmrq->request, iter) {
+	rq_for_each_segment(bv, req, iter) {
 		WARN_ON(bv.bv_offset);
 		msb->blk_count += bv.bv_len >> 12;
 		aidaw->data_addr = (u64) page_address(bv.bv_page);
 		aidaw++;
 	}
 
+	scmrq->next_aidaw = aidaw;
 	return 0;
 }
 
+static inline void scm_request_set(struct scm_request *scmrq,
+				   struct request *req)
+{
+	scmrq->request[scmrq->aob->request.msb_count] = req;
+}
+
 static inline void scm_request_init(struct scm_blk_dev *bdev,
-				    struct scm_request *scmrq,
-				    struct request *req)
+				    struct scm_request *scmrq)
 {
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 	struct aob *aob = scmrq->aob;
 
+	memset(scmrq->request, 0, sizeof(scmrq->request));
 	memset(aob, 0, sizeof(*aob));
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
 	aob->request.data = (u64) aobrq;
-	scmrq->request = req;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
 	scmrq->error = 0;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
-	scmrq->next_aidaw = (void *) &aob->msb[1];
+	scmrq->next_aidaw = (void *) &aob->msb[SCM_RQ_PER_IO];
 	scm_request_cluster_init(scmrq);
 }
 
@@ -227,9 +239,12 @@ static void scm_ensure_queue_restart(struct scm_blk_dev *bdev)
 void scm_request_requeue(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
+	int i;
 
 	scm_release_cluster(scmrq);
-	blk_requeue_request(bdev->rq, scmrq->request);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+		blk_requeue_request(bdev->rq, scmrq->request[i]);
+
 	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 	scm_ensure_queue_restart(bdev);
@@ -238,20 +253,41 @@ void scm_request_requeue(struct scm_request *scmrq)
 void scm_request_finish(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
+	int i;
 
 	scm_release_cluster(scmrq);
-	blk_end_request_all(scmrq->request, scmrq->error);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+		blk_end_request_all(scmrq->request[i], scmrq->error);
+
 	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 }
 
+static int scm_request_start(struct scm_request *scmrq)
+{
+	struct scm_blk_dev *bdev = scmrq->bdev;
+	int ret;
+
+	atomic_inc(&bdev->queued_reqs);
+	if (!scmrq->aob->request.msb_count) {
+		scm_request_requeue(scmrq);
+		return -EINVAL;
+	}
+
+	ret = eadm_start_aob(scmrq->aob);
+	if (ret) {
+		SCM_LOG(5, "no subchannel");
+		scm_request_requeue(scmrq);
+	}
+	return ret;
+}
+
 static void scm_blk_request(struct request_queue *rq)
 {
 	struct scm_device *scmdev = rq->queuedata;
 	struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
-	struct scm_request *scmrq;
+	struct scm_request *scmrq = NULL;
 	struct request *req;
-	int ret;
 
 	while ((req = blk_peek_request(rq))) {
 		if (req->cmd_type != REQ_TYPE_FS) {
@@ -261,47 +297,64 @@ static void scm_blk_request(struct request_queue *rq)
 			continue;
 		}
 
-		if (!scm_permit_request(bdev, req)) {
-			scm_ensure_queue_restart(bdev);
-			return;
-		}
-		scmrq = scm_request_fetch();
+		if (!scm_permit_request(bdev, req))
+			goto out;
+
 		if (!scmrq) {
-			SCM_LOG(5, "no request");
-			scm_ensure_queue_restart(bdev);
-			return;
+			scmrq = scm_request_fetch();
+			if (!scmrq) {
+				SCM_LOG(5, "no request");
+				goto out;
+			}
+			scm_request_init(bdev, scmrq);
 		}
-		scm_request_init(bdev, scmrq, req);
+		scm_request_set(scmrq, req);
+
 		if (!scm_reserve_cluster(scmrq)) {
 			SCM_LOG(5, "cluster busy");
+			scm_request_set(scmrq, NULL);
+			if (scmrq->aob->request.msb_count)
+				goto out;
+
 			scm_request_done(scmrq);
 			return;
 		}
+
 		if (scm_need_cluster_request(scmrq)) {
-			atomic_inc(&bdev->queued_reqs);
-			blk_start_request(req);
-			scm_initiate_cluster_request(scmrq);
-			return;
+			if (scmrq->aob->request.msb_count) {
+				/* Start cluster requests separately. */
+				scm_request_set(scmrq, NULL);
+				if (scm_request_start(scmrq))
+					return;
+			} else {
+				atomic_inc(&bdev->queued_reqs);
+				blk_start_request(req);
+				scm_initiate_cluster_request(scmrq);
+			}
+			scmrq = NULL;
+			continue;
 		}
 
 		if (scm_request_prepare(scmrq)) {
-			SCM_LOG(5, "no aidaw");
-			scm_release_cluster(scmrq);
-			scm_request_done(scmrq);
-			scm_ensure_queue_restart(bdev);
-			return;
+			SCM_LOG(5, "aidaw alloc failed");
+			scm_request_set(scmrq, NULL);
+			goto out;
 		}
-
-		atomic_inc(&bdev->queued_reqs);
 		blk_start_request(req);
 
-		ret = eadm_start_aob(scmrq->aob);
-		if (ret) {
-			SCM_LOG(5, "no subchannel");
-			scm_request_requeue(scmrq);
+		if (scmrq->aob->request.msb_count < SCM_RQ_PER_IO)
+			continue;
+
+		if (scm_request_start(scmrq))
 			return;
-		}
+
+		scmrq = NULL;
 	}
+out:
+	if (scmrq)
+		scm_request_start(scmrq);
+	else
+		scm_ensure_queue_restart(bdev);
 }
 
 static void __scmrq_log_error(struct scm_request *scmrq)
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 6334e1609208..3dae0a3570ce 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -11,6 +11,7 @@
 #include <asm/eadm.h>
 
 #define SCM_NR_PARTS 8
+#define SCM_RQ_PER_IO 8
 #define SCM_QUEUE_DELAY 5
 
 struct scm_blk_dev {
@@ -31,7 +32,7 @@ struct scm_blk_dev {
 struct scm_request {
 	struct scm_blk_dev *bdev;
 	struct aidaw *next_aidaw;
-	struct request *request;
+	struct request *request[SCM_RQ_PER_IO];
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 2fd01320b978..09db45296eed 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -57,39 +57,52 @@ void scm_request_cluster_init(struct scm_request *scmrq)
 	scmrq->cluster.state = CLUSTER_NONE;
 }
 
-static bool clusters_intersect(struct scm_request *A, struct scm_request *B)
+static bool clusters_intersect(struct request *A, struct request *B)
 {
 	unsigned long firstA, lastA, firstB, lastB;
 
-	firstA = ((u64) blk_rq_pos(A->request) << 9) / CLUSTER_SIZE;
-	lastA = (((u64) blk_rq_pos(A->request) << 9) +
-		    blk_rq_bytes(A->request) - 1) / CLUSTER_SIZE;
+	firstA = ((u64) blk_rq_pos(A) << 9) / CLUSTER_SIZE;
+	lastA = (((u64) blk_rq_pos(A) << 9) +
+		    blk_rq_bytes(A) - 1) / CLUSTER_SIZE;
 
-	firstB = ((u64) blk_rq_pos(B->request) << 9) / CLUSTER_SIZE;
-	lastB = (((u64) blk_rq_pos(B->request) << 9) +
-		    blk_rq_bytes(B->request) - 1) / CLUSTER_SIZE;
+	firstB = ((u64) blk_rq_pos(B) << 9) / CLUSTER_SIZE;
+	lastB = (((u64) blk_rq_pos(B) << 9) +
+		    blk_rq_bytes(B) - 1) / CLUSTER_SIZE;
 
 	return (firstB <= lastA && firstA <= lastB);
 }
 
 bool scm_reserve_cluster(struct scm_request *scmrq)
 {
+	struct request *req = scmrq->request[scmrq->aob->request.msb_count];
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_request *iter;
+	int pos, add = 1;
 
 	if (write_cluster_size == 0)
 		return true;
 
 	spin_lock(&bdev->lock);
 	list_for_each_entry(iter, &bdev->cluster_list, cluster.list) {
-		if (clusters_intersect(scmrq, iter) &&
-		    (rq_data_dir(scmrq->request) == WRITE ||
-		     rq_data_dir(iter->request) == WRITE)) {
-			spin_unlock(&bdev->lock);
-			return false;
+		if (iter == scmrq) {
+			/*
+			 * We don't have to use clusters_intersect here, since
+			 * cluster requests are always started separately.
+			 */
+			add = 0;
+			continue;
+		}
+		for (pos = 0; pos <= iter->aob->request.msb_count; pos++) {
+			if (clusters_intersect(req, iter->request[pos]) &&
+			    (rq_data_dir(req) == WRITE ||
+			     rq_data_dir(iter->request[pos]) == WRITE)) {
+				spin_unlock(&bdev->lock);
+				return false;
+			}
 		}
 	}
-	list_add(&scmrq->cluster.list, &bdev->cluster_list);
+	if (add)
+		list_add(&scmrq->cluster.list, &bdev->cluster_list);
 	spin_unlock(&bdev->lock);
 
 	return true;
@@ -118,7 +131,7 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct request *req = scmrq->request;
+	struct request *req = scmrq->request[0];
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
 	struct aidaw *aidaw;
@@ -183,10 +196,12 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 
 bool scm_need_cluster_request(struct scm_request *scmrq)
 {
-	if (rq_data_dir(scmrq->request) == READ)
+	int pos = scmrq->aob->request.msb_count;
+
+	if (rq_data_dir(scmrq->request[pos]) == READ)
 		return false;
 
-	return blk_rq_bytes(scmrq->request) < CLUSTER_SIZE;
+	return blk_rq_bytes(scmrq->request[pos]) < CLUSTER_SIZE;
 }
 
 /* Called with queue lock held. */
-- 
cgit v1.2.3-59-g8ed1b


From 8622384f138b786b9ae639e79ccfb84c7db82cbc Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:47:17 +0100
Subject: s390/scm_block: make the number of reqs per HW req configurable

Introduce a module parameter to specify the number of requests
we try to handle with one HW request.

Suggested-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 48 ++++++++++++++++++++++++++++++--------------
 drivers/s390/block/scm_blk.h |  3 +--
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index cd27cb92ac6d..75d9896deccb 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -25,10 +25,14 @@ static mempool_t *aidaw_pool;
 static DEFINE_SPINLOCK(list_lock);
 static LIST_HEAD(inactive_requests);
 static unsigned int nr_requests = 64;
+static unsigned int nr_requests_per_io = 8;
 static atomic_t nr_devices = ATOMIC_INIT(0);
 module_param(nr_requests, uint, S_IRUGO);
 MODULE_PARM_DESC(nr_requests, "Number of parallel requests.");
 
+module_param(nr_requests_per_io, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO.");
+
 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("scm:scmdev*");
@@ -39,6 +43,7 @@ static void __scm_free_rq(struct scm_request *scmrq)
 
 	free_page((unsigned long) scmrq->aob);
 	__scm_free_rq_cluster(scmrq);
+	kfree(scmrq->request);
 	kfree(aobrq);
 }
 
@@ -69,15 +74,16 @@ static int __scm_alloc_rq(void)
 
 	scmrq = (void *) aobrq->data;
 	scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
-	if (!scmrq->aob) {
-		__scm_free_rq(scmrq);
-		return -ENOMEM;
-	}
+	if (!scmrq->aob)
+		goto free;
 
-	if (__scm_alloc_rq_cluster(scmrq)) {
-		__scm_free_rq(scmrq);
-		return -ENOMEM;
-	}
+	scmrq->request = kcalloc(nr_requests_per_io, sizeof(scmrq->request[0]),
+				 GFP_KERNEL);
+	if (!scmrq->request)
+		goto free;
+
+	if (__scm_alloc_rq_cluster(scmrq))
+		goto free;
 
 	INIT_LIST_HEAD(&scmrq->list);
 	spin_lock_irq(&list_lock);
@@ -85,6 +91,9 @@ static int __scm_alloc_rq(void)
 	spin_unlock_irq(&list_lock);
 
 	return 0;
+free:
+	__scm_free_rq(scmrq);
+	return -ENOMEM;
 }
 
 static int scm_alloc_rqs(unsigned int nrqs)
@@ -122,7 +131,7 @@ static void scm_request_done(struct scm_request *scmrq)
 	u64 aidaw;
 	int i;
 
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++) {
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
 		msb = &scmrq->aob->msb[i];
 		aidaw = msb->data_addr;
 
@@ -214,7 +223,8 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 	struct aob *aob = scmrq->aob;
 
-	memset(scmrq->request, 0, sizeof(scmrq->request));
+	memset(scmrq->request, 0,
+	       nr_requests_per_io * sizeof(scmrq->request[0]));
 	memset(aob, 0, sizeof(*aob));
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
@@ -223,7 +233,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	scmrq->retries = 4;
 	scmrq->error = 0;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
-	scmrq->next_aidaw = (void *) &aob->msb[SCM_RQ_PER_IO];
+	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
 }
 
@@ -242,7 +252,7 @@ void scm_request_requeue(struct scm_request *scmrq)
 	int i;
 
 	scm_release_cluster(scmrq);
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
 		blk_requeue_request(bdev->rq, scmrq->request[i]);
 
 	atomic_dec(&bdev->queued_reqs);
@@ -256,7 +266,7 @@ void scm_request_finish(struct scm_request *scmrq)
 	int i;
 
 	scm_release_cluster(scmrq);
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
 		blk_end_request_all(scmrq->request[i], scmrq->error);
 
 	atomic_dec(&bdev->queued_reqs);
@@ -342,7 +352,7 @@ static void scm_blk_request(struct request_queue *rq)
 		}
 		blk_start_request(req);
 
-		if (scmrq->aob->request.msb_count < SCM_RQ_PER_IO)
+		if (scmrq->aob->request.msb_count < nr_requests_per_io)
 			continue;
 
 		if (scm_request_start(scmrq))
@@ -551,11 +561,19 @@ void scm_blk_set_available(struct scm_blk_dev *bdev)
 	spin_unlock_irqrestore(&bdev->lock, flags);
 }
 
+static bool __init scm_blk_params_valid(void)
+{
+	if (!nr_requests_per_io || nr_requests_per_io > 64)
+		return false;
+
+	return scm_cluster_size_valid();
+}
+
 static int __init scm_blk_init(void)
 {
 	int ret = -EINVAL;
 
-	if (!scm_cluster_size_valid())
+	if (!scm_blk_params_valid())
 		goto out;
 
 	ret = register_blkdev(0, "scm");
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 3dae0a3570ce..09218cdc5129 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -11,7 +11,6 @@
 #include <asm/eadm.h>
 
 #define SCM_NR_PARTS 8
-#define SCM_RQ_PER_IO 8
 #define SCM_QUEUE_DELAY 5
 
 struct scm_blk_dev {
@@ -32,7 +31,7 @@ struct scm_blk_dev {
 struct scm_request {
 	struct scm_blk_dev *bdev;
 	struct aidaw *next_aidaw;
-	struct request *request[SCM_RQ_PER_IO];
+	struct request **request;
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-- 
cgit v1.2.3-59-g8ed1b


From 351997810131565fe62aec2c366deccbf6bda3f4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 8 Dec 2014 13:19:12 +0100
Subject: s390/cputime: fix 31-bit compile

git commit 8461b63ca01d125a245a0d0fb4821ea0656e5053
"s390: translate cputime magic constants to macros"
introduce a built error for 31-bit:

  kernel/built-in.o: In function `posix_cpu_timer_set':
  posix-cpu-timers.c:(.text+0x2a8cc): undefined reference to `__udivdi3'

The original code is actually broken for 31-bit and has been
corrected by the above commit by forcing the compiler to use
64-bit arithmetic through the CPUTIME_PER_USEC define.

To fix the compile error replace the 64-bit division with
a call to __div().

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index b81712306360..b91e960e4045 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -94,7 +94,7 @@ static inline cputime_t secs_to_cputime(const unsigned int s)
 static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
 	unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
-	return (__force cputime_t)(ret + (value->tv_nsec * CPUTIME_PER_USEC) / NSEC_PER_USEC);
+	return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC));
 }
 
 static inline void cputime_to_timespec(const cputime_t cputime,
-- 
cgit v1.2.3-59-g8ed1b