From fd346c9dae6b266f8c3f62df20237059a3a90312 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Fri, 20 May 2016 18:51:20 +0530 Subject: s390/keyboard: use memdup_user_nul() Use memdup_user_nul to duplicate a memory region from user-space to kernel-space and terminate with a NULL, instead of open coding using kmalloc + copy_from_user and explicitly NULL terminating. Signed-off-by: Muhammad Falak R Wani [heiko.carstens@de.ibm.com: remove comment] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/keyboard.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index ef04a9f7a704..7b9c50aa4cc9 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -438,18 +438,9 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs, return -EFAULT; if (len > sizeof(u_kbs->kb_string)) return -EINVAL; - p = kmalloc(len, GFP_KERNEL); - if (!p) - return -ENOMEM; - if (copy_from_user(p, u_kbs->kb_string, len)) { - kfree(p); - return -EFAULT; - } - /* - * Make sure the string is terminated by 0. User could have - * modified it between us running strnlen_user() and copying it. - */ - p[len - 1] = 0; + p = memdup_user_nul(u_kbs->kb_string, len); + if (IS_ERR(p)) + return PTR_ERR(p); kfree(kbd->func_table[kb_func]); kbd->func_table[kb_func] = p; break; -- cgit v1.2.3-59-g8ed1b From 4ccccc522bd22ba8e272f95daca5ab92eb0387a0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 14 May 2016 10:46:33 +0200 Subject: s390/pgtable: introduce and use generic csp inline asm We have already two inline assemblies which make use of the csp instruction. Since I need a third instance let's introduce a generic inline assmebly which can be used by everyone. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 24 +++++++++++++++--------- arch/s390/include/asm/tlbflush.h | 15 ++++----------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 18d2beb89340..58e9950fd57f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -424,6 +424,19 @@ static inline int mm_use_skey(struct mm_struct *mm) return 0; } +static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + unsigned long address = (unsigned long)ptr | 1; + + asm volatile( + " csp %0,%3" + : "+d" (reg2), "+m" (*ptr) + : "d" (reg3), "d" (address) + : "cc"); +} + /* * pgd/pmd/pte query functions */ @@ -1068,15 +1081,8 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) static inline void __pmdp_csp(pmd_t *pmdp) { - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INVALID; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); + csp((unsigned int *)pmdp + 1, pmd_val(*pmdp), + pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); } static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index a2e6ef32e054..ac02a6c37a3e 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -5,6 +5,7 @@ #include #include #include +#include /* * Flush all TLB entries on the local CPU. @@ -44,17 +45,9 @@ void smp_ptlb_all(void); */ static inline void __tlb_flush_global(void) { - register unsigned long reg2 asm("2"); - register unsigned long reg3 asm("3"); - register unsigned long reg4 asm("4"); - long dummy; - - dummy = 0; - reg2 = reg3 = 0; - reg4 = ((unsigned long) &dummy) + 1; - asm volatile( - " csp %0,%2" - : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); + unsigned int dummy = 0; + + csp(&dummy, 0, 0); } /* -- cgit v1.2.3-59-g8ed1b From 2e9996fcf8b8f933f9496cc885c9e05730f23c9e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 May 2016 11:10:09 +0200 Subject: s390/vmem: align segment and region tables to 16k Usually segment and region tables are 16k aligned due to the way the buddy allocator works. This is not true for the vmem code which only asks for a 4k alignment. In order to be consistent enforce a 16k alignment here as well. This alignment will be assumed and therefore is required by the pageattr code. Signed-off-by: Heiko Carstens Acked-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/mm/vmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d48cf25cfe99..a64b91a8e5ac 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -29,9 +29,11 @@ static LIST_HEAD(mem_segs); static void __ref *vmem_alloc_pages(unsigned int order) { + unsigned long size = PAGE_SIZE << order; + if (slab_is_available()) return (void *)__get_free_pages(GFP_KERNEL, order); - return alloc_bootmem_pages((1 << order) * PAGE_SIZE); + return alloc_bootmem_align(size, size); } static inline pud_t *vmem_pud_alloc(void) -- cgit v1.2.3-59-g8ed1b From 2dffdcbac91c8da1eb0663d6eab92f2e5b56a798 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 May 2016 10:52:07 +0200 Subject: s390/vmem: introduce and use SEGMENT_KERNEL and REGION3_KERNEL Instead of open-coded SEGMENT_KERNEL and REGION3_KERNEL assignments use defines. Also to make e.g. pmd_wrprotect() work on the kernel mapping a couple more flags must be set. Therefore add the missing flags also. In order to make everything symmetrical this patch also adds software dirty, young, read and write bits for region 3 table entries. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 43 +++++++++++++++++++++++++++++++++++++++-- arch/s390/mm/vmem.c | 13 +++++-------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 58e9950fd57f..898456957c44 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -270,9 +270,20 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ +#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ +#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ +#define _REGION3_ENTRY_READ 0x0002 /* SW region read bit */ +#define _REGION3_ENTRY_WRITE 0x0001 /* SW region write bit */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#else +#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ +#endif + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL #define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL @@ -297,7 +308,8 @@ static inline int is_module_addr(void *addr) #endif /* - * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * Segment table and region3 table entry encoding + * (R = read-only, I = invalid, y = young bit): * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 @@ -391,6 +403,33 @@ static inline int is_module_addr(void *addr) _SEGMENT_ENTRY_READ) #define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) +#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ + _SEGMENT_ENTRY_LARGE | \ + _SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_WRITE | \ + _SEGMENT_ENTRY_YOUNG | \ + _SEGMENT_ENTRY_DIRTY) +#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ + _SEGMENT_ENTRY_LARGE | \ + _SEGMENT_ENTRY_READ | \ + _SEGMENT_ENTRY_YOUNG | \ + _SEGMENT_ENTRY_PROTECT) + +/* + * Region3 entry (large page) protection definitions. + */ + +#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_DIRTY) +#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT) static inline int mm_has_pgste(struct mm_struct *mm) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a64b91a8e5ac..2020ef8c0413 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -99,9 +99,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = __pa(address) | - _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | - (ro ? _REGION_ENTRY_PROTECT : 0); + pud_val(*pu_dir) = address | + pgprot_val(ro ? REGION3_KERNEL_RO : REGION3_KERNEL); address += PUD_SIZE; continue; } @@ -115,10 +114,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = __pa(address) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_YOUNG | - (ro ? _SEGMENT_ENTRY_PROTECT : 0); + pmd_val(*pm_dir) = address | + pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); address += PMD_SIZE; continue; } @@ -130,7 +127,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = __pa(address) | + pte_val(*pt_dir) = address | pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); address += PAGE_SIZE; } -- cgit v1.2.3-59-g8ed1b From c126aa83e2fe13ffcb5d073768b92108e4f56c54 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 20 May 2016 13:34:58 +0200 Subject: s390/pgtable: get rid of _REGION3_ENTRY_RO _REGION3_ENTRY_RO is a duplicate of _REGION_ENTRY_PROTECT. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 -- arch/s390/mm/dump_pagetables.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 898456957c44..68942f4c8668 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -270,8 +270,6 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY_RO 0x200 /* page protection bit */ - #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 8556d6be9b54..861880df12c7 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pud = pud_offset(pgd, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { - prot = pud_val(*pud) & _REGION3_ENTRY_RO; + prot = pud_val(*pud) & _REGION_ENTRY_PROTECT; note_page(m, st, prot, 2); } else walk_pmd_level(m, st, pud, addr); -- cgit v1.2.3-59-g8ed1b From 5aa29975e81851b7d48cb79c0c95d95360bfd9a6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 17 May 2016 12:17:51 +0200 Subject: s390/vmem: make use of pte_clear() Use pte_clear() instead of open-coding it. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/mm/vmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 2020ef8c0413..4badd8252e3c 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -148,9 +148,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; - pte_val(pte) = _PAGE_INVALID; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -178,7 +176,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) continue; } pt_dir = pte_offset_kernel(pm_dir, address); - *pt_dir = pte; + pte_clear(&init_mm, address, pt_dir); address += PAGE_SIZE; } flush_tlb_kernel_range(start, end); -- cgit v1.2.3-59-g8ed1b From 3e76ee99b095e5bee71165fda5edbe6b66b39700 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 May 2016 16:25:30 +0200 Subject: s390/mm: always use PAGE_KERNEL when mapping pages Always use PAGE_KERNEL when re-enabling pages within the kernel mapping due to debug pagealloc. Without using this pgprot value pte_mkwrite() and pte_wrprotect() won't work on such mappings after an unmap -> map cycle anymore. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f2a5c29a97e9..e67a8f712e19 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -138,7 +138,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) = __pa(address); + pte_val(*pte) = address | pgprot_val(PAGE_KERNEL); address += PAGE_SIZE; pte++; } -- cgit v1.2.3-59-g8ed1b From 9e20b4dac1f58921503109ea38f341ff2b0d21f5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 May 2016 10:34:47 +0200 Subject: s390/pgtable: make pmd and pud helper functions available Make pmd_wrprotect() and pmd_mkwrite() available independently from CONFIG_TRANSPARENT_HUGEPAGE and CONFIG_HUGETLB_PAGE so these can be used on the kernel mapping. Also introduce a couple of pud helper functions, namely pud_pfn(), pud_wrprotect(), pud_mkwrite(), pud_mkdirty() and pud_mkclean() which only work on the kernel mapping. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 77 +++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 68942f4c8668..882d6f4aad25 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -270,6 +270,9 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) +#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ +#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */ + #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ @@ -525,6 +528,16 @@ static inline int pud_large(pud_t pud) return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } +static inline unsigned long pud_pfn(pud_t pud) +{ + unsigned long origin_mask; + + origin_mask = _REGION3_ENTRY_ORIGIN; + if (pud_large(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; +} + static inline int pud_bad(pud_t pud) { /* @@ -1020,20 +1033,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) -static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) -{ - /* - * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) - * Convert to segment table entry format. - */ - if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) - return pgprot_val(SEGMENT_NONE); - if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) - return pgprot_val(SEGMENT_READ); - return pgprot_val(SEGMENT_WRITE); -} - static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; @@ -1070,6 +1069,56 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +static inline pud_t pud_wrprotect(pud_t pud) +{ + pud_val(pud) &= ~_REGION3_ENTRY_WRITE; + pud_val(pud) |= _REGION_ENTRY_PROTECT; + return pud; +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + pud_val(pud) |= _REGION3_ENTRY_WRITE; + if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY)) + return pud; + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + return pud; +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + if (pud_large(pud)) { + pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; + pud_val(pud) |= _REGION_ENTRY_PROTECT; + } + return pud; +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + if (pud_large(pud)) { + pud_val(pud) |= _REGION3_ENTRY_DIRTY | + _REGION3_ENTRY_SOFT_DIRTY; + if (pud_val(pud) & _REGION3_ENTRY_WRITE) + pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + } + return pud; +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) +{ + /* + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) + * Convert to segment table entry format. + */ + if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) + return pgprot_val(SEGMENT_NONE); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + static inline pmd_t pmd_mkyoung(pmd_t pmd) { if (pmd_large(pmd)) { -- cgit v1.2.3-59-g8ed1b From e8a97e42dc986a081017b1e77e3a3c7f02a0a638 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 17 May 2016 10:50:15 +0200 Subject: s390/pageattr: allow kernel page table splitting set_memory_ro() and set_memory_rw() currently only work on 4k mappings, which is good enough for module code aka the vmalloc area. However we stumbled already twice into the need to make this also work on larger mappings: - the ro after init patch set - the crash kernel resize code Therefore this patch implements automatic kernel page table splitting if e.g. set_memory_ro() would be called on parts of a 2G mapping. This works quite the same as the x86 code, but is much simpler. In order to make this work and to be architecturally compliant we now always use the csp, cspg or crdte instructions to replace valid page table entries. This means that set_memory_ro() and set_memory_rw() will be much more expensive than before. In order to avoid huge latencies the code contains a couple of cond_resched() calls. The current code only splits page tables, but does not merge them if it would be possible. The reason for this is that currently there is no real life scenarion where this would really happen. All current use cases that I know of only change access rights once during the life time. If that should change we can still implement kernel page table merging at a later time. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 36 ++++++ arch/s390/mm/pageattr.c | 243 +++++++++++++++++++++++++++++++++++----- arch/s390/mm/vmem.c | 4 +- 3 files changed, 250 insertions(+), 33 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 882d6f4aad25..9133388edd1f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -34,6 +34,8 @@ extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); +pmd_t *vmem_pmd_alloc(void); +pte_t *vmem_pte_alloc(void); /* * The S390 doesn't have any external MMU info: the kernel page @@ -477,6 +479,40 @@ static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) : "cc"); } +static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + unsigned long address = (unsigned long)ptr | 1; + + asm volatile( + " .insn rre,0xb98a0000,%0,%3" + : "+d" (reg2), "+m" (*ptr) + : "d" (reg3), "d" (address) + : "cc"); +} + +#define CRDTE_DTT_PAGE 0x00UL +#define CRDTE_DTT_SEGMENT 0x10UL +#define CRDTE_DTT_REGION3 0x14UL +#define CRDTE_DTT_REGION2 0x18UL +#define CRDTE_DTT_REGION1 0x1cUL + +static inline void crdte(unsigned long old, unsigned long new, + unsigned long table, unsigned long dtt, + unsigned long address, unsigned long asce) +{ + register unsigned long reg2 asm("2") = old; + register unsigned long reg3 asm("3") = new; + register unsigned long reg4 asm("4") = table | dtt; + register unsigned long reg5 asm("5") = address; + + asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0" + : "+d" (reg2) + : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce) + : "memory", "cc"); +} + /* * pgd/pmd/pte query functions */ diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index e67a8f712e19..91e5e29c1f5c 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -40,54 +40,235 @@ void __storage_key_init_range(unsigned long start, unsigned long end) } #endif -static pte_t *walk_page_table(unsigned long addr) +static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, + unsigned long dtt) { - pgd_t *pgdp; - pud_t *pudp; + unsigned long table, mask; + + mask = 0; + if (MACHINE_HAS_EDAT2) { + switch (dtt) { + case CRDTE_DTT_REGION3: + mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); + break; + case CRDTE_DTT_SEGMENT: + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + break; + case CRDTE_DTT_PAGE: + mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); + break; + } + table = (unsigned long)old & mask; + crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce); + } else if (MACHINE_HAS_IDTE) { + cspg(old, *old, new); + } else { + csp((unsigned int *)old + 1, *old, new); + } +} + +struct cpa { + unsigned int set_ro : 1; + unsigned int clear_ro : 1; +}; + +static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + pte_t *ptep, new; + + ptep = pte_offset(pmdp, addr); + do { + if (pte_none(*ptep)) + return -EINVAL; + if (cpa.set_ro) + new = pte_wrprotect(*ptep); + else if (cpa.clear_ro) + new = pte_mkwrite(pte_mkdirty(*ptep)); + pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); + ptep++; + addr += PAGE_SIZE; + cond_resched(); + } while (addr < end); + return 0; +} + +static int split_pmd_page(pmd_t *pmdp, unsigned long addr) +{ + unsigned long pte_addr, prot; + pte_t *pt_dir, *ptep; + pmd_t new; + int i, ro; + + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + return -ENOMEM; + pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; + ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); + prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + ptep = pt_dir; + for (i = 0; i < PTRS_PER_PTE; i++) { + pte_val(*ptep) = pte_addr | prot; + pte_addr += PAGE_SIZE; + ptep++; + } + pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; + pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); + return 0; +} + +static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa) +{ + pmd_t new; + + if (cpa.set_ro) + new = pmd_wrprotect(*pmdp); + else if (cpa.clear_ro) + new = pmd_mkwrite(pmd_mkdirty(*pmdp)); + pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); +} + +static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; pmd_t *pmdp; - pte_t *ptep; + int rc = 0; - pgdp = pgd_offset_k(addr); - if (pgd_none(*pgdp)) - return NULL; - pudp = pud_offset(pgdp, addr); - if (pud_none(*pudp) || pud_large(*pudp)) - return NULL; pmdp = pmd_offset(pudp, addr); - if (pmd_none(*pmdp) || pmd_large(*pmdp)) - return NULL; - ptep = pte_offset_kernel(pmdp, addr); - if (pte_none(*ptep)) - return NULL; - return ptep; + do { + if (pmd_none(*pmdp)) + return -EINVAL; + next = pmd_addr_end(addr, end); + if (pmd_large(*pmdp)) { + if (addr & ~PMD_MASK || addr + PMD_SIZE > next) { + rc = split_pmd_page(pmdp, addr); + if (rc) + return rc; + continue; + } + modify_pmd_page(pmdp, addr, cpa); + } else { + rc = walk_pte_level(pmdp, addr, next, cpa); + if (rc) + return rc; + } + pmdp++; + addr = next; + cond_resched(); + } while (addr < end); + return rc; } -static void change_page_attr(unsigned long addr, int numpages, - pte_t (*set) (pte_t)) +static int split_pud_page(pud_t *pudp, unsigned long addr) { - pte_t *ptep; - int i; + unsigned long pmd_addr, prot; + pmd_t *pm_dir, *pmdp; + pud_t new; + int i, ro; - for (i = 0; i < numpages; i++) { - ptep = walk_page_table(addr); - if (WARN_ON_ONCE(!ptep)) - break; - *ptep = set(*ptep); - addr += PAGE_SIZE; + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + return -ENOMEM; + pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; + ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); + prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); + pmdp = pm_dir; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_val(*pmdp) = pmd_addr | prot; + pmd_addr += PMD_SIZE; + pmdp++; } - __tlb_flush_kernel(); + pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; + pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); + return 0; +} + +static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa) +{ + pud_t new; + + if (cpa.set_ro) + new = pud_wrprotect(*pudp); + else if (cpa.clear_ro) + new = pud_mkwrite(pud_mkdirty(*pudp)); + pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); +} + +static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; + pud_t *pudp; + int rc = 0; + + pudp = pud_offset(pgd, addr); + do { + if (pud_none(*pudp)) + return -EINVAL; + next = pud_addr_end(addr, end); + if (pud_large(*pudp)) { + if (addr & ~PUD_MASK || addr + PUD_SIZE > next) { + rc = split_pud_page(pudp, addr); + if (rc) + break; + continue; + } + modify_pud_page(pudp, addr, cpa); + } else { + rc = walk_pmd_level(pudp, addr, next, cpa); + } + pudp++; + addr = next; + cond_resched(); + } while (addr < end && !rc); + return rc; +} + +static DEFINE_MUTEX(cpa_mutex); + +static int change_page_attr(unsigned long addr, unsigned long end, + struct cpa cpa) +{ + unsigned long next; + int rc = -EINVAL; + pgd_t *pgdp; + + if (end >= MODULES_END) + return -EINVAL; + mutex_lock(&cpa_mutex); + pgdp = pgd_offset_k(addr); + do { + if (pgd_none(*pgdp)) + break; + next = pgd_addr_end(addr, end); + rc = walk_pud_level(pgdp, addr, next, cpa); + if (rc) + break; + cond_resched(); + } while (pgdp++, addr = next, addr < end && !rc); + mutex_unlock(&cpa_mutex); + return rc; } int set_memory_ro(unsigned long addr, int numpages) { - change_page_attr(addr, numpages, pte_wrprotect); - return 0; + struct cpa cpa = { + .set_ro = 1, + }; + + addr &= PAGE_MASK; + return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); } int set_memory_rw(unsigned long addr, int numpages) { - change_page_attr(addr, numpages, pte_mkwrite); - return 0; + struct cpa cpa = { + .clear_ro = 1, + }; + + addr &= PAGE_MASK; + return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa); } /* not possible */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 4badd8252e3c..0a7b03496f67 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -47,7 +47,7 @@ static inline pud_t *vmem_pud_alloc(void) return pud; } -static inline pmd_t *vmem_pmd_alloc(void) +pmd_t *vmem_pmd_alloc(void) { pmd_t *pmd = NULL; @@ -58,7 +58,7 @@ static inline pmd_t *vmem_pmd_alloc(void) return pmd; } -static pte_t __ref *vmem_pte_alloc(void) +pte_t __ref *vmem_pte_alloc(void) { pte_t *pte; -- cgit v1.2.3-59-g8ed1b From bab247ff5f669216e3ed2f9a4034c540187e874c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 May 2016 16:28:28 +0200 Subject: s390/vmem: simplify vmem code for read-only mappings For the kernel identity mapping map everything read-writeable and subsequently call set_memory_ro() to make the ro section read-only. This simplifies the code a lot. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/mm/vmem.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 0a7b03496f67..b200f976c36b 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -77,7 +78,7 @@ pte_t __ref *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_mem(unsigned long start, unsigned long size, int ro) +static int vmem_add_mem(unsigned long start, unsigned long size) { unsigned long end = start + size; unsigned long address = start; @@ -99,8 +100,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = address | - pgprot_val(ro ? REGION3_KERNEL_RO : REGION3_KERNEL); + pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL); address += PUD_SIZE; continue; } @@ -114,8 +114,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = address | - pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); + pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL); address += PMD_SIZE; continue; } @@ -127,8 +126,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = address | - pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + pte_val(*pt_dir) = address | pgprot_val(PAGE_KERNEL); address += PAGE_SIZE; } ret = 0; @@ -338,7 +336,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) if (ret) goto out_free; - ret = vmem_add_mem(start, size, 0); + ret = vmem_add_mem(start, size); if (ret) goto out_remove; goto out; @@ -361,29 +359,12 @@ void __init vmem_map_init(void) { unsigned long ro_start, ro_end; struct memblock_region *reg; - phys_addr_t start, end; + for_each_memblock(memory, reg) + vmem_add_mem(reg->base, reg->size); ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for_each_memblock(memory, reg) { - start = reg->base; - end = reg->base + reg->size; - if (start >= ro_end || end <= ro_start) - vmem_add_mem(start, end - start, 0); - else if (start >= ro_start && end <= ro_end) - vmem_add_mem(start, end - start, 1); - else if (start >= ro_start) { - vmem_add_mem(start, ro_end - start, 1); - vmem_add_mem(ro_end, end - ro_end, 0); - } else if (end < ro_end) { - vmem_add_mem(start, ro_start - start, 0); - vmem_add_mem(ro_start, end - ro_start, 1); - } else { - vmem_add_mem(start, ro_start - start, 0); - vmem_add_mem(ro_start, ro_end - ro_start, 1); - vmem_add_mem(ro_end, end - ro_end, 0); - } - } + set_memory_ro(ro_start, (ro_end - ro_start) >> PAGE_SHIFT); } /* -- cgit v1.2.3-59-g8ed1b From 37cd944c8d8f406eee8e0c580f823ff66738c0af Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 20 May 2016 08:08:14 +0200 Subject: s390/pgtable: add mapping statistics Add statistics that show how memory is mapped within the kernel identity mapping. This is more or less the same like git commit ce0c0e50f94e ("x86, generic: CPA add statistics about state of direct mapping v4") for x86. I also intentionally copied the lower case "k" within DirectMap4k vs the upper case "M" and "G" within the two other lines. Let's have consistent inconsistencies across architectures. The output of /proc/meminfo now contains these additional lines: DirectMap4k: 2048 kB DirectMap1M: 3991552 kB DirectMap2G: 4194304 kB The implementation on s390 is lockless unlike the x86 version, since I assume changes to the kernel mapping are a very rare event. Therefore it really doesn't matter if these statistics could potentially be inconsistent if read while kernel pages tables are being changed. Signed-off-by: Heiko Carstens Acked-by: Martin Schwidefsky Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 19 +++++++++++++++++++ arch/s390/mm/pageattr.c | 18 ++++++++++++++++++ arch/s390/mm/vmem.c | 16 ++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9133388edd1f..3038edb12cad 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,24 @@ extern void vmem_map_init(void); pmd_t *vmem_pmd_alloc(void); pte_t *vmem_pte_alloc(void); +enum { + PG_DIRECT_MAP_4K = 0, + PG_DIRECT_MAP_1M, + PG_DIRECT_MAP_2G, + PG_DIRECT_MAP_MAX +}; + +extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; + +static inline void update_page_count(int level, long count) +{ + if (IS_ENABLED(CONFIG_PROC_FS)) + atomic_long_add(count, &direct_pages_count[level]); +} + +struct seq_file; +void arch_report_meminfo(struct seq_file *m); + /* * The S390 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 91e5e29c1f5c..ba124d9c96ba 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -40,6 +40,20 @@ void __storage_key_init_range(unsigned long start, unsigned long end) } #endif +#ifdef CONFIG_PROC_FS +atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; + +void arch_report_meminfo(struct seq_file *m) +{ + seq_printf(m, "DirectMap4k: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); + seq_printf(m, "DirectMap1M: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); + seq_printf(m, "DirectMap2G: %8lu kB\n", + atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); +} +#endif /* CONFIG_PROC_FS */ + static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long dtt) { @@ -114,6 +128,8 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) } pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); + update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); + update_page_count(PG_DIRECT_MAP_1M, -1); return 0; } @@ -181,6 +197,8 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) } pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); + update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); + update_page_count(PG_DIRECT_MAP_2G, -1); return 0; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b200f976c36b..a1e7c0b207e6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -80,6 +80,7 @@ pte_t __ref *vmem_pte_alloc(void) */ static int vmem_add_mem(unsigned long start, unsigned long size) { + unsigned long pages4k, pages1m, pages2g; unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; @@ -88,6 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) pte_t *pt_dir; int ret = -ENOMEM; + pages4k = pages1m = pages2g = 0; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -102,6 +104,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) !debug_pagealloc_enabled()) { pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL); address += PUD_SIZE; + pages2g++; continue; } if (pud_none(*pu_dir)) { @@ -116,6 +119,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) !debug_pagealloc_enabled()) { pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL); address += PMD_SIZE; + pages1m++; continue; } if (pmd_none(*pm_dir)) { @@ -128,9 +132,13 @@ static int vmem_add_mem(unsigned long start, unsigned long size) pt_dir = pte_offset_kernel(pm_dir, address); pte_val(*pt_dir) = address | pgprot_val(PAGE_KERNEL); address += PAGE_SIZE; + pages4k++; } ret = 0; out: + update_page_count(PG_DIRECT_MAP_4K, pages4k); + update_page_count(PG_DIRECT_MAP_1M, pages1m); + update_page_count(PG_DIRECT_MAP_2G, pages2g); return ret; } @@ -140,6 +148,7 @@ out: */ static void vmem_remove_range(unsigned long start, unsigned long size) { + unsigned long pages4k, pages1m, pages2g; unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; @@ -147,6 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pmd_t *pm_dir; pte_t *pt_dir; + pages4k = pages1m = pages2g = 0; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -161,6 +171,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) if (pud_large(*pu_dir)) { pud_clear(pu_dir); address += PUD_SIZE; + pages2g++; continue; } pm_dir = pmd_offset(pu_dir, address); @@ -171,13 +182,18 @@ static void vmem_remove_range(unsigned long start, unsigned long size) if (pmd_large(*pm_dir)) { pmd_clear(pm_dir); address += PMD_SIZE; + pages1m++; continue; } pt_dir = pte_offset_kernel(pm_dir, address); pte_clear(&init_mm, address, pt_dir); address += PAGE_SIZE; + pages4k++; } flush_tlb_kernel_range(start, end); + update_page_count(PG_DIRECT_MAP_4K, -pages4k); + update_page_count(PG_DIRECT_MAP_1M, -pages1m); + update_page_count(PG_DIRECT_MAP_2G, -pages2g); } /* -- cgit v1.2.3-59-g8ed1b From ac31418445597573538571d9b6610406a6cd66ab Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 20 May 2016 17:16:35 -0400 Subject: s390: add explicit for jump label Ensure that we always have __stringify(). Signed-off-by: Jason Baron Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/jump_label.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 7f9fd5e3f1bf..9be198f5ee79 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include +#include #define JUMP_LABEL_NOP_SIZE 6 #define JUMP_LABEL_NOP_OFFSET 2 -- cgit v1.2.3-59-g8ed1b From 219a21b3b08f5a801e45fada915c4b719076558d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Apr 2016 08:57:19 +0200 Subject: s390/cpuinfo: print cache info and all single cpu lines on first iteration Change the code to print all the current output during the first iteration. This is a preparation patch for the upcoming per cpu block extension to /proc/cpuinfo. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 62 ++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index de7451065c34..4f7a96d719d0 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -53,10 +53,7 @@ int cpu_have_feature(unsigned int num) } EXPORT_SYMBOL(cpu_have_feature); -/* - * show_cpuinfo - Get information on one CPU for use by procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) +static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", @@ -65,34 +62,43 @@ static int show_cpuinfo(struct seq_file *m, void *v) static const char * const int_hwcap_str[] = { "sie" }; - unsigned long n = (unsigned long) v - 1; - int i; - - if (!n) { - s390_adjust_jiffies(); - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) - if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) - seq_printf(m, "%s ", int_hwcap_str[i]); - seq_puts(m, "\n"); - show_cacheinfo(m); - } - if (cpu_online(n)) { - struct cpuid *id = &per_cpu(cpu_id, n); - seq_printf(m, "processor %li: " + int i, cpu; + + s390_adjust_jiffies(); + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); + seq_puts(m, "\n"); + show_cacheinfo(m); + for_each_online_cpu(cpu) { + struct cpuid *id = &per_cpu(cpu_id, cpu); + + seq_printf(m, "processor %d: " "version = %02X, " "identification = %06X, " "machine = %04X\n", - n, id->version, id->ident, id->machine); + cpu, id->version, id->ident, id->machine); } +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long n = (unsigned long) v - 1; + + if (!n) + show_cpu_summary(m, v); return 0; } -- cgit v1.2.3-59-g8ed1b From 097a116c7e9023267b61fb96b37fdcb2864a1ae3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Apr 2016 12:35:22 +0200 Subject: s390/cpuinfo: show dynamic and static cpu mhz Show the dynamic and static cpu mhz of each cpu. Since these values are per cpu this requires a fundamental extension of the format of /proc/cpuinfo. Historically we had only a single line per cpu and a summary at the top of the file. This format is hardly extendible if we want to add more per cpu information. Therefore this patch adds per cpu blocks at the end of /proc/cpuinfo: cpu : 0 cpu Mhz dynamic : 5504 cpu Mhz static : 5504 cpu : 1 cpu Mhz dynamic : 5504 cpu Mhz static : 5504 cpu : 2 cpu Mhz dynamic : 5504 cpu Mhz static : 5504 cpu : 3 cpu Mhz dynamic : 5504 cpu Mhz static : 5504 Right now each block contains only the dynamic and static cpu mhz, but it can be easily extended like on every other architecture. This extension is supposed to be compatible with the old format. Signed-off-by: Heiko Carstens Acked-by: Sascha Silbe Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 17 +++++++++++- arch/s390/kernel/cache.c | 7 +---- arch/s390/kernel/processor.c | 55 +++++++++++++++++++++++++++++++++++---- arch/s390/kernel/setup.c | 1 + drivers/s390/char/sclp_config.c | 2 +- 5 files changed, 69 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 9d4d311d7e52..09529202ea77 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -77,7 +77,10 @@ static inline void get_cpu_id(struct cpuid *ptr) asm volatile("stidp %0" : "=Q" (*ptr)); } -extern void s390_adjust_jiffies(void); +void s390_adjust_jiffies(void); +void s390_update_cpu_mhz(void); +void cpu_detect_mhz_feature(void); + extern const struct seq_operations cpuinfo_op; extern int sysctl_ieee_emulation_warnings; extern void execve_tail(void); @@ -233,6 +236,18 @@ void cpu_relax(void); #define cpu_relax_lowlatency() barrier() +#define ECAG_CACHE_ATTRIBUTE 0 +#define ECAG_CPU_ATTRIBUTE 1 + +static inline unsigned long __ecag(unsigned int asi, unsigned char parm) +{ + unsigned long val; + + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (asi << 8 | parm)); + return val; +} + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 77a84bd78be2..c8a83276a4dc 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level) static inline unsigned long ecag(int ai, int li, int ti) { - unsigned long cmd, val; - - cmd = ai << 4 | li << 1 | ti; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (cmd)); - return val; + return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti); } static void ci_leaf_init(struct cacheinfo *this_leaf, int private, diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 4f7a96d719d0..06ca71b7ec0e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -13,12 +13,45 @@ #include #include #include +#include #include #include #include #include -static DEFINE_PER_CPU(struct cpuid, cpu_id); +struct cpu_info { + unsigned int cpu_mhz_dynamic; + unsigned int cpu_mhz_static; + struct cpuid cpu_id; +}; + +static DEFINE_PER_CPU(struct cpu_info, cpu_info); + +static bool machine_has_cpu_mhz; + +void __init cpu_detect_mhz_feature(void) +{ + if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL) + machine_has_cpu_mhz = 1; +} + +static void update_cpu_mhz(void *arg) +{ + unsigned long mhz; + struct cpu_info *c; + + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + c = this_cpu_ptr(&cpu_info); + c->cpu_mhz_dynamic = mhz >> 32; + c->cpu_mhz_static = mhz & 0xffffffff; +} + +void s390_update_cpu_mhz(void) +{ + s390_adjust_jiffies(); + if (machine_has_cpu_mhz) + on_each_cpu(update_cpu_mhz, NULL, 0); +} void notrace cpu_relax(void) { @@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax); */ void cpu_init(void) { - struct cpuid *id = this_cpu_ptr(&cpu_id); + struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id); get_cpu_id(id); + if (machine_has_cpu_mhz) + update_cpu_mhz(NULL); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; BUG_ON(current->mm); @@ -64,7 +99,6 @@ static void show_cpu_summary(struct seq_file *m, void *v) }; int i, cpu; - s390_adjust_jiffies(); seq_printf(m, "vendor_id : IBM/S390\n" "# processors : %i\n" "bogomips per cpu: %lu.%02lu\n", @@ -80,7 +114,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) seq_puts(m, "\n"); show_cacheinfo(m); for_each_online_cpu(cpu) { - struct cpuid *id = &per_cpu(cpu_id, cpu); + struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); seq_printf(m, "processor %d: " "version = %02X, " @@ -90,6 +124,14 @@ static void show_cpu_summary(struct seq_file *m, void *v) } } +static void show_cpu_mhz(struct seq_file *m, unsigned long n) +{ + struct cpu_info *c = per_cpu_ptr(&cpu_info, n); + + seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic); + seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static); +} + /* * show_cpuinfo - Get information on one CPU for use by procfs. */ @@ -99,6 +141,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (!n) show_cpu_summary(m, v); + if (!machine_has_cpu_mhz) + return 0; + seq_printf(m, "\ncpu : %ld\n", n); + show_cpu_mhz(m, n); return 0; } @@ -132,4 +178,3 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; - diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f31939147ccd..d4e0742b197b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -901,6 +901,7 @@ void __init setup_arch(char **cmdline_p) setup_vmcoreinfo(); setup_lowcore(); smp_fill_possible_mask(); + cpu_detect_mhz_feature(); cpu_init(); numa_setup(); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 2ced50ccca63..1406fb688a26 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -47,7 +47,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work) int cpu; struct device *dev; - s390_adjust_jiffies(); + s390_update_cpu_mhz(); pr_info("CPU capability may have changed\n"); get_online_cpus(); for_each_online_cpu(cpu) { -- cgit v1.2.3-59-g8ed1b From 99ec1112da4a0fc19103cb87df1cfa0e413fcbb3 Mon Sep 17 00:00:00 2001 From: Daniel van Gerpen Date: Mon, 23 May 2016 15:19:38 +0200 Subject: s390: use canonical include guard style Signed-off-by: Daniel van Gerpen Acked-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/fcx.h | 2 +- drivers/s390/cio/chp.h | 2 +- drivers/s390/cio/idset.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index 7ecb92b469b6..04cb4b4bcc5f 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -6,7 +6,7 @@ */ #ifndef _ASM_S390_FCX_H -#define _ASM_S390_FCX_H _ASM_S390_FCX_H +#define _ASM_S390_FCX_H #include diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h index af0232290dc4..bb5a68226cda 100644 --- a/drivers/s390/cio/chp.h +++ b/drivers/s390/cio/chp.h @@ -4,7 +4,7 @@ */ #ifndef S390_CHP_H -#define S390_CHP_H S390_CHP_H +#define S390_CHP_H #include #include diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h index 22b58104683b..89a787790888 100644 --- a/drivers/s390/cio/idset.h +++ b/drivers/s390/cio/idset.h @@ -4,7 +4,7 @@ */ #ifndef S390_IDSET_H -#define S390_IDSET_H S390_IDSET_H +#define S390_IDSET_H #include -- cgit v1.2.3-59-g8ed1b From 4b8fe77ace2779e019863f9c166c15dc5bac5a24 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 24 May 2016 15:23:20 +0200 Subject: s390: dump_stack: fill in arch description Lets provide the basic machine information for dump_stack on s390. This enables the "Hardware name:" line and results in output like [...] Oops: 0004 ilc:2 [#1] SMP Modules linked in: CPU: 1 PID: 74 Comm: sh Not tainted 4.5.0+ #205 Hardware name: IBM 2964 NC9 704 (KVM) [...] Signed-off-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index a0684de5a93b..4067dbe390ac 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +static noinline __init void setup_arch_string(void) +{ + struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; + + if (stsi(mach, 1, 1, 1)) + return; + EBCASC(mach->manufacturer, sizeof(mach->manufacturer)); + EBCASC(mach->type, sizeof(mach->type)); + EBCASC(mach->model, sizeof(mach->model)); + EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); + dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", + mach->manufacturer, + mach->type, + mach->model, + mach->model_capacity, + MACHINE_IS_LPAR ? "LPAR" : + MACHINE_IS_VM ? "z/VM" : + MACHINE_IS_KVM ? "KVM" : "unknown"); +} + static __init void setup_topology(void) { int max_mnest; @@ -452,6 +472,7 @@ void __init startup_init(void) setup_lowcore_early(); setup_facility_list(); detect_machine_type(); + setup_arch_string(); ipl_update_parameters(); setup_boot_command_line(); create_kernel_nss(); -- cgit v1.2.3-59-g8ed1b From 0ccb32c983e0fe79d408e50ec1386aaf78c9a7ed Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 28 May 2016 10:03:55 +0200 Subject: s390/mm: align swapper_pg_dir to 16k The segment/region table that is part of the kernel image must be properly aligned to 16k in order to make the crdte inline assembly work. Otherwise it will calculate a wrong segment/region table start address and access incorrect memory locations if the swapper_pg_dir is not aligned to 16k. Therefore define BSS_FIRST_SECTIONS in order to put the swapper_pg_dir at the beginning of the bss section and also align the bss section to 16k just like other architectures did. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/kernel/vmlinux.lds.S | 9 ++++++++- arch/s390/mm/init.c | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3038edb12cad..37e1aa9bf84c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -32,7 +32,7 @@ #include #include -extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); +extern pgd_t swapper_pg_dir[]; extern void paging_init(void); extern void vmem_map_init(void); pmd_t *vmem_pmd_alloc(void); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 0f41a8286378..0656f7cc7fb4 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -4,6 +4,13 @@ #include #include + +/* + * Put .bss..swapper_pg_dir as the first thing in .bss. This will + * make sure it has 16k alignment. + */ +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) + #include OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") @@ -81,7 +88,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - BSS_SECTION(0, 2, 0) + BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) _end = . ; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 2489b2e917c8..44db60d9e519 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -40,7 +40,7 @@ #include #include -pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -- cgit v1.2.3-59-g8ed1b From 2d0af2247906cf17cd641cdb16444a8f291cd4f4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 31 May 2016 09:13:59 +0200 Subject: s390/kexec: fix update of os_info crash kernel size Implement an s390 version of the weak crash_free_reserved_phys_range function. This allows us to update the size of the reserved crash kernel memory if it will be resized. This was previously done with a call to crash_unmap_reserved_pages from crash_shrink_memory which was removed with ("s390/kexec: consolidate crash_map/unmap_reserved_pages() and arch_kexec_protect(unprotect)_crashkres()") Fixes: 7a0058ec7860 ("s390/kexec: consolidate crash_map/unmap_reserved_pages() and arch_kexec_protect(unprotect)_crashkres()") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/machine_kexec.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 0e64f08d3d69..078d38c05490 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -150,6 +150,19 @@ static int kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr, size; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); + size = begin - crashk_res.start; + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); +} + /* * Map or unmap crashkernel memory */ @@ -161,13 +174,8 @@ static void crash_map_pages(int enable) size % KEXEC_CRASH_MEM_ALIGN); if (enable) vmem_add_mapping(crashk_res.start, size); - else { + else vmem_remove_mapping(crashk_res.start, size); - if (size) - os_info_crashkernel_add(crashk_res.start, size); - else - os_info_crashkernel_add(0, 0); - } } /* -- cgit v1.2.3-59-g8ed1b From 4e042af463f806b6ef0e44048eba0964f0a5694e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 31 May 2016 09:14:00 +0200 Subject: s390/kexec: fix crash on resize of reserved memory Reducing the size of reserved memory for the crash kernel will result in an immediate crash on s390. Reason for that is that we do not create struct pages for memory that is reserved. If that memory is freed any access to struct pages which correspond to this memory will result in invalid memory accesses and a kernel panic. Fix this by properly creating struct pages when the system gets initialized. Change the code also to make use of set_memory_ro() and set_memory_rw() so page tables will be split if required. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/machine_kexec.c | 33 +++++++++++---------------------- arch/s390/kernel/setup.c | 15 ++++++++++++++- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 078d38c05490..3074c1d83829 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, static int __init machine_kdump_pm_init(void) { pm_notifier(machine_kdump_pm_cb, 0); - /* Create initial mapping for crashkernel memory */ - arch_kexec_unprotect_crashkres(); return 0; } arch_initcall(machine_kdump_pm_init); @@ -163,37 +162,27 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) os_info_crashkernel_add(0, 0); } -/* - * Map or unmap crashkernel memory - */ -static void crash_map_pages(int enable) +static void crash_protect_pages(int protect) { - unsigned long size = resource_size(&crashk_res); + unsigned long size; - BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || - size % KEXEC_CRASH_MEM_ALIGN); - if (enable) - vmem_add_mapping(crashk_res.start, size); + if (!crashk_res.end) + return; + size = resource_size(&crashk_res); + if (protect) + set_memory_ro(crashk_res.start, size >> PAGE_SHIFT); else - vmem_remove_mapping(crashk_res.start, size); + set_memory_rw(crashk_res.start, size >> PAGE_SHIFT); } -/* - * Unmap crashkernel memory - */ void arch_kexec_protect_crashkres(void) { - if (crashk_res.end) - crash_map_pages(0); + crash_protect_pages(1); } -/* - * Map crashkernel memory - */ void arch_kexec_unprotect_crashkres(void) { - if (crashk_res.end) - crash_map_pages(1); + crash_protect_pages(0); } #endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d4e0742b197b..9b4bb9d9275f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -432,6 +432,20 @@ static void __init setup_resources(void) } } } +#ifdef CONFIG_CRASH_DUMP + /* + * Re-add removed crash kernel memory as reserved memory. This makes + * sure it will be mapped with the identity mapping and struct pages + * will be created, so it can be resized later on. + * However add it later since the crash kernel resource should not be + * part of the System RAM resource. + */ + if (crashk_res.end) { + memblock_add(crashk_res.start, resource_size(&crashk_res)); + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); + insert_resource(&iomem_resource, &crashk_res); + } +#endif } static void __init setup_memory_end(void) @@ -602,7 +616,6 @@ static void __init reserve_crashkernel(void) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - insert_resource(&iomem_resource, &crashk_res); memblock_remove(crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", -- cgit v1.2.3-59-g8ed1b From 2f82f57763d97700788cf228de1cf30ffd4153b4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 May 2016 09:09:57 +0200 Subject: s390/time: STP sync clock correction The sync clock operation of the channel subsystem call for STP delivers the TOD clock difference as a result. Use this TOD clock difference instead of the difference between the TOD timestamps before and after the sync clock operation. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cio.h | 2 +- arch/s390/kernel/time.c | 11 +++++------ drivers/s390/cio/chsc.c | 8 ++++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index d1e7b0a0feeb..f7ed88cc066e 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -320,7 +320,7 @@ struct cio_iplinfo { extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo); /* Function from drivers/s390/cio/chsc.c */ -int chsc_sstpc(void *page, unsigned int op, u16 ctrl); +int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); #endif diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9409d32f285e..a725fd1c4315 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1455,7 +1455,7 @@ static void __init stp_reset(void) int rc; stp_page = (void *) get_zeroed_page(GFP_ATOMIC); - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { @@ -1554,11 +1554,10 @@ static int stp_sync_clock(void *data) stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { old_clock = get_tod_clock(); - rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta); if (rc == 0) { - new_clock = get_tod_clock(); + new_clock = old_clock + clock_delta; delta = adjust_time(old_clock, new_clock, 0); - clock_delta = new_clock - old_clock; atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &clock_delta); fixup_clock_comparator(delta); @@ -1590,12 +1589,12 @@ static void stp_work_fn(struct work_struct *work) mutex_lock(&stp_work_mutex); if (!stp_online) { - chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); del_timer_sync(&stp_timer); goto out_unlock; } - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL); if (rc) goto out_unlock; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index c424c0c7367e..452193f7298c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1176,7 +1176,7 @@ exit: EXPORT_SYMBOL_GPL(css_general_characteristics); EXPORT_SYMBOL_GPL(css_chsc_characteristics); -int chsc_sstpc(void *page, unsigned int op, u16 ctrl) +int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta) { struct { struct chsc_header request; @@ -1186,7 +1186,9 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl) unsigned int ctrl : 16; unsigned int rsvd2[5]; struct chsc_header response; - unsigned int rsvd3[7]; + unsigned int rsvd3[3]; + u64 clock_delta; + unsigned int rsvd4[2]; } __attribute__ ((packed)) *rr; int rc; @@ -1200,6 +1202,8 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl) if (rc) return -EIO; rc = (rr->response.code == 0x0001) ? 0 : -EIO; + if (clock_delta) + *clock_delta = rr->clock_delta; return rc; } -- cgit v1.2.3-59-g8ed1b From 9dc06ccf4699db81b88a6ff45a8acefd6c278327 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 May 2016 09:23:37 +0200 Subject: s390/time: move PTFF definitions The PTFF instruction is not a function of ETR, rename and move the PTFF definitions from etr.h to timex.h. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/etr.h | 32 -------------------------------- arch/s390/include/asm/timex.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index 105f90e63a0e..63b99cc3e657 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -129,14 +129,6 @@ struct etr_irq_parm { unsigned int _pad2 : 18; } __attribute__ ((packed)); -/* Query TOD offset result */ -struct etr_ptff_qto { - unsigned long long physical_clock; - unsigned long long tod_offset; - unsigned long long logical_tod_offset; - unsigned long long tod_epoch_difference; -} __attribute__ ((packed)); - /* Inline assembly helper functions */ static inline int etr_setr(struct etr_eacr *ctrl) { @@ -186,30 +178,6 @@ static inline int etr_steai(struct etr_aib *aib, unsigned int func) #define ETR_STEAI_PORT_0 0x12 #define ETR_STEAI_PORT_1 0x13 -static inline int etr_ptff(void *ptff_block, unsigned int func) -{ - register unsigned int reg0 asm("0") = func; - register unsigned long reg1 asm("1") = (unsigned long) ptff_block; - int rc = -EOPNOTSUPP; - - asm volatile( - " .word 0x0104\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc), "=m" (ptff_block) - : "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc"); - return rc; -} - -/* Function codes for the ptff instruction. */ -#define ETR_PTFF_QAF 0x00 /* query available functions */ -#define ETR_PTFF_QTO 0x01 /* query tod offset */ -#define ETR_PTFF_QSI 0x02 /* query steering information */ -#define ETR_PTFF_ATO 0x40 /* adjust tod offset */ -#define ETR_PTFF_STO 0x41 /* set tod offset */ -#define ETR_PTFF_SFS 0x42 /* set fine steering rate */ -#define ETR_PTFF_SGS 0x43 /* set gross steering rate */ - /* Functions needed by the machine check handler */ int etr_switch_to_local(void); int etr_sync_check(void); diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index dcb6312a0b91..920db0a2496b 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -52,6 +52,39 @@ static inline void store_clock_comparator(__u64 *time) void clock_comparator_work(void); +/* Function codes for the ptff instruction. */ +#define PTFF_QAF 0x00 /* query available functions */ +#define PTFF_QTO 0x01 /* query tod offset */ +#define PTFF_QSI 0x02 /* query steering information */ +#define PTFF_ATO 0x40 /* adjust tod offset */ +#define PTFF_STO 0x41 /* set tod offset */ +#define PTFF_SFS 0x42 /* set fine steering rate */ +#define PTFF_SGS 0x43 /* set gross steering rate */ + +/* Query TOD offset result */ +struct ptff_qto { + unsigned long long physical_clock; + unsigned long long tod_offset; + unsigned long long logical_tod_offset; + unsigned long long tod_epoch_difference; +} __packed; + +static inline int ptff(void *ptff_block, size_t len, unsigned int func) +{ + typedef struct { char _[len]; } addrtype; + register unsigned int reg0 asm("0") = func; + register unsigned long reg1 asm("1") = (unsigned long) ptff_block; + int rc; + + asm volatile( + " .word 0x0104\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "+m" (*(addrtype *) ptff_block) + : "d" (reg0), "d" (reg1) : "cc"); + return rc; +} + static inline unsigned long long local_tick_disable(void) { unsigned long long old; -- cgit v1.2.3-59-g8ed1b From 4027789192d149678262ad606b2d7e2a61bed0f2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 May 2016 10:16:24 +0200 Subject: s390/time: LPAR offset handling It is possible to specify a user offset for the TOD clock, e.g. +2 hours. The TOD clock will carry this offset even if the clock is synchronized with STP. This makes the time stamps acquired with get_sync_clock() useless as another LPAR migth use a different TOD offset. Use the PTFF instrution to get the TOD epoch difference and subtract it from the TOD clock value to get a physical timestamp. As the epoch difference contains the sync check delta as well the LPAR offset value to the physical clock needs to be refreshed after each clock synchronization. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 15 +++++++++++++- arch/s390/kernel/early.c | 1 + arch/s390/kernel/time.c | 46 ++++++++++++++++++++++++++++++++++-------- drivers/s390/block/dasd_eckd.c | 4 ++-- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 920db0a2496b..0f34db4238da 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -52,6 +52,11 @@ static inline void store_clock_comparator(__u64 *time) void clock_comparator_work(void); +void __init ptff_init(void); + +extern unsigned char ptff_function_mask[16]; +extern unsigned long lpar_offset; + /* Function codes for the ptff instruction. */ #define PTFF_QAF 0x00 /* query available functions */ #define PTFF_QTO 0x01 /* query tod offset */ @@ -69,6 +74,14 @@ struct ptff_qto { unsigned long long tod_epoch_difference; } __packed; +static inline int ptff_query(unsigned int nr) +{ + unsigned char *ptr; + + ptr = ptff_function_mask + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + static inline int ptff(void *ptff_block, size_t len, unsigned int func) { typedef struct { char _[len]; } addrtype; @@ -138,7 +151,7 @@ static inline cycles_t get_cycles(void) return (cycles_t) get_tod_clock() >> 2; } -int get_sync_clock(unsigned long long *clock); +int get_phys_clock(unsigned long long *clock); void init_cpu_timer(void); unsigned long long monotonic_clock(void); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4067dbe390ac..717b03aa16b5 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -467,6 +467,7 @@ void __init startup_init(void) ipl_save_parameters(); rescue_initrd(); clear_bss_section(); + ptff_init(); init_kernel_storage_key(); lockdep_off(); setup_lowcore_early(); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a725fd1c4315..d71623639997 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,25 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier); EXPORT_SYMBOL(s390_epoch_delta_notifier); +unsigned char ptff_function_mask[16]; +unsigned long lpar_offset; + +/* + * Get time offsets with PTFF + */ +void __init ptff_init(void) +{ + struct ptff_qto qto; + + if (!test_facility(28)) + return; + ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF); + + /* get LPAR offset */ + if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + lpar_offset = qto.tod_epoch_difference; +} + /* * Scheduler clock - returns current time in nanosec units. */ @@ -337,20 +357,20 @@ static unsigned long clock_sync_flags; #define CLOCK_SYNC_STP 3 /* - * The synchronous get_clock function. It will write the current clock - * value to the clock pointer and return 0 if the clock is in sync with - * the external time source. If the clock mode is local it will return - * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external - * reference. + * The get_clock function for the physical clock. It will get the current + * TOD clock, subtract the LPAR offset and write the result to *clock. + * The function returns 0 if the clock is in sync with the external time + * source. If the clock mode is local it will return -EOPNOTSUPP and + * -EAGAIN if the clock is not in sync with the external reference. */ -int get_sync_clock(unsigned long long *clock) +int get_phys_clock(unsigned long long *clock) { atomic_t *sw_ptr; unsigned int sw0, sw1; sw_ptr = &get_cpu_var(clock_sync_word); sw0 = atomic_read(sw_ptr); - *clock = get_tod_clock(); + *clock = get_tod_clock() - lpar_offset; sw1 = atomic_read(sw_ptr); put_cpu_var(clock_sync_word); if (sw0 == sw1 && (sw0 & 0x80000000U)) @@ -364,7 +384,7 @@ int get_sync_clock(unsigned long long *clock) return -EACCES; return -EAGAIN; } -EXPORT_SYMBOL(get_sync_clock); +EXPORT_SYMBOL(get_phys_clock); /* * Make get_sync_clock return -EAGAIN. @@ -758,6 +778,7 @@ static int etr_sync_clock(void *data) unsigned long long clock, old_clock, clock_delta, delay, delta; struct clock_sync_data *etr_sync; struct etr_aib *sync_port, *aib; + struct ptff_qto qto; int port; int rc; @@ -804,6 +825,10 @@ static int etr_sync_clock(void *data) etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } else { + if (ptff_query(PTFF_QTO) && + ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + /* Update LPAR offset */ + lpar_offset = qto.tod_epoch_difference; etr_sync->in_sync = 1; rc = 0; } @@ -1533,6 +1558,7 @@ static int stp_sync_clock(void *data) static int first; unsigned long long old_clock, delta, new_clock, clock_delta; struct clock_sync_data *stp_sync; + struct ptff_qto qto; int rc; stp_sync = data; @@ -1558,6 +1584,10 @@ static int stp_sync_clock(void *data) if (rc == 0) { new_clock = old_clock + clock_delta; delta = adjust_time(old_clock, new_clock, 0); + if (ptff_query(PTFF_QTO) && + ptff(&qto, sizeof(qto), PTFF_QTO) == 0) + /* Update LPAR offset */ + lpar_offset = qto.tod_epoch_difference; atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &clock_delta); fixup_clock_comparator(delta); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 42b34cd1f002..fd2eff440098 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -228,7 +228,7 @@ check_XRC (struct ccw1 *de_ccw, data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid' */ data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */ - rc = get_sync_clock(&data->ep_sys_time); + rc = get_phys_clock(&data->ep_sys_time); /* Ignore return code if sync clock is switched off. */ if (rc == -EOPNOTSUPP || rc == -EACCES) rc = 0; @@ -339,7 +339,7 @@ static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata, pfxdata->define_extent.ga_extended |= 0x02; /* 'Extended Parameter' */ pfxdata->validity.time_stamp = 1; /* 'Time Stamp Valid' */ - rc = get_sync_clock(&pfxdata->define_extent.ep_sys_time); + rc = get_phys_clock(&pfxdata->define_extent.ep_sys_time); /* Ignore return code if sync clock is switched off. */ if (rc == -EOPNOTSUPP || rc == -EACCES) rc = 0; -- cgit v1.2.3-59-g8ed1b From 936cc855ffe808b428cf75116fe031af9f12237e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 May 2016 12:47:03 +0200 Subject: s390/time: add leap seconds to initial system time The PTFF instruction can be used to retrieve information about UTC including the current number of leap seconds. Use this value to convert the coordinated server time value of the TOD clock to a proper UTC timestamp to initialize the system time. Without this correction the system time will be off by the number of leap seonds until it has been corrected via NTP. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 18 ++++++++++++++++++ arch/s390/kernel/time.c | 17 +++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 0f34db4238da..0bb08f341c09 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -56,11 +56,13 @@ void __init ptff_init(void); extern unsigned char ptff_function_mask[16]; extern unsigned long lpar_offset; +extern unsigned long initial_leap_seconds; /* Function codes for the ptff instruction. */ #define PTFF_QAF 0x00 /* query available functions */ #define PTFF_QTO 0x01 /* query tod offset */ #define PTFF_QSI 0x02 /* query steering information */ +#define PTFF_QUI 0x04 /* query UTC information */ #define PTFF_ATO 0x40 /* adjust tod offset */ #define PTFF_STO 0x41 /* set tod offset */ #define PTFF_SFS 0x42 /* set fine steering rate */ @@ -82,6 +84,22 @@ static inline int ptff_query(unsigned int nr) return (*ptr & (0x80 >> (nr & 7))) != 0; } +/* Query UTC information result */ +struct ptff_qui { + unsigned int tm : 2; + unsigned int ts : 2; + unsigned int : 28; + unsigned int pad_0x04; + unsigned long leap_event; + short old_leap; + short new_leap; + unsigned int pad_0x14; + unsigned long prt[5]; + unsigned long cst[3]; + unsigned int skew; + unsigned int pad_0x5c[41]; +} __packed; + static inline int ptff(void *ptff_block, size_t len, unsigned int func) { typedef struct { char _[len]; } addrtype; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d71623639997..1a27d4d08b43 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -64,6 +64,7 @@ EXPORT_SYMBOL(s390_epoch_delta_notifier); unsigned char ptff_function_mask[16]; unsigned long lpar_offset; +unsigned long initial_leap_seconds; /* * Get time offsets with PTFF @@ -71,6 +72,7 @@ unsigned long lpar_offset; void __init ptff_init(void) { struct ptff_qto qto; + struct ptff_qui qui; if (!test_facility(28)) return; @@ -79,6 +81,11 @@ void __init ptff_init(void) /* get LPAR offset */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) lpar_offset = qto.tod_epoch_difference; + + /* get initial leap seconds */ + if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0) + initial_leap_seconds = (unsigned long) + ((long) qui.old_leap * 4096000000L); } /* @@ -200,12 +207,18 @@ static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); + __u64 clock; + + clock = get_tod_clock() - initial_leap_seconds; + tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); } void read_boot_clock64(struct timespec64 *ts) { - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); + __u64 clock; + + clock = sched_clock_base_cc - initial_leap_seconds; + tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) -- cgit v1.2.3-59-g8ed1b From fd5ada04030cb584251c381cb70daa41e984ae82 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 May 2016 15:06:51 +0200 Subject: s390/time: remove ETR support The External-Time-Reference (ETR) clock synchronization interface has been superseded by Server-Time-Protocol (STP). Remove the outdated ETR interface. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/etr.h | 229 ---------- arch/s390/include/asm/stp.h | 51 +++ arch/s390/kernel/nmi.c | 13 +- arch/s390/kernel/time.c | 989 +------------------------------------------- arch/s390/kvm/kvm-s390.c | 2 +- 5 files changed, 55 insertions(+), 1229 deletions(-) delete mode 100644 arch/s390/include/asm/etr.h create mode 100644 arch/s390/include/asm/stp.h diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h deleted file mode 100644 index 63b99cc3e657..000000000000 --- a/arch/s390/include/asm/etr.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright IBM Corp. 2006 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#ifndef __S390_ETR_H -#define __S390_ETR_H - -/* ETR attachment control register */ -struct etr_eacr { - unsigned int e0 : 1; /* port 0 stepping control */ - unsigned int e1 : 1; /* port 1 stepping control */ - unsigned int _pad0 : 5; /* must be 00100 */ - unsigned int dp : 1; /* data port control */ - unsigned int p0 : 1; /* port 0 change recognition control */ - unsigned int p1 : 1; /* port 1 change recognition control */ - unsigned int _pad1 : 3; /* must be 000 */ - unsigned int ea : 1; /* ETR alert control */ - unsigned int es : 1; /* ETR sync check control */ - unsigned int sl : 1; /* switch to local control */ -} __attribute__ ((packed)); - -/* Port state returned by steai */ -enum etr_psc { - etr_psc_operational = 0, - etr_psc_semi_operational = 1, - etr_psc_protocol_error = 4, - etr_psc_no_symbols = 8, - etr_psc_no_signal = 12, - etr_psc_pps_mode = 13 -}; - -/* Logical port state returned by stetr */ -enum etr_lpsc { - etr_lpsc_operational_step = 0, - etr_lpsc_operational_alt = 1, - etr_lpsc_semi_operational = 2, - etr_lpsc_protocol_error = 4, - etr_lpsc_no_symbol_sync = 8, - etr_lpsc_no_signal = 12, - etr_lpsc_pps_mode = 13 -}; - -/* ETR status words */ -struct etr_esw { - struct etr_eacr eacr; /* attachment control register */ - unsigned int y : 1; /* stepping mode */ - unsigned int _pad0 : 5; /* must be 00000 */ - unsigned int p : 1; /* stepping port number */ - unsigned int q : 1; /* data port number */ - unsigned int psc0 : 4; /* port 0 state code */ - unsigned int psc1 : 4; /* port 1 state code */ -} __attribute__ ((packed)); - -/* Second level data register status word */ -struct etr_slsw { - unsigned int vv1 : 1; /* copy of validity bit data frame 1 */ - unsigned int vv2 : 1; /* copy of validity bit data frame 2 */ - unsigned int vv3 : 1; /* copy of validity bit data frame 3 */ - unsigned int vv4 : 1; /* copy of validity bit data frame 4 */ - unsigned int _pad0 : 19; /* must by all zeroes */ - unsigned int n : 1; /* EAF port number */ - unsigned int v1 : 1; /* validity bit ETR data frame 1 */ - unsigned int v2 : 1; /* validity bit ETR data frame 2 */ - unsigned int v3 : 1; /* validity bit ETR data frame 3 */ - unsigned int v4 : 1; /* validity bit ETR data frame 4 */ - unsigned int _pad1 : 4; /* must be 0000 */ -} __attribute__ ((packed)); - -/* ETR data frames */ -struct etr_edf1 { - unsigned int u : 1; /* untuned bit */ - unsigned int _pad0 : 1; /* must be 0 */ - unsigned int r : 1; /* service request bit */ - unsigned int _pad1 : 4; /* must be 0000 */ - unsigned int a : 1; /* time adjustment bit */ - unsigned int net_id : 8; /* ETR network id */ - unsigned int etr_id : 8; /* id of ETR which sends data frames */ - unsigned int etr_pn : 8; /* port number of ETR output port */ -} __attribute__ ((packed)); - -struct etr_edf2 { - unsigned int etv : 32; /* Upper 32 bits of TOD. */ -} __attribute__ ((packed)); - -struct etr_edf3 { - unsigned int rc : 8; /* failure reason code */ - unsigned int _pad0 : 3; /* must be 000 */ - unsigned int c : 1; /* ETR coupled bit */ - unsigned int tc : 4; /* ETR type code */ - unsigned int blto : 8; /* biased local time offset */ - /* (blto - 128) * 15 = minutes */ - unsigned int buo : 8; /* biased utc offset */ - /* (buo - 128) = leap seconds */ -} __attribute__ ((packed)); - -struct etr_edf4 { - unsigned int ed : 8; /* ETS device dependent data */ - unsigned int _pad0 : 1; /* must be 0 */ - unsigned int buc : 5; /* biased ut1 correction */ - /* (buc - 16) * 0.1 seconds */ - unsigned int em : 6; /* ETS error magnitude */ - unsigned int dc : 6; /* ETS drift code */ - unsigned int sc : 6; /* ETS steering code */ -} __attribute__ ((packed)); - -/* - * ETR attachment information block, two formats - * format 1 has 4 reserved words with a size of 64 bytes - * format 2 has 16 reserved words with a size of 96 bytes - */ -struct etr_aib { - struct etr_esw esw; - struct etr_slsw slsw; - unsigned long long tsp; - struct etr_edf1 edf1; - struct etr_edf2 edf2; - struct etr_edf3 edf3; - struct etr_edf4 edf4; - unsigned int reserved[16]; -} __attribute__ ((packed,aligned(8))); - -/* ETR interruption parameter */ -struct etr_irq_parm { - unsigned int _pad0 : 8; - unsigned int pc0 : 1; /* port 0 state change */ - unsigned int pc1 : 1; /* port 1 state change */ - unsigned int _pad1 : 3; - unsigned int eai : 1; /* ETR alert indication */ - unsigned int _pad2 : 18; -} __attribute__ ((packed)); - -/* Inline assembly helper functions */ -static inline int etr_setr(struct etr_eacr *ctrl) -{ - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2160000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*ctrl)); - return rc; -} - -/* Stores a format 1 aib with 64 bytes */ -static inline int etr_stetr(struct etr_aib *aib) -{ - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2170000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*aib)); - return rc; -} - -/* Stores a format 2 aib with 96 bytes for specified port */ -static inline int etr_steai(struct etr_aib *aib, unsigned int func) -{ - register unsigned int reg0 asm("0") = func; - int rc = -EOPNOTSUPP; - - asm volatile( - " .insn s,0xb2b30000,%1\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "Q" (*aib), "d" (reg0)); - return rc; -} - -/* Function codes for the steai instruction. */ -#define ETR_STEAI_STEPPING_PORT 0x10 -#define ETR_STEAI_ALTERNATE_PORT 0x11 -#define ETR_STEAI_PORT_0 0x12 -#define ETR_STEAI_PORT_1 0x13 - -/* Functions needed by the machine check handler */ -int etr_switch_to_local(void); -int etr_sync_check(void); -void etr_queue_work(void); - -/* notifier for syncs */ -extern struct atomic_notifier_head s390_epoch_delta_notifier; - -/* STP interruption parameter */ -struct stp_irq_parm { - unsigned int _pad0 : 14; - unsigned int tsc : 1; /* Timing status change */ - unsigned int lac : 1; /* Link availability change */ - unsigned int tcpc : 1; /* Time control parameter change */ - unsigned int _pad2 : 15; -} __attribute__ ((packed)); - -#define STP_OP_SYNC 1 -#define STP_OP_CTRL 3 - -struct stp_sstpi { - unsigned int rsvd0; - unsigned int rsvd1 : 8; - unsigned int stratum : 8; - unsigned int vbits : 16; - unsigned int leaps : 16; - unsigned int tmd : 4; - unsigned int ctn : 4; - unsigned int rsvd2 : 3; - unsigned int c : 1; - unsigned int tst : 4; - unsigned int tzo : 16; - unsigned int dsto : 16; - unsigned int ctrl : 16; - unsigned int rsvd3 : 16; - unsigned int tto; - unsigned int rsvd4; - unsigned int ctnid[3]; - unsigned int rsvd5; - unsigned int todoff[4]; - unsigned int rsvd6[48]; -} __attribute__ ((packed)); - -/* Functions needed by the machine check handler */ -int stp_sync_check(void); -int stp_island_check(void); -void stp_queue_work(void); - -#endif /* __S390_ETR_H */ diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h new file mode 100644 index 000000000000..7689727585b2 --- /dev/null +++ b/arch/s390/include/asm/stp.h @@ -0,0 +1,51 @@ +/* + * Copyright IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ +#ifndef __S390_STP_H +#define __S390_STP_H + +/* notifier for syncs */ +extern struct atomic_notifier_head s390_epoch_delta_notifier; + +/* STP interruption parameter */ +struct stp_irq_parm { + unsigned int _pad0 : 14; + unsigned int tsc : 1; /* Timing status change */ + unsigned int lac : 1; /* Link availability change */ + unsigned int tcpc : 1; /* Time control parameter change */ + unsigned int _pad2 : 15; +} __attribute__ ((packed)); + +#define STP_OP_SYNC 1 +#define STP_OP_CTRL 3 + +struct stp_sstpi { + unsigned int rsvd0; + unsigned int rsvd1 : 8; + unsigned int stratum : 8; + unsigned int vbits : 16; + unsigned int leaps : 16; + unsigned int tmd : 4; + unsigned int ctn : 4; + unsigned int rsvd2 : 3; + unsigned int c : 1; + unsigned int tst : 4; + unsigned int tzo : 16; + unsigned int dsto : 16; + unsigned int ctrl : 16; + unsigned int rsvd3 : 16; + unsigned int tto; + unsigned int rsvd4; + unsigned int ctnid[3]; + unsigned int rsvd5; + unsigned int todoff[4]; + unsigned int rsvd6[48]; +} __attribute__ ((packed)); + +/* Functions needed by the machine check handler */ +int stp_sync_check(void); +int stp_island_check(void); +void stp_queue_work(void); + +#endif /* __S390_STP_H */ diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 07302ce37648..29376f0e725c 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -27,7 +27,6 @@ struct mcck_struct { unsigned int kill_task : 1; unsigned int channel_report : 1; unsigned int warning : 1; - unsigned int etr_queue : 1; unsigned int stp_queue : 1; unsigned long mcck_code; }; @@ -82,8 +81,6 @@ void s390_handle_mcck(void) if (xchg(&mchchk_wng_posted, 1) == 0) kill_cad_pid(SIGPWR, 1); } - if (mcck.etr_queue) - etr_queue_work(); if (mcck.stp_queue) stp_queue_work(); if (mcck.kill_task) { @@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci) #define ED_STP_ISLAND 6 /* External damage STP island check */ #define ED_STP_SYNC 7 /* External damage STP sync check */ -#define ED_ETR_SYNC 12 /* External damage ETR sync check */ -#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ /* * machine check handler. @@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs) } if (mci.ed && mci.ec) { /* External damage */ - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) - mcck->etr_queue |= etr_sync_check(); - if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) - mcck->etr_queue |= etr_switch_to_local(); if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) mcck->stp_queue |= stp_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); - if (mcck->etr_queue || mcck->stp_queue) + if (mcck->stp_queue) set_cpu_flag(CIF_MCCK_PENDING); } if (mci.se) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 1a27d4d08b43..4e9949800562 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include "entry.h" @@ -189,20 +189,16 @@ static void clock_comparator_interrupt(struct ext_code ext_code, set_clock_comparator(S390_lowcore.clock_comparator); } -static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { inc_irq_stat(IRQEXT_TLA); - if (param32 & 0x00c40000) - etr_timing_alert((struct etr_irq_parm *) ¶m32); if (param32 & 0x00038000) stp_timing_alert((struct stp_irq_parm *) ¶m32); } -static void etr_reset(void); static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) @@ -302,7 +298,6 @@ void update_vsyscall_tz(void) void __init time_init(void) { /* Reset time synchronization interfaces. */ - etr_reset(); stp_reset(); /* request the clock comparator external interrupt */ @@ -449,301 +444,6 @@ static void __init time_init_wq(void) time_sync_wq = create_singlethread_workqueue("timesync"); } -/* - * External Time Reference (ETR) code. - */ -static int etr_port0_online; -static int etr_port1_online; -static int etr_steai_available; - -static int __init early_parse_etr(char *p) -{ - if (strncmp(p, "off", 3) == 0) - etr_port0_online = etr_port1_online = 0; - else if (strncmp(p, "port0", 5) == 0) - etr_port0_online = 1; - else if (strncmp(p, "port1", 5) == 0) - etr_port1_online = 1; - else if (strncmp(p, "on", 2) == 0) - etr_port0_online = etr_port1_online = 1; - return 0; -} -early_param("etr", early_parse_etr); - -enum etr_event { - ETR_EVENT_PORT0_CHANGE, - ETR_EVENT_PORT1_CHANGE, - ETR_EVENT_PORT_ALERT, - ETR_EVENT_SYNC_CHECK, - ETR_EVENT_SWITCH_LOCAL, - ETR_EVENT_UPDATE, -}; - -/* - * Valid bit combinations of the eacr register are (x = don't care): - * e0 e1 dp p0 p1 ea es sl - * 0 0 x 0 0 0 0 0 initial, disabled state - * 0 0 x 0 1 1 0 0 port 1 online - * 0 0 x 1 0 1 0 0 port 0 online - * 0 0 x 1 1 1 0 0 both ports online - * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode - * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode - * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync - * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync - * 0 1 x 1 1 1 0 0 both ports online, port 1 usable - * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync - * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync - * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode - * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode - * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync - * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync - * 1 0 x 1 1 1 0 0 both ports online, port 0 usable - * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync - * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync - * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync - * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync - */ -static struct etr_eacr etr_eacr; -static u64 etr_tolec; /* time of last eacr update */ -static struct etr_aib etr_port0; -static int etr_port0_uptodate; -static struct etr_aib etr_port1; -static int etr_port1_uptodate; -static unsigned long etr_events; -static struct timer_list etr_timer; - -static void etr_timeout(unsigned long dummy); -static void etr_work_fn(struct work_struct *work); -static DEFINE_MUTEX(etr_work_mutex); -static DECLARE_WORK(etr_work, etr_work_fn); - -/* - * Reset ETR attachment. - */ -static void etr_reset(void) -{ - etr_eacr = (struct etr_eacr) { - .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, - .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, - .es = 0, .sl = 0 }; - if (etr_setr(&etr_eacr) == 0) { - etr_tolec = get_tod_clock(); - set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - } else if (etr_port0_online || etr_port1_online) { - pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); - etr_port0_online = etr_port1_online = 0; - } -} - -static int __init etr_init(void) -{ - struct etr_aib aib; - - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return 0; - time_init_wq(); - /* Check if this machine has the steai instruction. */ - if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) - etr_steai_available = 1; - setup_timer(&etr_timer, etr_timeout, 0UL); - if (etr_port0_online) { - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - if (etr_port1_online) { - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } - return 0; -} - -arch_initcall(etr_init); - -/* - * Two sorts of ETR machine checks. The architecture reads: - * "When a machine-check niterruption occurs and if a switch-to-local or - * ETR-sync-check interrupt request is pending but disabled, this pending - * disabled interruption request is indicated and is cleared". - * Which means that we can get etr_switch_to_local events from the machine - * check handler although the interruption condition is disabled. Lovely.. - */ - -/* - * Switch to local machine check. This is called when the last usable - * ETR port goes inactive. After switch to local the clock is not in sync. - */ -int etr_switch_to_local(void) -{ - if (!etr_eacr.sl) - return 0; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { - etr_eacr.es = etr_eacr.sl = 0; - etr_setr(&etr_eacr); - return 1; - } - return 0; -} - -/* - * ETR sync check machine check. This is called when the ETR OTE and the - * local clock OTE are farther apart than the ETR sync check tolerance. - * After a ETR sync check the clock is not in sync. The machine check - * is broadcasted to all cpus at the same time. - */ -int etr_sync_check(void) -{ - if (!etr_eacr.es) - return 0; - disable_sync_clock(NULL); - if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { - etr_eacr.es = 0; - etr_setr(&etr_eacr); - return 1; - } - return 0; -} - -void etr_queue_work(void) -{ - queue_work(time_sync_wq, &etr_work); -} - -/* - * ETR timing alert. There are two causes: - * 1) port state change, check the usability of the port - * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the - * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) - * or ETR-data word 4 (edf4) has changed. - */ -static void etr_timing_alert(struct etr_irq_parm *intparm) -{ - if (intparm->pc0) - /* ETR port 0 state change. */ - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - if (intparm->pc1) - /* ETR port 1 state change. */ - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - if (intparm->eai) - /* - * ETR port alert on either port 0, 1 or both. - * Both ports are not up-to-date now. - */ - set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -static void etr_timeout(unsigned long dummy) -{ - set_bit(ETR_EVENT_UPDATE, &etr_events); - queue_work(time_sync_wq, &etr_work); -} - -/* - * Check if the etr mode is pss. - */ -static inline int etr_mode_is_pps(struct etr_eacr eacr) -{ - return eacr.es && !eacr.sl; -} - -/* - * Check if the etr mode is etr. - */ -static inline int etr_mode_is_etr(struct etr_eacr eacr) -{ - return eacr.es && eacr.sl; -} - -/* - * Check if the port can be used for TOD synchronization. - * For PPS mode the port has to receive OTEs. For ETR mode - * the port has to receive OTEs, the ETR stepping bit has to - * be zero and the validity bits for data frame 1, 2, and 3 - * have to be 1. - */ -static int etr_port_valid(struct etr_aib *aib, int port) -{ - unsigned int psc; - - /* Check that this port is receiving OTEs. */ - if (aib->tsp == 0) - return 0; - - psc = port ? aib->esw.psc1 : aib->esw.psc0; - if (psc == etr_lpsc_pps_mode) - return 1; - if (psc == etr_lpsc_operational_step) - return !aib->esw.y && aib->slsw.v1 && - aib->slsw.v2 && aib->slsw.v3; - return 0; -} - -/* - * Check if two ports are on the same network. - */ -static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2) -{ - // FIXME: any other fields we have to compare? - return aib1->edf1.net_id == aib2->edf1.net_id; -} - -/* - * Wrapper for etr_stei that converts physical port states - * to logical port states to be consistent with the output - * of stetr (see etr_psc vs. etr_lpsc). - */ -static void etr_steai_cv(struct etr_aib *aib, unsigned int func) -{ - BUG_ON(etr_steai(aib, func) != 0); - /* Convert port state to logical port state. */ - if (aib->esw.psc0 == 1) - aib->esw.psc0 = 2; - else if (aib->esw.psc0 == 0 && aib->esw.p == 0) - aib->esw.psc0 = 1; - if (aib->esw.psc1 == 1) - aib->esw.psc1 = 2; - else if (aib->esw.psc1 == 0 && aib->esw.p == 1) - aib->esw.psc1 = 1; -} - -/* - * Check if the aib a2 is still connected to the same attachment as - * aib a1, the etv values differ by one and a2 is valid. - */ -static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) -{ - int state_a1, state_a2; - - /* Paranoia check: e0/e1 should better be the same. */ - if (a1->esw.eacr.e0 != a2->esw.eacr.e0 || - a1->esw.eacr.e1 != a2->esw.eacr.e1) - return 0; - - /* Still connected to the same etr ? */ - state_a1 = p ? a1->esw.psc1 : a1->esw.psc0; - state_a2 = p ? a2->esw.psc1 : a2->esw.psc0; - if (state_a1 == etr_lpsc_operational_step) { - if (state_a2 != etr_lpsc_operational_step || - a1->edf1.net_id != a2->edf1.net_id || - a1->edf1.etr_id != a2->edf1.etr_id || - a1->edf1.etr_pn != a2->edf1.etr_pn) - return 0; - } else if (state_a2 != etr_lpsc_pps_mode) - return 0; - - /* The ETV value of a2 needs to be ETV of a1 + 1. */ - if (a1->edf2.etv + 1 != a2->edf2.etv) - return 0; - - if (!etr_port_valid(a2, p)) - return 0; - - return 1; -} - struct clock_sync_data { atomic_t cpus; int in_sync; @@ -780,693 +480,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync) fixup_clock_comparator(sync->fixup_cc); } -/* - * Sync the TOD clock using the port referred to by aibp. This port - * has to be enabled and the other port has to be disabled. The - * last eacr update has to be more than 1.6 seconds in the past. - */ -static int etr_sync_clock(void *data) -{ - static int first; - unsigned long long clock, old_clock, clock_delta, delay, delta; - struct clock_sync_data *etr_sync; - struct etr_aib *sync_port, *aib; - struct ptff_qto qto; - int port; - int rc; - - etr_sync = data; - - if (xchg(&first, 1) == 1) { - /* Slave */ - clock_sync_cpu(etr_sync); - return 0; - } - - /* Wait until all other cpus entered the sync function. */ - while (atomic_read(&etr_sync->cpus) != 0) - cpu_relax(); - - port = etr_sync->etr_port; - aib = etr_sync->etr_aib; - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - enable_sync_clock(); - - /* Set clock to next OTE. */ - __ctl_set_bit(14, 21); - __ctl_set_bit(0, 29); - clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; - old_clock = get_tod_clock(); - if (set_tod_clock(clock) == 0) { - __udelay(1); /* Wait for the clock to start. */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - etr_stetr(aib); - /* Adjust Linux timing variables. */ - delay = (unsigned long long) - (aib->edf2.etv - sync_port->edf2.etv) << 32; - delta = adjust_time(old_clock, clock, delay); - clock_delta = clock - old_clock; - atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, - &clock_delta); - etr_sync->fixup_cc = delta; - fixup_clock_comparator(delta); - /* Verify that the clock is properly set. */ - if (!etr_aib_follows(sync_port, aib, port)) { - /* Didn't work. */ - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } else { - if (ptff_query(PTFF_QTO) && - ptff(&qto, sizeof(qto), PTFF_QTO) == 0) - /* Update LPAR offset */ - lpar_offset = qto.tod_epoch_difference; - etr_sync->in_sync = 1; - rc = 0; - } - } else { - /* Could not set the clock ?!? */ - __ctl_clear_bit(0, 29); - __ctl_clear_bit(14, 21); - disable_sync_clock(NULL); - etr_sync->in_sync = -EAGAIN; - rc = -EAGAIN; - } - xchg(&first, 0); - return rc; -} - -static int etr_sync_clock_stop(struct etr_aib *aib, int port) -{ - struct clock_sync_data etr_sync; - struct etr_aib *sync_port; - int follows; - int rc; - - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; - memset(&etr_sync, 0, sizeof(etr_sync)); - etr_sync.etr_aib = aib; - etr_sync.etr_port = port; - get_online_cpus(); - atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); - put_online_cpus(); - return rc; -} - -/* - * Handle the immediate effects of the different events. - * The port change event is used for online/offline changes. - */ -static struct etr_eacr etr_handle_events(struct etr_eacr eacr) -{ - if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) - eacr.es = 0; - if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) - eacr.es = eacr.sl = 0; - if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events)) - etr_port0_uptodate = etr_port1_uptodate = 0; - - if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) { - if (eacr.e0) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p0 = etr_port0_online; - if (!eacr.p0) - eacr.e0 = 0; - etr_port0_uptodate = 0; - } - if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) { - if (eacr.e1) - /* - * Port change of an enabled port. We have to - * assume that this can have caused an stepping - * port switch. - */ - etr_tolec = get_tod_clock(); - eacr.p1 = etr_port1_online; - if (!eacr.p1) - eacr.e1 = 0; - etr_port1_uptodate = 0; - } - clear_bit(ETR_EVENT_UPDATE, &etr_events); - return eacr; -} - -/* - * Set up a timer that expires after the etr_tolec + 1.6 seconds if - * one of the ports needs an update. - */ -static void etr_set_tolec_timeout(unsigned long long now) -{ - unsigned long micros; - - if ((!etr_eacr.p0 || etr_port0_uptodate) && - (!etr_eacr.p1 || etr_port1_uptodate)) - return; - micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0; - micros = (micros > 1600000) ? 0 : 1600000 - micros; - mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1); -} - -/* - * Set up a time that expires after 1/2 second. - */ -static void etr_set_sync_timeout(void) -{ - mod_timer(&etr_timer, jiffies + HZ/2); -} - -/* - * Update the aib information for one or both ports. - */ -static struct etr_eacr etr_handle_update(struct etr_aib *aib, - struct etr_eacr eacr) -{ - /* With both ports disabled the aib information is useless. */ - if (!eacr.e0 && !eacr.e1) - return eacr; - - /* Update port0 or port1 with aib stored in etr_work_fn. */ - if (aib->esw.q == 0) { - /* Information for port 0 stored. */ - if (eacr.p0 && !etr_port0_uptodate) { - etr_port0 = *aib; - if (etr_port0_online) - etr_port0_uptodate = 1; - } - } else { - /* Information for port 1 stored. */ - if (eacr.p1 && !etr_port1_uptodate) { - etr_port1 = *aib; - if (etr_port0_online) - etr_port1_uptodate = 1; - } - } - - /* - * Do not try to get the alternate port aib if the clock - * is not in sync yet. - */ - if (!eacr.es || !check_sync_clock()) - return eacr; - - /* - * If steai is available we can get the information about - * the other port immediately. If only stetr is available the - * data-port bit toggle has to be used. - */ - if (etr_steai_available) { - if (eacr.p0 && !etr_port0_uptodate) { - etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); - etr_port0_uptodate = 1; - } - if (eacr.p1 && !etr_port1_uptodate) { - etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1); - etr_port1_uptodate = 1; - } - } else { - /* - * One port was updated above, if the other - * port is not uptodate toggle dp bit. - */ - if ((eacr.p0 && !etr_port0_uptodate) || - (eacr.p1 && !etr_port1_uptodate)) - eacr.dp ^= 1; - else - eacr.dp = 0; - } - return eacr; -} - -/* - * Write new etr control register if it differs from the current one. - * Return 1 if etr_tolec has been updated as well. - */ -static void etr_update_eacr(struct etr_eacr eacr) -{ - int dp_changed; - - if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0) - /* No change, return. */ - return; - /* - * The disable of an active port of the change of the data port - * bit can/will cause a change in the data port. - */ - dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 || - (etr_eacr.dp ^ eacr.dp) != 0; - etr_eacr = eacr; - etr_setr(&etr_eacr); - if (dp_changed) - etr_tolec = get_tod_clock(); -} - -/* - * ETR work. In this function you'll find the main logic. In - * particular this is the only function that calls etr_update_eacr(), - * it "controls" the etr control register. - */ -static void etr_work_fn(struct work_struct *work) -{ - unsigned long long now; - struct etr_eacr eacr; - struct etr_aib aib; - int sync_port; - - /* prevent multiple execution. */ - mutex_lock(&etr_work_mutex); - - /* Create working copy of etr_eacr. */ - eacr = etr_eacr; - - /* Check for the different events and their immediate effects. */ - eacr = etr_handle_events(eacr); - - /* Check if ETR is supposed to be active. */ - eacr.ea = eacr.p0 || eacr.p1; - if (!eacr.ea) { - /* Both ports offline. Reset everything. */ - eacr.dp = eacr.es = eacr.sl = 0; - on_each_cpu(disable_sync_clock, NULL, 1); - del_timer_sync(&etr_timer); - etr_update_eacr(eacr); - goto out_unlock; - } - - /* Store aib to get the current ETR status word. */ - BUG_ON(etr_stetr(&aib) != 0); - etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ - now = get_tod_clock(); - - /* - * Update the port information if the last stepping port change - * or data port change is older than 1.6 seconds. - */ - if (now >= etr_tolec + (1600000 << 12)) - eacr = etr_handle_update(&aib, eacr); - - /* - * Select ports to enable. The preferred synchronization mode is PPS. - * If a port can be enabled depends on a number of things: - * 1) The port needs to be online and uptodate. A port is not - * disabled just because it is not uptodate, but it is only - * enabled if it is uptodate. - * 2) The port needs to have the same mode (pps / etr). - * 3) The port needs to be usable -> etr_port_valid() == 1 - * 4) To enable the second port the clock needs to be in sync. - * 5) If both ports are useable and are ETR ports, the network id - * has to be the same. - * The eacr.sl bit is used to indicate etr mode vs. pps mode. - */ - if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode) - eacr.e1 = 0; - // FIXME: uptodate checks ? - else if (etr_port0_uptodate && etr_port1_uptodate) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { - eacr.sl = 0; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_pps(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - if (!eacr.es || !eacr.p1 || - aib.esw.psc1 != etr_lpsc_operational_alt) - eacr.e1 = 0; - else if (etr_port0_uptodate && etr_port1_uptodate && - etr_compare_network(&etr_port0, &etr_port1)) - eacr.e1 = 1; - sync_port = (etr_port0_uptodate && - etr_port_valid(&etr_port0, 0)) ? 0 : -1; - } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { - eacr.sl = 1; - eacr.e0 = 0; - eacr.e1 = 1; - if (!etr_mode_is_etr(etr_eacr)) - eacr.es = 0; - sync_port = (etr_port1_uptodate && - etr_port_valid(&etr_port1, 1)) ? 1 : -1; - } else { - /* Both ports not usable. */ - eacr.es = eacr.sl = 0; - sync_port = -1; - } - - /* - * If the clock is in sync just update the eacr and return. - * If there is no valid sync port wait for a port update. - */ - if ((eacr.es && check_sync_clock()) || sync_port < 0) { - etr_update_eacr(eacr); - etr_set_tolec_timeout(now); - goto out_unlock; - } - - /* - * Prepare control register for clock syncing - * (reset data port bit, set sync check control. - */ - eacr.dp = 0; - eacr.es = 1; - - /* - * Update eacr and try to synchronize the clock. If the update - * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occurred) or the - * clock syncing failed, reset the sync check control bit - * and set up a timer to try again after 0.5 seconds - */ - etr_update_eacr(eacr); - if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock_stop(&aib, sync_port) != 0) { - /* Sync failed. Try again in 1/2 second. */ - eacr.es = 0; - etr_update_eacr(eacr); - etr_set_sync_timeout(); - } else - etr_set_tolec_timeout(now); -out_unlock: - mutex_unlock(&etr_work_mutex); -} - -/* - * Sysfs interface functions - */ -static struct bus_type etr_subsys = { - .name = "etr", - .dev_name = "etr", -}; - -static struct device etr_port0_dev = { - .id = 0, - .bus = &etr_subsys, -}; - -static struct device etr_port1_dev = { - .id = 1, - .bus = &etr_subsys, -}; - -/* - * ETR subsys attributes - */ -static ssize_t etr_stepping_port_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", etr_port0.esw.p); -} - -static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); - -static ssize_t etr_stepping_mode_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - char *mode_str; - - if (etr_mode_is_pps(etr_eacr)) - mode_str = "pps"; - else if (etr_mode_is_etr(etr_eacr)) - mode_str = "etr"; - else - mode_str = "local"; - return sprintf(buf, "%s\n", mode_str); -} - -static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); - -/* - * ETR port attributes - */ -static inline struct etr_aib *etr_aib_from_dev(struct device *dev) -{ - if (dev == &etr_port0_dev) - return etr_port0_online ? &etr_port0 : NULL; - else - return etr_port1_online ? &etr_port1 : NULL; -} - -static ssize_t etr_online_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - unsigned int online; - - online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online; - return sprintf(buf, "%i\n", online); -} - -static ssize_t etr_online_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - unsigned int value; - - value = simple_strtoul(buf, NULL, 0); - if (value != 0 && value != 1) - return -EINVAL; - if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) - return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); - if (dev == &etr_port0_dev) { - if (etr_port0_online == value) - goto out; /* Nothing to do. */ - etr_port0_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } else { - if (etr_port1_online == value) - goto out; /* Nothing to do. */ - etr_port1_online = value; - if (etr_port0_online && etr_port1_online) - set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - else - clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - queue_work(time_sync_wq, &etr_work); - } -out: - mutex_unlock(&clock_sync_mutex); - return count; -} - -static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); - -static ssize_t etr_stepping_control_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_eacr.e0 : etr_eacr.e1); -} - -static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); - -static ssize_t etr_mode_code_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (!etr_port0_online && !etr_port1_online) - /* Status word is not uptodate if both ports are offline. */ - return -ENODATA; - return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? - etr_port0.esw.psc0 : etr_port0.esw.psc1); -} - -static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); - -static ssize_t etr_untuned_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.u); -} - -static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); - -static ssize_t etr_network_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.net_id); -} - -static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); - -static ssize_t etr_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_id); -} - -static DEVICE_ATTR(id, 0400, etr_id_show, NULL); - -static ssize_t etr_port_number_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v1) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf1.etr_pn); -} - -static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); - -static ssize_t etr_coupled_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.c); -} - -static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); - -static ssize_t etr_local_time_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.blto); -} - -static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); - -static ssize_t etr_utc_offset_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct etr_aib *aib = etr_aib_from_dev(dev); - - if (!aib || !aib->slsw.v3) - return -ENODATA; - return sprintf(buf, "%i\n", aib->edf3.buo); -} - -static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); - -static struct device_attribute *etr_port_attributes[] = { - &dev_attr_online, - &dev_attr_stepping_control, - &dev_attr_state_code, - &dev_attr_untuned, - &dev_attr_network, - &dev_attr_id, - &dev_attr_port, - &dev_attr_coupled, - &dev_attr_local_time, - &dev_attr_utc_offset, - NULL -}; - -static int __init etr_register_port(struct device *dev) -{ - struct device_attribute **attr; - int rc; - - rc = device_register(dev); - if (rc) - goto out; - for (attr = etr_port_attributes; *attr; attr++) { - rc = device_create_file(dev, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= etr_port_attributes; attr--) - device_remove_file(dev, *attr); - device_unregister(dev); -out: - return rc; -} - -static void __init etr_unregister_port(struct device *dev) -{ - struct device_attribute **attr; - - for (attr = etr_port_attributes; *attr; attr++) - device_remove_file(dev, *attr); - device_unregister(dev); -} - -static int __init etr_init_sysfs(void) -{ - int rc; - - rc = subsys_system_register(&etr_subsys, NULL); - if (rc) - goto out; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); - if (rc) - goto out_unreg_subsys; - rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); - if (rc) - goto out_remove_stepping_port; - rc = etr_register_port(&etr_port0_dev); - if (rc) - goto out_remove_stepping_mode; - rc = etr_register_port(&etr_port1_dev); - if (rc) - goto out_remove_port0; - return 0; - -out_remove_port0: - etr_unregister_port(&etr_port0_dev); -out_remove_stepping_mode: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); -out_remove_stepping_port: - device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); -out_unreg_subsys: - bus_unregister(&etr_subsys); -out: - return rc; -} - -device_initcall(etr_init_sysfs); - /* * Server Time Protocol (STP) code. */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3ac9dd8..08c39e769747 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From a9809407f6b6b4b55df7b6cf5522e39476b7e5e6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Jun 2016 10:30:45 +0200 Subject: s390/mm: fix vunmap vs finish_arch_post_lock_switch The vunmap_pte_range() function calls ptep_get_and_clear() without any locking. ptep_get_and_clear() uses ptep_xchg_lazy()/ptep_flush_direct() for the page table update. ptep_flush_direct requires that preemption is disabled, but without any locking this is not the case. If the kernel preempts the task while the attach_counter is increased an endless loop in finish_arch_post_lock_switch() will occur the next time the task is scheduled. Add explicit preempt_disable()/preempt_enable() calls to the relevant functions in arch/s390/mm/pgtable.c. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9f0ce0e6eeb4..67111ccbb5e0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -70,7 +70,6 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) #ifdef CONFIG_PGSTE unsigned long old; - preempt_disable(); asm( " lg %0,%2\n" "0: lgr %1,%0\n" @@ -93,7 +92,6 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) : "=Q" (ptep[PTRS_PER_PTE]) : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); - preempt_enable(); #endif } @@ -230,9 +228,11 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_direct(mm, addr, ptep); ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + preempt_enable(); return old; } EXPORT_SYMBOL(ptep_xchg_direct); @@ -243,9 +243,11 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + preempt_enable(); return old; } EXPORT_SYMBOL(ptep_xchg_lazy); @@ -256,6 +258,7 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pgste_t pgste; pte_t old; + preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); if (mm_has_pgste(mm)) { @@ -279,6 +282,7 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } else { *ptep = pte; } + preempt_enable(); } EXPORT_SYMBOL(ptep_modify_prot_commit); @@ -333,8 +337,10 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, { pmd_t old; + preempt_disable(); old = pmdp_flush_direct(mm, addr, pmdp); *pmdp = new; + preempt_enable(); return old; } EXPORT_SYMBOL(pmdp_xchg_direct); @@ -344,8 +350,10 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, { pmd_t old; + preempt_disable(); old = pmdp_flush_lazy(mm, addr, pmdp); *pmdp = new; + preempt_enable(); return old; } EXPORT_SYMBOL(pmdp_xchg_lazy); @@ -398,20 +406,24 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, pgste_t pgste; /* the mm_has_pgste() check is done in set_pte_at() */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); + preempt_enable(); } void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pgste_t pgste; + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); + preempt_enable(); } static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) @@ -434,6 +446,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_t pte; /* Zap unused and logically-zero pages */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; @@ -446,6 +459,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, if (reset) pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; pgste_set_unlock(ptep, pgste); + preempt_enable(); } void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -454,6 +468,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) pgste_t pgste; /* Clear storage key */ + preempt_disable(); pgste = pgste_get_lock(ptep); pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); @@ -461,6 +476,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); pgste_set_unlock(ptep, pgste); + preempt_enable(); } /* -- cgit v1.2.3-59-g8ed1b From 7dd968163f7c12bcb2132792bf873133b397a2d2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 25 May 2016 09:32:20 +0200 Subject: bitmap: bitmap_equal memcmp optimization The bitmap_equal function has optimized code for small bitmaps with less than BITS_PER_LONG bits. For larger bitmaps the out-of-line function __bitmap_equal is called. For a constant number of bits divisible by BITS_PER_LONG the memcmp function can be used. For s390 gcc knows how to optimize this function, memcmp calls with up to 256 bytes / 2048 bits are translated into a single instruction. Reviewed-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- include/linux/bitmap.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index e9b0b9ab07e5..27bfc0b631a9 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -267,6 +267,10 @@ static inline int bitmap_equal(const unsigned long *src1, { if (small_const_nbits(nbits)) return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); +#ifdef CONFIG_S390 + else if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0) + return !memcmp(src1, src2, nbits / 8); +#endif else return __bitmap_equal(src1, src2, nbits); } -- cgit v1.2.3-59-g8ed1b From 64f31d5802af11fd87872b4bae07b35cf0acb358 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 25 May 2016 09:45:26 +0200 Subject: s390/mm: simplify the TLB flushing code ptep_flush_lazy and pmdp_flush_lazy use mm->context.attach_count to decide between a lazy TLB flush vs an immediate TLB flush. The field contains two 16-bit counters, the number of CPUs that have the mm attached and can create TLB entries for it and the number of CPUs in the middle of a page table update. The __tlb_flush_asce, ptep_flush_direct and pmdp_flush_direct functions use the attach counter and a mask check with mm_cpumask(mm) to decide between a local flush local of the current CPU and a global flush. For all these functions the decision between lazy vs immediate and local vs global TLB flush can be based on CPU masks. There are two masks: the mm->context.cpu_attach_mask with the CPUs that are actively using the mm, and the mm_cpumask(mm) with the CPUs that have used the mm since the last full flush. The decision between lazy vs immediate flush is based on the mm->context.cpu_attach_mask, to decide between local vs global flush the mm_cpumask(mm) is used. With this patch all checks will use the CPU masks, the old counter mm->context.attach_count with its two 16-bit values is turned into a single counter mm->context.flush_count that keeps track of the number of CPUs with incomplete page table updates. The sole user of this counter is finish_arch_post_lock_switch() which waits for the end of all page table updates. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 +- arch/s390/include/asm/mmu_context.h | 15 +++++---------- arch/s390/include/asm/tlbflush.h | 13 +++++-------- arch/s390/kernel/smp.c | 8 ++------ arch/s390/mm/init.c | 4 +--- arch/s390/mm/pgtable.c | 34 ++++++++++++++-------------------- 6 files changed, 28 insertions(+), 48 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 081b2ad99d73..18226437a832 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -6,7 +6,7 @@ typedef struct { cpumask_t cpu_attach_mask; - atomic_t attach_count; + atomic_t flush_count; unsigned int flush_mm; spinlock_t list_lock; struct list_head pgtable_list; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c837b79b455d..f77c638bf397 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); - atomic_set(&mm->context.attach_count, 0); + atomic_set(&mm->context.flush_count, 0); mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; @@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; if (prev == next) return; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); - atomic_inc(&next->context.attach_count); - atomic_dec(&prev->context.attach_count); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch @@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void) load_kernel_asce(); if (mm) { preempt_disable(); - while (atomic_read(&mm->context.attach_count) >> 16) + while (atomic_read(&mm->context.flush_count)) cpu_relax(); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); @@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ac02a6c37a3e..e72cea7a4bfe 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -57,7 +57,7 @@ static inline void __tlb_flush_global(void) static inline void __tlb_flush_full(struct mm_struct *mm) { preempt_disable(); - atomic_add(0x10000, &mm->context.attach_count); + atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { /* Local TLB flush */ __tlb_flush_local(); @@ -69,7 +69,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -78,12 +78,9 @@ static inline void __tlb_flush_full(struct mm_struct *mm) */ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - int active, count; - preempt_disable(); - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { __tlb_flush_idte_local(asce); } else { @@ -96,7 +93,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7b89a7572100..830537432493 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct lowcore *lc = pcpu->lowcore; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); - atomic_inc(&init_mm.context.attach_count); lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->percpu_offset = __per_cpu_offset[cpu]; @@ -876,10 +874,8 @@ void __cpu_die(unsigned int cpu) while (!pcpu_stopped(pcpu)) cpu_relax(); pcpu_free_lowcore(pcpu); - atomic_dec(&init_mm.context.attach_count); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } void __noreturn cpu_die(void) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 44db60d9e519..de2cdf4fbb9a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -118,10 +118,8 @@ void mark_rodata_ro(void) void __init mem_init(void) { - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - atomic_set(&init_mm.context.attach_count, 1); set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 67111ccbb5e0..74f8f2a8a4e8 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -27,40 +27,37 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __ptep_ipte_local(addr, ptep); else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } @@ -289,7 +286,6 @@ EXPORT_SYMBOL(ptep_modify_prot_commit); static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; @@ -299,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, __pmdp_csp(pmdp); return old; } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __pmdp_idte_local(addr, pmdp); else __pmdp_idte(addr, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; } else if (MACHINE_HAS_IDTE) __pmdp_idte(addr, pmdp); else __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } -- cgit v1.2.3-59-g8ed1b From 32fb2fc5c357fb99616bbe100dbcb27bc7f5d045 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jun 2016 12:20:51 +0200 Subject: vmlinux.lds.h: allow arch specific handling of ro_after_init data section commit c74ba8b3480d ("arch: Introduce post-init read-only memory") introduced the __ro_after_init attribute which allows to add variables to the ro_after_init data section. This new section was added to rodata, even though it contains writable data. This in turn causes problems on architectures which mark the page table entries read-only that point to rodata very early. This patch allows architectures to implement an own handling of the .data..ro_after_init section. Usually that would be: - mark the rodata section read-only very early - mark the ro_after_init section read-only within mark_rodata_ro Signed-off-by: Heiko Carstens Reviewed-by: Kees Cook Signed-off-by: Martin Schwidefsky --- include/asm-generic/vmlinux.lds.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6a67ab94b553..c4436d072d37 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -249,6 +249,14 @@ *(.data..init_task) \ VMLINUX_SYMBOL(__end_init_task) = .; +/* + * Allow architectures to handle ro_after_init data on their + * own by defining an empty RO_AFTER_INIT_DATA. + */ +#ifndef RO_AFTER_INIT_DATA +#define RO_AFTER_INIT_DATA *(.data..ro_after_init) +#endif + /* * Read only Data */ @@ -257,7 +265,7 @@ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ - *(.data..ro_after_init) /* Read only after init */ \ + RO_AFTER_INIT_DATA /* Read only after init */ \ *(__vermagic) /* Kernel version magic */ \ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .; \ -- cgit v1.2.3-59-g8ed1b From d07a980c1b8d7ac18854bae94a4e7aeabce933b8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jun 2016 10:12:55 +0200 Subject: s390: add proper __ro_after_init support On s390 __ro_after_init is currently mapped to __read_mostly which means that data marked as __ro_after_init will not be protected. Reason for this is that the common code __ro_after_init implementation is x86 centric: the ro_after_init data section was added to rodata, since x86 enables write protection to kernel text and rodata very late. On s390 we have write protection for these sections enabled with the initial page tables. So adding the ro_after_init data section to rodata does not work on s390. In order to make __ro_after_init work properly on s390 move the ro_after_init data, right behind rodata. Unlike the rodata section it will be marked read-only later after all init calls happened. This s390 specific implementation adds new __start_ro_after_init and __end_ro_after_init labels. Everything in between will be marked read-only after the init calls happened. In addition to the __ro_after_init data move also the exception table there, since from a practical point of view it fits the __ro_after_init requirements. Signed-off-by: Heiko Carstens Reviewed-by: Kees Cook Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cache.h | 3 --- arch/s390/include/asm/sections.h | 1 + arch/s390/kernel/vmlinux.lds.S | 12 +++++++++++- arch/s390/mm/init.c | 7 ++++--- arch/s390/mm/vmem.c | 7 +++---- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 22da3b34c655..4d7ccac5fd1d 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -15,7 +15,4 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -/* Read-only memory is marked before mark_rodata_ro() is called. */ -#define __ro_after_init __read_mostly - #endif diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index fbd9116eb17b..5ce29fe100ba 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,5 +4,6 @@ #include extern char _eshared[], _ehead[]; +extern char __start_ro_after_init[], __end_ro_after_init[]; #endif diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 0656f7cc7fb4..429bfd111961 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -11,6 +11,9 @@ */ #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) +/* Handle ro_after_init data on our own. */ +#define RO_AFTER_INIT_DATA + #include OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") @@ -56,7 +59,14 @@ SECTIONS _eshared = .; /* End of shareable data */ _sdata = .; /* Start of data section */ - EXCEPTION_TABLE(16) :data + . = ALIGN(PAGE_SIZE); + __start_ro_after_init = .; + .data..ro_after_init : { + *(.data..ro_after_init) + } + EXCEPTION_TABLE(16) + . = ALIGN(PAGE_SIZE); + __end_ro_after_init = .; RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index de2cdf4fbb9a..f56a39bd8ba6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -111,9 +111,10 @@ void __init paging_init(void) void mark_rodata_ro(void) { - /* Text and rodata are already protected. Nothing to do here. */ - pr_info("Write protecting the kernel read-only data: %luk\n", - ((unsigned long)&_eshared - (unsigned long)&_stext) >> 10); + unsigned long size = __end_ro_after_init - __start_ro_after_init; + + set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); + pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } void __init mem_init(void) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a1e7c0b207e6..1848292766ef 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -373,14 +373,13 @@ out: */ void __init vmem_map_init(void) { - unsigned long ro_start, ro_end; + unsigned long size = _eshared - _stext; struct memblock_region *reg; for_each_memblock(memory, reg) vmem_add_mem(reg->base, reg->size); - ro_start = PFN_ALIGN((unsigned long)&_stext); - ro_end = (unsigned long)&_eshared & PAGE_MASK; - set_memory_ro(ro_start, (ro_end - ro_start) >> PAGE_SHIFT); + set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT); + pr_info("Write protected kernel read-only data: %luk\n", size >> 10); } /* -- cgit v1.2.3-59-g8ed1b From 72a9b02d3b55d5ffeb4a00ad99b2c2471ebfa23c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jun 2016 10:23:07 +0200 Subject: s390: use __section macro everywhere Small cleanup patch to use the shorter __section macro everywhere. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cache.h | 2 +- arch/s390/kernel/ipl.c | 6 +++--- arch/s390/kernel/setup.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 4d7ccac5fd1d..05219a5e0b2f 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -13,6 +13,6 @@ #define L1_CACHE_SHIFT 8 #define NET_SKB_PAD 32 -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) #endif diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..a6d2a998e328 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type) * Must be in data section since the bss section * is not cleared when these are accessed. */ -static u8 ipl_ssid __attribute__((__section__(".data"))) = 0; -static u16 ipl_devno __attribute__((__section__(".data"))) = 0; -u32 ipl_flags __attribute__((__section__(".data"))) = 0; +static u8 ipl_ssid __section(.data) = 0; +static u16 ipl_devno __section(.data) = 0; +u32 ipl_flags __section(.data) = 0; enum ipl_method { REIPL_METHOD_CCW_CIO, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9b4bb9d9275f..00e38f0c75df 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -289,7 +289,7 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -void *restart_stack __attribute__((__section__(".data"))); +void *restart_stack __section(.data); static void __init setup_lowcore(void) { -- cgit v1.2.3-59-g8ed1b From 1b8b9c81a99ace2c706aa8696e12dd846df50078 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 8 Jun 2016 13:58:22 +0200 Subject: s390/Documentation: improve sort command for trace buffer When s390 traces with hex_ascii or sprintf view are extracted and sorted, use the sort option -s (stable) to avoid multiple lines with the same time stamp being sorted using the rest of the line as secondary key. Signed-off-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- Documentation/s390/s390dbf.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index 3da163383c93..61329fd62e89 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -405,7 +405,7 @@ Example: > ls /sys/kernel/debug/s390dbf/dasd flush hex_ascii level pages raw -> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1 +> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | .... -- cgit v1.2.3-59-g8ed1b From bb98f396f14477480273a92a75da448af6a9ae85 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 2 Jun 2016 14:57:17 +0200 Subject: s390: use SPARSE_IRQ Use dynamically allocated irq descriptors on s390 which allows us to get rid of the s390 specific config option PCI_NR_MSI and exploit more MSI interrupts. Also the size of the kernel image is reduced by 131K (using performance_defconfig). Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 11 +---------- arch/s390/include/asm/irq.h | 7 ++----- arch/s390/kernel/irq.c | 7 ++++--- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a8c259059adf..3529a285dda8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -163,6 +163,7 @@ config S390 select NO_BOOTMEM select OLD_SIGACTION select OLD_SIGSUSPEND3 + select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select TTY select VIRT_CPU_ACCOUNTING @@ -605,16 +606,6 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. -config PCI_NR_MSI - int "Maximum number of MSI interrupts (64-32768)" - range 64 32768 - default "256" - help - This defines the number of virtual interrupts the kernel will - provide for MSI interrupts. If you configure your system to have - too few drivers will fail to allocate MSI interrupts for all - PCI devices. - source "drivers/pci/Kconfig" endif # PCI diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index f97b055de76a..70c9bce766f5 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -7,11 +7,8 @@ #define NR_IRQS_BASE 3 -#ifdef CONFIG_PCI_NR_MSI -# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) -#else -# define NR_IRQS NR_IRQS_BASE -#endif +#define NR_IRQS NR_IRQS_BASE +#define NR_IRQS_LEGACY NR_IRQS_BASE /* External interruption codes */ #define EXT_IRQ_INTERRUPT_KEY 0x0040 diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c373a1d41d10..285d6561076d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); } - if (index < NR_IRQS) { - if (index >= NR_IRQS_BASE) - goto out; + if (index < NR_IRQS_BASE) { seq_printf(p, "%s: ", irqclass_main_desc[index].name); irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) @@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); goto out; } + if (index > NR_IRQS_BASE) + goto out; + for (index = 0; index < NR_ARCH_IRQS; index++) { seq_printf(p, "%s: ", irqclass_sub_desc[index].name); irq = irqclass_sub_desc[index].irq; -- cgit v1.2.3-59-g8ed1b From 53b1bc9aba525a599721565507d9e3aebae80a1d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 3 Jun 2016 19:03:12 +0200 Subject: s390/pci: ensure page aligned dma start address We don't have an architectural guarantee on the value of the dma offset but rely on it to be at least page aligned. Enforce page alignemt of start_dma. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 1ea8c07eab84..494eb832bcc5 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -469,6 +469,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) * Also set zdev->end_dma to the actual end address of the usable * range, instead of the theoretical maximum as reported by hardware. */ + zdev->start_dma = PAGE_ALIGN(zdev->start_dma); zdev->iommu_size = min3((u64) high_memory, ZPCI_TABLE_SIZE_RT - zdev->start_dma, zdev->end_dma - zdev->start_dma + 1); -- cgit v1.2.3-59-g8ed1b From 8ee2db3cf1dc02ff9f2c65ac8bd7f478ba79abbe Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 3 Jun 2016 19:05:38 +0200 Subject: s390/pci: ensure to not cross a dma segment boundary When we use the iommu_area_alloc helper to get dma addresses we specify the boundary_size parameter but not the offset (called shift in this context). As long as the offset (start_dma) is a multiple of the boundary we're ok (on current machines start_dma always seems to be 4GB). Don't leave this to chance and specify the offset for iommu_area_alloc. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 494eb832bcc5..070f1ae5cfad 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -226,7 +226,8 @@ static unsigned long __dma_alloc_iommu(struct device *dev, boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, - start, size, 0, boundary_size, 0); + start, size, zdev->start_dma >> PAGE_SHIFT, + boundary_size, 0); } static unsigned long dma_alloc_iommu(struct device *dev, int size) -- cgit v1.2.3-59-g8ed1b From b8ac5e2f4d8659961d2e0aea5475a07fb4eeec2d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jun 2016 09:35:51 +0200 Subject: s390/uaccess: fix whitespace damage Fix some whitespace damage that was introduced by me with a query-replace when removing 31 bit support. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index ae4de559e3a0..d96596128e9f 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -49,7 +49,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr " jnm 5b\n" " ex %4,0(%3)\n" " j 8f\n" - "7:slgr %0,%0\n" + "7: slgr %0,%0\n" "8:\n" EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) @@ -93,7 +93,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " jnm 6b\n" " ex %4,0(%3)\n" " j 9f\n" - "8:slgr %0,%0\n" + "8: slgr %0,%0\n" "9: sacf 768\n" EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) @@ -266,7 +266,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" " slgr %0,%3\n" " j 5f\n" - "4:slgr %0,%0\n" + "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) -- cgit v1.2.3-59-g8ed1b From ee64baf4ea3b7affb7e325bb3d188515ec4bb66e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jun 2016 10:17:20 +0200 Subject: s390/uaccess: use __builtin_expect for get_user/put_user We always expect that get_user and put_user return with zero. Give the compiler a hint so it can slightly optimize the code and avoid branches. This is the same what x86 got with commit a76cf66e948a ("x86/uaccess: Tell the compiler that uaccess is unlikely to fault"). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index e0900ddf91dd..2ea7f3208cd2 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -191,7 +191,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s __put_user_bad(); \ break; \ } \ - __pu_err; \ + __builtin_expect(__pu_err, 0); \ }) #define put_user(x, ptr) \ @@ -240,7 +240,7 @@ int __put_user_bad(void) __attribute__((noreturn)); __get_user_bad(); \ break; \ } \ - __gu_err; \ + __builtin_expect(__gu_err, 0); \ }) #define get_user(x, ptr) \ -- cgit v1.2.3-59-g8ed1b From 6c22c98637602f89beb40c43cfb3d764b71a54c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jun 2016 09:57:05 +0200 Subject: s390: avoid extable collisions We have some inline assemblies where the extable entry points to a label at the end of an inline assembly which is not followed by an instruction. On the other hand we have also inline assemblies where the extable entry points to the first instruction of an inline assembly. If a first type inline asm (extable point to empty label at the end) would be directly followed by a second type inline asm (extable points to first instruction) then we would have two different extable entries that point to the same instruction but would have a different target address. This can lead to quite random behaviour, depending on sorting order. I verified that we currently do not have such collisions within the kernel. However to avoid such subtle bugs add a couple of nop instructions to those inline assemblies which contain an extable that points to an empty label. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 2 +- arch/s390/hypfs/hypfs_vm.c | 2 +- arch/s390/include/asm/diag.h | 2 +- arch/s390/kernel/ipl.c | 2 +- arch/s390/mm/fault.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 045035796ca7..27779f0743ac 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -344,7 +344,7 @@ static inline int __diag204(unsigned long subcode, unsigned long size, void *add asm volatile( " diag %2,%0,0x204\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); if (_subcode) diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 44feac38ccfc..012919d9833b 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -70,7 +70,7 @@ static int diag2fc(int size, char* query, void *addr) diag_stat_inc(DIAG_STAT_X2FC); asm volatile( " diag %0,%1,0x2fc\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory"); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 5fac921c1c42..86cae09e076a 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -49,7 +49,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) diag_stat_inc(DIAG_STAT_X010); asm volatile( "0: diag %0,%1,0x10\n" - "1:\n" + "1: nopr %%r7\n" EX_TABLE(0b, 1b) EX_TABLE(1b, 1b) : : "a" (start_addr), "a" (end_addr)); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a6d2a998e328..44317398b77b 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr) asm volatile( " diag %0,%2,0x308\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : "+d" (_addr), "+d" (_rc) : "d" (subcode) : "cc", "memory"); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 19288c1b36d3..6ad7eff84c82 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -624,7 +624,7 @@ void pfault_fini(void) diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %0,0,0x258\n" - "0:\n" + "0: nopr %%r7\n" EX_TABLE(0b,0b) : : "a" (&refbk), "m" (refbk) : "cc"); } -- cgit v1.2.3-59-g8ed1b From 0f7451ff3ab88507fcffc72a5b7e1c211523a34e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jun 2016 12:36:50 +0200 Subject: s390/ipl: use load normal for LPAR re-ipl Avoid clearing memory for CCW-type re-ipl within a logical partition. This can save a significant amount of time if a logical partition contains a lot of memory. On the other hand we still clear memory if running within a second level hypervisor, since the hypervisor can simply free all memory that was used for the guest. Signed-off-by: Heiko Carstens Acked-by: Christian Borntraeger Acked-by: Michael Holzheu Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 44317398b77b..b0dbf9a265f5 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1085,7 +1085,10 @@ static void __reipl_run(void *unused) break; case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); - diag308(DIAG308_IPL, NULL); + if (MACHINE_IS_LPAR) + diag308(DIAG308_DUMP, NULL); + else + diag308(DIAG308_IPL, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); -- cgit v1.2.3-59-g8ed1b From 0599eead5833d9dd0970b59fed3844441b44fe0c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Jun 2016 11:24:27 +0200 Subject: s390/ipl: rename diagnose enums Rename DIAG308_IPL and DIAG308_DUMP to DIAG308_LOAD_CLEAR and DIAG308_LOAD_NORMAL_DUMP to better reflect the associated IPL functions. Suggested-by: Cornelia Huck Suggested-by: Christian Borntraeger Signed-off-by: Heiko Carstens Acked-by: Michael Holzheu Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ipl.h | 10 +++++----- arch/s390/kernel/ipl.c | 16 ++++++++-------- drivers/s390/char/zcore.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 6fc44dca193e..4da22b2f0521 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -141,11 +141,11 @@ extern void setup_ipl(void); * DIAG 308 support */ enum diag308_subcode { - DIAG308_REL_HSA = 2, - DIAG308_IPL = 3, - DIAG308_DUMP = 4, - DIAG308_SET = 5, - DIAG308_STORE = 6, + DIAG308_REL_HSA = 2, + DIAG308_LOAD_CLEAR = 3, + DIAG308_LOAD_NORMAL_DUMP = 4, + DIAG308_SET = 5, + DIAG308_STORE = 6, }; enum diag308_ipl_type { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b0dbf9a265f5..b892480cd4d8 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -563,7 +563,7 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); else if (ipl_info.type == IPL_TYPE_CCW) @@ -1086,23 +1086,23 @@ static void __reipl_run(void *unused) case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); if (MACHINE_IS_LPAR) - diag308(DIAG308_DUMP, NULL); + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); else - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RO_DIAG: - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RO_VM: __cpcmd("IPL", NULL, 0, NULL); break; case REIPL_METHOD_NSS_DIAG: diag308(DIAG308_SET, reipl_block_nss); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_NSS: get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); @@ -1111,7 +1111,7 @@ static void __reipl_run(void *unused) case REIPL_METHOD_DEFAULT: if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_DUMP: break; @@ -1426,7 +1426,7 @@ static void diag308_dump(void *dump_block) { diag308(DIAG308_SET, dump_block); while (1) { - if (diag308(DIAG308_DUMP, NULL) != 0x302) + if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302) break; udelay_simple(USEC_PER_SEC); } diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 5043ecfa1fbc..16992e2a40ad 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -185,7 +185,7 @@ static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf, { if (ipl_block) { diag308(DIAG308_SET, ipl_block); - diag308(DIAG308_IPL, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); } return count; } -- cgit v1.2.3-59-g8ed1b From a62247e1f5c13b926f535bb64ecbd7f9fdef7b21 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 21 May 2016 11:10:13 +0200 Subject: topology/sysfs: provide drawer id and siblings attributes The s390 cpu topology gained another hierarchy level. The top level is now called drawer and contains several books. A book used to be the top level. In order to expose the cpu topology to user space allow to create new sysfs attributes dependent on CONFIG_SCHED_DRAWER which an architecture may define and select. These additional attributes will be available: /sys/devices/system/cpu/cpuX/topology/drawer_id /sys/devices/system/cpu/cpuX/topology/drawer_siblings /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra (Intel) Signed-off-by: Martin Schwidefsky --- Documentation/cputopology.txt | 40 ++++++++++++++++++++++++++++++++-------- drivers/base/topology.c | 13 +++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 12b1b25b4da9..f722f227a73b 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -20,48 +20,70 @@ to /proc/cpuinfo output of some architectures: identifier (rather than the kernel's). The actual value is architecture and platform dependent. -4) /sys/devices/system/cpu/cpuX/topology/thread_siblings: +4) /sys/devices/system/cpu/cpuX/topology/drawer_id: + + the drawer ID of cpuX. Typically it is the hardware platform's + identifier (rather than the kernel's). The actual value is + architecture and platform dependent. + +5) /sys/devices/system/cpu/cpuX/topology/thread_siblings: internal kernel map of cpuX's hardware threads within the same core as cpuX. -5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list: +6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list: human-readable list of cpuX's hardware threads within the same core as cpuX. -6) /sys/devices/system/cpu/cpuX/topology/core_siblings: +7) /sys/devices/system/cpu/cpuX/topology/core_siblings: internal kernel map of cpuX's hardware threads within the same physical_package_id. -7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list: +8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list: human-readable list of cpuX's hardware threads within the same physical_package_id. -8) /sys/devices/system/cpu/cpuX/topology/book_siblings: +9) /sys/devices/system/cpu/cpuX/topology/book_siblings: internal kernel map of cpuX's hardware threads within the same book_id. -9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list: +10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list: human-readable list of cpuX's hardware threads within the same book_id. +11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings: + + internal kernel map of cpuX's hardware threads within the same + drawer_id. + +12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list: + + human-readable list of cpuX's hardware threads within the same + drawer_id. + To implement it in an architecture-neutral way, a new source file, -drivers/base/topology.c, is to export the 6 or 9 attributes. The three book -related sysfs files will only be created if CONFIG_SCHED_BOOK is selected. +drivers/base/topology.c, is to export the 6 to 12 attributes. The book +and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK +and CONFIG_SCHED_DRAWER are selected. + +CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where +they reflect the cpu and cache hierarchy. For an architecture to support this feature, it must define some of these macros in include/asm-XXX/topology.h: #define topology_physical_package_id(cpu) #define topology_core_id(cpu) #define topology_book_id(cpu) +#define topology_drawer_id(cpu) #define topology_sibling_cpumask(cpu) #define topology_core_cpumask(cpu) #define topology_book_cpumask(cpu) +#define topology_drawer_cpumask(cpu) The type of **_id macros is int. The type of **_cpumask macros is (const) struct cpumask *. The latter @@ -78,6 +100,8 @@ not defined by include/asm-XXX/topology.h: For architectures that don't support books (CONFIG_SCHED_BOOK) there are no default definitions for topology_book_id() and topology_book_cpumask(). +For architectures that don't support drawes (CONFIG_SCHED_DRAWER) there are +no default definitions for topology_drawer_id() and topology_drawer_cpumask(). Additionally, CPU topology information is provided under /sys/devices/system/cpu and includes these files. The internal diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 8b7d7f8e5851..df3c97cb4c99 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -77,6 +77,14 @@ static DEVICE_ATTR_RO(book_siblings); static DEVICE_ATTR_RO(book_siblings_list); #endif +#ifdef CONFIG_SCHED_DRAWER +define_id_show_func(drawer_id); +static DEVICE_ATTR_RO(drawer_id); +define_siblings_show_func(drawer_siblings, drawer_cpumask); +static DEVICE_ATTR_RO(drawer_siblings); +static DEVICE_ATTR_RO(drawer_siblings_list); +#endif + static struct attribute *default_attrs[] = { &dev_attr_physical_package_id.attr, &dev_attr_core_id.attr, @@ -88,6 +96,11 @@ static struct attribute *default_attrs[] = { &dev_attr_book_id.attr, &dev_attr_book_siblings.attr, &dev_attr_book_siblings_list.attr, +#endif +#ifdef CONFIG_SCHED_DRAWER + &dev_attr_drawer_id.attr, + &dev_attr_drawer_siblings.attr, + &dev_attr_drawer_siblings_list.attr, #endif NULL }; -- cgit v1.2.3-59-g8ed1b From adac0f1e8c08548d82a48c9913ebc9787f946440 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 25 May 2016 10:25:50 +0200 Subject: s390/topology: add drawer scheduling domain level The z13 machine added a fourth level to the cpu topology information. The new top level is called drawer. A drawer contains two books, which used to be the top level. Adding this additional scheduling domain did show performance improvements for some workloads of up to 8%, while there don't seem to be any workloads impacted in a negative way. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra (Intel) Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 4 ++++ arch/s390/include/asm/topology.h | 4 ++++ arch/s390/kernel/topology.c | 33 +++++++++++++++++++++++++++------ arch/s390/numa/mode_emu.c | 25 ++++++++++++++++++++----- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3529a285dda8..ac963903d54f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -478,6 +478,9 @@ config SCHED_MC config SCHED_BOOK def_bool n +config SCHED_DRAWER + def_bool n + config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" @@ -485,6 +488,7 @@ config SCHED_TOPOLOGY select SCHED_SMT select SCHED_MC select SCHED_BOOK + select SCHED_DRAWER help Topology scheduler support improves the CPU scheduler's decision making when dealing with machines that have multi-threading, diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 6b53962e807e..f15f5571ca2b 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -14,10 +14,12 @@ struct cpu_topology_s390 { unsigned short core_id; unsigned short socket_id; unsigned short book_id; + unsigned short drawer_id; unsigned short node_id; cpumask_t thread_mask; cpumask_t core_mask; cpumask_t book_mask; + cpumask_t drawer_mask; }; DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); @@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) #define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask) +#define topology_drawer_id(cpu) (per_cpu(cpu_topology, cpu).drawer_id) +#define topology_drawer_cpumask(cpu) (&per_cpu(cpu_topology, cpu).drawer_mask) #define mc_capable() 1 diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 64298a867589..44745e751c3a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn); */ static struct mask_info socket_info; static struct mask_info book_info; +static struct mask_info drawer_info; DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology); EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology); @@ -80,6 +81,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu) } static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, + struct mask_info *drawer, struct mask_info *book, struct mask_info *socket, int one_socket_per_cpu) @@ -97,9 +99,11 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, continue; for (i = 0; i <= smp_cpu_mtid; i++) { topo = &per_cpu(cpu_topology, lcpu + i); + topo->drawer_id = drawer->id; topo->book_id = book->id; topo->core_id = rcore; topo->thread_id = lcpu + i; + cpumask_set_cpu(lcpu + i, &drawer->mask); cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); if (one_socket_per_cpu) @@ -128,6 +132,11 @@ static void clear_masks(void) cpumask_clear(&info->mask); info = info->next; } + info = &drawer_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } } static union topology_entry *next_tle(union topology_entry *tle) @@ -141,12 +150,17 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) { struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; + struct mask_info *drawer = &drawer_info; union topology_entry *tle, *end; tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { + case 3: + drawer = drawer->next; + drawer->id = tle->container.id; + break; case 2: book = book->next; book->id = tle->container.id; @@ -156,7 +170,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) socket->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, socket, 0); + add_cpus_to_mask(&tle->cpu, drawer, book, socket, 0); break; default: clear_masks(); @@ -170,6 +184,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) { struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; + struct mask_info *drawer = &drawer_info; union topology_entry *tle, *end; tle = info->tle; @@ -181,7 +196,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) book->id = tle->container.id; break; case 0: - socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + socket = add_cpus_to_mask(&tle->cpu, drawer, book, socket, 1); break; default: clear_masks(); @@ -257,11 +272,13 @@ static void update_cpu_masks(void) topo->thread_mask = cpu_thread_map(cpu); topo->core_mask = cpu_group_map(&socket_info, cpu); topo->book_mask = cpu_group_map(&book_info, cpu); + topo->drawer_mask = cpu_group_map(&drawer_info, cpu); if (!MACHINE_HAS_TOPOLOGY) { topo->thread_id = cpu; topo->core_id = cpu; topo->socket_id = cpu; topo->book_id = cpu; + topo->drawer_id = cpu; } } numa_update_cpu_topology(); @@ -269,10 +286,7 @@ static void update_cpu_masks(void) void store_topology(struct sysinfo_15_1_x *info) { - if (topology_max_mnest >= 3) - stsi(info, 15, 1, 3); - else - stsi(info, 15, 1, 2); + stsi(info, 15, 1, min(topology_max_mnest, 4)); } int arch_update_cpu_topology(void) @@ -442,6 +456,11 @@ static const struct cpumask *cpu_book_mask(int cpu) return &per_cpu(cpu_topology, cpu).book_mask; } +static const struct cpumask *cpu_drawer_mask(int cpu) +{ + return &per_cpu(cpu_topology, cpu).drawer_mask; +} + static int __init early_parse_topology(char *p) { return kstrtobool(p, &topology_enabled); @@ -452,6 +471,7 @@ static struct sched_domain_topology_level s390_topology[] = { { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, { cpu_book_mask, SD_INIT_NAME(BOOK) }, + { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, { cpu_cpu_mask, SD_INIT_NAME(DIE) }, { NULL, }, }; @@ -487,6 +507,7 @@ static int __init s390_topology_init(void) printk(KERN_CONT " / %d\n", info->mnest); alloc_masks(info, &socket_info, 1); alloc_masks(info, &book_info, 2); + alloc_masks(info, &drawer_info, 3); set_sched_topology(s390_topology); return 0; } diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 828d0695d0d4..fbc394e16b2c 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -34,7 +34,8 @@ #define DIST_CORE 1 #define DIST_MC 2 #define DIST_BOOK 3 -#define DIST_MAX 4 +#define DIST_DRAWER 4 +#define DIST_MAX 5 /* Node distance reported to common code */ #define EMU_NODE_DIST 10 @@ -43,7 +44,7 @@ #define NODE_ID_FREE -1 /* Different levels of toptree */ -enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY}; +enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; /* The two toptree IDs */ enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; @@ -113,6 +114,14 @@ static int cores_free(struct toptree *tree) * Return node of core */ static struct toptree *core_node(struct toptree *core) +{ + return core->parent->parent->parent->parent; +} + +/* + * Return drawer of core + */ +static struct toptree *core_drawer(struct toptree *core) { return core->parent->parent->parent; } @@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core) */ static int dist_core_to_core(struct toptree *core1, struct toptree *core2) { + if (core_drawer(core1)->id != core_drawer(core2)->id) + return DIST_DRAWER; if (core_book(core1)->id != core_book(core2)->id) return DIST_BOOK; if (core_mc(core1)->id != core_mc(core2)->id) @@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) struct toptree *core; /* Always try to move perfectly fitting structures first */ + move_level_to_numa(numa, phys, DRAWER, true); + move_level_to_numa(numa, phys, DRAWER, false); move_level_to_numa(numa, phys, BOOK, true); move_level_to_numa(numa, phys, BOOK, false); move_level_to_numa(numa, phys, MC, true); @@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys) */ static struct toptree *toptree_from_topology(void) { - struct toptree *phys, *node, *book, *mc, *core; + struct toptree *phys, *node, *drawer, *book, *mc, *core; struct cpu_topology_s390 *top; int cpu; @@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void) for_each_online_cpu(cpu) { top = &per_cpu(cpu_topology, cpu); node = toptree_get_child(phys, 0); - book = toptree_get_child(node, top->book_id); + drawer = toptree_get_child(node, top->drawer_id); + book = toptree_get_child(drawer, top->book_id); mc = toptree_get_child(book, top->socket_id); core = toptree_get_child(mc, top->core_id); - if (!book || !mc || !core) + if (!drawer || !book || !mc || !core) panic("NUMA emulation could not allocate memory"); cpumask_set_cpu(cpu, &core->mask); toptree_update_mask(mc); @@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core) cpumask_copy(&top->thread_mask, &core->mask); cpumask_copy(&top->core_mask, &core_mc(core)->mask); cpumask_copy(&top->book_mask, &core_book(core)->mask); + cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); top->node_id = core_node(core)->id; } -- cgit v1.2.3-59-g8ed1b From 86d18a55dd66aea8bb8fffb0334557eb4973ea52 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 25 May 2016 09:53:07 +0200 Subject: s390/topology: remove z10 special handling I don't have a z10 to test this anymore, so I have no idea if the code works at all or even crashes. I can try to emulate, but it is just guess work. Nor do we know if the z10 special handling is performance wise still better than the generic handling. There have been a lot of changes to the scheduler. Therefore let's play safe and remove the special handling. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 64 ++++++--------------------------------------- 1 file changed, 8 insertions(+), 56 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 44745e751c3a..e959c02e0cac 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -80,11 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu) return mask; } -static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, - struct mask_info *drawer, - struct mask_info *book, - struct mask_info *socket, - int one_socket_per_cpu) +static void add_cpus_to_mask(struct topology_core *tl_core, + struct mask_info *drawer, + struct mask_info *book, + struct mask_info *socket) { struct cpu_topology_s390 *topo; unsigned int core; @@ -101,21 +100,15 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, topo = &per_cpu(cpu_topology, lcpu + i); topo->drawer_id = drawer->id; topo->book_id = book->id; + topo->socket_id = socket->id; topo->core_id = rcore; topo->thread_id = lcpu + i; cpumask_set_cpu(lcpu + i, &drawer->mask); cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); - if (one_socket_per_cpu) - topo->socket_id = rcore; - else - topo->socket_id = socket->id; smp_cpu_set_polarization(lcpu + i, tl_core->pp); } - if (one_socket_per_cpu) - socket = socket->next; } - return socket; } static void clear_masks(void) @@ -146,13 +139,14 @@ static union topology_entry *next_tle(union topology_entry *tle) return (union topology_entry *)((struct topology_container *)tle + 1); } -static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) +static void tl_to_masks(struct sysinfo_15_1_x *info) { struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; struct mask_info *drawer = &drawer_info; union topology_entry *tle, *end; + clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { @@ -170,33 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) socket->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, drawer, book, socket, 0); - break; - default: - clear_masks(); - return; - } - tle = next_tle(tle); - } -} - -static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) -{ - struct mask_info *socket = &socket_info; - struct mask_info *book = &book_info; - struct mask_info *drawer = &drawer_info; - union topology_entry *tle, *end; - - tle = info->tle; - end = (union topology_entry *)((unsigned long)info + info->length); - while (tle < end) { - switch (tle->nl) { - case 1: - book = book->next; - book->id = tle->container.id; - break; - case 0: - socket = add_cpus_to_mask(&tle->cpu, drawer, book, socket, 1); + add_cpus_to_mask(&tle->cpu, drawer, book, socket); break; default: clear_masks(); @@ -206,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) } } -static void tl_to_masks(struct sysinfo_15_1_x *info) -{ - struct cpuid cpu_id; - - get_cpu_id(&cpu_id); - clear_masks(); - switch (cpu_id.machine) { - case 0x2097: - case 0x2098: - __tl_to_masks_z10(info); - break; - default: - __tl_to_masks_generic(info); - } -} - static void topology_update_polarization_simple(void) { int cpu; -- cgit v1.2.3-59-g8ed1b From de3fa841e429de7e288facf9b642948677fac581 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jun 2016 06:55:43 +0200 Subject: s390/mm: fix compile for PAGE_DEFAULT_KEY != 0 The usual problem for code that is ifdef'ed out is that it doesn't compile after a while. That's also the case for the storage key initialisation code, if it would be used (set PAGE_DEFAULT_KEY to something not zero): ./arch/s390/include/asm/page.h: In function 'storage_key_init_range': ./arch/s390/include/asm/page.h:36:2: error: implicit declaration of function '__storage_key_init_range' Since the code itself has been useful for debugging purposes several times, remove the ifdefs and make sure the code gets compiler coverage. The cost for this is eight bytes. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 7 ++++--- arch/s390/mm/pageattr.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd4f09b..42fd5fea42ee 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -30,11 +30,12 @@ #include #ifndef __ASSEMBLY__ +void __storage_key_init_range(unsigned long start, unsigned long end); + static inline void storage_key_init_range(unsigned long start, unsigned long end) { -#if PAGE_DEFAULT_KEY - __storage_key_init_range(start, end); -#endif + if (PAGE_DEFAULT_KEY) + __storage_key_init_range(start, end); } #define clear_page(page) memset((page), 0, PAGE_SIZE) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index ba124d9c96ba..7104ffb5a67f 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -10,7 +10,6 @@ #include #include -#if PAGE_DEFAULT_KEY static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" @@ -22,6 +21,8 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; + if (!PAGE_DEFAULT_KEY) + return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -38,7 +39,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end) start += PAGE_SIZE; } } -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; -- cgit v1.2.3-59-g8ed1b From 04864808029e59ea1bf075c756a0f35c8398fc11 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 18 Feb 2015 14:46:00 +0100 Subject: s390/vx: add support functions for in-kernel FPU use Introduce the kernel_fpu_begin() and kernel_fpu_end() function to enclose any in-kernel use of FPU instructions and registers. In enclosed sections, you can perform floating-point or vector (SIMD) computations. The functions take care of saving and restoring FPU register contents and controls. For usage details, see the guidelines in arch/s390/include/asm/fpu/api.h Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/fpu/api.h | 75 ++++++++++++ arch/s390/include/asm/fpu/types.h | 10 ++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/fpu.c | 249 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 335 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/fpu.c diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..78ba3ddb9e18 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -1,6 +1,41 @@ /* * In-kernel FPU support functions * + * + * Consider these guidelines before using in-kernel FPU functions: + * + * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel + * use of floating-point or vector registers and instructions. + * + * 2. For kernel_fpu_begin(), specify the vector register range you want to + * use with the KERNEL_VXR_* constants. Consider these usage guidelines: + * + * a) If your function typically runs in process-context, use the lower + * half of the vector registers, for example, specify KERNEL_VXR_LOW. + * b) If your function typically runs in soft-irq or hard-irq context, + * prefer using the upper half of the vector registers, for example, + * specify KERNEL_VXR_HIGH. + * + * If you adhere to these guidelines, an interrupted process context + * does not require to save and restore vector registers because of + * disjoint register ranges. + * + * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions + * includes logic to save and restore up to 16 vector registers at once. + * + * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different + * struct kernel_fpu states. Vector registers that are in use by outer + * levels are saved and restored. You can minimize the save and restore + * effort by choosing disjoint vector register ranges. + * + * 5. To use vector floating-point instructions, specify the KERNEL_FPC + * flag to save and restore floating-point controls in addition to any + * vector register range. + * + * 6. To use floating-point registers and instructions only, specify the + * KERNEL_FPR flag. This flag triggers a save and restore of vector + * registers V0 to V15 and floating-point controls. + * * Copyright IBM Corp. 2015 * Author(s): Hendrik Brueckner */ @@ -8,6 +43,8 @@ #ifndef _ASM_S390_FPU_API_H #define _ASM_S390_FPU_API_H +#include + void save_fpu_regs(void); static inline int test_fp_ctl(u32 fpc) @@ -27,4 +64,42 @@ static inline int test_fp_ctl(u32 fpc) return rc; } +#define KERNEL_VXR_V0V7 1 +#define KERNEL_VXR_V8V15 2 +#define KERNEL_VXR_V16V23 4 +#define KERNEL_VXR_V24V31 8 +#define KERNEL_FPR 16 +#define KERNEL_FPC 256 + +#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) +#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) +#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) + +#define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) + +struct kernel_fpu; + +/* + * Note the functions below must be called with preemption disabled. + * Do not enable preemption before calling __kernel_fpu_end() to prevent + * an corruption of an existing kernel FPU state. + * + * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. + */ +void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); +void __kernel_fpu_end(struct kernel_fpu *state); + + +static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) +{ + preempt_disable(); + __kernel_fpu_begin(state, flags); +} + +static inline void kernel_fpu_end(struct kernel_fpu *state) +{ + __kernel_fpu_end(state); + preempt_enable(); +} + #endif /* _ASM_S390_FPU_API_H */ diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h index fe937c9b6471..bce255ead72b 100644 --- a/arch/s390/include/asm/fpu/types.h +++ b/arch/s390/include/asm/fpu/types.h @@ -24,4 +24,14 @@ struct fpu { /* VX array structure for address operand constraints in inline assemblies */ struct vx_array { __vector128 _[__NUM_VXRS]; }; +/* In-kernel FPU state structure */ +struct kernel_fpu { + u32 mask; + u32 fpc; + union { + freg_t fprs[__NUM_FPRS]; + __vector128 vxrs[__NUM_VXRS]; + }; +}; + #endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2f5586ab8a6a..8d1419120bb7 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -45,7 +45,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c new file mode 100644 index 000000000000..81d1d1887507 --- /dev/null +++ b/arch/s390/kernel/fpu.c @@ -0,0 +1,249 @@ +/* + * In-kernel vector facility support functions + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ +#include +#include +#include +#include +#include + +/* + * Per-CPU variable to maintain FPU register ranges that are in use + * by the kernel. + */ +static DEFINE_PER_CPU(u32, kernel_fpu_state); + +#define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) + + +void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) +{ + if (!__this_cpu_read(kernel_fpu_state)) { + /* + * Save user space FPU state and register contents. Multiple + * calls because of interruptions do not matter and return + * immediately. This also sets CIF_FPU to lazy restore FP/VX + * register contents when returning to user space. + */ + save_fpu_regs(); + } + + /* Update flags to use the vector facility for KERNEL_FPR */ + if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { + flags |= KERNEL_VXR_LOW | KERNEL_FPC; + flags &= ~KERNEL_FPR; + } + + /* Save and update current kernel VX state */ + state->mask = __this_cpu_read(kernel_fpu_state); + __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); + + /* + * If this is the first call to __kernel_fpu_begin(), no additional + * work is required. + */ + if (!(state->mask & KERNEL_FPU_STATE_MASK)) + return; + + /* + * If KERNEL_FPR is still set, the vector facility is not available + * and, thus, save floating-point control and registers only. + */ + if (state->mask & KERNEL_FPR) { + asm volatile("stfpc %0" : "=Q" (state->fpc)); + asm volatile("std 0,%0" : "=Q" (state->fprs[0])); + asm volatile("std 1,%0" : "=Q" (state->fprs[1])); + asm volatile("std 2,%0" : "=Q" (state->fprs[2])); + asm volatile("std 3,%0" : "=Q" (state->fprs[3])); + asm volatile("std 4,%0" : "=Q" (state->fprs[4])); + asm volatile("std 5,%0" : "=Q" (state->fprs[5])); + asm volatile("std 6,%0" : "=Q" (state->fprs[6])); + asm volatile("std 7,%0" : "=Q" (state->fprs[7])); + asm volatile("std 8,%0" : "=Q" (state->fprs[8])); + asm volatile("std 9,%0" : "=Q" (state->fprs[9])); + asm volatile("std 10,%0" : "=Q" (state->fprs[10])); + asm volatile("std 11,%0" : "=Q" (state->fprs[11])); + asm volatile("std 12,%0" : "=Q" (state->fprs[12])); + asm volatile("std 13,%0" : "=Q" (state->fprs[13])); + asm volatile("std 14,%0" : "=Q" (state->fprs[14])); + asm volatile("std 15,%0" : "=Q" (state->fprs[15])); + return; + } + + /* + * If this is a nested call to __kernel_fpu_begin(), check the saved + * state mask to save and later restore the vector registers that + * are already in use. Let's start with checking floating-point + * controls. + */ + if (state->mask & KERNEL_FPC) + asm volatile("stfpc %0" : "=m" (state->fpc)); + + /* Test and save vector registers */ + asm volatile ( + /* + * Test if any vector register must be saved and, if so, + * test if all register can be saved. + */ + " tmll %[m],15\n" /* KERNEL_VXR_MASK */ + " jz 20f\n" /* no work -> done */ + " la 1,%[vxrs]\n" /* load save area */ + " jo 18f\n" /* -> save V0..V31 */ + + /* + * Test if V8..V23 can be saved at once... this speeds up + * for KERNEL_fpu_MID only. Otherwise continue to split the + * range of vector registers into two halves and test them + * separately. + */ + " tmll %[m],6\n" /* KERNEL_VXR_MID */ + " jo 17f\n" /* -> save V8..V23 */ + + /* Test and save the first half of 16 vector registers */ + "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ + " jz 10f\n" /* -> KERNEL_VXR_HIGH */ + " jo 2f\n" /* 11 -> save V0..V15 */ + " brc 4,3f\n" /* 01 -> save V0..V7 */ + " brc 2,4f\n" /* 10 -> save V8..V15 */ + + /* Test and save the second half of 16 vector registers */ + "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ + " jo 19f\n" /* 11 -> save V16..V31 */ + " brc 4,11f\n" /* 01 -> save V16..V23 */ + " brc 2,12f\n" /* 10 -> save V24..V31 */ + " j 20f\n" /* 00 -> done */ + + /* + * Below are the vstm combinations to save multiple vector + * registers at once. + */ + "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "\n" + "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ + " j 20f\n" /* -> done */ + "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ + " j 20f\n" /* -> done */ + "\n" + "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ + " nill %[m],249\n" /* m &= ~VXR_MID */ + " j 1b\n" /* -> VXR_LOW */ + "\n" + "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + "20:" + : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) + : [m] "d" (state->mask) + : "1", "cc"); +} +EXPORT_SYMBOL(__kernel_fpu_begin); + +void __kernel_fpu_end(struct kernel_fpu *state) +{ + /* Just update the per-CPU state if there is nothing to restore */ + if (!(state->mask & KERNEL_FPU_STATE_MASK)) + goto update_fpu_state; + + /* + * If KERNEL_FPR is specified, the vector facility is not available + * and, thus, restore floating-point control and registers only. + */ + if (state->mask & KERNEL_FPR) { + asm volatile("lfpc %0" : : "Q" (state->fpc)); + asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); + asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); + asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); + asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); + asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); + asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); + asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); + asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); + asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); + asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); + asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); + asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); + asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); + asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); + asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); + asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); + goto update_fpu_state; + } + + /* Test and restore floating-point controls */ + if (state->mask & KERNEL_FPC) + asm volatile("lfpc %0" : : "Q" (state->fpc)); + + /* Test and restore (load) vector registers */ + asm volatile ( + /* + * Test if any vector registers must be loaded and, if so, + * test if all registers can be loaded at once. + */ + " tmll %[m],15\n" /* KERNEL_VXR_MASK */ + " jz 20f\n" /* no work -> done */ + " la 1,%[vxrs]\n" /* load load area */ + " jo 18f\n" /* -> load V0..V31 */ + + /* + * Test if V8..V23 can be restored at once... this speeds up + * for KERNEL_VXR_MID only. Otherwise continue to split the + * range of vector registers into two halves and test them + * separately. + */ + " tmll %[m],6\n" /* KERNEL_VXR_MID */ + " jo 17f\n" /* -> load V8..V23 */ + + /* Test and load the first half of 16 vector registers */ + "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ + " jz 10f\n" /* -> KERNEL_VXR_HIGH */ + " jo 2f\n" /* 11 -> load V0..V15 */ + " brc 4,3f\n" /* 01 -> load V0..V7 */ + " brc 2,4f\n" /* 10 -> load V8..V15 */ + + /* Test and load the second half of 16 vector registers */ + "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ + " jo 19f\n" /* 11 -> load V16..V31 */ + " brc 4,11f\n" /* 01 -> load V16..V23 */ + " brc 2,12f\n" /* 10 -> load V24..V31 */ + " j 20f\n" /* 00 -> done */ + + /* + * Below are the vstm combinations to load multiple vector + * registers at once. + */ + "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ + " j 10b\n" /* -> VXR_HIGH */ + "\n" + "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ + " j 20f\n" /* -> done */ + "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ + " j 20f\n" /* -> done */ + "\n" + "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ + " nill %[m],249\n" /* m &= ~VXR_MID */ + " j 1b\n" /* -> VXR_LOW */ + "\n" + "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + "20:" + : + : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), + [m] "d" (state->mask) + : "1", "cc"); + +update_fpu_state: + /* Update current kernel VX state */ + __this_cpu_write(kernel_fpu_state, state->mask); +} +EXPORT_SYMBOL(__kernel_fpu_end); -- cgit v1.2.3-59-g8ed1b From 19c93787f573c6cffe9c25d3be20e3b40112b7ea Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 28 Apr 2015 12:29:06 +0200 Subject: s390/crc32-vx: use vector instructions to optimize CRC-32 computation Use vector instructions to optimize the computation of CRC-32 checksums. An optimized version is provided for CRC-32 (IEEE 802.3 Ethernet) in normal and bitreflected domain, as well as, for bitreflected CRC-32C (Castagnoli). Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crc32be-vx.S | 207 ++++++++++++++++++++++++++++++++ arch/s390/crypto/crc32le-vx.S | 268 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 475 insertions(+) create mode 100644 arch/s390/crypto/crc32be-vx.S create mode 100644 arch/s390/crypto/crc32le-vx.S diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S new file mode 100644 index 000000000000..8013989cd2e5 --- /dev/null +++ b/arch/s390/crypto/crc32be-vx.S @@ -0,0 +1,207 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of CRC-32 checksums. + * + * This CRC-32 implementation algorithm processes the most-significant + * bit first (BE). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#include +#include + +/* Vector register range containing CRC-32 constants */ +#define CONST_R1R2 %v9 +#define CONST_R3R4 %v10 +#define CONST_R5 %v11 +#define CONST_R6 %v12 +#define CONST_RU_POLY %v13 +#define CONST_CRC_POLY %v14 + +.data +.align 8 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these defintions: + * + * R1 = x4*128+64 mod P(x) + * R2 = x4*128 mod P(x) + * R3 = x128+64 mod P(x) + * R4 = x128 mod P(x) + * R5 = x96 mod P(x) + * R6 = x64 mod P(x) + * + * Barret reduction constant, u, is defined as floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * Note that the constant definitions below are extended in order to compute + * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. + * The righmost doubleword can be 0 to prevent contribution to the result or + * can be multiplied by 1 to perform an XOR without the need for a separate + * VECTOR EXCLUSIVE OR instruction. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + */ + +.Lconstants_CRC_32_BE: + .quad 0x08833794c, 0x0e6228b11 # R1, R2 + .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4 + .quad 0x0f200aa66, 1 << 32 # R5, x32 + .quad 0x0490d678d, 1 # R6, 1 + .quad 0x104d101df, 0 # u + .quad 0x104C11DB7, 0 # P(x) + +.previous + +.text +/* + * The CRC-32 function(s) use these calling conventions: + * + * Parameters: + * + * %r2: Initial CRC value, typically ~0; and final CRC (return) value. + * %r3: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * %r4: Length of the buffer, must be 64 bytes or greater. + * + * Register usage: + * + * %r5: CRC-32 constant pool base pointer. + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * + * V9..V14: CRC-32 constants. + */ +ENTRY(crc32_be_vgfm_16) + /* Load CRC-32 constants */ + larl %r5,.Lconstants_CRC_32_BE + VLM CONST_R1R2,CONST_CRC_POLY,0,%r5 + + /* Load the initial CRC value into the leftmost word of V0. */ + VZERO %v0 + VLVGF %v0,%r2,0 + + /* Load a 64-byte data chunk and XOR with CRC */ + VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ + VX %v1,%v0,%v1 /* V1 ^= CRC */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + /* Check remaining buffer size and jump to proper folding method */ + cghi %r4,64 + jl .Lless_than_64bytes + +.Lfold_64bytes_loop: + /* Load the next 64-byte data chunk into V5 to V8 */ + VLM %v5,%v8,0,%r3 + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the reduction constants in V0. The intermediate result is + * then folded (accumulated) with the next data chunk in V5 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + VGFMAG %v1,CONST_R1R2,%v1,%v5 + VGFMAG %v2,CONST_R1R2,%v2,%v6 + VGFMAG %v3,CONST_R1R2,%v3,%v7 + VGFMAG %v4,CONST_R1R2,%v4,%v8 + + /* Adjust buffer pointer and length for next loop */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jnl .Lfold_64bytes_loop + +.Lless_than_64bytes: + /* Fold V1 to V4 into a single 128-bit value in V1 */ + VGFMAG %v1,CONST_R3R4,%v1,%v2 + VGFMAG %v1,CONST_R3R4,%v1,%v3 + VGFMAG %v1,CONST_R3R4,%v1,%v4 + + /* Check whether to continue with 64-bit folding */ + cghi %r4,16 + jl .Lfinal_fold + +.Lfold_16bytes_loop: + + VL %v2,0,,%r3 /* Load next data chunk */ + VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */ + + /* Adjust buffer pointer and size for folding next data chunk */ + aghi %r3,16 + aghi %r4,-16 + + /* Process remaining data chunks */ + cghi %r4,16 + jnl .Lfold_16bytes_loop + +.Lfinal_fold: + /* + * The R5 constant is used to fold a 128-bit value into an 96-bit value + * that is XORed with the next 96-bit input data chunk. To use a single + * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to + * form an intermediate 96-bit value (with appended zeros) which is then + * XORed with the intermediate reduction result. + */ + VGFMG %v1,CONST_R5,%v1 + + /* + * Further reduce the remaining 96-bit value to a 64-bit value using a + * single VGFMG, the rightmost doubleword is multiplied with 0x1. The + * intermediate result is then XORed with the product of the leftmost + * doubleword with R6. The result is a 64-bit value and is subject to + * the Barret reduction. + */ + VGFMG %v1,CONST_R6,%v1 + + /* + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: To compensate the division by x^32, use the vector unpack + * instruction to move the leftmost word into the leftmost doubleword + * of the vector register. The rightmost doubleword is multiplied + * with zero to not contribute to the intermedate results. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + VUPLLF %v2,%v1 + VGFMG %v2,CONST_RU_POLY,%v2 + + /* + * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is in the rightmost word of V2. + */ + VUPLLF %v2,%v2 + VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 + +.Ldone: + VLGVF %r2,%v2,3 + br %r14 + +.previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S new file mode 100644 index 000000000000..17f2504c2633 --- /dev/null +++ b/arch/s390/crypto/crc32le-vx.S @@ -0,0 +1,268 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet + * and Castagnoli. + * + * This CRC-32 implementation algorithm is bitreflected and processes + * the least-significant bit first (Little-Endian). + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#include +#include + +/* Vector register range containing CRC-32 constants */ +#define CONST_PERM_LE2BE %v9 +#define CONST_R2R1 %v10 +#define CONST_R4R3 %v11 +#define CONST_R5 %v12 +#define CONST_RU_POLY %v13 +#define CONST_CRC_POLY %v14 + +.data +.align 8 + +/* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 + * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 + * R3 = [(x128+32 mod P'(x) << 32)]' << 1 + * R4 = [(x128-32 mod P'(x) << 32)]' << 1 + * R5 = [(x64 mod P'(x) << 32)]' << 1 + * R6 = [(x32 mod P'(x) << 32)]' << 1 + * + * The bitreflected Barret reduction constant, u', is defined as + * the bit reversal of floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + * + * CRC-32C (Castagnoli) polynomials: + * + * P(x) = 0x1EDC6F41 + * P'(x) = 0x82F63B78 + */ + +.Lconstants_CRC_32_LE: + .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask + .quad 0x1c6e41596, 0x154442bd4 # R2, R1 + .quad 0x0ccaa009e, 0x1751997d0 # R4, R3 + .octa 0x163cd6124 # R5 + .octa 0x1F7011641 # u' + .octa 0x1DB710641 # P'(x) << 1 + +.Lconstants_CRC_32C_LE: + .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask + .quad 0x09e4addf8, 0x740eef02 # R2, R1 + .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3 + .octa 0x0dd45aab8 # R5 + .octa 0x0dea713f1 # u' + .octa 0x105ec76f0 # P'(x) << 1 + +.previous + + +.text + +/* + * The CRC-32 functions use these calling conventions: + * + * Parameters: + * + * %r2: Initial CRC value, typically ~0; and final CRC (return) value. + * %r3: Input buffer pointer, performance might be improved if the + * buffer is on a doubleword boundary. + * %r4: Length of the buffer, must be 64 bytes or greater. + * + * Register usage: + * + * %r5: CRC-32 constant pool base pointer. + * V0: Initial CRC value and intermediate constants and results. + * V1..V4: Data for CRC computation. + * V5..V8: Next data chunks that are fetched from the input buffer. + * V9: Constant for BE->LE conversion and shift operations + * + * V10..V14: CRC-32 constants. + */ + +ENTRY(crc32_le_vgfm_16) + larl %r5,.Lconstants_CRC_32_LE + j crc32_le_vgfm_generic + +ENTRY(crc32c_le_vgfm_16) + larl %r5,.Lconstants_CRC_32C_LE + j crc32_le_vgfm_generic + + +crc32_le_vgfm_generic: + /* Load CRC-32 constants */ + VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5 + + /* + * Load the initial CRC value. + * + * The CRC value is loaded into the rightmost word of the + * vector register and is later XORed with the LSB portion + * of the loaded input data. + */ + VZERO %v0 /* Clear V0 */ + VLVGF %v0,%r2,3 /* Load CRC into rightmost word */ + + /* Load a 64-byte data chunk and XOR with CRC */ + VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */ + VPERM %v1,%v1,%v1,CONST_PERM_LE2BE + VPERM %v2,%v2,%v2,CONST_PERM_LE2BE + VPERM %v3,%v3,%v3,CONST_PERM_LE2BE + VPERM %v4,%v4,%v4,CONST_PERM_LE2BE + + VX %v1,%v0,%v1 /* V1 ^= CRC */ + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jl .Lless_than_64bytes + +.Lfold_64bytes_loop: + /* Load the next 64-byte data chunk into V5 to V8 */ + VLM %v5,%v8,0,%r3 + VPERM %v5,%v5,%v5,CONST_PERM_LE2BE + VPERM %v6,%v6,%v6,CONST_PERM_LE2BE + VPERM %v7,%v7,%v7,CONST_PERM_LE2BE + VPERM %v8,%v8,%v8,CONST_PERM_LE2BE + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the R1 and R2 reduction constants in V0. The intermediate result + * is then folded (accumulated) with the next data chunk in V5 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + VGFMAG %v1,CONST_R2R1,%v1,%v5 + VGFMAG %v2,CONST_R2R1,%v2,%v6 + VGFMAG %v3,CONST_R2R1,%v3,%v7 + VGFMAG %v4,CONST_R2R1,%v4,%v8 + + aghi %r3,64 /* BUF = BUF + 64 */ + aghi %r4,-64 /* LEN = LEN - 64 */ + + cghi %r4,64 + jnl .Lfold_64bytes_loop + +.Lless_than_64bytes: + /* + * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 + * and R4 and accumulating the next 128-bit chunk until a single 128-bit + * value remains. + */ + VGFMAG %v1,CONST_R4R3,%v1,%v2 + VGFMAG %v1,CONST_R4R3,%v1,%v3 + VGFMAG %v1,CONST_R4R3,%v1,%v4 + + cghi %r4,16 + jl .Lfinal_fold + +.Lfold_16bytes_loop: + + VL %v2,0,,%r3 /* Load next data chunk */ + VPERM %v2,%v2,%v2,CONST_PERM_LE2BE + VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */ + + aghi %r3,16 + aghi %r4,-16 + + cghi %r4,16 + jnl .Lfold_16bytes_loop + +.Lfinal_fold: + /* + * Set up a vector register for byte shifts. The shift value must + * be loaded in bits 1-4 in byte element 7 of a vector register. + * Shift by 8 bytes: 0x40 + * Shift by 4 bytes: 0x20 + */ + VLEIB %v9,0x40,7 + + /* + * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes + * to move R4 into the rightmost doubleword and set the leftmost + * doubleword to 0x1. + */ + VSRLB %v0,CONST_R4R3,%v9 + VLEIG %v0,1,0 + + /* + * Compute GF(2) product of V1 and V0. The rightmost doubleword + * of V1 is multiplied with R4. The leftmost doubleword of V1 is + * multiplied by 0x1 and is then XORed with rightmost product. + * Implicitly, the intermediate leftmost product becomes padded + */ + VGFMG %v1,%v0,%v1 + + /* + * Now do the final 32-bit fold by multiplying the rightmost word + * in V1 with R5 and XOR the result with the remaining bits in V1. + * + * To achieve this by a single VGFMAG, right shift V1 by a word + * and store the result in V2 which is then accumulated. Use the + * vector unpack instruction to load the rightmost half of the + * doubleword into the rightmost doubleword element of V1; the other + * half is loaded in the leftmost doubleword. + * The vector register with CONST_R5 contains the R5 constant in the + * rightmost doubleword and the leftmost doubleword is zero to ignore + * the leftmost product of V1. + */ + VLEIB %v9,0x20,7 /* Shift by words */ + VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */ + VUPLLF %v1,%v1 /* Split rightmost doubleword */ + VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */ + + /* + * Apply a Barret reduction to compute the final 32-bit CRC value. + * + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: The leftmost doubleword of vector register containing + * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product + * is zero and does not contribute to the final result. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + VUPLLF %v2,%v1 + VGFMG %v2,CONST_RU_POLY,%v2 + + /* + * Compute the GF(2) product of the CRC polynomial with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is stored in word element 2 of V2. + */ + VUPLLF %v2,%v2 + VGFMAG %v2,CONST_CRC_POLY,%v2,%v1 + +.Ldone: + VLGVF %r2,%v2,2 + br %r14 + +.previous -- cgit v1.2.3-59-g8ed1b From f848dbd3bc1a71274241c080b57eb912ff9f0098 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 28 Apr 2015 15:52:44 +0200 Subject: s390/crc32-vx: add crypto API module for optimized CRC-32 algorithms Add a crypto API module to access the vector extension based CRC-32 implementations. Users can request the optimized implementation through the shash crypto API interface. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/Makefile | 3 + arch/s390/crypto/crc32-vx.c | 310 ++++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/Kconfig | 13 ++ 3 files changed, 326 insertions(+) create mode 100644 arch/s390/crypto/crc32-vx.c diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 7f0b7cda6259..d1033de4c4ee 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -9,3 +9,6 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o +obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o + +crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c new file mode 100644 index 000000000000..577ae1d4ae89 --- /dev/null +++ b/arch/s390/crypto/crc32-vx.c @@ -0,0 +1,310 @@ +/* + * Crypto-API module for CRC-32 algorithms implemented with the + * z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include + + +#define CRC32_BLOCK_SIZE 1 +#define CRC32_DIGEST_SIZE 4 + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +struct crc_ctx { + u32 key; +}; + +struct crc_desc_ctx { + u32 crc; +}; + +/* Prototypes for functions in assembly files */ +u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size); +u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementation + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + * + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + static u32 __pure ___fname(u32 crc, \ + unsigned char const *data, size_t datalen) \ + { \ + struct kernel_fpu vxstate; \ + unsigned long prealign, aligned, remaining; \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign = VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -= prealign; \ + crc = ___crc32_sw(crc, data, prealign); \ + data = (void *)((unsigned long)data + prealign); \ + } \ + \ + if (datalen < VX_MIN_LEN) \ + return ___crc32_sw(crc, data, datalen); \ + \ + aligned = datalen & ~VX_ALIGN_MASK; \ + remaining = datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc = ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate); \ + \ + if (remaining) \ + crc = ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } + +DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) +DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) +DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) + + +static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) +{ + struct crc_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = 0; + return 0; +} + +static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) +{ + struct crc_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static int crc32_vx_init(struct shash_desc *desc) +{ + struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + return 0; +} + +static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, + unsigned int newkeylen) +{ + struct crc_ctx *mctx = crypto_shash_ctx(tfm); + + if (newkeylen != sizeof(mctx->key)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + mctx->key = le32_to_cpu(*(__le32 *)newkey); + return 0; +} + +static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, + unsigned int newkeylen) +{ + struct crc_ctx *mctx = crypto_shash_ctx(tfm); + + if (newkeylen != sizeof(mctx->key)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + mctx->key = be32_to_cpu(*(__be32 *)newkey); + return 0; +} + +static int crc32le_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(&ctx->crc); + return 0; +} + +static int crc32be_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__be32 *)out = cpu_to_be32p(&ctx->crc); + return 0; +} + +static int crc32c_vx_final(struct shash_desc *desc, u8 *out) +{ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); + + /* + * Perform a final XOR with 0xFFFFFFFF to be in sync + * with the generic crc32c shash implementation. + */ + *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); + return 0; +} + +static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); + return 0; +} + +static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); + return 0; +} + +static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, + u8 *out) +{ + /* + * Perform a final XOR with 0xFFFFFFFF to be in sync + * with the generic crc32c shash implementation. + */ + *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); + return 0; +} + + +#define CRC32_VX_FINUP(alg, func) \ + static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ + unsigned int datalen, u8 *out) \ + { \ + return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ + data, datalen, out); \ + } + +CRC32_VX_FINUP(crc32le, crc32_le_vx) +CRC32_VX_FINUP(crc32be, crc32_be_vx) +CRC32_VX_FINUP(crc32c, crc32c_le_vx) + +#define CRC32_VX_DIGEST(alg, func) \ + static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ + unsigned int len, u8 *out) \ + { \ + return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ + data, len, out); \ + } + +CRC32_VX_DIGEST(crc32le, crc32_le_vx) +CRC32_VX_DIGEST(crc32be, crc32_be_vx) +CRC32_VX_DIGEST(crc32c, crc32c_le_vx) + +#define CRC32_VX_UPDATE(alg, func) \ + static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ + unsigned int datalen) \ + { \ + struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ + ctx->crc = func(ctx->crc, data, datalen); \ + return 0; \ + } + +CRC32_VX_UPDATE(crc32le, crc32_le_vx) +CRC32_VX_UPDATE(crc32be, crc32_be_vx) +CRC32_VX_UPDATE(crc32c, crc32c_le_vx) + + +static struct shash_alg crc32_vx_algs[] = { + /* CRC-32 LE */ + { + .init = crc32_vx_init, + .setkey = crc32_vx_setkey, + .update = crc32le_vx_update, + .final = crc32le_vx_final, + .finup = crc32le_vx_finup, + .digest = crc32le_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_zero, + }, + }, + /* CRC-32 BE */ + { + .init = crc32_vx_init, + .setkey = crc32be_vx_setkey, + .update = crc32be_vx_update, + .final = crc32be_vx_final, + .finup = crc32be_vx_finup, + .digest = crc32be_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32be", + .cra_driver_name = "crc32be-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_zero, + }, + }, + /* CRC-32C LE */ + { + .init = crc32_vx_init, + .setkey = crc32_vx_setkey, + .update = crc32c_vx_update, + .final = crc32c_vx_final, + .finup = crc32c_vx_finup, + .digest = crc32c_vx_digest, + .descsize = sizeof(struct crc_desc_ctx), + .digestsize = CRC32_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-vx", + .cra_priority = 200, + .cra_blocksize = CRC32_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crc_ctx), + .cra_module = THIS_MODULE, + .cra_init = crc32_vx_cra_init_invert, + }, + }, +}; + + +static int __init crc_vx_mod_init(void) +{ + return crypto_register_shashes(crc32_vx_algs, + ARRAY_SIZE(crc32_vx_algs)); +} + +static void __exit crc_vx_mod_exit(void) +{ + crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); +} + +module_cpu_feature_match(VXRS, crc_vx_mod_init); +module_exit(crc_vx_mod_exit); + +MODULE_AUTHOR("Hendrik Brueckner "); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-vx"); +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index d77ba2f12242..1af94e2d1a25 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390 It is available as of z196. +config CRYPTO_CRC32_S390 + tristate "CRC-32 algorithms" + depends on S390 + select CRYPTO_HASH + select CRC32 + help + Select this option if you want to use hardware accelerated + implementations of CRC algorithms. With this option, you + can optimize the computation of CRC-32 (IEEE 802.3 Ethernet) + and CRC-32C (Castagnoli). + + It is available with IBM z13 or later. + config CRYPTO_DEV_MV_CESA tristate "Marvell's Cryptographic Engine" depends on PLAT_ORION -- cgit v1.2.3-59-g8ed1b From a086171ad88685f265b0e42894fcdefa3919cdb2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 24 Jun 2015 16:10:59 +0200 Subject: s390: Updated kernel config files Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/configs/default_defconfig | 1 + arch/s390/configs/gcov_defconfig | 1 + arch/s390/configs/performance_defconfig | 1 + arch/s390/defconfig | 4 ++++ 4 files changed, 7 insertions(+) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index d5ec71b2ed02..889ea3450210 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -678,6 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f46a35115d2d..1bcfd764910a 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -616,6 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index ba0f2a58b8cd..13ff090139c8 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -615,6 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 3f571ea89509..ccccebeeaaf6 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -225,12 +225,16 @@ CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_CRC32_S390=m CONFIG_CRC7=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set -- cgit v1.2.3-59-g8ed1b From 9c203239c5970354536acb3c01000e925bee0270 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2015 13:19:19 +0200 Subject: s390: calculate loops_per_jiffies with fp instructions Implement calculation of loops_per_jiffies with fp instructions which are available on all 64 bit machines. To save and restore floating point register context use the new vx support functions. Reviewed-by: Hendrik Brueckner Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sysinfo.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index f7dba3887a54..96d81502e599 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -16,11 +16,7 @@ #include #include #include - -/* Sigh, math-emu. Don't ask. */ -#include -#include -#include +#include int topology_max_mnest; @@ -414,10 +410,8 @@ subsys_initcall(create_proc_service_level); void s390_adjust_jiffies(void) { struct sysinfo_1_2_2 *info; - const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ - FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); - FP_DECL_EX; - unsigned int capability; + unsigned long capability; + struct kernel_fpu fpu; info = (void *) get_zeroed_page(GFP_KERNEL); if (!info) @@ -433,15 +427,25 @@ void s390_adjust_jiffies(void) * higher cpu capacity. Bogomips are the other way round. * To get to a halfway suitable number we divide 1e7 * by the cpu capability number. Yes, that means a floating - * point division .. math-emu here we come :-) + * point division .. */ - FP_UNPACK_SP(SA, &fmil); - if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, (long) info->capability, 64, long); - else - FP_UNPACK_SP(SB, &info->capability); - FP_DIV_S(SR, SA, SB); - FP_TO_INT_S(capability, SR, 32, 0); + kernel_fpu_begin(&fpu, KERNEL_FPR); + asm volatile( + " sfpc %3\n" + " l %0,%1\n" + " tmlh %0,0xff80\n" + " jnz 0f\n" + " cefbr %%f2,%0\n" + " j 1f\n" + "0: le %%f2,%1\n" + "1: cefbr %%f0,%2\n" + " debr %%f0,%%f2\n" + " cgebr %0,5,%%f0\n" + : "=&d" (capability) + : "Q" (info->capability), "d" (10000000), "d" (0) + : "cc" + ); + kernel_fpu_end(&fpu); } else /* * Really old machine without stsi block for basic -- cgit v1.2.3-59-g8ed1b From 11a7752e015cabb5dbd4a9096f4ab2ca18836ae3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2015 13:26:51 +0200 Subject: s390: remove math emulation code The last in-kernel user is gone so we can finally remove this code. Reviewed-by: Hendrik Brueckner Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mathemu.h | 28 ------- arch/s390/include/asm/sfp-machine.h | 142 ------------------------------------ arch/s390/include/asm/sfp-util.h | 67 ----------------- arch/s390/kernel/dis.c | 1 - 4 files changed, 238 deletions(-) delete mode 100644 arch/s390/include/asm/mathemu.h delete mode 100644 arch/s390/include/asm/sfp-machine.h delete mode 100644 arch/s390/include/asm/sfp-util.h diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h deleted file mode 100644 index 614dfaf47f71..000000000000 --- a/arch/s390/include/asm/mathemu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * IEEE floating point emulation. - * - * S390 version - * Copyright IBM Corp. 1999 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ - -#ifndef __MATHEMU__ -#define __MATHEMU__ - -extern int math_emu_b3(__u8 *, struct pt_regs *); -extern int math_emu_ed(__u8 *, struct pt_regs *); -extern int math_emu_ldr(__u8 *); -extern int math_emu_ler(__u8 *); -extern int math_emu_std(__u8 *, struct pt_regs *); -extern int math_emu_ld(__u8 *, struct pt_regs *); -extern int math_emu_ste(__u8 *, struct pt_regs *); -extern int math_emu_le(__u8 *, struct pt_regs *); -extern int math_emu_lfpc(__u8 *, struct pt_regs *); -extern int math_emu_stfpc(__u8 *, struct pt_regs *); -extern int math_emu_srnm(__u8 *, struct pt_regs *); - -#endif /* __MATHEMU__ */ - - - - diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h deleted file mode 100644 index 4e16aede4b06..000000000000 --- a/arch/s390/include/asm/sfp-machine.h +++ /dev/null @@ -1,142 +0,0 @@ -/* Machine-dependent software floating-point definitions. - S/390 kernel version. - Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Richard Henderson (rth@cygnus.com), - Jakub Jelinek (jj@ultra.linux.cz), - David S. Miller (davem@redhat.com) and - Peter Maydell (pmaydell@chiark.greenend.org.uk). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef _SFP_MACHINE_H -#define _SFP_MACHINE_H - - -#define _FP_W_TYPE_SIZE 32 -#define _FP_W_TYPE unsigned int -#define _FP_WS_TYPE signed int -#define _FP_I_TYPE int - -#define _FP_MUL_MEAT_S(R,X,Y) \ - _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) -#define _FP_MUL_MEAT_D(R,X,Y) \ - _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) -#define _FP_MUL_MEAT_Q(R,X,Y) \ - _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) - -#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) -#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) -#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) - -#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) -#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 -#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 -#define _FP_NANSIGN_S 0 -#define _FP_NANSIGN_D 0 -#define _FP_NANSIGN_Q 0 - -#define _FP_KEEPNANFRACP 1 - -/* - * If one NaN is signaling and the other is not, - * we choose that one, otherwise we choose X. - */ -#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ - do { \ - if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ - && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ - { \ - R##_s = Y##_s; \ - _FP_FRAC_COPY_##wc(R,Y); \ - } \ - else \ - { \ - R##_s = X##_s; \ - _FP_FRAC_COPY_##wc(R,X); \ - } \ - R##_c = FP_CLS_NAN; \ - } while (0) - -/* Some assembly to speed things up. */ -#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ - unsigned int __r2 = (x2) + (y2); \ - unsigned int __r1 = (x1); \ - unsigned int __r0 = (x0); \ - asm volatile( \ - " alr %2,%3\n" \ - " brc 12,0f\n" \ - " lhi 0,1\n" \ - " alr %1,0\n" \ - " brc 12,0f\n" \ - " alr %0,0\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ - : "d" (y0), "i" (1) : "cc", "0" ); \ - asm volatile( \ - " alr %1,%2\n" \ - " brc 12,0f\n" \ - " ahi %0,1\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1) \ - : "d" (y1) : "cc"); \ - (r2) = __r2; \ - (r1) = __r1; \ - (r0) = __r0; \ -}) - -#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ - unsigned int __r2 = (x2) - (y2); \ - unsigned int __r1 = (x1); \ - unsigned int __r0 = (x0); \ - asm volatile( \ - " slr %2,%3\n" \ - " brc 3,0f\n" \ - " lhi 0,1\n" \ - " slr %1,0\n" \ - " brc 3,0f\n" \ - " slr %0,0\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ - : "d" (y0) : "cc", "0"); \ - asm volatile( \ - " slr %1,%2\n" \ - " brc 3,0f\n" \ - " ahi %0,-1\n" \ - "0:" \ - : "+&d" (__r2), "+&d" (__r1) \ - : "d" (y1) : "cc"); \ - (r2) = __r2; \ - (r1) = __r1; \ - (r0) = __r0; \ -}) - -#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0) - -/* Obtain the current rounding mode. */ -#define FP_ROUNDMODE mode - -/* Exception flags. */ -#define FP_EX_INVALID 0x800000 -#define FP_EX_DIVZERO 0x400000 -#define FP_EX_OVERFLOW 0x200000 -#define FP_EX_UNDERFLOW 0x100000 -#define FP_EX_INEXACT 0x080000 - -/* We write the results always */ -#define FP_INHIBIT_RESULTS 0 - -#endif diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h deleted file mode 100644 index c8b7cf9d6279..000000000000 --- a/arch/s390/include/asm/sfp-util.h +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include -#include -#include - -#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - asm volatile( \ - " alr %1,%3\n" \ - " brc 12,0f\n" \ - " ahi %0,1\n" \ - "0: alr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc"); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - asm volatile( \ - " slr %1,%3\n" \ - " brc 3,0f\n" \ - " ahi %0,-1\n" \ - "0: slr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc"); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */ -#define umul_ppmm(wh, wl, u, v) ({ \ - unsigned int __wh = u; \ - unsigned int __wl = v; \ - asm volatile( \ - " ltr 1,%0\n" \ - " mr 0,%1\n" \ - " jnm 0f\n" \ - " alr 0,%1\n" \ - "0: ltr %1,%1\n" \ - " jnm 1f\n" \ - " alr 0,%0\n" \ - "1: lr %0,0\n" \ - " lr %1,1\n" \ - : "+d" (__wh), "+d" (__wl) \ - : : "0", "1", "cc"); \ - wh = __wh; \ - wl = __wl; \ -}) - -#define udiv_qrnnd(q, r, n1, n0, d) \ - do { unsigned long __n; \ - unsigned int __r, __d; \ - __n = ((unsigned long)(n1) << 32) + n0; \ - __d = (d); \ - (q) = __n / __d; \ - (r) = __n % __d; \ - } while (0) - -#define UDIV_NEEDS_NORMALIZATION 0 - -#define abort() BUG() - -#define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 8cb9bfdd3ea8..43446fa2a4e5 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 93dd49d0028c66be9f0404d23dd6817b876cc6dc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jun 2016 10:40:06 +0200 Subject: s390/oprofile: remove hardware sampler support Remove hardware sampler support from oprofile module. The oprofile user space utilty has been switched to use the kernel perf interface, for which we also provide hardware sampling support. In addition the hardware sampling support is also slightly broken: it supports only 16 bits for the pid and therefore would generate wrong results on machines which have a pid >64k. Also the pt_regs structure which was passed to oprofile common code cannot necessarily be used to generate sane backtraces, since the task(s) in question may run while the samples are fed to oprofile. So the result would be more or less random. However given that the only user space tools switched to the perf interface already four years ago the hardware sampler code seems to be unused code, and therefore it should be reasonable to remove it. The timer based oprofile support continues to work. Signed-off-by: Heiko Carstens Acked-by: Andreas Arnez Acked-by: Andreas Krebbel Acked-by: Robert Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 2 - arch/s390/oprofile/Makefile | 1 - arch/s390/oprofile/hwsampler.c | 1178 ----------------------------------- arch/s390/oprofile/hwsampler.h | 63 -- arch/s390/oprofile/init.c | 489 --------------- arch/s390/oprofile/op_counter.h | 21 - 6 files changed, 1754 deletions(-) delete mode 100644 arch/s390/oprofile/hwsampler.c delete mode 100644 arch/s390/oprofile/hwsampler.h delete mode 100644 arch/s390/oprofile/op_counter.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c958d1c..68b0c022861a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2788,8 +2788,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. timer: [X86] Force use of architectural NMI timer mode (see also oprofile.timer for generic hr timer mode) - [s390] Force legacy basic mode sampling - (report cpu_type "timer") oops=panic Always panic on oopses. Default is to just kill the process, but there is a small probability of diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 496e4a7ee00e..e9dd41b0b8d3 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -7,4 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o -oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c deleted file mode 100644 index ff9b4eb34589..000000000000 --- a/arch/s390/oprofile/hwsampler.c +++ /dev/null @@ -1,1178 +0,0 @@ -/* - * Copyright IBM Corp. 2010 - * Author: Heinz Graalfs - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "hwsampler.h" -#include "op_counter.h" - -#define MAX_NUM_SDB 511 -#define MIN_NUM_SDB 1 - -DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); - -struct hws_execute_parms { - void *buffer; - signed int rc; -}; - -DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); -EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); - -static DEFINE_MUTEX(hws_sem); -static DEFINE_MUTEX(hws_sem_oom); - -static unsigned char hws_flush_all; -static unsigned int hws_oom; -static unsigned int hws_alert; -static struct workqueue_struct *hws_wq; - -static unsigned int hws_state; -enum { - HWS_INIT = 1, - HWS_DEALLOCATED, - HWS_STOPPED, - HWS_STARTED, - HWS_STOPPING }; - -/* set to 1 if called by kernel during memory allocation */ -static unsigned char oom_killer_was_active; -/* size of SDBT and SDB as of allocate API */ -static unsigned long num_sdbt = 100; -static unsigned long num_sdb = 511; -/* sampling interval (machine cycles) */ -static unsigned long interval; - -static unsigned long min_sampler_rate; -static unsigned long max_sampler_rate; - -static void execute_qsi(void *parms) -{ - struct hws_execute_parms *ep = parms; - - ep->rc = qsi(ep->buffer); -} - -static void execute_ssctl(void *parms) -{ - struct hws_execute_parms *ep = parms; - - ep->rc = lsctl(ep->buffer); -} - -static int smp_ctl_ssctl_stop(int cpu) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.es = 0; - cb->ssctl.cs = 0; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) { - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - dump_stack(); - } - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - if (cb->qsi.es || cb->qsi.cs) { - printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); - dump_stack(); - } - - return rc; -} - -static int smp_ctl_ssctl_deactivate(int cpu) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.es = 1; - cb->ssctl.cs = 0; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - if (cb->qsi.cs) - printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); - - return rc; -} - -static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) -{ - int rc; - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - cb->ssctl.h = 1; - cb->ssctl.tear = cb->first_sdbt; - cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; - cb->ssctl.interval = interval; - cb->ssctl.es = 1; - cb->ssctl.cs = 1; - - ep.buffer = &cb->ssctl; - smp_call_function_single(cpu, execute_ssctl, &ep, 1); - rc = ep.rc; - if (rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - if (ep.rc) - printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); - - return rc; -} - -static int smp_ctl_qsi(int cpu) -{ - struct hws_execute_parms ep; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - ep.buffer = &cb->qsi; - smp_call_function_single(cpu, execute_qsi, &ep, 1); - - return ep.rc; -} - -static void hws_ext_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer); - - if (!(param32 & CPU_MF_INT_SF_MASK)) - return; - - if (!hws_alert) - return; - - inc_irq_stat(IRQEXT_CMS); - atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); - - if (hws_wq) - queue_work(hws_wq, &cb->worker); -} - -static void worker(struct work_struct *work); - -static void add_samples_to_oprofile(unsigned cpu, unsigned long *, - unsigned long *dear); - -static void init_all_cpu_buffers(void) -{ - int cpu; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - memset(cb, 0, sizeof(struct hws_cpu_buffer)); - } -} - -static void prepare_cpu_buffers(void) -{ - struct hws_cpu_buffer *cb; - int cpu; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - atomic_set(&cb->ext_params, 0); - cb->worker_entry = 0; - cb->sample_overflow = 0; - cb->req_alert = 0; - cb->incorrect_sdbt_entry = 0; - cb->invalid_entry_address = 0; - cb->loss_of_sample_data = 0; - cb->sample_auth_change_alert = 0; - cb->finish = 0; - cb->oom = 0; - cb->stop_mode = 0; - } -} - -/* - * allocate_sdbt() - allocate sampler memory - * @cpu: the cpu for which sampler memory is allocated - * - * A 4K page is allocated for each requested SDBT. - * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. - * Set ALERT_REQ mask in each SDBs trailer. - * Returns zero if successful, <0 otherwise. - */ -static int allocate_sdbt(int cpu) -{ - int j, k, rc; - unsigned long *sdbt; - unsigned long sdb; - unsigned long *tail; - unsigned long *trailer; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (cb->first_sdbt) - return -EINVAL; - - sdbt = NULL; - tail = sdbt; - - for (j = 0; j < num_sdbt; j++) { - sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); - - mutex_lock(&hws_sem_oom); - /* OOM killer might have been activated */ - barrier(); - if (oom_killer_was_active || !sdbt) { - if (sdbt) - free_page((unsigned long)sdbt); - - goto allocate_sdbt_error; - } - if (cb->first_sdbt == 0) - cb->first_sdbt = (unsigned long)sdbt; - - /* link current page to tail of chain */ - if (tail) - *tail = (unsigned long)(void *)sdbt + 1; - - mutex_unlock(&hws_sem_oom); - - for (k = 0; k < num_sdb; k++) { - /* get and set SDB page */ - sdb = get_zeroed_page(GFP_KERNEL); - - mutex_lock(&hws_sem_oom); - /* OOM killer might have been activated */ - barrier(); - if (oom_killer_was_active || !sdb) { - if (sdb) - free_page(sdb); - - goto allocate_sdbt_error; - } - *sdbt = sdb; - trailer = trailer_entry_ptr(*sdbt); - *trailer = SDB_TE_ALERT_REQ_MASK; - sdbt++; - mutex_unlock(&hws_sem_oom); - } - tail = sdbt; - } - mutex_lock(&hws_sem_oom); - if (oom_killer_was_active) - goto allocate_sdbt_error; - - rc = 0; - if (tail) - *tail = (unsigned long) - ((void *)cb->first_sdbt) + 1; - -allocate_sdbt_exit: - mutex_unlock(&hws_sem_oom); - return rc; - -allocate_sdbt_error: - rc = -ENOMEM; - goto allocate_sdbt_exit; -} - -/* - * deallocate_sdbt() - deallocate all sampler memory - * - * For each online CPU all SDBT trees are deallocated. - * Returns the number of freed pages. - */ -static int deallocate_sdbt(void) -{ - int cpu; - int counter; - - counter = 0; - - for_each_online_cpu(cpu) { - unsigned long start; - unsigned long sdbt; - unsigned long *curr; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (!cb->first_sdbt) - continue; - - sdbt = cb->first_sdbt; - curr = (unsigned long *) sdbt; - start = sdbt; - - /* we'll free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* watch for link entry reset if found */ - if (is_link_entry(curr)) { - curr = get_next_sdbt(curr); - if (sdbt) - free_page(sdbt); - - /* we are done if we reach the start */ - if ((unsigned long) curr == start) - break; - else - sdbt = (unsigned long) curr; - } else { - /* process SDB pointer */ - if (*curr) { - free_page(*curr); - curr++; - } - } - counter++; - } - cb->first_sdbt = 0; - } - return counter; -} - -static int start_sampling(int cpu) -{ - int rc; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_ssctl_enable_activate(cpu, interval); - if (rc) { - printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); - goto start_exit; - } - - rc = -EINVAL; - if (!cb->qsi.es) { - printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); - goto start_exit; - } - - if (!cb->qsi.cs) { - printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); - goto start_exit; - } - - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", - cpu, interval); - - rc = 0; - -start_exit: - return rc; -} - -static int stop_sampling(int cpu) -{ - unsigned long v; - int rc; - struct hws_cpu_buffer *cb; - - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (!rc && !cb->qsi.es) - printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); - - rc = smp_ctl_ssctl_stop(cpu); - if (rc) { - printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", - cpu, rc); - goto stop_exit; - } - - printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); - -stop_exit: - v = cb->req_alert; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," - " count=%lu.\n", cpu, v); - - v = cb->loss_of_sample_data; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," - " count=%lu.\n", cpu, v); - - v = cb->invalid_entry_address; - if (v) - printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," - " count=%lu.\n", cpu, v); - - v = cb->incorrect_sdbt_entry; - if (v) - printk(KERN_ERR - "hwsampler: CPU %d CPUMF Incorrect SDBT address," - " count=%lu.\n", cpu, v); - - v = cb->sample_auth_change_alert; - if (v) - printk(KERN_ERR - "hwsampler: CPU %d CPUMF Sample authorization change," - " count=%lu.\n", cpu, v); - - return rc; -} - -static int check_hardware_prerequisites(void) -{ - if (!test_facility(68)) - return -EOPNOTSUPP; - return 0; -} -/* - * hws_oom_callback() - the OOM callback function - * - * In case the callback is invoked during memory allocation for the - * hw sampler, all obtained memory is deallocated and a flag is set - * so main sampler memory allocation can exit with a failure code. - * In case the callback is invoked during sampling the hw sampler - * is deactivated for all CPUs. - */ -static int hws_oom_callback(struct notifier_block *nfb, - unsigned long dummy, void *parm) -{ - unsigned long *freed; - int cpu; - struct hws_cpu_buffer *cb; - - freed = parm; - - mutex_lock(&hws_sem_oom); - - if (hws_state == HWS_DEALLOCATED) { - /* during memory allocation */ - if (oom_killer_was_active == 0) { - oom_killer_was_active = 1; - *freed += deallocate_sdbt(); - } - } else { - int i; - cpu = get_cpu(); - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (!cb->oom) { - for_each_online_cpu(i) { - smp_ctl_ssctl_deactivate(i); - cb->oom = 1; - } - cb->finish = 1; - - printk(KERN_INFO - "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", - cpu); - } - } - - mutex_unlock(&hws_sem_oom); - - return NOTIFY_OK; -} - -static struct notifier_block hws_oom_notifier = { - .notifier_call = hws_oom_callback -}; - -static int hws_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - /* We do not have sampler space available for all possible CPUs. - All CPUs should be online when hw sampling is activated. */ - return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; -} - -static struct notifier_block hws_cpu_notifier = { - .notifier_call = hws_cpu_callback -}; - -/** - * hwsampler_deactivate() - set hardware sampling temporarily inactive - * @cpu: specifies the CPU to be set inactive. - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_deactivate(unsigned int cpu) -{ - /* - * Deactivate hw sampling temporarily and flush the buffer - * by pushing all the pending samples to oprofile buffer. - * - * This function can be called under one of the following conditions: - * Memory unmap, task is exiting. - */ - int rc; - struct hws_cpu_buffer *cb; - - rc = 0; - mutex_lock(&hws_sem); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (hws_state == HWS_STARTED) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (cb->qsi.cs) { - rc = smp_ctl_ssctl_deactivate(cpu); - if (rc) { - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); - cb->finish = 1; - hws_state = HWS_STOPPING; - } else { - hws_flush_all = 1; - /* Add work to queue to read pending samples.*/ - queue_work_on(cpu, hws_wq, &cb->worker); - } - } - } - mutex_unlock(&hws_sem); - - if (hws_wq) - flush_workqueue(hws_wq); - - return rc; -} - -/** - * hwsampler_activate() - activate/resume hardware sampling which was deactivated - * @cpu: specifies the CPU to be set active. - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_activate(unsigned int cpu) -{ - /* - * Re-activate hw sampling. This should be called in pair with - * hwsampler_deactivate(). - */ - int rc; - struct hws_cpu_buffer *cb; - - rc = 0; - mutex_lock(&hws_sem); - - cb = &per_cpu(sampler_cpu_buffer, cpu); - if (hws_state == HWS_STARTED) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (!cb->qsi.cs) { - hws_flush_all = 0; - rc = smp_ctl_ssctl_enable_activate(cpu, interval); - if (rc) { - printk(KERN_ERR - "CPU %d, CPUMF activate sampling failed.\n", - cpu); - } - } - } - - mutex_unlock(&hws_sem); - - return rc; -} - -static int check_qsi_on_setup(void) -{ - int rc; - unsigned int cpu; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (rc) - return -EOPNOTSUPP; - - if (!cb->qsi.as) { - printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); - return -EINVAL; - } - - if (cb->qsi.es) { - printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); - rc = smp_ctl_ssctl_stop(cpu); - if (rc) - return -EINVAL; - - printk(KERN_INFO - "CPU %d, CPUMF Sampling stopped now.\n", cpu); - } - } - return 0; -} - -static int check_qsi_on_start(void) -{ - unsigned int cpu; - int rc; - struct hws_cpu_buffer *cb; - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - - if (!cb->qsi.as) - return -EINVAL; - - if (cb->qsi.es) - return -EINVAL; - - if (cb->qsi.cs) - return -EINVAL; - } - return 0; -} - -static void worker_on_start(unsigned int cpu) -{ - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - cb->worker_entry = cb->first_sdbt; -} - -static int worker_check_error(unsigned int cpu, int ext_params) -{ - int rc; - unsigned long *sdbt; - struct hws_cpu_buffer *cb; - - rc = 0; - cb = &per_cpu(sampler_cpu_buffer, cpu); - sdbt = (unsigned long *) cb->worker_entry; - - if (!sdbt || !*sdbt) - return -EINVAL; - - if (ext_params & CPU_MF_INT_SF_PRA) - cb->req_alert++; - - if (ext_params & CPU_MF_INT_SF_LSDA) - cb->loss_of_sample_data++; - - if (ext_params & CPU_MF_INT_SF_IAE) { - cb->invalid_entry_address++; - rc = -EINVAL; - } - - if (ext_params & CPU_MF_INT_SF_ISE) { - cb->incorrect_sdbt_entry++; - rc = -EINVAL; - } - - if (ext_params & CPU_MF_INT_SF_SACA) { - cb->sample_auth_change_alert++; - rc = -EINVAL; - } - - return rc; -} - -static void worker_on_finish(unsigned int cpu) -{ - int rc, i; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - if (cb->finish) { - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (cb->qsi.es) { - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", - cpu); - rc = smp_ctl_ssctl_stop(cpu); - if (rc) - printk(KERN_INFO - "hwsampler: CPU %d, CPUMF Deactivation failed.\n", - cpu); - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (!cb->finish) { - cb->finish = 1; - queue_work_on(i, hws_wq, - &cb->worker); - } - } - } - } -} - -static void worker_on_interrupt(unsigned int cpu) -{ - unsigned long *sdbt; - unsigned char done; - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - sdbt = (unsigned long *) cb->worker_entry; - - done = 0; - /* do not proceed if stop was entered, - * forget the buffers not yet processed */ - while (!done && !cb->stop_mode) { - unsigned long *trailer; - struct hws_trailer_entry *te; - unsigned long *dear = 0; - - trailer = trailer_entry_ptr(*sdbt); - /* leave loop if no more work to do */ - if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { - done = 1; - if (!hws_flush_all) - continue; - } - - te = (struct hws_trailer_entry *)trailer; - cb->sample_overflow += te->overflow; - - add_samples_to_oprofile(cpu, sdbt, dear); - - /* reset trailer */ - xchg((unsigned char *) te, 0x40); - - /* advance to next sdb slot in current sdbt */ - sdbt++; - /* in case link bit is set use address w/o link bit */ - if (is_link_entry(sdbt)) - sdbt = get_next_sdbt(sdbt); - - cb->worker_entry = (unsigned long)sdbt; - } -} - -static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, - unsigned long *dear) -{ - struct hws_basic_entry *sample_data_ptr; - unsigned long *trailer; - - trailer = trailer_entry_ptr(*sdbt); - if (dear) { - if (dear > trailer) - return; - trailer = dear; - } - - sample_data_ptr = (struct hws_basic_entry *)(*sdbt); - - while ((unsigned long *)sample_data_ptr < trailer) { - struct pt_regs *regs = NULL; - struct task_struct *tsk = NULL; - - /* - * Check sampling mode, 1 indicates basic (=customer) sampling - * mode. - */ - if (sample_data_ptr->def != 1) { - /* sample slot is not yet written */ - break; - } else { - /* make sure we don't use it twice, - * the next time the sampler will set it again */ - sample_data_ptr->def = 0; - } - - /* Get pt_regs. */ - if (sample_data_ptr->P == 1) { - /* userspace sample */ - unsigned int pid = sample_data_ptr->prim_asn; - if (!counter_config.user) - goto skip_sample; - rcu_read_lock(); - tsk = pid_task(find_vpid(pid), PIDTYPE_PID); - if (tsk) - regs = task_pt_regs(tsk); - rcu_read_unlock(); - } else { - /* kernelspace sample */ - if (!counter_config.kernel) - goto skip_sample; - regs = task_pt_regs(current); - } - - mutex_lock(&hws_sem); - oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, - !sample_data_ptr->P, tsk); - mutex_unlock(&hws_sem); - skip_sample: - sample_data_ptr++; - } -} - -static void worker(struct work_struct *work) -{ - unsigned int cpu; - int ext_params; - struct hws_cpu_buffer *cb; - - cb = container_of(work, struct hws_cpu_buffer, worker); - cpu = smp_processor_id(); - ext_params = atomic_xchg(&cb->ext_params, 0); - - if (!cb->worker_entry) - worker_on_start(cpu); - - if (worker_check_error(cpu, ext_params)) - return; - - if (!cb->finish) - worker_on_interrupt(cpu); - - if (cb->finish) - worker_on_finish(cpu); -} - -/** - * hwsampler_allocate() - allocate memory for the hardware sampler - * @sdbt: number of SDBTs per online CPU (must be > 0) - * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) -{ - int cpu, rc; - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state != HWS_DEALLOCATED) - goto allocate_exit; - - if (sdbt < 1) - goto allocate_exit; - - if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) - goto allocate_exit; - - num_sdbt = sdbt; - num_sdb = sdb; - - oom_killer_was_active = 0; - register_oom_notifier(&hws_oom_notifier); - - for_each_online_cpu(cpu) { - if (allocate_sdbt(cpu)) { - unregister_oom_notifier(&hws_oom_notifier); - goto allocate_error; - } - } - unregister_oom_notifier(&hws_oom_notifier); - if (oom_killer_was_active) - goto allocate_error; - - hws_state = HWS_STOPPED; - rc = 0; - -allocate_exit: - mutex_unlock(&hws_sem); - return rc; - -allocate_error: - rc = -ENOMEM; - printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); - goto allocate_exit; -} - -/** - * hwsampler_deallocate() - deallocate hardware sampler memory - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_deallocate(void) -{ - int rc; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state != HWS_STOPPED) - goto deallocate_exit; - - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - hws_alert = 0; - deallocate_sdbt(); - - hws_state = HWS_DEALLOCATED; - rc = 0; - -deallocate_exit: - mutex_unlock(&hws_sem); - - return rc; -} - -unsigned long hwsampler_query_min_interval(void) -{ - return min_sampler_rate; -} - -unsigned long hwsampler_query_max_interval(void) -{ - return max_sampler_rate; -} - -unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) -{ - struct hws_cpu_buffer *cb; - - cb = &per_cpu(sampler_cpu_buffer, cpu); - - return cb->sample_overflow; -} - -int hwsampler_setup(void) -{ - int rc; - int cpu; - struct hws_cpu_buffer *cb; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state) - goto setup_exit; - - hws_state = HWS_INIT; - - init_all_cpu_buffers(); - - rc = check_hardware_prerequisites(); - if (rc) - goto setup_exit; - - rc = check_qsi_on_setup(); - if (rc) - goto setup_exit; - - rc = -EINVAL; - hws_wq = create_workqueue("hwsampler"); - if (!hws_wq) - goto setup_exit; - - register_cpu_notifier(&hws_cpu_notifier); - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - INIT_WORK(&cb->worker, worker); - rc = smp_ctl_qsi(cpu); - WARN_ON(rc); - if (min_sampler_rate != cb->qsi.min_sampl_rate) { - if (min_sampler_rate) { - printk(KERN_WARNING - "hwsampler: different min sampler rate values.\n"); - if (min_sampler_rate < cb->qsi.min_sampl_rate) - min_sampler_rate = - cb->qsi.min_sampl_rate; - } else - min_sampler_rate = cb->qsi.min_sampl_rate; - } - if (max_sampler_rate != cb->qsi.max_sampl_rate) { - if (max_sampler_rate) { - printk(KERN_WARNING - "hwsampler: different max sampler rate values.\n"); - if (max_sampler_rate > cb->qsi.max_sampl_rate) - max_sampler_rate = - cb->qsi.max_sampl_rate; - } else - max_sampler_rate = cb->qsi.max_sampl_rate; - } - } - register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); - - hws_state = HWS_DEALLOCATED; - rc = 0; - -setup_exit: - mutex_unlock(&hws_sem); - return rc; -} - -int hwsampler_shutdown(void) -{ - int rc; - - mutex_lock(&hws_sem); - - rc = -EINVAL; - if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { - mutex_unlock(&hws_sem); - - if (hws_wq) - flush_workqueue(hws_wq); - - mutex_lock(&hws_sem); - - if (hws_state == HWS_STOPPED) { - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - hws_alert = 0; - deallocate_sdbt(); - } - if (hws_wq) { - destroy_workqueue(hws_wq); - hws_wq = NULL; - } - - unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); - hws_state = HWS_INIT; - rc = 0; - } - mutex_unlock(&hws_sem); - - unregister_cpu_notifier(&hws_cpu_notifier); - - return rc; -} - -/** - * hwsampler_start_all() - start hardware sampling on all online CPUs - * @rate: specifies the used interval when samples are taken - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_start_all(unsigned long rate) -{ - int rc, cpu; - - mutex_lock(&hws_sem); - - hws_oom = 0; - - rc = -EINVAL; - if (hws_state != HWS_STOPPED) - goto start_all_exit; - - interval = rate; - - /* fail if rate is not valid */ - if (interval < min_sampler_rate || interval > max_sampler_rate) - goto start_all_exit; - - rc = check_qsi_on_start(); - if (rc) - goto start_all_exit; - - prepare_cpu_buffers(); - - for_each_online_cpu(cpu) { - rc = start_sampling(cpu); - if (rc) - break; - } - if (rc) { - for_each_online_cpu(cpu) { - stop_sampling(cpu); - } - goto start_all_exit; - } - hws_state = HWS_STARTED; - rc = 0; - -start_all_exit: - mutex_unlock(&hws_sem); - - if (rc) - return rc; - - register_oom_notifier(&hws_oom_notifier); - hws_oom = 1; - hws_flush_all = 0; - /* now let them in, 1407 CPUMF external interrupts */ - hws_alert = 1; - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/** - * hwsampler_stop_all() - stop hardware sampling on all online CPUs - * - * Returns 0 on success, !0 on failure. - */ -int hwsampler_stop_all(void) -{ - int tmp_rc, rc, cpu; - struct hws_cpu_buffer *cb; - - mutex_lock(&hws_sem); - - rc = 0; - if (hws_state == HWS_INIT) { - mutex_unlock(&hws_sem); - return 0; - } - hws_state = HWS_STOPPING; - mutex_unlock(&hws_sem); - - for_each_online_cpu(cpu) { - cb = &per_cpu(sampler_cpu_buffer, cpu); - cb->stop_mode = 1; - tmp_rc = stop_sampling(cpu); - if (tmp_rc) - rc = tmp_rc; - } - - if (hws_wq) - flush_workqueue(hws_wq); - - mutex_lock(&hws_sem); - if (hws_oom) { - unregister_oom_notifier(&hws_oom_notifier); - hws_oom = 0; - } - hws_state = HWS_STOPPED; - mutex_unlock(&hws_sem); - - return rc; -} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h deleted file mode 100644 index a483d06f2fa7..000000000000 --- a/arch/s390/oprofile/hwsampler.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * CPUMF HW sampler functions and internal structures - * - * Copyright IBM Corp. 2010 - * Author(s): Heinz Graalfs - */ - -#ifndef HWSAMPLER_H_ -#define HWSAMPLER_H_ - -#include -#include - -struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ -{ /* bytes 0 - 7 Bit(s) */ - unsigned int s:1; /* 0: maximum buffer indicator */ - unsigned int h:1; /* 1: part. level reserved for VM use*/ - unsigned long b2_53:52; /* 2-53: zeros */ - unsigned int es:1; /* 54: sampling enable control */ - unsigned int b55_61:7; /* 55-61: - zeros */ - unsigned int cs:1; /* 62: sampling activation control */ - unsigned int b63:1; /* 63: zero */ - unsigned long interval; /* 8-15: sampling interval */ - unsigned long tear; /* 16-23: TEAR contents */ - unsigned long dear; /* 24-31: DEAR contents */ - /* 32-63: */ - unsigned long rsvrd1; /* reserved */ - unsigned long rsvrd2; /* reserved */ - unsigned long rsvrd3; /* reserved */ - unsigned long rsvrd4; /* reserved */ -}; - -struct hws_cpu_buffer { - unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ - unsigned long worker_entry; - unsigned long sample_overflow; /* taken from SDB ... */ - struct hws_qsi_info_block qsi; - struct hws_ssctl_request_block ssctl; - struct work_struct worker; - atomic_t ext_params; - unsigned long req_alert; - unsigned long loss_of_sample_data; - unsigned long invalid_entry_address; - unsigned long incorrect_sdbt_entry; - unsigned long sample_auth_change_alert; - unsigned int finish:1; - unsigned int oom:1; - unsigned int stop_mode:1; -}; - -int hwsampler_setup(void); -int hwsampler_shutdown(void); -int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); -int hwsampler_deallocate(void); -unsigned long hwsampler_query_min_interval(void); -unsigned long hwsampler_query_max_interval(void); -int hwsampler_start_all(unsigned long interval); -int hwsampler_stop_all(void); -int hwsampler_deactivate(unsigned int cpu); -int hwsampler_activate(unsigned int cpu); -unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); - -#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 791935a65800..16f4c3960b87 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -10,488 +10,8 @@ */ #include -#include #include -#include -#include -#include #include -#include - -#include "../../../drivers/oprofile/oprof.h" - -#include "hwsampler.h" -#include "op_counter.h" - -#define DEFAULT_INTERVAL 4127518 - -#define DEFAULT_SDBT_BLOCKS 1 -#define DEFAULT_SDB_BLOCKS 511 - -static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; -static unsigned long oprofile_min_interval; -static unsigned long oprofile_max_interval; - -static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; -static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; - -static int hwsampler_enabled; -static int hwsampler_running; /* start_mutex must be held to change */ -static int hwsampler_available; - -static struct oprofile_operations timer_ops; - -struct op_counter_config counter_config; - -enum __force_cpu_type { - reserved = 0, /* do not force */ - timer, -}; -static int force_cpu_type; - -static int set_cpu_type(const char *str, struct kernel_param *kp) -{ - if (!strcmp(str, "timer")) { - force_cpu_type = timer; - printk(KERN_INFO "oprofile: forcing timer to be returned " - "as cpu type\n"); - } else { - force_cpu_type = 0; - } - - return 0; -} -module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); -MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" - "(report cpu_type \"timer\""); - -static int __oprofile_hwsampler_start(void) -{ - int retval; - - retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); - if (retval) - return retval; - - retval = hwsampler_start_all(oprofile_hw_interval); - if (retval) - hwsampler_deallocate(); - - return retval; -} - -static int oprofile_hwsampler_start(void) -{ - int retval; - - hwsampler_running = hwsampler_enabled; - - if (!hwsampler_running) - return timer_ops.start(); - - retval = perf_reserve_sampling(); - if (retval) - return retval; - - retval = __oprofile_hwsampler_start(); - if (retval) - perf_release_sampling(); - - return retval; -} - -static void oprofile_hwsampler_stop(void) -{ - if (!hwsampler_running) { - timer_ops.stop(); - return; - } - - hwsampler_stop_all(); - hwsampler_deallocate(); - perf_release_sampling(); - return; -} - -/* - * File ops used for: - * /dev/oprofile/0/enabled - * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) - */ - -static ssize_t hwsampler_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); -} - -static ssize_t hwsampler_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - if (oprofile_started) - /* - * save to do without locking as we set - * hwsampler_running in start() when start_mutex is - * held - */ - return -EBUSY; - - hwsampler_enabled = val; - - return count; -} - -static const struct file_operations hwsampler_fops = { - .read = hwsampler_read, - .write = hwsampler_write, -}; - -/* - * File ops used for: - * /dev/oprofile/0/count - * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) - * - * Make sure that the value is within the hardware range. - */ - -static ssize_t hw_interval_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, - count, offset); -} - -static ssize_t hw_interval_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - if (val < oprofile_min_interval) - oprofile_hw_interval = oprofile_min_interval; - else if (val > oprofile_max_interval) - oprofile_hw_interval = oprofile_max_interval; - else - oprofile_hw_interval = val; - - return count; -} - -static const struct file_operations hw_interval_fops = { - .read = hw_interval_read, - .write = hw_interval_write, -}; - -/* - * File ops used for: - * /dev/oprofile/0/event - * Only a single event with number 0 is supported with this counter. - * - * /dev/oprofile/0/unit_mask - * This is a dummy file needed by the user space tools. - * No value other than 0 is accepted or returned. - */ - -static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(0, buf, count, offset); -} - -static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - if (val != 0) - return -EINVAL; - return count; -} - -static const struct file_operations zero_fops = { - .read = hwsampler_zero_read, - .write = hwsampler_zero_write, -}; - -/* /dev/oprofile/0/kernel file ops. */ - -static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(counter_config.kernel, - buf, count, offset); -} - -static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - counter_config.kernel = val; - - return count; -} - -static const struct file_operations kernel_fops = { - .read = hwsampler_kernel_read, - .write = hwsampler_kernel_write, -}; - -/* /dev/oprofile/0/user file ops. */ - -static ssize_t hwsampler_user_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(counter_config.user, - buf, count, offset); -} - -static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - counter_config.user = val; - - return count; -} - -static const struct file_operations user_fops = { - .read = hwsampler_user_read, - .write = hwsampler_user_write, -}; - - -/* - * File ops used for: /dev/oprofile/timer/enabled - * The value always has to be the inverted value of hwsampler_enabled. So - * no separate variable is created. That way we do not need locking. - */ - -static ssize_t timer_enabled_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); -} - -static ssize_t timer_enabled_write(struct file *file, char const __user *buf, - size_t count, loff_t *offset) -{ - unsigned long val; - int retval; - - if (*offset) - return -EINVAL; - - retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval <= 0) - return retval; - - if (val != 0 && val != 1) - return -EINVAL; - - /* Timer cannot be disabled without having hardware sampling. */ - if (val == 0 && !hwsampler_available) - return -EINVAL; - - if (oprofile_started) - /* - * save to do without locking as we set - * hwsampler_running in start() when start_mutex is - * held - */ - return -EBUSY; - - hwsampler_enabled = !val; - - return count; -} - -static const struct file_operations timer_enabled_fops = { - .read = timer_enabled_read, - .write = timer_enabled_write, -}; - - -static int oprofile_create_hwsampling_files(struct dentry *root) -{ - struct dentry *dir; - - dir = oprofilefs_mkdir(root, "timer"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); - - if (!hwsampler_available) - return 0; - - /* reinitialize default values */ - hwsampler_enabled = 1; - counter_config.kernel = 1; - counter_config.user = 1; - - if (!force_cpu_type) { - /* - * Create the counter file system. A single virtual - * counter is created which can be used to - * enable/disable hardware sampling dynamically from - * user space. The user space will configure a single - * counter with a single event. The value of 'event' - * and 'unit_mask' are not evaluated by the kernel code - * and can only be set to 0. - */ - - dir = oprofilefs_mkdir(root, "0"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "enabled", &hwsampler_fops); - oprofilefs_create_file(dir, "event", &zero_fops); - oprofilefs_create_file(dir, "count", &hw_interval_fops); - oprofilefs_create_file(dir, "unit_mask", &zero_fops); - oprofilefs_create_file(dir, "kernel", &kernel_fops); - oprofilefs_create_file(dir, "user", &user_fops); - oprofilefs_create_ulong(dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); - - } else { - /* - * Hardware sampling can be used but the cpu_type is - * forced to timer in order to deal with legacy user - * space tools. The /dev/oprofile/hwsampling fs is - * provided in that case. - */ - dir = oprofilefs_mkdir(root, "hwsampling"); - if (!dir) - return -EINVAL; - - oprofilefs_create_file(dir, "hwsampler", - &hwsampler_fops); - oprofilefs_create_file(dir, "hw_interval", - &hw_interval_fops); - oprofilefs_create_ro_ulong(dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); - } - return 0; -} - -static int oprofile_hwsampler_init(struct oprofile_operations *ops) -{ - /* - * Initialize the timer mode infrastructure as well in order - * to be able to switch back dynamically. oprofile_timer_init - * is not supposed to fail. - */ - if (oprofile_timer_init(ops)) - BUG(); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); - ops->create_files = oprofile_create_hwsampling_files; - - /* - * If the user space tools do not support newer cpu types, - * the force_cpu_type module parameter - * can be used to always return \"timer\" as cpu type. - */ - if (force_cpu_type != timer) { - struct cpuid id; - - get_cpu_id (&id); - - switch (id.machine) { - case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; - case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; - case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; - case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; - default: return -ENODEV; - } - } - - if (hwsampler_setup()) - return -ENODEV; - - /* - * Query the range for the sampling interval from the - * hardware. - */ - oprofile_min_interval = hwsampler_query_min_interval(); - if (oprofile_min_interval == 0) - return -ENODEV; - oprofile_max_interval = hwsampler_query_max_interval(); - if (oprofile_max_interval == 0) - return -ENODEV; - - /* The initial value should be sane */ - if (oprofile_hw_interval < oprofile_min_interval) - oprofile_hw_interval = oprofile_min_interval; - if (oprofile_hw_interval > oprofile_max_interval) - oprofile_hw_interval = oprofile_max_interval; - - printk(KERN_INFO "oprofile: System z hardware sampling " - "facility found.\n"); - - ops->start = oprofile_hwsampler_start; - ops->stop = oprofile_hwsampler_stop; - - return 0; -} - -static void oprofile_hwsampler_exit(void) -{ - hwsampler_shutdown(); -} static int __s390_backtrace(void *data, unsigned long address) { @@ -514,18 +34,9 @@ static void s390_backtrace(struct pt_regs *regs, unsigned int depth) int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; - - /* - * -ENODEV is not reported to the caller. The module itself - * will use the timer mode sampling as fallback and this is - * always available. - */ - hwsampler_available = oprofile_hwsampler_init(ops) == 0; - return 0; } void oprofile_arch_exit(void) { - oprofile_hwsampler_exit(); } diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h deleted file mode 100644 index 61b2531eef17..000000000000 --- a/arch/s390/oprofile/op_counter.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright IBM Corp. 2011 - * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) - * - * @remark Copyright 2011 OProfile authors - */ - -#ifndef OP_COUNTER_H -#define OP_COUNTER_H - -struct op_counter_config { - /* `enabled' maps to the hwsampler_file variable. */ - /* `count' maps to the oprofile_hw_interval variable. */ - /* `event' and `unit_mask' are unused. */ - unsigned long kernel; - unsigned long user; -}; - -extern struct op_counter_config counter_config; - -#endif /* OP_COUNTER_H */ -- cgit v1.2.3-59-g8ed1b From 2764196f45be5efe1c9e71bf766ef6291ee86339 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Jun 2016 11:08:10 +0200 Subject: s390/perf: remove perf_release/reserver_sampling functions Now that the oprofile sampling code is gone there is only one user of the sampling facility left. Therefore the reserve and release functions can be removed. Signed-off-by: Heiko Carstens Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 12 ------------ arch/s390/kernel/perf_cpum_sf.c | 5 ----- arch/s390/kernel/perf_event.c | 30 ------------------------------ 3 files changed, 47 deletions(-) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 1f7ff85c5e4c..c64c0befd3f3 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -86,16 +86,4 @@ struct sf_raw_sample { u8 padding[]; /* Padding to next multiple of 8 */ } __packed; -/* Perf hardware reserve and release functions */ -#ifdef CONFIG_PERF_EVENTS -int perf_reserve_sampling(void); -void perf_release_sampling(void); -#else /* CONFIG_PERF_EVENTS */ -static inline int perf_reserve_sampling(void) -{ - return 0; -} -static inline void perf_release_sampling(void) {} -#endif /* CONFIG_PERF_EVENTS */ - #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index a8e832166417..9ea26dface38 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -601,17 +601,12 @@ static void release_pmc_hardware(void) irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); on_each_cpu(setup_pmc_cpu, &flags, 1); - perf_release_sampling(); } static int reserve_pmc_hardware(void) { int flags = PMC_INIT; - int err; - err = perf_reserve_sampling(); - if (err) - return err; on_each_cpu(setup_pmc_cpu, &flags, 1); if (flags & PMC_FAILURE) { release_pmc_hardware(); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 87035fa58bbe..17431f63de00 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%04llx,name=%s\n", pmu_attr->id, attr->attr.name); } - -/* Reserve/release functions for sharing perf hardware */ -static DEFINE_SPINLOCK(perf_hw_owner_lock); -static void *perf_sampling_owner; - -int perf_reserve_sampling(void) -{ - int err; - - err = 0; - spin_lock(&perf_hw_owner_lock); - if (perf_sampling_owner) { - pr_warn("The sampling facility is already reserved by %p\n", - perf_sampling_owner); - err = -EBUSY; - } else - perf_sampling_owner = __builtin_return_address(0); - spin_unlock(&perf_hw_owner_lock); - return err; -} -EXPORT_SYMBOL(perf_reserve_sampling); - -void perf_release_sampling(void) -{ - spin_lock(&perf_hw_owner_lock); - WARN_ON(!perf_sampling_owner); - perf_sampling_owner = NULL; - spin_unlock(&perf_hw_owner_lock); -} -EXPORT_SYMBOL(perf_release_sampling); -- cgit v1.2.3-59-g8ed1b From 109ab954713bd07c96f65ceddab4886069a110ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jun 2016 11:05:05 +0200 Subject: s390/cpuinfo: rename cpu field to cpu number The cpu field name within /proc/cpuinfo has a conflict with the powerpc and sparc output where it contains the cpu model name. So rename the field name to cpu number which shouldn't generate any conflicts. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 06ca71b7ec0e..6cf24b471b0c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -143,7 +143,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; - seq_printf(m, "\ncpu : %ld\n", n); + seq_printf(m, "\ncpu number : %ld\n", n); show_cpu_mhz(m, n); return 0; } -- cgit v1.2.3-59-g8ed1b From dc4aace160165bd15400a79f9f5ab97405278f48 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 10:35:20 +0200 Subject: s390/uaccess: fix __put_get_user_asm define The __put_get_user_asm defines an inline assmembly which makes use of the asm register construct. The parameters passed to that define may also contain function calls. It is a gcc restriction that between register asm statements and the use of any such annotated variables function calls may clobber the register / variable contents. Or in other words: gcc would generate broken code. This can be achieved e.g. with the following code: get_user(x, func() ? a : b); where the call of func would clobber register zero which is used by the __put_get_user_asm define. To avoid this add two static inline functions which don't have these side effects. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/uaccess.h | 61 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2ea7f3208cd2..9b49cf1daa8f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -151,8 +151,65 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, __rc; \ }) -#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL) -#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL) +static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +{ + unsigned long spec = 0x810000UL; + int rc; + + switch (size) { + case 1: + rc = __put_get_user_asm((unsigned char __user *)ptr, + (unsigned char *)x, + size, spec); + break; + case 2: + rc = __put_get_user_asm((unsigned short __user *)ptr, + (unsigned short *)x, + size, spec); + break; + case 4: + rc = __put_get_user_asm((unsigned int __user *)ptr, + (unsigned int *)x, + size, spec); + break; + case 8: + rc = __put_get_user_asm((unsigned long __user *)ptr, + (unsigned long *)x, + size, spec); + break; + }; + return rc; +} + +static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +{ + unsigned long spec = 0x81UL; + int rc; + + switch (size) { + case 1: + rc = __put_get_user_asm((unsigned char *)x, + (unsigned char __user *)ptr, + size, spec); + break; + case 2: + rc = __put_get_user_asm((unsigned short *)x, + (unsigned short __user *)ptr, + size, spec); + break; + case 4: + rc = __put_get_user_asm((unsigned int *)x, + (unsigned int __user *)ptr, + size, spec); + break; + case 8: + rc = __put_get_user_asm((unsigned long *)x, + (unsigned long __user *)ptr, + size, spec); + break; + }; + return rc; +} #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ -- cgit v1.2.3-59-g8ed1b From db7f5eef3dc0866320a4a92c38c58d6fb8dfdd34 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 13:07:42 +0200 Subject: s390/lib: use basic blocks for inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/string.c | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index b647d5ff0ad9..e390bbb16443 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -236,6 +236,26 @@ char * strrchr(const char * s, int c) } EXPORT_SYMBOL(strrchr); +static inline int clcle(const char *s1, unsigned long l1, + const char *s2, unsigned long l2, + int *diff) +{ + register unsigned long r2 asm("2") = (unsigned long) s1; + register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r4 asm("4") = (unsigned long) s2; + register unsigned long r5 asm("5") = (unsigned long) l2; + int cc; + + asm volatile ("0: clcle %1,%3,0\n" + " jo 0b\n" + " ipm %0\n" + " srl %0,28" + : "=&d" (cc), "+a" (r2), "+a" (r3), + "+a" (r4), "+a" (r5) : : "cc"); + *diff = *(char *)r2 - *(char *)r4; + return cc; +} + /** * strstr - Find the first substring in a %NUL terminated string * @s1: The string to be searched @@ -250,18 +270,9 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; l1 = __strend(s1) - s1; while (l1-- >= l2) { - register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l2; - register unsigned long r4 asm("4") = (unsigned long) s2; - register unsigned long r5 asm("5") = (unsigned long) l2; - int cc; - - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (cc), "+a" (r2), "+a" (r3), - "+a" (r4), "+a" (r5) : : "cc" ); + int cc, dummy; + + cc = clcle(s1, l1, s2, l2, &dummy); if (!cc) return (char *) s1; s1++; @@ -302,20 +313,11 @@ EXPORT_SYMBOL(memchr); */ int memcmp(const void *cs, const void *ct, size_t n) { - register unsigned long r2 asm("2") = (unsigned long) cs; - register unsigned long r3 asm("3") = (unsigned long) n; - register unsigned long r4 asm("4") = (unsigned long) ct; - register unsigned long r5 asm("5") = (unsigned long) n; - int ret; + int ret, diff; - asm volatile ("0: clcle %1,%3,0\n" - " jo 0b\n" - " ipm %0\n" - " srl %0,28" - : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) - : : "cc" ); + ret = clcle(cs, n, ct, n, &diff); if (ret) - ret = *(char *) r2 - *(char *) r4; + ret = diff; return ret; } EXPORT_SYMBOL(memcmp); -- cgit v1.2.3-59-g8ed1b From 931641c639cea024d4462d79ad9716406bba7b19 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 13:31:55 +0200 Subject: s390/mm: use basic block for essa inline assembly Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/page-states.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index a90d45e9dfb0..3330ea124eec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -34,20 +34,25 @@ static int __init cmma(char *str) } __setup("cmma=", cmma); -void __init cmma_init(void) +static inline int cmma_test_essa(void) { register unsigned long tmp asm("0") = 0; register int rc asm("1") = -EOPNOTSUPP; - if (!cmma_flag) - return; asm volatile( " .insn rrf,0xb9ab0000,%1,%1,0,0\n" "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "+&d" (rc), "+&d" (tmp)); - if (rc) + return rc; +} + +void __init cmma_init(void) +{ + if (!cmma_flag) + return; + if (cmma_test_essa()) cmma_flag = 0; } -- cgit v1.2.3-59-g8ed1b From 11d376730384a07568b56ca8d43989d25e585760 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:00:27 +0200 Subject: s390/crypto: use basic blocks for ap bus inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 77 ++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 327255da115a..4feb27215ab6 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -169,6 +169,19 @@ static int ap_configuration_available(void) return test_facility(12); } +static inline struct ap_queue_status +__pqap_tapq(ap_qid_t qid, unsigned long *info) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + *info = reg2; + return reg1; +} + /** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number @@ -179,17 +192,15 @@ static int ap_configuration_available(void) static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, unsigned long *info) { - register unsigned long reg0 asm ("0") = qid; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = 0UL; + struct ap_queue_status aqs; + unsigned long _info; if (test_facility(15)) - reg0 |= 1UL << 23; /* set APFT T bit*/ - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ - : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + qid |= 1UL << 23; /* set APFT T bit*/ + aqs = __pqap_tapq(qid, &_info); if (info) - *info = reg2; - return reg1; + *info = _info; + return aqs; } /** @@ -237,14 +248,12 @@ ap_queue_interruption_control(ap_qid_t qid, void *ind) * * Returns 0 on success, or -EOPNOTSUPP. */ -static inline int ap_query_configuration(void) +static inline int __ap_query_configuration(void) { register unsigned long reg0 asm ("0") = 0x04000000UL; register unsigned long reg1 asm ("1") = -EINVAL; register void *reg2 asm ("2") = (void *) ap_configuration; - if (!ap_configuration) - return -EOPNOTSUPP; asm volatile( ".long 0xb2af0000\n" /* PQAP(QCI) */ "0: la %1,0\n" @@ -257,6 +266,13 @@ static inline int ap_query_configuration(void) return reg1; } +static inline int ap_query_configuration(void) +{ + if (!ap_configuration) + return -EOPNOTSUPP; + return __ap_query_configuration(); +} + /** * ap_init_configuration(): Allocate and query configuration array. */ @@ -346,6 +362,26 @@ static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind) } } +static inline struct ap_queue_status +__nqap(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +{ + typedef struct { char _[length]; } msgblock; + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* NQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg) + : "cc"); + return reg1; +} + /** * __ap_send(): Send message to adjunct processor queue. * @qid: The AP queue number @@ -363,24 +399,9 @@ static inline struct ap_queue_status __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length, unsigned int special) { - typedef struct { char _[length]; } msgblock; - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = (unsigned long) msg; - register unsigned long reg3 asm ("3") = (unsigned long) length; - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; - if (special == 1) - reg0 |= 0x400000UL; - - asm volatile ( - "0: .long 0xb2ad0042\n" /* NQAP */ - " brc 2,0b" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) - : "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg) - : "cc" ); - return reg1; + qid |= 0x400000UL; + return __nqap(qid, psmid, msg, length); } int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) -- cgit v1.2.3-59-g8ed1b From eb090ad2ad5c7d0b4264a54f371683f2b2de1f9d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:03:00 +0200 Subject: s390/iucv: use basic blocks for iucv inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- net/iucv/iucv.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 7eaa000c9258..88a2a3ba4212 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -320,21 +320,29 @@ static union iucv_param *iucv_param_irq[NR_CPUS]; * * Returns the result of the CP IUCV call. */ -static inline int iucv_call_b2f0(int command, union iucv_param *parm) +static inline int __iucv_call_b2f0(int command, union iucv_param *parm) { register unsigned long reg0 asm ("0"); register unsigned long reg1 asm ("1"); int ccode; reg0 = command; - reg1 = virt_to_phys(parm); + reg1 = (unsigned long)parm; asm volatile( " .long 0xb2f01000\n" " ipm %0\n" " srl %0,28\n" : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) : "m" (*parm) : "cc"); - return (ccode == 1) ? parm->ctrl.iprcode : ccode; + return ccode; +} + +static inline int iucv_call_b2f0(int command, union iucv_param *parm) +{ + int ccode; + + ccode = __iucv_call_b2f0(command, parm); + return ccode == 1 ? parm->ctrl.iprcode : ccode; } /** @@ -345,16 +353,12 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) * Returns the maximum number of connections or -EPERM is IUCV is not * available. */ -static int iucv_query_maxconn(void) +static int __iucv_query_maxconn(void *param, unsigned long *max_pathid) { register unsigned long reg0 asm ("0"); register unsigned long reg1 asm ("1"); - void *param; int ccode; - param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA); - if (!param) - return -ENOMEM; reg0 = IUCV_QUERY; reg1 = (unsigned long) param; asm volatile ( @@ -362,8 +366,22 @@ static int iucv_query_maxconn(void) " ipm %0\n" " srl %0,28\n" : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + *max_pathid = reg1; + return ccode; +} + +static int iucv_query_maxconn(void) +{ + unsigned long max_pathid; + void *param; + int ccode; + + param = kzalloc(sizeof(union iucv_param), GFP_KERNEL | GFP_DMA); + if (!param) + return -ENOMEM; + ccode = __iucv_query_maxconn(param, &max_pathid); if (ccode == 0) - iucv_max_pathid = reg1; + iucv_max_pathid = max_pathid; kfree(param); return ccode ? -EPERM : 0; } -- cgit v1.2.3-59-g8ed1b From 7b411ac6b755b211423dfd37c59a7dad5a5e0bf1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:03:25 +0200 Subject: s390/pci: use basic blocks for pci inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Acked-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_insn.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 10ca15dcab11..fa8d7d4b9751 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -99,7 +99,7 @@ void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) } /* PCI Load */ -static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) { register u64 __req asm("2") = req; register u64 __offset asm("3") = offset; @@ -116,6 +116,16 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) : "d" (__offset) : "cc"); *status = __req >> 24 & 0xff; + *data = __data; + return cc; +} + +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + u64 __data; + int cc; + + cc = ____pcilg(&__data, req, offset, status); if (!cc) *data = __data; -- cgit v1.2.3-59-g8ed1b From 7b4ff87cbf3d2530d6362bb08cc61635894f596c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:03:38 +0200 Subject: s390/cio: use basic blocks for cmf inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Acked-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cmf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index b2afad5a5682..f18c83bacb09 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -164,6 +164,9 @@ static inline u64 time_to_avg_nsec(u32 value, u32 count) return ret; } +#define CMF_OFF 0 +#define CMF_ON 2 + /* * Activate or deactivate the channel monitor. When area is NULL, * the monitor is deactivated. The channel monitor needs to @@ -176,7 +179,7 @@ static inline void cmf_activate(void *area, unsigned int onoff) register long __gpr1 asm("1"); __gpr2 = area; - __gpr1 = onoff ? 2 : 0; + __gpr1 = onoff; /* activate channel measurement */ asm("schm" : : "d" (__gpr2), "d" (__gpr1) ); } @@ -587,7 +590,7 @@ static int alloc_cmb(struct ccw_device *cdev) /* everything ok */ memset(mem, 0, size); cmb_area.mem = mem; - cmf_activate(cmb_area.mem, 1); + cmf_activate(cmb_area.mem, CMF_ON); } } @@ -621,7 +624,7 @@ static void free_cmb(struct ccw_device *cdev) if (list_empty(&cmb_area.list)) { ssize_t size; size = sizeof(struct cmb) * cmb_area.num_channels; - cmf_activate(NULL, 0); + cmf_activate(NULL, CMF_OFF); free_pages((unsigned long)cmb_area.mem, get_order(size)); cmb_area.mem = NULL; } @@ -830,7 +833,7 @@ static int alloc_cmbe(struct ccw_device *cdev) /* activate global measurement if this is the first channel */ if (list_empty(&cmb_area.list)) - cmf_activate(NULL, 1); + cmf_activate(NULL, CMF_ON); list_add_tail(&cdev->private->cmb_list, &cmb_area.list); spin_unlock_irq(cdev->ccwlock); @@ -867,7 +870,7 @@ static void free_cmbe(struct ccw_device *cdev) /* deactivate global measurement if this is the last channel */ list_del_init(&cdev->private->cmb_list); if (list_empty(&cmb_area.list)) - cmf_activate(NULL, 0); + cmf_activate(NULL, CMF_OFF); spin_unlock_irq(cdev->ccwlock); spin_unlock(&cmb_area.lock); } @@ -1321,7 +1324,7 @@ void cmf_reactivate(void) { spin_lock(&cmb_area.lock); if (!list_empty(&cmb_area.list)) - cmf_activate(cmb_area.mem, 1); + cmf_activate(cmb_area.mem, CMF_ON); spin_unlock(&cmb_area.lock); } -- cgit v1.2.3-59-g8ed1b From a4d9b97cc3c215a2dd6e841ee203db7b7e73b3c9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:03:52 +0200 Subject: s390/cio: use basic blocks for i/o inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Acked-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ioasm.c | 91 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c index 98984818618f..8225da619014 100644 --- a/drivers/s390/cio/ioasm.c +++ b/drivers/s390/cio/ioasm.c @@ -12,7 +12,7 @@ #include "orb.h" #include "cio.h" -int stsch(struct subchannel_id schid, struct schib *addr) +static inline int __stsch(struct subchannel_id schid, struct schib *addr) { register struct subchannel_id reg1 asm ("1") = schid; int ccode = -EIO; @@ -26,13 +26,21 @@ int stsch(struct subchannel_id schid, struct schib *addr) : "+d" (ccode), "=m" (*addr) : "d" (reg1), "a" (addr) : "cc"); + return ccode; +} + +int stsch(struct subchannel_id schid, struct schib *addr) +{ + int ccode; + + ccode = __stsch(schid, addr); trace_s390_cio_stsch(schid, addr, ccode); return ccode; } EXPORT_SYMBOL(stsch); -int msch(struct subchannel_id schid, struct schib *addr) +static inline int __msch(struct subchannel_id schid, struct schib *addr) { register struct subchannel_id reg1 asm ("1") = schid; int ccode = -EIO; @@ -46,12 +54,20 @@ int msch(struct subchannel_id schid, struct schib *addr) : "+d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc"); + return ccode; +} + +int msch(struct subchannel_id schid, struct schib *addr) +{ + int ccode; + + ccode = __msch(schid, addr); trace_s390_cio_msch(schid, addr, ccode); return ccode; } -int tsch(struct subchannel_id schid, struct irb *addr) +static inline int __tsch(struct subchannel_id schid, struct irb *addr) { register struct subchannel_id reg1 asm ("1") = schid; int ccode; @@ -63,12 +79,20 @@ int tsch(struct subchannel_id schid, struct irb *addr) : "=d" (ccode), "=m" (*addr) : "d" (reg1), "a" (addr) : "cc"); + return ccode; +} + +int tsch(struct subchannel_id schid, struct irb *addr) +{ + int ccode; + + ccode = __tsch(schid, addr); trace_s390_cio_tsch(schid, addr, ccode); return ccode; } -int ssch(struct subchannel_id schid, union orb *addr) +static inline int __ssch(struct subchannel_id schid, union orb *addr) { register struct subchannel_id reg1 asm("1") = schid; int ccode = -EIO; @@ -82,13 +106,21 @@ int ssch(struct subchannel_id schid, union orb *addr) : "+d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc", "memory"); + return ccode; +} + +int ssch(struct subchannel_id schid, union orb *addr) +{ + int ccode; + + ccode = __ssch(schid, addr); trace_s390_cio_ssch(schid, addr, ccode); return ccode; } EXPORT_SYMBOL(ssch); -int csch(struct subchannel_id schid) +static inline int __csch(struct subchannel_id schid) { register struct subchannel_id reg1 asm("1") = schid; int ccode; @@ -100,6 +132,14 @@ int csch(struct subchannel_id schid) : "=d" (ccode) : "d" (reg1) : "cc"); + return ccode; +} + +int csch(struct subchannel_id schid) +{ + int ccode; + + ccode = __csch(schid); trace_s390_cio_csch(schid, ccode); return ccode; @@ -140,7 +180,7 @@ int chsc(void *chsc_area) } EXPORT_SYMBOL(chsc); -int rchp(struct chp_id chpid) +static inline int __rchp(struct chp_id chpid) { register struct chp_id reg1 asm ("1") = chpid; int ccode; @@ -151,12 +191,20 @@ int rchp(struct chp_id chpid) " ipm %0\n" " srl %0,28" : "=d" (ccode) : "d" (reg1) : "cc"); + return ccode; +} + +int rchp(struct chp_id chpid) +{ + int ccode; + + ccode = __rchp(chpid); trace_s390_cio_rchp(chpid, ccode); return ccode; } -int rsch(struct subchannel_id schid) +static inline int __rsch(struct subchannel_id schid) { register struct subchannel_id reg1 asm("1") = schid; int ccode; @@ -168,12 +216,21 @@ int rsch(struct subchannel_id schid) : "=d" (ccode) : "d" (reg1) : "cc", "memory"); + + return ccode; +} + +int rsch(struct subchannel_id schid) +{ + int ccode; + + ccode = __rsch(schid); trace_s390_cio_rsch(schid, ccode); return ccode; } -int hsch(struct subchannel_id schid) +static inline int __hsch(struct subchannel_id schid) { register struct subchannel_id reg1 asm("1") = schid; int ccode; @@ -185,12 +242,20 @@ int hsch(struct subchannel_id schid) : "=d" (ccode) : "d" (reg1) : "cc"); + return ccode; +} + +int hsch(struct subchannel_id schid) +{ + int ccode; + + ccode = __hsch(schid); trace_s390_cio_hsch(schid, ccode); return ccode; } -int xsch(struct subchannel_id schid) +static inline int __xsch(struct subchannel_id schid) { register struct subchannel_id reg1 asm("1") = schid; int ccode; @@ -202,6 +267,14 @@ int xsch(struct subchannel_id schid) : "=d" (ccode) : "d" (reg1) : "cc"); + return ccode; +} + +int xsch(struct subchannel_id schid) +{ + int ccode; + + ccode = __xsch(schid); trace_s390_cio_xsch(schid, ccode); return ccode; -- cgit v1.2.3-59-g8ed1b From 80a60f6ef19a0d4d06a55065f69fa3dbbbac8bcc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:04:17 +0200 Subject: s390/smp: use basic blocks for sigp inline assemblies Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sigp.h | 17 ++++++++++++++--- arch/s390/kernel/smp.c | 8 +------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 1c8f33fca356..72df5f2de6b0 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -37,8 +37,8 @@ #ifndef __ASSEMBLY__ -static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, - u32 *status) +static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) { register unsigned long reg1 asm ("1") = parm; int cc; @@ -48,8 +48,19 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, " ipm %0\n" " srl %0,28\n" : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + *status = reg1; + return cc; +} + +static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) +{ + u32 _status; + int cc; + + cc = ____pcpu_sigp(addr, order, parm, &_status); if (status && cc == 1) - *status = reg1; + *status = _status; return cc; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 830537432493..5c8f7caf9f31 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -318,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), */ static int pcpu_set_smt(unsigned int mtid) { - register unsigned long reg1 asm ("1") = (unsigned long) mtid; int cc; if (smp_cpu_mtid == mtid) return 0; - asm volatile( - " sigp %1,0,%2 # sigp set multi-threading\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING) - : "cc"); + cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); if (cc == 0) { smp_cpu_mtid = mtid; smp_cpu_mt_shift = 0; -- cgit v1.2.3-59-g8ed1b From 2c79813a1f459dd702f9b324a96b0849dd02a6a4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:05:54 +0200 Subject: s390/sysinfo: use basic block for stsi inline assembly Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sysinfo.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 96d81502e599..050b8d067d3b 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -20,13 +20,7 @@ int topology_max_mnest; -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) +static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) { register int r0 asm("0") = (fc << 28) | sel1; register int r1 asm("1") = sel2; @@ -41,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) : "+d" (r0), "+d" (rc) : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) : "cc", "memory"); + *lvl = ((unsigned int) r0) >> 28; + return rc; +} + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + int lvl, rc; + + rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); if (rc) return rc; - return fc ? 0 : ((unsigned int) r0) >> 28; + return fc ? 0 : lvl; } EXPORT_SYMBOL(stsi); -- cgit v1.2.3-59-g8ed1b From e030c1125eab9eb822a7a271ed233a77ebfa2629 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:06:09 +0200 Subject: s390/hypfs: use basic block for diag inline assembly Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 27779f0743ac..67d43a0eabb4 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -337,9 +337,9 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) /* Diagnose 204 functions */ -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) +static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) { - register unsigned long _subcode asm("0") = subcode; + register unsigned long _subcode asm("0") = *subcode; register unsigned long _size asm("1") = size; asm volatile( @@ -347,15 +347,17 @@ static inline int __diag204(unsigned long subcode, unsigned long size, void *add "0: nopr %%r7\n" EX_TABLE(0b,0b) : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - if (_subcode) - return -1; + *subcode = _subcode; return _size; } static int diag204(unsigned long subcode, unsigned long size, void *addr) { diag_stat_inc(DIAG_STAT_X204); - return __diag204(subcode, size, addr); + size = __diag204(&subcode, size, addr); + if (subcode) + return -1; + return size; } /* -- cgit v1.2.3-59-g8ed1b From e238c15e5252d2d6eb5c43c1da617fcad8e60420 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:06:54 +0200 Subject: s390/cpumf: use basic block for ecctr inline assembly Use only simple inline assemblies which consist of a single basic block if the register asm construct is being used. Otherwise gcc would generate broken code if the compiler option --sanitize-coverage=trace-pc would be used. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 9dd04b9e9782..03516476127b 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -169,16 +169,27 @@ static inline int lcctl(u64 ctl) } /* Extract CPU counter */ -static inline int ecctr(u64 ctr, u64 *val) +static inline int __ecctr(u64 ctr, u64 *content) { - register u64 content asm("4") = 0; + register u64 _content asm("4") = 0; int cc; asm volatile ( " .insn rre,0xb2e40000,%0,%2\n" " ipm %1\n" " srl %1,28\n" - : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc"); + *content = _content; + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + u64 content; + int cc; + + cc = __ecctr(ctr, &content); if (!cc) *val = content; return cc; -- cgit v1.2.3-59-g8ed1b From 907fa061cc24f7092db4fcb7938b9cb6ea7dd15e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jun 2016 14:08:32 +0200 Subject: s390: enable kcov support Now that hopefully all inline assemblies have been converted to single basic blocks we can enable kcov on s390. Note that this patch does not disable as many files on s390 like the x86 variant does. Right now I didn't see a reason to do that, however additional files or directories can be excluded at any time. The runtime overhead seems to be quite high. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/boot/compressed/Makefile | 2 ++ arch/s390/kernel/Makefile | 3 +++ arch/s390/kernel/vdso32/Makefile | 2 ++ arch/s390/kernel/vdso64/Makefile | 2 ++ 5 files changed, 10 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ac963903d54f..9e607bf2d640 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,7 @@ config S390 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 1dd210347e12..98ec652cc332 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -4,6 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # +KCOV_INSTRUMENT := n + targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += misc.o piggy.o sizes.h head.o diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 8d1419120bb7..f37be37edd3a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux kernel. # +KCOV_INSTRUMENT_early.o := n +KCOV_INSTRUMENT_sclp.o := n + ifdef CONFIG_FUNCTION_TRACER # Don't trace early setup code and tracing code CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index f9c459586649..68145456fee2 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -1,5 +1,7 @@ # List of files in the vdso, has to be asm only for now +KCOV_INSTRUMENT := n + obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 058659c1b8cf..0b0fd22c869a 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,5 +1,7 @@ # List of files in the vdso, has to be asm only for now +KCOV_INSTRUMENT := n + obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules -- cgit v1.2.3-59-g8ed1b From 43799597dc78358d31fdad86a6480054c519bacc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jun 2016 10:02:38 +0200 Subject: s390: remove pointless load within __switch_to Remove a leftover from the code that transferred a couple of TIF bits from the previous task to the next task. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 2d47f9cfcb36..9a9a35657860 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -175,7 +175,6 @@ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lgr %r1,%r2 aghi %r1,__TASK_thread # thread_struct of prev task - lg %r4,__TASK_thread_info(%r2) # get thread_info of prev lg %r5,__TASK_thread_info(%r3) # get thread_info of next stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev lgr %r1,%r3 -- cgit v1.2.3-59-g8ed1b From c3bdf2e11a10ea804d69a6d1ba1e8a061efb0fa8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jun 2016 07:57:21 +0200 Subject: s390: stack address vs thread_info Avoid using the address of a process' thread_info structure as the kernel stack address. This will break as soon as the thread_info structure will be removed from the stack, and in addition it makes the code a bit more understandable. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kprobes.h | 4 ++-- arch/s390/kernel/dumpstack.c | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index b47ad3b642cc..591e5a5279b0 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -43,9 +43,9 @@ typedef u16 kprobe_opcode_t; #define MAX_INSN_SIZE 0x0003 #define MAX_STACK_SIZE 64 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ - (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ + (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \ ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) + : (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) #define kretprobe_blacklist_size 0 diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 69f9908ac44c..6693383bc01b 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); - if (task) - __dump_trace(func, data, sp, - (unsigned long)task_stack_page(task), - (unsigned long)task_stack_page(task) + THREAD_SIZE); - else - __dump_trace(func, data, sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + task = task ?: current; + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); } EXPORT_SYMBOL_GPL(dump_trace); -- cgit v1.2.3-59-g8ed1b From 25d539c9a958faf092bc1b8736ebc30b49faeebc Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Jun 2016 13:05:04 +0200 Subject: s390/ptrace: clarify bits in the per_struct The bits single_step and instruction_fetch lost their meaning with git commit 5e9a26928f550157 "[S390] ptrace cleanup". Clarify the comment for these two bits. Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/ptrace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index a150f4fabe43..77630c74f13b 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -359,9 +359,9 @@ typedef struct per_cr_bits bits; } control_regs; /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing + * The single_step and instruction_fetch bits are obsolete, + * the kernel always sets them to zero. To enable single + * stepping use ptrace(PTRACE_SINGLESTEP) instead. */ unsigned single_step : 1; unsigned instruction_fetch : 1; -- cgit v1.2.3-59-g8ed1b From 10f4954ae67b3be6dac3cdfb1e0057c3779542be Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jun 2016 13:19:22 +0200 Subject: s390/cpuinfo: show maximum thread id Expose the maximum thread id with /proc/cpuinfo. With the new line the output looks like this: vendor_id : IBM/S390 bogomips per cpu: 20325.00 max thread id : 1 With this new interface it is possible to always tell the correct number of cpu threads potentially being used by the kernel. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 6cf24b471b0c..81d0808085e6 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -104,6 +104,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) "bogomips per cpu: %lu.%02lu\n", num_online_cpus(), loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ))%100); + seq_printf(m, "max thread id : %d\n", smp_cpu_mtid); seq_puts(m, "features\t: "); for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) -- cgit v1.2.3-59-g8ed1b From 46210c440c7b2d32a8ee4e1f0248d0a0b4ad9fa5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Jun 2016 12:40:25 +0200 Subject: s390: have unique symbol for __switch_to address After linking there are several symbols for the same address that the __switch_to symbol points to. E.g.: 000000000089b9c0 T __kprobes_text_start 000000000089b9c0 T __lock_text_end 000000000089b9c0 T __lock_text_start 000000000089b9c0 T __sched_text_end 000000000089b9c0 T __switch_to When disassembling with "objdump -d" this results in a missing __switch_to function. It would be named __kprobes_text_start instead. To unconfuse objdump add a nop in front of the kprobes text section. That way __switch_to appears again. Obviously this solution is sort of a hack, since it also depends on link order if this works or not. However it is the best I can come up with for now. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 9a9a35657860..c51650a1ed16 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -163,6 +163,16 @@ _PIF_WORK = (_PIF_PER_TRAP) .endm .section .kprobes.text, "ax" +.Ldummy: + /* + * This nop exists only in order to avoid that __switch_to starts at + * the beginning of the kprobes text section. In that case we would + * have several symbols at the same address. E.g. objdump would take + * an arbitrary symbol name when disassembling this code. + * With the added nop in between the __switch_to symbol is unique + * again. + */ + nop 0 /* * Scheduler resume function, called by switch_to -- cgit v1.2.3-59-g8ed1b From d08de8e2d86744f91d9d5d57c56ca2b6e33bf6ec Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 4 Jul 2016 14:47:01 +0200 Subject: s390/mm: add support for 2GB hugepages This adds support for 2GB hugetlbfs pages on s390. Reviewed-by: Martin Schwidefsky Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/hugetlb.h | 5 +- arch/s390/include/asm/page.h | 1 + arch/s390/include/asm/pgtable.h | 71 +++++++++++++++------- arch/s390/mm/gmap.c | 3 + arch/s390/mm/gup.c | 45 +++++++++++++- arch/s390/mm/hugetlbpage.c | 129 ++++++++++++++++++++++++++++------------ arch/s390/mm/pgtable.c | 39 ++++++++++++ mm/hugetlb.c | 4 +- 8 files changed, 233 insertions(+), 64 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index d9be7c0c1291..4c7fac75090e 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -41,7 +41,10 @@ static inline int prepare_hugepage_range(struct file *file, static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pte_val(*ptep) = _REGION3_ENTRY_EMPTY; + else + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 42fd5fea42ee..b2146c4119b2 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -21,6 +21,7 @@ #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE 2 #define ARCH_HAS_SETCLEAR_HUGE_PTE #define ARCH_HAS_HUGE_PTE_TYPE diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 37e1aa9bf84c..ea1533e07271 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -306,6 +306,9 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif +#define _REGION_ENTRY_BITS 0xfffffffffffff227UL +#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL #define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL @@ -573,7 +576,7 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; + return pud_val(pud) == _REGION3_ENTRY_EMPTY; } static inline int pud_large(pud_t pud) @@ -593,17 +596,25 @@ static inline unsigned long pud_pfn(pud_t pud) return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; } +static inline int pmd_large(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; +} + +static inline int pmd_bad(pmd_t pmd) +{ + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; +} + static inline int pud_bad(pud_t pud) { - /* - * With dynamic page table levels the pud can be a region table - * entry or a segment table entry. Check for the bit that are - * invalid for either table entry. - */ - unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & - ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; - return (pud_val(pud) & mask) != 0; + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return pmd_bad(__pmd(pud_val(pud))); + if (pud_large(pud)) + return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0; + return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; } static inline int pmd_present(pmd_t pmd) @@ -616,11 +627,6 @@ static inline int pmd_none(pmd_t pmd) return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; } -static inline int pmd_large(pmd_t pmd) -{ - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; -} - static inline unsigned long pmd_pfn(pmd_t pmd) { unsigned long origin_mask; @@ -631,13 +637,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -static inline int pmd_bad(pmd_t pmd) -{ - if (pmd_large(pmd)) - return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; - return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; -} - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -1081,6 +1080,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) +#define pud_page(pud) pfn_to_page(pud_pfn(pud)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) @@ -1238,6 +1238,19 @@ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) : "cc" ); } +static inline void __pudp_idte(unsigned long address, pud_t *pudp) +{ + unsigned long r3o; + + r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o |= _ASCE_TYPE_REGION3; + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pudp) + : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) + : "cc"); +} + static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) { unsigned long sto; @@ -1250,8 +1263,22 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } +static inline void __pudp_idte_local(unsigned long address, pud_t *pudp) +{ + unsigned long r3o; + + r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); + r3o |= _ASCE_TYPE_REGION3; + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,1" + : "=m" (*pudp) + : "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK)) + : "cc"); +} + pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cace818d86eb..69466f6055c2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) VM_BUG_ON(pgd_none(*pgd)); pud = pud_offset(pgd, vmaddr); VM_BUG_ON(pud_none(*pud)); + /* large puds cannot yet be handled */ + if (pud_large(*pud)) + return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); /* large pmds cannot yet be handled */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index a8a6765f1a51..adb0c34bf431 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, return 1; } +static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + struct page *head, *page; + unsigned long mask; + int refs; + + mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID; + if ((pud_val(pud) & mask) != 0) + return 0; + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); + + refs = 0; + head = pud_page(pud); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON_PAGE(compound_head(page) != head, page); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pud_val(pud) != pud_val(*pudp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + return 1; +} + static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + if (unlikely(pud_large(pud))) { + if (!gup_huge_pud(pudp, pud, addr, next, write, pages, + nr)) + return 0; + } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages, + nr)) return 0; } while (pudp++, addr = next, addr != end); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 1b5e8983f4f3..e19d853883be 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -1,19 +1,22 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2016 * Author(s): Gerald Schaefer */ +#define KMSG_COMPONENT "hugetlb" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include -static inline pmd_t __pte_to_pmd(pte_t pte) +static inline unsigned long __pte_to_rste(pte_t pte) { - pmd_t pmd; + unsigned long rste; /* - * Convert encoding pte bits pmd bits + * Convert encoding pte bits pmd / pud bits * lIR.uswrdy.p dy..R...I...wr * empty 010.000000.0 -> 00..0...1...00 * prot-none, clean, old 111.000000.1 -> 00..1...1...00 @@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte) * u unused, l large */ if (pte_present(pte)) { - pmd_val(pmd) = pte_val(pte) & PAGE_MASK; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); - pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; - pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; + rste = pte_val(pte) & PAGE_MASK; + rste |= (pte_val(pte) & _PAGE_READ) >> 4; + rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; + rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; + rste |= (pte_val(pte) & _PAGE_PROTECT); + rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; + rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; + rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; } else - pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; - return pmd; + rste = _SEGMENT_ENTRY_INVALID; + return rste; } -static inline pte_t __pmd_to_pte(pmd_t pmd) +static inline pte_t __rste_to_pte(unsigned long rste) { + int present; pte_t pte; + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + present = pud_present(__pud(rste)); + else + present = pmd_present(__pmd(rste)); + /* - * Convert encoding pmd bits pte bits + * Convert encoding pmd / pud bits pte bits * dy..R...I...wr lIR.uswrdy.p * empty 00..0...1...00 -> 010.000000.0 * prot-none, clean, old 00..1...1...00 -> 111.000000.1 @@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) * SW-bits: p present, y young, d dirty, r read, w write, s special, * u unused, l large */ - if (pmd_present(pmd)) { - pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; + if (present) { + pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; - pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); + pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; + pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pmd_t pmd = __pte_to_pmd(pte); - - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - *(pmd_t *) ptep = pmd; + unsigned long rste = __pte_to_rste(pte); + + /* Set correct table type for 2G hugepages */ + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; + else + rste |= _SEGMENT_ENTRY_LARGE; + pte_val(*ptep) = rste; } pte_t huge_ptep_get(pte_t *ptep) { - pmd_t pmd = *(pmd_t *) ptep; - - return __pmd_to_pte(pmd); + return __rste_to_pte(pte_val(*ptep)); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + pte_t pte = huge_ptep_get(ptep); pmd_t *pmdp = (pmd_t *) ptep; - pmd_t old; + pud_t *pudp = (pud_t *) ptep; - old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); - return __pmd_to_pte(old); + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); + else + pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pte; } pte_t *huge_pte_alloc(struct mm_struct *mm, @@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pgdp = pgd_offset(mm, addr); pudp = pud_alloc(mm, pgdp, addr); - if (pudp) - pmdp = pmd_alloc(mm, pudp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } return (pte_t *) pmdp; } @@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) + if (pud_present(*pudp)) { + if (pud_large(*pudp)) + return (pte_t *) pudp; pmdp = pmd_offset(pudp, addr); + } } return (pte_t *) pmdp; } @@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return 0; + return pud_large(pud); +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int flags) +{ + if (flags & FOLL_GET) + return NULL; + + return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long size; + char *string = opt; + + size = memparse(opt, &opt); + if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz= specifies an unsupported page size %s\n", + string); + return 0; + } + return 1; } +__setup("hugepagesz=", setup_hugepagesz); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 74f8f2a8a4e8..b98d1a152d46 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -352,6 +352,45 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); +static inline pud_t pudp_flush_direct(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + pud_t old; + + old = *pudp; + if (pud_val(old) & _REGION_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + /* + * Invalid bit position is the same for pmd and pud, so we can + * re-use _pmd_csp() here + */ + __pmdp_csp((pmd_t *) pudp); + return old; + } + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pudp_idte_local(addr, pudp); + else + __pudp_idte(addr, pudp); + atomic_dec(&mm->context.flush_count); + return old; +} + +pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t new) +{ + pud_t old; + + preempt_disable(); + old = pudp_flush_direct(mm, addr, pudp); + *pudp = new; + preempt_enable(); + return old; +} +EXPORT_SYMBOL(pudp_xchg_direct); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 388c2bb9b55c..7f0fa87cc66d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1022,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)) +#if (defined(CONFIG_X86_64) || defined(CONFIG_S390)) && \ + ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \ + defined(CONFIG_CMA)) static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { -- cgit v1.2.3-59-g8ed1b From f045402984404ddc11016358411e445192919047 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 Jul 2016 10:44:10 +0200 Subject: s390/mm: fix gmap tlb flush issues __tlb_flush_asce() should never be used if multiple asce belong to a mm. As this function changes mm logic determining if local or global tlb flushes will be neded, we might end up flushing only the gmap asce on all CPUs and a follow up mm asce flushes will only flush on the local CPU, although that asce ran on multiple CPUs. The missing tlb flushes will provoke strange faults in user space and even low address protections in user space, crashing the kernel. Fixes: 1b948d6caec4 ("s390/mm,tlb: optimize TLB flushing for zEC12") Cc: stable@vger.kernel.org # 3.15+ Reported-by: Sascha Silbe Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlbflush.h | 3 ++- arch/s390/mm/gmap.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index e72cea7a4bfe..1a691ef740cf 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -74,7 +74,8 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } /* - * Flush TLB entries for a specific ASCE on all CPUs. + * Flush TLB entries for a specific ASCE on all CPUs. Should never be used + * when more than one asce (e.g. gmap) ran on this mm. */ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 69466f6055c2..063c721ec0dc 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc); static void gmap_flush_tlb(struct gmap *gmap) { if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); + __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); } @@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap) /* Flush tlb. */ if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); + __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); -- cgit v1.2.3-59-g8ed1b From 8f50af49f564d4e57c03903ca374109bdc270ea9 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 7 Jul 2016 07:52:38 +0200 Subject: s390/console: Make preferred console handling more consistent Use the same code structure when determining preferred consoles for Linux running as KVM guest as with Linux running in LPAR and z/VM guest: - Extend the console_mode variable to cover vt220 and hvc consoles - Determine sensible console defaults in conmode_default() - Remove KVM-special handling in set_preferred_console() Ensure that the sclp line mode console is also registered when the vt220 console was selected to not change existing behavior that someone might be relying on. As an externally visible change, KVM guest users can now select the 3270 or 3215 console devices using the conmode= kernel parameter, provided that support for the corresponding driver was compiled into the kernel. Signed-off-by: Peter Oberparleiter Signed-off-by: Jing Liu Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 4 ++++ arch/s390/kernel/setup.c | 22 ++++++++++++++-------- drivers/s390/char/sclp_con.c | 3 ++- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index c0f0efbb6ab5..5e8d57e1cc5e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -86,9 +86,13 @@ extern char vmpoff_cmd[]; #define CONSOLE_IS_SCLP (console_mode == 1) #define CONSOLE_IS_3215 (console_mode == 2) #define CONSOLE_IS_3270 (console_mode == 3) +#define CONSOLE_IS_VT220 (console_mode == 4) +#define CONSOLE_IS_HVC (console_mode == 5) #define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) +#define SET_CONSOLE_VT220 do { console_mode = 4; } while (0) +#define SET_CONSOLE_HVC do { console_mode = 5; } while (0) #define NSS_NAME_SIZE 8 extern char kernel_nss_name[]; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 00e38f0c75df..ba5f456edaa9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -130,17 +130,14 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { - if (MACHINE_IS_KVM) { - if (sclp.has_vt220) - add_preferred_console("ttyS", 1, NULL); - else if (sclp.has_linemode) - add_preferred_console("ttyS", 0, NULL); - else - add_preferred_console("hvc", 0, NULL); - } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) add_preferred_console("ttyS", 0, NULL); else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); + else if (CONSOLE_IS_VT220) + add_preferred_console("ttyS", 1, NULL); + else if (CONSOLE_IS_HVC) + add_preferred_console("hvc", 0, NULL); } static int __init conmode_setup(char *str) @@ -206,6 +203,15 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } + } else if (MACHINE_IS_KVM) { + if (sclp.has_vt220 && + config_enabled(CONFIG_SCLP_VT220_CONSOLE)) + SET_CONSOLE_VT220; + else if (sclp.has_linemode && + config_enabled(CONFIG_SCLP_CONSOLE)) + SET_CONSOLE_SCLP; + else + SET_CONSOLE_HVC; } else { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c index 5880def98fc1..6037bc87e767 100644 --- a/drivers/s390/char/sclp_con.c +++ b/drivers/s390/char/sclp_con.c @@ -319,7 +319,8 @@ sclp_console_init(void) int i; int rc; - if (!CONSOLE_IS_SCLP) + /* SCLP consoles are handled together */ + if (!(CONSOLE_IS_SCLP || CONSOLE_IS_VT220)) return 0; rc = sclp_rw_init(); if (rc) -- cgit v1.2.3-59-g8ed1b From 0f5d050ceaa31b2229102211d60c149f920df3aa Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 12 Jul 2016 19:57:57 +0200 Subject: s390/cio: allow to reset channel measurement block Prior to commit 1bc6664bdfb949bc69a08113801e7d6acbf6bc3f a call to enable_cmf for a device for which channel measurement was already enabled resulted in a reset of the measurement data. What looked like bugs at the time (a 2nd allocation was triggered but failed, reset was called regardless of previous failures, and errors have not been reported to userspace) was actually something at least one userspace tool depended on. Restore that behavior in a sane way. Fixes: 1bc6664bdfb ("s390/cio: use device_lock during cmb activation") Cc: stable@vger.kernel.org #v4.4+ Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cmf.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index f18c83bacb09..268aa23afa01 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -756,6 +756,17 @@ static void reset_cmb(struct ccw_device *cdev) cmf_generic_reset(cdev); } +static int cmf_enabled(struct ccw_device *cdev) +{ + int enabled; + + spin_lock_irq(cdev->ccwlock); + enabled = !!cdev->private->cmb; + spin_unlock_irq(cdev->ccwlock); + + return enabled; +} + static struct attribute_group cmf_attr_group; static struct cmb_operations cmbops_basic = { @@ -1156,13 +1167,8 @@ static ssize_t cmb_enable_show(struct device *dev, char *buf) { struct ccw_device *cdev = to_ccwdev(dev); - int enabled; - spin_lock_irq(cdev->ccwlock); - enabled = !!cdev->private->cmb; - spin_unlock_irq(cdev->ccwlock); - - return sprintf(buf, "%d\n", enabled); + return sprintf(buf, "%d\n", cmf_enabled(cdev)); } static ssize_t cmb_enable_store(struct device *dev, @@ -1202,15 +1208,20 @@ int ccw_set_cmf(struct ccw_device *cdev, int enable) * @cdev: The ccw device to be enabled * * Returns %0 for success or a negative error value. - * + * Note: If this is called on a device for which channel measurement is already + * enabled a reset of the measurement data is triggered. * Context: * non-atomic */ int enable_cmf(struct ccw_device *cdev) { - int ret; + int ret = 0; device_lock(&cdev->dev); + if (cmf_enabled(cdev)) { + cmbops->reset(cdev); + goto out_unlock; + } get_device(&cdev->dev); ret = cmbops->alloc(cdev); if (ret) @@ -1229,7 +1240,7 @@ int enable_cmf(struct ccw_device *cdev) out: if (ret) put_device(&cdev->dev); - +out_unlock: device_unlock(&cdev->dev); return ret; } -- cgit v1.2.3-59-g8ed1b From 388b74d3a9f76abd349fe0b7ed8c58f88647111d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 23 Jun 2016 11:09:26 +0200 Subject: s390/cio/device_ops: fix kernel doc Fix an incorrect kernel doc comment in device_ops.c. Also provide proper function markups while at it. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_ops.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index a69f702a2fcc..877d9f601e63 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -97,7 +97,7 @@ void ccw_device_clear_options(struct ccw_device *cdev, unsigned long flags) } /** - * ccw_device_is_pathgroup - determine if paths to this device are grouped + * ccw_device_is_pathgroup() - determine if paths to this device are grouped * @cdev: ccw device * * Return non-zero if there is a path group, zero otherwise. @@ -109,7 +109,7 @@ int ccw_device_is_pathgroup(struct ccw_device *cdev) EXPORT_SYMBOL(ccw_device_is_pathgroup); /** - * ccw_device_is_multipath - determine if device is operating in multipath mode + * ccw_device_is_multipath() - determine if device is operating in multipath mode * @cdev: ccw device * * Return non-zero if device is operating in multipath mode, zero otherwise. @@ -457,7 +457,7 @@ __u8 ccw_device_get_path_mask(struct ccw_device *cdev) } /** - * chp_get_chp_desc - return newly allocated channel-path descriptor + * ccw_device_get_chp_desc() - return newly allocated channel-path descriptor * @cdev: device to obtain the descriptor for * @chp_idx: index of the channel path * @@ -477,7 +477,7 @@ struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *cdev, } /** - * ccw_device_get_id - obtain a ccw device id + * ccw_device_get_id() - obtain a ccw device id * @cdev: device to obtain the id for * @dev_id: where to fill in the values */ @@ -488,7 +488,7 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id) EXPORT_SYMBOL(ccw_device_get_id); /** - * ccw_device_tm_start_key - perform start function + * ccw_device_tm_start_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler @@ -533,7 +533,7 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, EXPORT_SYMBOL(ccw_device_tm_start_key); /** - * ccw_device_tm_start_timeout_key - perform start function + * ccw_device_tm_start_timeout_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler @@ -559,7 +559,7 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, EXPORT_SYMBOL(ccw_device_tm_start_timeout_key); /** - * ccw_device_tm_start - perform start function + * ccw_device_tm_start() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler @@ -577,7 +577,7 @@ int ccw_device_tm_start(struct ccw_device *cdev, struct tcw *tcw, EXPORT_SYMBOL(ccw_device_tm_start); /** - * ccw_device_tm_start_timeout - perform start function + * ccw_device_tm_start_timeout() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler @@ -596,7 +596,7 @@ int ccw_device_tm_start_timeout(struct ccw_device *cdev, struct tcw *tcw, EXPORT_SYMBOL(ccw_device_tm_start_timeout); /** - * ccw_device_get_mdc - accumulate max data count + * ccw_device_get_mdc() - accumulate max data count * @cdev: ccw device for which the max data count is accumulated * @mask: mask of paths to use * @@ -642,7 +642,7 @@ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask) EXPORT_SYMBOL(ccw_device_get_mdc); /** - * ccw_device_tm_intrg - perform interrogate function + * ccw_device_tm_intrg() - perform interrogate function * @cdev: ccw device on which to perform the interrogate function * * Perform an interrogate function on the given ccw device. Return zero on @@ -664,7 +664,7 @@ int ccw_device_tm_intrg(struct ccw_device *cdev) EXPORT_SYMBOL(ccw_device_tm_intrg); /** - * ccw_device_get_schid - obtain a subchannel id + * ccw_device_get_schid() - obtain a subchannel id * @cdev: device to obtain the id for * @schid: where to fill in the values */ -- cgit v1.2.3-59-g8ed1b From 6228c2a51edccf3e01fc4db065aac91b4cafdc76 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 21 Jun 2016 12:33:30 +0200 Subject: s390/chsc: fix ioctl CHSC_INFO_CU command Via CHSC_INFO_CU we ought to provide userspace with control unit configuration data. Due to an erroneous request code we trigger the wrong chsc command. Fix this copy and paste error. Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc_sch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index b6f12c2bb114..735052ecd3e5 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -552,7 +552,7 @@ static int chsc_ioctl_info_cu(void __user *user_cd) goto out_free; } scucd_area->request.length = 0x0010; - scucd_area->request.code = 0x0028; + scucd_area->request.code = 0x0026; scucd_area->m = cd->m; scucd_area->fmt1 = cd->fmt; scucd_area->cssid = cd->cssid; -- cgit v1.2.3-59-g8ed1b From 0b601373778c770d7536b1d1761c3b2c06e4eb24 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 14 Jul 2016 11:30:37 +0200 Subject: s390/cio: make fmt1 channel path descriptor optional Not all machines / hypervisors support the chsc commands to fetch the fmt1 descriptor. When these commands fail the channel path would currently not be available to linux. Since users of these descriptors can already deal with invalid data make fetching it optional. The only data that is mandatory for us is the fmt0 channel path descriptor. Also make the return code for missing facilities in chsc_get_channel_measurement_chars consistent to other functions. Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 11 +++++++---- drivers/s390/cio/chsc.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 50597f9522fe..d3b72eab2b8f 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -428,11 +428,14 @@ int chp_update_desc(struct channel_path *chp) if (rc) return rc; - rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1); - if (rc) - return rc; + /* + * Fetching the following data is optional. Not all machines or + * hypervisors implement the required chsc commands. + */ + chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1); + chsc_get_channel_measurement_chars(chp); - return chsc_get_channel_measurement_chars(chp); + return 0; } /** diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 452193f7298c..39bb2ea7b78f 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1020,7 +1020,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) chp->cmg = -1; if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm) - return 0; + return -EINVAL; spin_lock_irq(&chsc_page_lock); memset(chsc_page, 0, PAGE_SIZE); -- cgit v1.2.3-59-g8ed1b From f9773768f10c6af77778df67d6050778de42f120 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 21 Jun 2016 13:59:08 +0200 Subject: s390/chsc: sanitize fmt check for chp_desc determination When fetching channel path descriptors we've only evaluated the rfmt parameter which could lead us to trigger the chsc even though the machine doesn't support the specific format or to not trigger the chsc and report a failure to userspace even though the machine would've supported it. Improve these checks and change the parameters of the in-kernel user to be less confusing. Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 39bb2ea7b78f..4fe9531fb128 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -907,7 +907,8 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, struct chsc_scpd *scpd_area; int ccode, ret; - if ((rfmt == 1) && !css_general_characteristics.fcs) + if ((rfmt == 1 || rfmt == 0) && c == 1 && + !css_general_characteristics.fcs) return -EINVAL; if ((rfmt == 2) && !css_general_characteristics.cib) return -EINVAL; @@ -966,7 +967,7 @@ int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid, spin_lock_irqsave(&chsc_page_lock, flags); scpd_area = chsc_page; - ret = chsc_determine_channel_path_desc(chpid, 0, 0, 1, 0, scpd_area); + ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area); if (ret) goto out; chsc_resp = (void *)&scpd_area->response; -- cgit v1.2.3-59-g8ed1b From 687cb7f21695469626f683d709d931ad41b2ca68 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 21 Jun 2016 16:26:25 +0200 Subject: s390/chsc: improve channel path descriptor determination When we fetch channel path descriptors via chsc we use a suboptimal struct chsc_scpd and adjust that by casting the response to a generic chsc_response_struct. Simplify the code by improving struct chsc_scpd. Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 10 ++++------ drivers/s390/cio/chsc.h | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 4fe9531fb128..940e725bde1e 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -940,7 +940,6 @@ EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc); int chsc_determine_base_channel_path_desc(struct chp_id chpid, struct channel_path_desc *desc) { - struct chsc_response_struct *chsc_resp; struct chsc_scpd *scpd_area; unsigned long flags; int ret; @@ -950,8 +949,8 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid, ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area); if (ret) goto out; - chsc_resp = (void *)&scpd_area->response; - memcpy(desc, &chsc_resp->data, sizeof(*desc)); + + memcpy(desc, scpd_area->data, sizeof(*desc)); out: spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; @@ -960,7 +959,6 @@ out: int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid, struct channel_path_desc_fmt1 *desc) { - struct chsc_response_struct *chsc_resp; struct chsc_scpd *scpd_area; unsigned long flags; int ret; @@ -970,8 +968,8 @@ int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid, ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area); if (ret) goto out; - chsc_resp = (void *)&scpd_area->response; - memcpy(desc, &chsc_resp->data, sizeof(*desc)); + + memcpy(desc, scpd_area->data, sizeof(*desc)); out: spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 0de134c3a204..67c87b6e63ec 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -112,8 +112,9 @@ struct chsc_scpd { u32 last_chpid:8; u32 zeroes1; struct chsc_header response; - u8 data[PAGE_SIZE - 20]; -} __attribute__ ((packed)); + u32:32; + u8 data[0]; +} __packed; struct chsc_sda_area { struct chsc_header request; -- cgit v1.2.3-59-g8ed1b From 7b1058bc59ac24eb01e259749e8655fb884aa711 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Sat, 16 Jul 2016 14:18:34 +0530 Subject: s390/cio/chp : Remove deprecated create_singlethread_workqueue The workqueue "chp_wq" is involved in performing pending configure tasks for channel paths. It has a single work item(&cfg_work) and hence doesn't require ordering. Also, it is not being used on a memory reclaim path. Hence, the singlethreaded workqueue has been replaced with the use of system_wq. System workqueues have been able to handle high level of concurrency for a long time now and hence it's not required to have a singlethreaded workqueue just to gain concurrency. Unlike a dedicated per-cpu workqueue created with create_singlethread_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantee unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index d3b72eab2b8f..e96aced58627 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -47,8 +47,6 @@ static DEFINE_MUTEX(info_lock); /* Time after which channel-path status may be outdated. */ static unsigned long chp_info_expires; -/* Workqueue to perform pending configure tasks. */ -static struct workqueue_struct *chp_wq; static struct work_struct cfg_work; /* Wait queue for configure completion events. */ @@ -717,7 +715,7 @@ static void cfg_func(struct work_struct *work) wake_up_interruptible(&cfg_wait_queue); return; } - queue_work(chp_wq, &cfg_work); + schedule_work(&cfg_work); } /** @@ -735,7 +733,7 @@ void chp_cfg_schedule(struct chp_id chpid, int configure) cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure); cfg_busy = 1; mutex_unlock(&cfg_lock); - queue_work(chp_wq, &cfg_work); + schedule_work(&cfg_work); } /** @@ -769,11 +767,6 @@ static int __init chp_init(void) ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw); if (ret) return ret; - chp_wq = create_singlethread_workqueue("cio_chp"); - if (!chp_wq) { - crw_unregister_handler(CRW_RSC_CPATH); - return -ENOMEM; - } INIT_WORK(&cfg_work, cfg_func); init_waitqueue_head(&cfg_wait_queue); if (info_update()) -- cgit v1.2.3-59-g8ed1b From 61282affd2b877cdc86df1f212b2b1a17317bf57 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 Jul 2016 09:02:56 +0200 Subject: s390/smp: clean up a condition I can never remember precedence rules. Let's add some parenthesis so this code is more clear. Signed-off-by: Dan Carpenter Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 5c8f7caf9f31..35531fe1c5ea 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -887,7 +887,7 @@ void __init smp_fill_possible_mask(void) sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; sclp_max = min(smp_max_threads, sclp_max); - sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids; + sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; possible = min(possible, sclp_max); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) -- cgit v1.2.3-59-g8ed1b From 64a40c84001e55001a4d80496b6b56e4d04e4360 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 18 Jul 2016 09:00:00 +0200 Subject: s390/pci: Delete an unnecessary check before the function call "pci_dev_put" The pci_dev_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index fb2a9a560fdc..c2b27ad8e94d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -145,8 +145,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } - if (pdev) - pci_dev_put(pdev); + pci_dev_put(pdev); } void zpci_event_availability(void *data) -- cgit v1.2.3-59-g8ed1b