From d5b2c4cd0bbb78e6e59e36312ac0c296d379b9b7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 15 Feb 2021 20:47:08 +0100 Subject: s390/opcodes: rename selhhhr to selfhr Provide correct mnemonic for selfhr. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/tools/opcodes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt index 46d8ed96cf06..0e207c46e8da 100644 --- a/arch/s390/tools/opcodes.txt +++ b/arch/s390/tools/opcodes.txt @@ -597,7 +597,7 @@ b9b3 cu42 RRE_RR b9bd trtre RRF_U0RR b9be srstu RRE_RR b9bf trte RRF_U0RR -b9c0 selhhhr RRF_RURR +b9c0 selfhr RRF_RURR b9c8 ahhhr RRF_R0RR2 b9c9 shhhr RRF_R0RR2 b9ca alhhhr RRF_R0RR2 -- cgit v1.2.3-59-g8ed1b From 86c827b39ebb200c65c01d2ed490ee15874efe71 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 12 Feb 2021 07:43:16 +0100 Subject: s390/mm: make pXd_deref() macros return a pointer This update fixes semantics of pXd_deref macros which are expected to return a CPU-addressable pointer. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 794746a32806..40abab9daa66 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1219,8 +1219,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) -#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN)) +#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN)) static inline unsigned long pmd_deref(pmd_t pmd) { @@ -1229,12 +1229,12 @@ static inline unsigned long pmd_deref(pmd_t pmd) origin_mask = _SEGMENT_ENTRY_ORIGIN; if (pmd_large(pmd)) origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; - return pmd_val(pmd) & origin_mask; + return (unsigned long)__va(pmd_val(pmd) & origin_mask); } static inline unsigned long pmd_pfn(pmd_t pmd) { - return pmd_deref(pmd) >> PAGE_SHIFT; + return __pa(pmd_deref(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_deref(pud_t pud) @@ -1244,12 +1244,12 @@ static inline unsigned long pud_deref(pud_t pud) origin_mask = _REGION_ENTRY_ORIGIN; if (pud_large(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; - return pud_val(pud) & origin_mask; + return (unsigned long)__va(pud_val(pud) & origin_mask); } static inline unsigned long pud_pfn(pud_t pud) { - return pud_deref(pud) >> PAGE_SHIFT; + return __pa(pud_deref(pud)) >> PAGE_SHIFT; } /* -- cgit v1.2.3-59-g8ed1b From 0f3bf303fb628ed09ae288c94a84ecc075355755 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 12 Feb 2021 07:43:17 +0100 Subject: s390/mm: fix invalid __pa() usage in pfn_pXd() macros There is little sense in applying __pa() to a physical address, but that what pfn_pXd() macros do. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 40abab9daa66..29c7ecd5ad1d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1329,7 +1329,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) } #define gup_fast_permitted gup_fast_permitted -#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) +#define pfn_pte(pfn, pgprot) mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) #define pte_page(x) pfn_to_page(pte_pfn(x)) @@ -1636,7 +1636,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, } #define pmdp_collapse_flush pmdp_collapse_flush -#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) +#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) static inline int pmd_trans_huge(pmd_t pmd) -- cgit v1.2.3-59-g8ed1b From 2a444fdc24a860ed0ca016045913ebc2fa09a66e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 12 Feb 2021 07:43:18 +0100 Subject: s390/mm: fix phys vs virt confusion in pgtable allocation routines The physical address of page tables is passed around and used as virtual address in various locations. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgalloc.h | 2 +- arch/s390/mm/pgalloc.c | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d1297d6bbdcf..6b187cd72251 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -135,7 +135,7 @@ static inline void pmd_populate(struct mm_struct *mm, #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) #define pmd_pgtable(pmd) \ - (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) + ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) /* * page table entry allocation/free routines. diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 4e87c819ddea..781965f7210e 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -58,7 +58,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) if (!page) return NULL; arch_set_page_dat(page, 2); - return (unsigned long *) page_to_phys(page); + return (unsigned long *) page_to_virt(page); } void crst_table_free(struct mm_struct *mm, unsigned long *table) @@ -161,7 +161,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) page = alloc_page(GFP_KERNEL); if (page) { - table = (u64 *)page_to_phys(page); + table = (u64 *)page_to_virt(page); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } @@ -194,7 +194,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) mask = atomic_read(&page->_refcount) >> 24; mask = (mask | (mask >> 4)) & 3; if (mask != 3) { - table = (unsigned long *) page_to_phys(page); + table = (unsigned long *) page_to_virt(page); bit = mask & 1; /* =1 -> second 2K */ if (bit) table += PTRS_PER_PTE; @@ -217,7 +217,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) } arch_set_page_dat(page, 0); /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); + table = (unsigned long *) page_to_virt(page); if (mm_alloc_pgste(mm)) { /* Return 4K page table with PGSTEs */ atomic_xor_bits(&page->_refcount, 3 << 24); @@ -239,10 +239,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned int bit, mask; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page = virt_to_page(table); if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); mask >>= 24; @@ -269,14 +269,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, unsigned int bit, mask; mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page = virt_to_page(table); if (mm_alloc_pgste(mm)) { gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); + table = (unsigned long *) ((unsigned long)table | 3); tlb_remove_table(tlb, table); return; } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; @@ -285,7 +285,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); + table = (unsigned long *) ((unsigned long) table | (1U << bit)); tlb_remove_table(tlb, table); } @@ -293,7 +293,7 @@ void __tlb_remove_table(void *_table) { unsigned int mask = (unsigned long) _table & 3; void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + struct page *page = virt_to_page(table); switch (mask) { case 0: /* pmd, pud, or p4d */ -- cgit v1.2.3-59-g8ed1b From 4c86d2f51a0b2f8f7793129660f1232ec01d562b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 12 Feb 2021 07:43:19 +0100 Subject: s390/mm: fix phys vs virt confusion in vmem_*() functions family Due to historical reasons vmem_*() functions misuse or ignore the notion of physical vs virtual addresses difference. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/vmem.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 01f3a5f58e64..77afa28876a1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -26,14 +26,14 @@ static void __ref *vmem_alloc_pages(unsigned int order) if (slab_is_available()) return (void *)__get_free_pages(GFP_KERNEL, order); - return (void *) memblock_phys_alloc(size, size); + return memblock_alloc(size, size); } static void vmem_free_pages(unsigned long addr, int order) { /* We don't expect boot memory to be removed ever. */ if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) + WARN_ON_ONCE(PageReserved(virt_to_page(addr)))) return; free_pages(addr, order); } @@ -56,7 +56,7 @@ pte_t __ref *vmem_pte_alloc(void) if (slab_is_available()) pte = (pte_t *) page_table_alloc(&init_mm); else - pte = (pte_t *) memblock_phys_alloc(size, size); + pte = (pte_t *) memblock_alloc(size, size); if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); @@ -84,7 +84,7 @@ static void vmemmap_flush_unused_sub_pmd(void) { if (!unused_sub_pmd_start) return; - memset(__va(unused_sub_pmd_start), PAGE_UNUSED, + memset((void *)unused_sub_pmd_start, PAGE_UNUSED, ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); unused_sub_pmd_start = 0; } @@ -97,7 +97,7 @@ static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) * getting removed (just in case the memmap never gets initialized, * e.g., because the memory block never gets onlined). */ - memset(__va(start), 0, sizeof(struct page)); + memset((void *)start, 0, sizeof(struct page)); } static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) @@ -118,7 +118,7 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { - void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + unsigned long page = ALIGN_DOWN(start, PMD_SIZE); vmemmap_flush_unused_sub_pmd(); @@ -127,7 +127,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) - memset(page, PAGE_UNUSED, start - __pa(page)); + memset((void *)page, PAGE_UNUSED, start - page); /* * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of * consecutive sections. Remember for the last added PMD the last @@ -140,11 +140,11 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) /* Returns true if the PMD is completely unused and can be freed. */ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { - void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + unsigned long page = ALIGN_DOWN(start, PMD_SIZE); vmemmap_flush_unused_sub_pmd(); - memset(__va(start), PAGE_UNUSED, end - start); - return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); + memset((void *)start, PAGE_UNUSED, end - start); + return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE); } /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ @@ -165,7 +165,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, if (pte_none(*pte)) continue; if (!direct) - vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); + vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0); pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { if (!direct) { @@ -175,7 +175,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, goto out; pte_val(*pte) = __pa(new_page) | prot; } else { - pte_val(*pte) = addr | prot; + pte_val(*pte) = __pa(addr) | prot; } } else { continue; @@ -200,7 +200,7 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start) if (!pte_none(*pte)) return; } - vmem_pte_free(__va(pmd_deref(*pmd))); + vmem_pte_free((unsigned long *) pmd_deref(*pmd)); pmd_clear(pmd); } @@ -241,7 +241,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, IS_ALIGNED(next, PMD_SIZE) && MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { - pmd_val(*pmd) = addr | prot; + pmd_val(*pmd) = __pa(addr) | prot; pages++; continue; } else if (!direct && MACHINE_HAS_EDAT1) { @@ -337,7 +337,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE) && MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { - pud_val(*pud) = addr | prot; + pud_val(*pud) = __pa(addr) | prot; pages++; continue; } -- cgit v1.2.3-59-g8ed1b From 588a079ebd6297dc225c8bba717a71a8065e1a4b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 07:10:46 +0100 Subject: s390/smp: consolidate locking for smp_rescan() Move locking to __smp_rescan() instead of duplicating it to all call sites. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e299892440b6..9fa85c927a0c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -780,6 +780,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) u16 core_id; int nr, i; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); /* @@ -800,6 +802,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) configured = i < info->configured; nr += smp_add_core(&info->core[i], &avail, configured, early); } + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); return nr; } @@ -847,9 +851,7 @@ void __init smp_detect_cpus(void) pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); /* Add CPUs present at boot */ - get_online_cpus(); __smp_rescan_cpus(info, true); - put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1178,11 +1180,7 @@ int __ref smp_rescan_cpus(void) if (!info) return -ENOMEM; smp_get_core_info(info, 0); - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); nr = __smp_rescan_cpus(info, false); - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); kfree(info); if (nr) topology_schedule_update(); -- cgit v1.2.3-59-g8ed1b From 62c8dca9e194326802b43c60763f856d782b225c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 07:13:02 +0100 Subject: s390/smp: __smp_rescan_cpus() - move cpumask away from stack Avoid a potentially large stack frame and overflow by making "cpumask_t avail" a static variable. There is no concurrent access due to the existing locking. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9fa85c927a0c..3d838b7bbb3b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -775,7 +775,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) { struct sclp_core_entry *core; - cpumask_t avail; + static cpumask_t avail; bool configured; u16 core_id; int nr, i; -- cgit v1.2.3-59-g8ed1b From f213e5502d2e68f141a4e3faadffd03000224192 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 07:19:09 +0100 Subject: s390/smp: smp_emergency_stop() - move cpumask away from stack Make "cpumask_t cpumask" a static variable to avoid a potential large stack frame. Also protect against potential concurrent callers by introducing a local lock. Note: smp_emergency_stop() gets only called with irqs and machine checks disabled, therefore a cpu local deadlock is not possible. For concurrent callers the first cpu which enters the critical section wins and will stop all other cpus. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3d838b7bbb3b..7f30d954519b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -434,10 +434,12 @@ void notrace smp_yield_cpu(int cpu) */ void notrace smp_emergency_stop(void) { - cpumask_t cpumask; + static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + static cpumask_t cpumask; u64 end; int cpu; + arch_spin_lock(&lock); cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); @@ -458,6 +460,7 @@ void notrace smp_emergency_stop(void) break; cpu_relax(); } + arch_spin_unlock(&lock); } NOKPROBE_SYMBOL(smp_emergency_stop); -- cgit v1.2.3-59-g8ed1b From da6d2c289dbe8871f1977bf7c348309d37b867b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 17 Feb 2021 08:15:00 +0100 Subject: s390/topology: move cpumasks away from stack Make cpumasks static variables to avoid potential large stack frames. There shouldn't be any concurrent callers since all current callers are serialized with the cpu hotplug lock. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/topology.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index ca47141a5be9..e7ce447651b9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -62,16 +62,16 @@ static struct mask_info drawer_info; struct cpu_topology_s390 cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); -static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) { - cpumask_t mask; + static cpumask_t mask; cpumask_copy(&mask, cpumask_of(cpu)); switch (topology_mode) { case TOPOLOGY_MODE_HW: while (info) { if (cpumask_test_cpu(cpu, &info->mask)) { - mask = info->mask; + cpumask_copy(&mask, &info->mask); break; } info = info->next; @@ -89,23 +89,24 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) break; } cpumask_and(&mask, &mask, cpu_online_mask); - return mask; + cpumask_copy(dst, &mask); } -static cpumask_t cpu_thread_map(unsigned int cpu) +static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) { - cpumask_t mask; + static cpumask_t mask; int i; cpumask_copy(&mask, cpumask_of(cpu)); if (topology_mode != TOPOLOGY_MODE_HW) - return mask; + goto out; cpu -= cpu % (smp_cpu_mtid + 1); for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_present(cpu + i)) cpumask_set_cpu(cpu + i, &mask); cpumask_and(&mask, &mask, cpu_online_mask); - return mask; +out: + cpumask_copy(dst, &mask); } #define TOPOLOGY_CORE_BITS 64 @@ -250,10 +251,10 @@ void update_cpu_masks(void) for_each_possible_cpu(cpu) { topo = &cpu_topology[cpu]; - topo->thread_mask = cpu_thread_map(cpu); - topo->core_mask = cpu_group_map(&socket_info, cpu); - topo->book_mask = cpu_group_map(&book_info, cpu); - topo->drawer_mask = cpu_group_map(&drawer_info, cpu); + cpu_thread_map(&topo->thread_mask, cpu); + cpu_group_map(&topo->core_mask, &socket_info, cpu); + cpu_group_map(&topo->book_mask, &book_info, cpu); + cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); topo->booted_cores = 0; if (topology_mode != TOPOLOGY_MODE_HW) { id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; -- cgit v1.2.3-59-g8ed1b From 55f03123f60985815e57cc9d1b29dea48e32e709 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 19 Feb 2021 23:32:56 +0100 Subject: s390/smp: implement arch_irq_work_raise() The immediate need to have this is to have bpf_send_signal() send the signal ASAP instead of during the next hrtimer interrupt. However, it should also improve irq_work_queue() latencies in general, as well as get s390 out of the lame architectures list [1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq_work.c?h=v5.11#n45 Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/irq_work.h | 12 ++++++++++++ arch/s390/kernel/smp.c | 11 +++++++++++ 2 files changed, 23 insertions(+) create mode 100644 arch/s390/include/asm/irq_work.h diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h new file mode 100644 index 000000000000..603783766d0a --- /dev/null +++ b/arch/s390/include/asm/irq_work.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_IRQ_WORK_H +#define _ASM_S390_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return true; +} + +void arch_irq_work_raise(void); + +#endif /* _ASM_S390_IRQ_WORK_H */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7f30d954519b..58c8afa3da65 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,7 @@ enum { ec_call_function_single, ec_stop_cpu, ec_mcck_pending, + ec_irq_work, }; enum { @@ -508,6 +510,8 @@ static void smp_handle_ext_call(void) generic_smp_call_function_single_interrupt(); if (test_bit(ec_mcck_pending, &bits)) __s390_handle_mcck(); + if (test_bit(ec_irq_work, &bits)) + irq_work_run(); } static void do_ext_call_interrupt(struct ext_code ext_code, @@ -540,6 +544,13 @@ void smp_send_reschedule(int cpu) pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); +} +#endif + /* * parameter area for the set/clear control bit callbacks */ -- cgit v1.2.3-59-g8ed1b From 182f709c5cff683e6732d04c78e328de0532284f Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 16 Feb 2021 12:06:45 +0100 Subject: virtio/s390: implement virtio-ccw revision 2 correctly CCW_CMD_READ_STATUS was introduced with revision 2 of virtio-ccw, and drivers should only rely on it being implemented when they negotiated at least that revision with the device. However, virtio_ccw_get_status() issued READ_STATUS for any device operating at least at revision 1. If the device accepts READ_STATUS regardless of the negotiated revision (which some implementations like QEMU do, even though the spec currently does not allow it), everything works as intended. While a device rejecting the command should also be handled gracefully, we will not be able to see any changes the device makes to the status, such as setting NEEDS_RESET or setting the status to zero after a completed reset. We negotiated the revision to at most 1, as we never bumped the maximum revision; let's do that now and properly send READ_STATUS only if we are operating at least at revision 2. Cc: stable@vger.kernel.org Fixes: 7d3ce5ab9430 ("virtio/s390: support READ_STATUS command for virtio-ccw") Reviewed-by: Halil Pasic Signed-off-by: Cornelia Huck Signed-off-by: Vasily Gorbik Link: https://lore.kernel.org/r/20210216110645.1087321-1-cohuck@redhat.com Signed-off-by: Vasily Gorbik --- drivers/s390/virtio/virtio_ccw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..54e686dca6de 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -117,7 +117,7 @@ struct virtio_rev_info { }; /* the highest virtio-ccw revision we support */ -#define VIRTIO_CCW_REV_MAX 1 +#define VIRTIO_CCW_REV_MAX 2 struct virtio_ccw_vq_info { struct virtqueue *vq; @@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev) u8 old_status = vcdev->dma_area->status; struct ccw1 *ccw; - if (vcdev->revision < 1) + if (vcdev->revision < 2) return vcdev->dma_area->status; ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw)); -- cgit v1.2.3-59-g8ed1b From cf6acb8bdb1d829b85a4daa2944bf9e71c93f4b9 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 22 Feb 2021 18:01:54 +0100 Subject: s390/cpumf: Add support for complete counter set extraction Add support to the CPU Measurement counter facility device driver to extract complete counter sets per CPU and per counter set from user space. This includes a new device named /dev/hwctr and support for the device driver functions open, close and ioctl. Other functions are not supported. The ioctl command supports 3 subcommands: S390_HWCTR_START: enables counter sets on a list of CPUs. S390_HWCTR_STOP: disables counter sets on a list of CPUs. S390_HWCTR_READ: reads counter sets on a list of CPUs. The ioctl(..., S390_HWCTR_READ, ...) is the only subcommand which returns data. It requires member data_bytes to be positive and indicates the maximum amount of data available to store counter set data. The other ioctl() subcommands do not use this member and it should be set to zero. The S390_HWCTR_READ subcommand returns the following data: The cpuset data is flattened using the following scheme, stored in member data: 0x0 0x8 0xc 0x10 0x10 0x18 0x20 0x28 0xU-1 +---------+-----+---------+-----+---------+-----+-----+------+------+ | no_cpus | cpu | no_sets | set | no_cnts | cv1 | cv2 | .... | cv_n | +---------+-----+---------+-----+---------+-----+-----+------+------+ 0xU 0xU+4 0xU+8 0xU+10 0xV-1 +-----+---------+-----+-----+------+------+ | set | no_cnts | cv1 | cv2 | .... | cv_n | +-----+---------+-----+-----+------+------+ 0xV 0xV+4 0xV+8 0xV+c +-----+---------+-----+---------+-----+-----+------+------+ | cpu | no_sets | set | no_cnts | cv1 | cv2 | .... | cv_n | +-----+---------+-----+---------+-----+-----+------+------+ U and V denote arbitrary hexadezimal addresses. The first integer represents the number of CPUs data was extracted from. This is followed by CPU number and number of counter sets extracted. Both are two integer values. This is followed by the set identifer and number of counters extracted. Both are two integer values. This is followed by the counter values, each element is eight bytes in size. The S390_HWCTR_READ ioctl subcommand is also limited to one call per minute. This ensures that an application does not read out the counter sets too often and reduces the overall CPU performance. The complete counter set extraction is an expensive operation. Reviewed-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/perf_cpum_cf_diag.h | 51 +++ arch/s390/kernel/perf_cpum_cf_diag.c | 548 +++++++++++++++++++++++-- include/linux/cpuhotplug.h | 1 + 3 files changed, 577 insertions(+), 23 deletions(-) create mode 100644 arch/s390/include/uapi/asm/perf_cpum_cf_diag.h diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h new file mode 100644 index 000000000000..3d8284b95f87 --- /dev/null +++ b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright IBM Corp. 2021 + * Interface implementation for communication with the CPU Measurement + * counter facility device driver. + * + * Author(s): Thomas Richter + * + * Define for ioctl() commands to communicate with the CPU Measurement + * counter facility device driver. + */ + +#ifndef _PERF_CPUM_CF_DIAG_H +#define _PERF_CPUM_CF_DIAG_H + +#include +#include + +#define S390_HWCTR_DEVICE "hwctr" +#define S390_HWCTR_START_VERSION 1 + +struct s390_ctrset_start { /* Set CPUs to operate on */ + __u64 version; /* Version of interface */ + __u64 data_bytes; /* # of bytes required */ + __u64 cpumask_len; /* Length of CPU mask in bytes */ + __u64 *cpumask; /* Pointer to CPU mask */ + __u64 counter_sets; /* Bit mask of counter sets to get */ +}; + +struct s390_ctrset_setdata { /* Counter set data */ + __u32 set; /* Counter set number */ + __u32 no_cnts; /* # of counters stored in cv[] */ + __u64 cv[0]; /* Counter values (variable length) */ +}; + +struct s390_ctrset_cpudata { /* Counter set data per CPU */ + __u32 cpu_nr; /* CPU number */ + __u32 no_sets; /* # of counters sets in data[] */ + struct s390_ctrset_setdata data[0]; +}; + +struct s390_ctrset_read { /* Structure to get all ctr sets */ + __u64 no_cpus; /* Total # of CPUs data taken from */ + struct s390_ctrset_cpudata data[0]; +}; + +#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */ +#define S390_HWCTR_START _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start) +#define S390_HWCTR_STOP _IO(S390_HWCTR_MAGIC, 2) +#define S390_HWCTR_READ _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read) +#endif diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index b5c86fb70d63..db4877bbb9aa 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -2,7 +2,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Sets * - * Copyright IBM Corp. 2019 + * Copyright IBM Corp. 2019, 2021 * Author(s): Hendrik Brueckner * Thomas Richer */ @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -24,15 +26,20 @@ #include #include -#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ +#include +#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ +#define CF_DIAG_MIN_INTERVAL 60 /* Minimum counter set read */ + /* interval in seconds */ +static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL; static unsigned int cf_diag_cpu_speed; static debug_info_t *cf_diag_dbg; -struct cf_diag_csd { /* Counter set data per CPU */ +struct cf_diag_csd { /* Counter set data per CPU */ size_t used; /* Bytes used in data/start */ unsigned char start[PAGE_SIZE]; /* Counter set at event start */ unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ + unsigned int sets; /* # Counter set saved in data */ }; static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); @@ -178,18 +185,35 @@ static void cf_diag_disable(struct pmu *pmu) /* Number of perf events counting hardware events */ static atomic_t cf_diag_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(cf_diag_reserve_mutex); /* Release the PMU if event is the last perf event */ static void cf_diag_perf_event_destroy(struct perf_event *event) { debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d cf_diag_events %d\n", - __func__, event, event->cpu, + __func__, event, smp_processor_id(), atomic_read(&cf_diag_events)); if (atomic_dec_return(&cf_diag_events) == 0) __kernel_cpumcf_end(); } +static int get_authctrsets(void) +{ + struct cpu_cf_events *cpuhw; + unsigned long auth = 0; + enum cpumf_ctr_set i; + + cpuhw = &get_cpu_var(cpu_cf_events); + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) + auth |= cpumf_ctr_ctl[i]; + } + put_cpu_var(cpu_cf_events); + return auth; +} + /* Setup the event. Test for authorized counter sets and only include counter * sets which are authorized at the time of the setup. Including unauthorized * counter sets result in specification exception (and panic). @@ -197,15 +221,12 @@ static void cf_diag_perf_event_destroy(struct perf_event *event) static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct cpu_cf_events *cpuhw; - enum cpumf_ctr_set i; int err = 0; debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__, event, event->cpu); event->hw.config = attr->config; - event->hw.config_base = 0; /* Add all authorized counter sets to config_base. The * the hardware init function is either called per-cpu or just once @@ -215,11 +236,7 @@ static int __hw_perf_event_init(struct perf_event *event) * Checking the authorization on any CPU is fine as the hardware * applies the same authorization settings to all CPUs. */ - cpuhw = &get_cpu_var(cpu_cf_events); - for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) - if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) - event->hw.config_base |= cpumf_ctr_ctl[i]; - put_cpu_var(cpu_cf_events); + event->hw.config_base = get_authctrsets(); /* No authorized counter sets, nothing to count/sample */ if (!event->hw.config_base) { @@ -237,6 +254,25 @@ out: return err; } +/* Return 0 if the CPU-measurement counter facility is currently free + * and an error otherwise. + */ +static int cf_diag_perf_event_inuse(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&cf_diag_events)) { + mutex_lock(&cf_diag_reserve_mutex); + if (atomic_read(&cf_diag_events) == 0 && + __kernel_cpumcf_begin()) + err = -EBUSY; + else + err = atomic_inc_return(&cf_diag_events); + mutex_unlock(&cf_diag_reserve_mutex); + } + return err; +} + static int cf_diag_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -264,13 +300,9 @@ static int cf_diag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - if (atomic_inc_return(&cf_diag_events) == 1) { - if (__kernel_cpumcf_begin()) { - atomic_dec(&cf_diag_events); - err = -EBUSY; - goto out; - } - } + err = cf_diag_perf_event_inuse(); + if (err < 0) + goto out; event->destroy = cf_diag_perf_event_destroy; err = __hw_perf_event_init(event); @@ -599,6 +631,8 @@ static void cf_diag_del(struct perf_event *event, int flags) cpuhw->flags &= ~PMU_F_IN_USE; } +/* Default counter set events and format attribute groups */ + CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); static struct attribute *cf_diag_events_attr[] = { @@ -663,6 +697,452 @@ static void cf_diag_get_cpu_speed(void) } } +/* Code to create device and file I/O operations */ +static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */ + +static int cf_diag_open(struct inode *inode, struct file *file) +{ + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (atomic_xchg(&ctrset_opencnt, 1)) + return -EBUSY; + + /* Avoid concurrent access with perf_event_open() system call */ + mutex_lock(&cf_diag_reserve_mutex); + if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin()) + err = -EBUSY; + mutex_unlock(&cf_diag_reserve_mutex); + if (err) { + atomic_set(&ctrset_opencnt, 0); + return err; + } + file->private_data = NULL; + debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); + /* nonseekable_open() never fails */ + return nonseekable_open(inode, file); +} + +/* Variables for ioctl() interface support */ +static DEFINE_MUTEX(cf_diag_ctrset_mutex); +static struct cf_diag_ctrset { + unsigned long ctrset; /* Bit mask of counter set to read */ + cpumask_t mask; /* CPU mask to read from */ + time64_t lastread; /* Epoch counter set last read */ +} cf_diag_ctrset; + +static void cf_diag_ctrset_clear(void) +{ + cpumask_clear(&cf_diag_ctrset.mask); + cf_diag_ctrset.ctrset = 0; +} + +static void cf_diag_release_cpu(void *p) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + + debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__, + smp_processor_id()); + lcctl(0); /* Reset counter sets */ + cpuhw->state = 0; /* Save state in CPU hardware state */ +} + +/* Release function is also called when application gets terminated without + * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. + * Since only one application is allowed to open the device, simple stop all + * CPU counter sets. + */ +static int cf_diag_release(struct inode *inode, struct file *file) +{ + on_each_cpu(cf_diag_release_cpu, NULL, 1); + cf_diag_ctrset_clear(); + atomic_set(&ctrset_opencnt, 0); + __kernel_cpumcf_end(); + debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__); + return 0; +} + +struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ + unsigned int sets; /* Counter set bit mask */ + atomic_t cpus_ack; /* # CPUs successfully executed func */ +}; + +static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask) +{ + struct s390_ctrset_read __user *ctrset_read; + unsigned int cpu, cpus, rc; + void __user *uptr; + + ctrset_read = (struct s390_ctrset_read __user *)arg; + uptr = ctrset_read->data; + for_each_cpu(cpu, mask) { + struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu); + struct s390_ctrset_cpudata __user *ctrset_cpudata; + + ctrset_cpudata = uptr; + debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n", + __func__, cpu, csd->used); + rc = put_user(cpu, &ctrset_cpudata->cpu_nr); + rc |= put_user(csd->sets, &ctrset_cpudata->no_sets); + rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used); + if (rc) + return -EFAULT; + uptr += sizeof(struct s390_ctrset_cpudata) + csd->used; + cond_resched(); + } + cpus = cpumask_weight(mask); + if (put_user(cpus, &ctrset_read->no_cpus)) + return -EFAULT; + debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n", + __func__, uptr - (void __user *)ctrset_read->data); + return 0; +} + +static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, + int ctrset_size, size_t room) +{ + size_t need = 0; + int rc = -1; + + need = sizeof(*p) + sizeof(u64) * ctrset_size; + debug_sprintf_event(cf_diag_dbg, 5, + "%s room %zd need %zd set %#x set_size %d\n", + __func__, room, need, ctrset, ctrset_size); + if (need <= room) { + p->set = cpumf_ctr_ctl[ctrset]; + p->no_cnts = ctrset_size; + rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); + if (rc == 3) /* Nothing stored */ + need = 0; + } + debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__, + need, rc); + return need; +} + +/* Read all counter sets. Since the perf_event_open() system call with + * event cpum_cf_diag/.../ is blocked when this interface is active, reuse + * the perf_event_open() data buffer to store the counter sets. + */ +static void cf_diag_cpu_read(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); + struct cf_diag_call_on_cpu_parm *p = parm; + int set, set_size; + size_t space; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + /* No data saved yet */ + csd->used = 0; + csd->sets = 0; + memset(csd->data, 0, sizeof(csd->data)); + + /* Scan the counter sets */ + for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { + struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used; + + if (!(p->sets & cpumf_ctr_ctl[set])) + continue; /* Counter set not in list */ + set_size = cf_diag_ctrset_size(set, &cpuhw->info); + space = sizeof(csd->data) - csd->used; + space = cf_diag_cpuset_read(sp, set, set_size, space); + if (space) { + csd->used += space; + csd->sets += 1; + } + debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n", + __func__, sp, space); + } + debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__, + csd->sets, csd->used); +} + +static int cf_diag_all_read(unsigned long arg) +{ + struct cf_diag_call_on_cpu_parm p; + cpumask_var_t mask; + time64_t now; + int rc = 0; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + now = ktime_get_seconds(); + if (cf_diag_ctrset.lastread + cf_diag_interval > now) { + debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld " + " lastread %lld\n", __func__, now, + cf_diag_ctrset.lastread); + rc = -EAGAIN; + goto out; + } else { + cf_diag_ctrset.lastread = now; + } + p.sets = cf_diag_ctrset.ctrset; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1); + rc = cf_diag_all_copy(arg, mask); +out: + free_cpumask_var(mask); + debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc); + return rc; +} + +/* Stop all counter sets via ioctl interface */ +static void cf_diag_ioctl_off(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_call_on_cpu_parm *p = parm; + int rc; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + + ctr_set_multiple_disable(&cpuhw->state, p->sets); + ctr_set_multiple_stop(&cpuhw->state, p->sets); + rc = lcctl(cpuhw->state); /* Stop counter sets */ + if (!cpuhw->state) + cpuhw->flags &= ~PMU_F_IN_USE; + debug_sprintf_event(cf_diag_dbg, 5, + "%s rc %d flags %#x state %#llx\n", __func__, + rc, cpuhw->flags, cpuhw->state); +} + +/* Start counter sets on particular CPU */ +static void cf_diag_ioctl_on(void *parm) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_call_on_cpu_parm *p = parm; + int rc; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s new %#x flags %#x state %#llx\n", + __func__, p->sets, cpuhw->flags, + cpuhw->state); + + if (!(cpuhw->flags & PMU_F_IN_USE)) + cpuhw->state = 0; + cpuhw->flags |= PMU_F_IN_USE; + rc = lcctl(cpuhw->state); /* Reset unused counter sets */ + ctr_set_multiple_enable(&cpuhw->state, p->sets); + ctr_set_multiple_start(&cpuhw->state, p->sets); + rc |= lcctl(cpuhw->state); /* Start counter sets */ + if (!rc) + atomic_inc(&p->cpus_ack); + debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n", + __func__, rc, cpuhw->state); +} + +static int cf_diag_all_stop(void) +{ + struct cf_diag_call_on_cpu_parm p = { + .sets = cf_diag_ctrset.ctrset, + }; + cpumask_var_t mask; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); + free_cpumask_var(mask); + return 0; +} + +static int cf_diag_all_start(void) +{ + struct cf_diag_call_on_cpu_parm p = { + .sets = cf_diag_ctrset.ctrset, + .cpus_ack = ATOMIC_INIT(0), + }; + cpumask_var_t mask; + int rc = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask); + on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1); + if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { + on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1); + rc = -EIO; + } + free_cpumask_var(mask); + return rc; +} + +/* Return the maximum required space for all possible CPUs in case one + * CPU will be onlined during the START, READ, STOP cycles. + * To find out the size of the counter sets, any one CPU will do. They + * all have the same counter sets. + */ +static size_t cf_diag_needspace(unsigned int sets) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + size_t bytes = 0; + int i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + if (!(sets & cpumf_ctr_ctl[i])) + continue; + bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) + + sizeof(((struct s390_ctrset_setdata *)0)->set) + + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); + } + bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * + (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); + debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, + bytes); + return bytes; +} + +static long cf_diag_ioctl_read(unsigned long arg) +{ + struct s390_ctrset_read read; + int ret = 0; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + if (copy_from_user(&read, (char __user *)arg, sizeof(read))) + return -EFAULT; + ret = cf_diag_all_read(arg); + debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); + return ret; +} + +static long cf_diag_ioctl_stop(void) +{ + int ret; + + debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__); + ret = cf_diag_all_stop(); + cf_diag_ctrset_clear(); + debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret); + return ret; +} + +static long cf_diag_ioctl_start(unsigned long arg) +{ + struct s390_ctrset_start __user *ustart; + struct s390_ctrset_start start; + void __user *umask; + unsigned int len; + int ret = 0; + size_t need; + + if (cf_diag_ctrset.ctrset) + return -EBUSY; + ustart = (struct s390_ctrset_start __user *)arg; + if (copy_from_user(&start, ustart, sizeof(start))) + return -EFAULT; + if (start.version != S390_HWCTR_START_VERSION) + return -EINVAL; + if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | + cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | + cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | + cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | + cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) + return -EINVAL; /* Invalid counter set */ + if (!start.counter_sets) + return -EINVAL; /* No counter set at all? */ + cpumask_clear(&cf_diag_ctrset.mask); + len = min_t(u64, start.cpumask_len, cpumask_size()); + umask = (void __user *)start.cpumask; + if (copy_from_user(&cf_diag_ctrset.mask, umask, len)) + return -EFAULT; + if (cpumask_empty(&cf_diag_ctrset.mask)) + return -EINVAL; + need = cf_diag_needspace(start.counter_sets); + if (put_user(need, &ustart->data_bytes)) + ret = -EFAULT; + if (ret) + goto out; + cf_diag_ctrset.ctrset = start.counter_sets; + ret = cf_diag_all_start(); +out: + if (ret) + cf_diag_ctrset_clear(); + debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n", + __func__, cf_diag_ctrset.ctrset, need, ret); + return ret; +} + +static long cf_diag_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret; + + debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__, + cmd, arg); + get_online_cpus(); + mutex_lock(&cf_diag_ctrset_mutex); + switch (cmd) { + case S390_HWCTR_START: + ret = cf_diag_ioctl_start(arg); + break; + case S390_HWCTR_STOP: + ret = cf_diag_ioctl_stop(); + break; + case S390_HWCTR_READ: + ret = cf_diag_ioctl_read(arg); + break; + default: + ret = -ENOTTY; + break; + } + mutex_unlock(&cf_diag_ctrset_mutex); + put_online_cpus(); + debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret); + return ret; +} + +static const struct file_operations cf_diag_fops = { + .owner = THIS_MODULE, + .open = cf_diag_open, + .release = cf_diag_release, + .unlocked_ioctl = cf_diag_ioctl, + .compat_ioctl = cf_diag_ioctl, + .llseek = no_llseek +}; + +static struct miscdevice cf_diag_dev = { + .name = S390_HWCTR_DEVICE, + .minor = MISC_DYNAMIC_MINOR, + .fops = &cf_diag_fops, +}; + +static int cf_diag_online_cpu(unsigned int cpu) +{ + struct cf_diag_call_on_cpu_parm p; + + mutex_lock(&cf_diag_ctrset_mutex); + if (!cf_diag_ctrset.ctrset) + goto out; + p.sets = cf_diag_ctrset.ctrset; + cf_diag_ioctl_on(&p); +out: + mutex_unlock(&cf_diag_ctrset_mutex); + return 0; +} + +static int cf_diag_offline_cpu(unsigned int cpu) +{ + struct cf_diag_call_on_cpu_parm p; + + mutex_lock(&cf_diag_ctrset_mutex); + if (!cf_diag_ctrset.ctrset) + goto out; + p.sets = cf_diag_ctrset.ctrset; + cf_diag_ioctl_off(&p); +out: + mutex_unlock(&cf_diag_ctrset_mutex); + return 0; +} + /* Initialize the counter set PMU to generate complete counter set data as * event raw data. This relies on the CPU Measurement Counter Facility device * already being loaded and initialized. @@ -685,21 +1165,43 @@ static int __init cf_diag_init(void) return -ENOMEM; } + rc = misc_register(&cf_diag_dev); + if (rc) { + pr_err("Registration of /dev/" S390_HWCTR_DEVICE + "failed rc=%d\n", rc); + goto out; + } + /* Setup s390dbf facility */ cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); if (!cf_diag_dbg) { pr_err("Registration of s390dbf(cpum_cf_diag) failed\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out_dbf; } debug_register_view(cf_diag_dbg, &debug_sprintf_view); rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { - debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); - debug_unregister(cf_diag_dbg); pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", rc); + goto out_perf; } + rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE, + "perf/s390/cfd:online", + cf_diag_online_cpu, cf_diag_offline_cpu); + if (!rc) + goto out; + + pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n", + rc); + perf_pmu_unregister(&cf_diag); +out_perf: + debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); + debug_unregister(cf_diag_dbg); +out_dbf: + misc_deregister(&cf_diag_dev); +out: return rc; } -arch_initcall(cf_diag_init); +device_initcall(cf_diag_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index ee09a39627d6..9129ba231423 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, + CPUHP_AP_PERF_S390_CFD_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, -- cgit v1.2.3-59-g8ed1b