diff options
Diffstat (limited to 'arch')
27 files changed, 378 insertions, 130 deletions
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 4e6e88a6b2f4..2244a94ed9c9 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -37,7 +37,7 @@ do { \ ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ - ".pushsection __bug_table,\"a\"\n" \ + ".pushsection __bug_table,\"aw\"\n" \ ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d69bebf697e7..74504b154256 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -116,7 +116,7 @@ struct cpu_cache_fns { void (*dma_unmap_area)(const void *, size_t, int); void (*dma_flush_range)(const void *, const void *); -}; +} __no_randomize_layout; /* * Select the calling method diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 366448eb0fb7..a02a57186f56 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -36,7 +36,7 @@ #ifdef CONFIG_GENERIC_BUG #define __BUG_ENTRY(flags) \ - ".pushsection __bug_table,\"a\"\n\t" \ + ".pushsection __bug_table,\"aw\"\n\t" \ ".align 2\n\t" \ "0: .long 1f - 0b\n\t" \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ diff --git a/arch/blackfin/include/asm/bug.h b/arch/blackfin/include/asm/bug.h index 8d9b1eba89c4..76b2e82ee730 100644 --- a/arch/blackfin/include/asm/bug.h +++ b/arch/blackfin/include/asm/bug.h @@ -21,7 +21,7 @@ #define _BUG_OR_WARN(flags) \ asm volatile( \ "1: .hword %0\n" \ - " .section __bug_table,\"a\",@progbits\n" \ + " .section __bug_table,\"aw\",@progbits\n" \ "2: .long 1b\n" \ " .long %1\n" \ " .short %2\n" \ @@ -38,7 +38,7 @@ #define _BUG_OR_WARN(flags) \ asm volatile( \ "1: .hword %0\n" \ - " .section __bug_table,\"a\",@progbits\n" \ + " .section __bug_table,\"aw\",@progbits\n" \ "2: .long 1b\n" \ " .short %1\n" \ " .org 2b + %2\n" \ diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h index 296d7f56fbfd..f1d6ba7afbf2 100644 --- a/arch/blackfin/include/asm/flat.h +++ b/arch/blackfin/include/asm/flat.h @@ -44,8 +44,7 @@ flat_get_relocate_addr (unsigned long relval) return relval & 0x03ffffff; /* Mask out top 6 bits */ } -static inline int flat_set_persistent(unsigned long relval, - unsigned long *persistent) +static inline int flat_set_persistent(u32 relval, u32 *persistent) { int type = (relval >> 26) & 7; if (type == 3) { diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c index d29ab6a2e909..8ebc54daaa8e 100644 --- a/arch/blackfin/kernel/flat.c +++ b/arch/blackfin/kernel/flat.c @@ -32,7 +32,7 @@ unsigned long bfin_get_addr_from_rp(u32 *ptr, break; case FLAT_BFIN_RELOC_TYPE_32_BIT: - pr_debug("*ptr = %lx", get_unaligned(ptr)); + pr_debug("*ptr = %x", get_unaligned(ptr)); val = get_unaligned(ptr); break; @@ -77,7 +77,7 @@ void bfin_put_addr_at_rp(u32 *ptr, u32 addr, u32 relval) case FLAT_BFIN_RELOC_TYPE_32_BIT: put_unaligned(addr, ptr); - pr_debug("new ptr =%lx", get_unaligned(ptr)); + pr_debug("new ptr =%x", get_unaligned(ptr)); break; } } diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h index 18d024251738..7e0bd6fa1532 100644 --- a/arch/h8300/include/asm/flat.h +++ b/arch/h8300/include/asm/flat.h @@ -24,7 +24,7 @@ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, u32 *addr, u32 *persistent) { u32 val = get_unaligned((__force u32 *)rp); - if (!(flags & FLAT_FLAG_GOTPIC) + if (!(flags & FLAT_FLAG_GOTPIC)) val &= 0x00ffffff; *addr = val; return 0; diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index 48b62790fe70..b2a41f5b3890 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -30,8 +30,7 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) } #define flat_get_relocate_addr(rel) (rel) -static inline int flat_set_persistent(unsigned long relval, - unsigned long *persistent) +static inline int flat_set_persistent(u32 relval, u32 *persistent) { return 0; } diff --git a/arch/mn10300/include/asm/bug.h b/arch/mn10300/include/asm/bug.h index aa6a38886391..811414fb002d 100644 --- a/arch/mn10300/include/asm/bug.h +++ b/arch/mn10300/include/asm/bug.h @@ -21,7 +21,7 @@ do { \ asm volatile( \ " syscall 15 \n" \ "0: \n" \ - " .section __bug_table,\"a\" \n" \ + " .section __bug_table,\"aw\" \n" \ " .long 0b,%0,%1 \n" \ " .previous \n" \ : \ diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index d2742273a685..07ea467f22fc 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -27,7 +27,7 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"a\"\n" \ + "\t.pushsection __bug_table,\"aw\"\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ "\t.short %c1, %c2\n" \ "\t.org 2b+%c3\n" \ @@ -50,7 +50,7 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"a\"\n" \ + "\t.pushsection __bug_table,\"aw\"\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ "\t.short %c1, %c2\n" \ "\t.org 2b+%c3\n" \ @@ -64,7 +64,7 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"a\"\n" \ + "\t.pushsection __bug_table,\"aw\"\n" \ "2:\t" ASM_WORD_INSN "1b\n" \ "\t.short %c0\n" \ "\t.org 2b+%c1\n" \ diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 0151af6c2a50..87fcc1948817 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -18,7 +18,7 @@ #include <asm/asm-offsets.h> #ifdef CONFIG_DEBUG_BUGVERBOSE .macro EMIT_BUG_ENTRY addr,file,line,flags - .section __bug_table,"a" + .section __bug_table,"aw" 5001: PPC_LONG \addr, 5002f .short \line, \flags .org 5001b+BUG_ENTRY_SIZE @@ -29,7 +29,7 @@ .endm #else .macro EMIT_BUG_ENTRY addr,file,line,flags - .section __bug_table,"a" + .section __bug_table,"aw" 5001: PPC_LONG \addr .short \flags .org 5001b+BUG_ENTRY_SIZE @@ -42,14 +42,14 @@ sizeof(struct bug_entry), respectively */ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ - ".section __bug_table,\"a\"\n" \ + ".section __bug_table,\"aw\"\n" \ "2:\t" PPC_LONG "1b, %0\n" \ "\t.short %1, %2\n" \ ".org 2b+%3\n" \ ".previous\n" #else #define _EMIT_BUG_ENTRY \ - ".section __bug_table,\"a\"\n" \ + ".section __bug_table,\"aw\"\n" \ "2:\t" PPC_LONG "1b\n" \ "\t.short %2\n" \ ".org 2b+%3\n" \ diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 1bbd9dbfe4e0..ce9cc123988b 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -14,7 +14,7 @@ ".section .rodata.str,\"aMS\",@progbits,1\n" \ "2: .asciz \""__FILE__"\"\n" \ ".previous\n" \ - ".section __bug_table,\"a\"\n" \ + ".section __bug_table,\"aw\"\n" \ "3: .long 1b-3b,2b-3b\n" \ " .short %0,%1\n" \ " .org 3b+%2\n" \ @@ -30,7 +30,7 @@ asm volatile( \ "0: j 0b+2\n" \ "1:\n" \ - ".section __bug_table,\"a\"\n" \ + ".section __bug_table,\"aw\"\n" \ "2: .long 1b-2b\n" \ " .short %0\n" \ " .org 2b+%1\n" \ diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h index c9828f785ca0..5b5086367639 100644 --- a/arch/sh/include/asm/bug.h +++ b/arch/sh/include/asm/bug.h @@ -24,14 +24,14 @@ */ #ifdef CONFIG_DEBUG_BUGVERBOSE #define _EMIT_BUG_ENTRY \ - "\t.pushsection __bug_table,\"a\"\n" \ + "\t.pushsection __bug_table,\"aw\"\n" \ "2:\t.long 1b, %O1\n" \ "\t.short %O2, %O3\n" \ "\t.org 2b+%O4\n" \ "\t.popsection\n" #else #define _EMIT_BUG_ENTRY \ - "\t.pushsection __bug_table,\"a\"\n" \ + "\t.pushsection __bug_table,\"aw\"\n" \ "2:\t.long 1b\n" \ "\t.short %O3\n" \ "\t.org 2b+%O4\n" \ diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 24f21c726dfa..f10e2f712394 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, static int dma_4v_supported(struct device *dev, u64 device_mask) { struct iommu *iommu = dev->archdata.iommu; - u64 dma_addr_mask; + u64 dma_addr_mask = iommu->dma_addr_mask; - if (device_mask > DMA_BIT_MASK(32) && iommu->atu) - dma_addr_mask = iommu->atu->dma_addr_mask; - else - dma_addr_mask = iommu->dma_addr_mask; + if (device_mask > DMA_BIT_MASK(32)) { + if (iommu->atu) + dma_addr_mask = iommu->atu->dma_addr_mask; + else + return 0; + } if ((device_mask & dma_addr_mask) == dma_addr_mask) return 1; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fdf31040a7dc..3218bc43302e 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -622,22 +622,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. + * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 196ee5eb4d48..ad31af1dd726 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aa62437d1aa1..98b0f0729527 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs }, }; +static __initconst const u64 glp_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ + [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ + [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +static __initconst const u64 glp_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_READ| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_READ| + GLM_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_WRITE| + GLM_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event) return 0; } +static struct event_constraint counter0_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); + static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); @@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static struct event_constraint * +glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + /* :ppp means to do reduced skid PEBS which is PMC0 only. */ + if (event->attr.precise_ip == 3) + return &counter0_constraint; + + c = intel_get_event_constraints(cpuc, idx, event); + + return c; +} + /* * Broadwell: * @@ -3265,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu) static void intel_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (x86_pmu.pebs_active) - intel_pmu_pebs_sched_task(ctx, sched_in); - if (x86_pmu.lbr_nr) - intel_pmu_lbr_sched_task(ctx, sched_in); + intel_pmu_pebs_sched_task(ctx, sched_in); + intel_pmu_lbr_sched_task(ctx, sched_in); } PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); @@ -3838,6 +3968,32 @@ __init int intel_pmu_init(void) pr_cont("Goldmont events, "); break; + case INTEL_FAM6_ATOM_GEMINI_LAKE: + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints; + x86_pmu.extra_regs = intel_glm_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.get_event_constraints = glp_get_event_constraints; + x86_pmu.cpu_events = glm_events_attrs; + /* Goldmont Plus has 4-wide pipeline */ + event_attr_td_total_slots_scale_glm.event_str = "4"; + pr_cont("Goldmont plus events, "); + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 238ae3248ba5..4cf100ff2a37 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,16 +40,16 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT + * Available model: SLM,AMT,GLM * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL + * SKL,KNL,GLM * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -57,16 +57,17 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL,KNL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL + * GLM * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL + * SKL,KNL,GLM * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -82,7 +83,7 @@ * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT only + * Available model: HSW ULT, GLM * Scope: Package (physical package) * */ @@ -504,6 +505,17 @@ static const struct cstate_model knl_cstates __initconst = { }; +static const struct cstate_model glm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -546,6 +558,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c6d23ffe422d..a322fed5f8ed 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void) x86_pmu.drain_pebs(®s); } -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) -{ - if (!sched_in) - intel_pmu_drain_pebs_buffer(); -} - /* * PEBS */ @@ -651,6 +645,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_glp_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ @@ -816,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); } +void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!sched_in && pebs_needs_sched_cb(cpuc)) + intel_pmu_drain_pebs_buffer(); +} + static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; @@ -889,6 +897,8 @@ void intel_pmu_pebs_enable(struct perf_event *event) if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { ds->pebs_event_reset[hwc->idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; + } else { + ds->pebs_event_reset[hwc->idx] = 0; } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index eb261656a320..955457a30197 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; + if (!cpuc->lbr_users) + return; + /* * If LBR callstack feature is enabled and the stack was saved when * the task was scheduled out, restore the stack. Otherwise flush diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 53728eea1bed..476aec3a4cab 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[]; extern struct event_constraint intel_glm_pebs_event_constraints[]; +extern struct event_constraint intel_glp_pebs_event_constraints[]; + extern struct event_constraint intel_nehalem_pebs_event_constraints[]; extern struct event_constraint intel_westmere_pebs_event_constraints[]; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 39e702d90cdb..aa6b2023d8f8 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -35,7 +35,7 @@ #define _BUG_FLAGS(ins, flags) \ do { \ asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"a\"\n" \ + ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \ "\t.word %c1" "\t# bug_entry::line\n" \ @@ -52,7 +52,7 @@ do { \ #define _BUG_FLAGS(ins, flags) \ do { \ asm volatile("1:\t" ins "\n" \ - ".pushsection __bug_table,\"a\"\n" \ + ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t.word %c0" "\t# bug_entry::flags\n" \ "\t.org 2b+%c1\n" \ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 34b984c60790..6cf65437b5e5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -52,10 +52,10 @@ typedef u8 kprobe_opcode_t; #define flush_insn_slot(p) do { } while (0) /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)&optprobe_template_end - \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index cb976bab6299..9ffc36bfe4cd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -84,7 +84,7 @@ struct pv_init_ops { */ unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, unsigned long addr, unsigned len); -}; +} __no_randomize_layout; struct pv_lazy_ops { @@ -92,12 +92,12 @@ struct pv_lazy_ops { void (*enter)(void); void (*leave)(void); void (*flush)(void); -}; +} __no_randomize_layout; struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); -}; +} __no_randomize_layout; struct pv_cpu_ops { /* hooks for various privileged instructions */ @@ -176,7 +176,7 @@ struct pv_cpu_ops { void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); -}; +} __no_randomize_layout; struct pv_irq_ops { /* @@ -199,7 +199,7 @@ struct pv_irq_ops { #ifdef CONFIG_X86_64 void (*adjust_exception_frame)(void); #endif -}; +} __no_randomize_layout; struct pv_mmu_ops { unsigned long (*read_cr2)(void); @@ -305,7 +305,7 @@ struct pv_mmu_ops { an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); -}; +} __no_randomize_layout; struct arch_spinlock; #ifdef CONFIG_SMP @@ -322,7 +322,7 @@ struct pv_lock_ops { void (*kick)(int cpu); struct paravirt_callee_save vcpu_is_preempted; -}; +} __no_randomize_layout; /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate @@ -334,7 +334,7 @@ struct paravirt_patch_template { struct pv_irq_ops pv_irq_ops; struct pv_mmu_ops pv_mmu_ops; struct pv_lock_ops pv_lock_ops; -}; +} __no_randomize_layout; extern struct pv_info pv_info; extern struct pv_init_ops pv_init_ops; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6a79547e8ee0..028245e1c42b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -129,7 +129,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; -}; +} __randomize_layout; struct cpuid_regs { u32 eax, ebx, ecx, edx; |