aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c19
-rw-r--r--arch/x86/kernel/cpu/common.c32
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c145
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c76
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c17
21 files changed, 250 insertions, 188 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index e27b49d7c922..80091ae54c2b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -66,3 +66,4 @@ targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
endif
+clean-files += capflags.c
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15c5df92f74e..a220239cea65 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
return false;
}
+
+void set_dr_addr_mask(unsigned long mask, int dr)
+{
+ if (!cpu_has_bpext)
+ return;
+
+ switch (dr) {
+ case 0:
+ wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c6049650c093..b5c8ff5e9dfc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -19,6 +19,7 @@
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
+#include <asm/tlbflush.h>
#include <asm/debugreg.h>
#include <asm/sections.h>
#include <asm/vsyscall.h>
@@ -278,7 +279,7 @@ __setup("nosmep", setup_disable_smep);
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP))
- set_in_cr4(X86_CR4_SMEP);
+ cr4_set_bits(X86_CR4_SMEP);
}
static __init int setup_disable_smap(char *arg)
@@ -298,9 +299,9 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_SMAP)) {
#ifdef CONFIG_X86_SMAP
- set_in_cr4(X86_CR4_SMAP);
+ cr4_set_bits(X86_CR4_SMAP);
#else
- clear_in_cr4(X86_CR4_SMAP);
+ cr4_clear_bits(X86_CR4_SMAP);
#endif
}
}
@@ -491,17 +492,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO];
u16 __read_mostly tlb_lld_4m[NR_INFO];
u16 __read_mostly tlb_lld_1g[NR_INFO];
-void cpu_detect_tlb(struct cpuinfo_x86 *c)
+static void cpu_detect_tlb(struct cpuinfo_x86 *c)
{
if (this_cpu->c_detect_tlb)
this_cpu->c_detect_tlb(c);
- printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n"
- "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
+ pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
- tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
- tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
- tlb_lld_1g[ENTRIES]);
+ tlb_lli_4m[ENTRIES]);
+
+ pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
+ tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
+ tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
}
void detect_ht(struct cpuinfo_x86 *c)
@@ -1294,6 +1296,12 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
/*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
+
+ /*
* Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure.
*/
@@ -1312,7 +1320,7 @@ void cpu_init(void)
pr_debug("Initializing CPU#%d\n", cpu);
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
@@ -1332,7 +1340,7 @@ void cpu_init(void)
barrier();
x86_configure_nx();
- enable_x2apic();
+ x2apic_setup();
/*
* set up and load the per-CPU TSS
@@ -1393,7 +1401,7 @@ void cpu_init(void)
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_current_idt();
switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9cc6b6f25f42..94d7dcb12145 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c)
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) {
- printk_once(KERN_WARNING "ENERGY_PERF_BIAS:"
- " Set to 'normal', was 'performance'\n"
- "ENERGY_PERF_BIAS: View and update with"
- " x86_energy_perf_policy(8)\n");
+ pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c7035073dfc1..659643376dbf 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -952,20 +952,18 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
int type, char *buf)
{
- ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
- int n = 0;
-
- if (len > 1) {
- const struct cpumask *mask;
-
- mask = to_cpumask(this_leaf->shared_cpu_map);
- n = type ?
- cpulist_scnprintf(buf, len-2, mask) :
- cpumask_scnprintf(buf, len-2, mask);
- buf[n++] = '\n';
- buf[n] = '\0';
- }
- return n;
+ const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+ int ret;
+
+ if (type)
+ ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
+ cpumask_pr_args(mask));
+ else
+ ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb",
+ cpumask_pr_args(mask));
+ buf[ret++] = '\n';
+ buf[ret] = '\0';
+ return ret;
}
static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d2c611699cd9..3c036cb4a370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,8 @@
#include <linux/export.h>
#include <asm/processor.h>
+#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -115,7 +117,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
@@ -150,14 +152,11 @@ static struct mce_log mcelog = {
void mce_log(struct mce *mce)
{
unsigned next, entry;
- int ret = 0;
/* Emit the trace record: */
trace_mce_record(mce);
- ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
- if (ret == NOTIFY_STOP)
- return;
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
mce->finished = 0;
wmb();
@@ -311,7 +310,7 @@ static void wait_for_panic(void)
panic("Panicing machine check CPU died");
}
-static void mce_panic(char *msg, struct mce *final, char *exp)
+static void mce_panic(const char *msg, struct mce *final, char *exp)
{
int i, apei_err = 0;
@@ -529,7 +528,7 @@ static void mce_schedule_work(void)
schedule_work(this_cpu_ptr(&mce_work));
}
-DEFINE_PER_CPU(struct irq_work, mce_irq_work);
+static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
static void mce_irq_work_cb(struct irq_work *entry)
{
@@ -735,7 +734,7 @@ static atomic_t mce_callin;
/*
* Check if a timeout waiting for other CPUs happened.
*/
-static int mce_timed_out(u64 *t)
+static int mce_timed_out(u64 *t, const char *msg)
{
/*
* The others already did panic for some reason.
@@ -750,8 +749,7 @@ static int mce_timed_out(u64 *t)
goto out;
if ((s64)*t < SPINUNIT) {
if (mca_cfg.tolerant <= 1)
- mce_panic("Timeout synchronizing machine check over CPUs",
- NULL, NULL);
+ mce_panic(msg, NULL, NULL);
cpu_missing = 1;
return 1;
}
@@ -867,7 +865,8 @@ static int mce_start(int *no_way_out)
* Wait for everyone.
*/
while (atomic_read(&mce_callin) != cpus) {
- if (mce_timed_out(&timeout)) {
+ if (mce_timed_out(&timeout,
+ "Timeout: Not all CPUs entered broadcast exception handler")) {
atomic_set(&global_nwo, 0);
return -1;
}
@@ -892,7 +891,8 @@ static int mce_start(int *no_way_out)
* only seen by one CPU before cleared, avoiding duplicates.
*/
while (atomic_read(&mce_executing) < order) {
- if (mce_timed_out(&timeout)) {
+ if (mce_timed_out(&timeout,
+ "Timeout: Subject CPUs unable to finish machine check processing")) {
atomic_set(&global_nwo, 0);
return -1;
}
@@ -936,7 +936,8 @@ static int mce_end(int order)
* loops.
*/
while (atomic_read(&mce_executing) <= cpus) {
- if (mce_timed_out(&timeout))
+ if (mce_timed_out(&timeout,
+ "Timeout: Monarch CPU unable to finish machine check processing"))
goto reset;
ndelay(SPINUNIT);
}
@@ -949,7 +950,8 @@ static int mce_end(int order)
* Subject: Wait for Monarch to finish.
*/
while (atomic_read(&mce_executing) != 0) {
- if (mce_timed_out(&timeout))
+ if (mce_timed_out(&timeout,
+ "Timeout: Monarch CPU did not finish machine check processing"))
goto reset;
ndelay(SPINUNIT);
}
@@ -1003,51 +1005,6 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
- * Need to save faulting physical address associated with a process
- * in the machine check handler some place where we can grab it back
- * later in mce_notify_process()
- */
-#define MCE_INFO_MAX 16
-
-struct mce_info {
- atomic_t inuse;
- struct task_struct *t;
- __u64 paddr;
- int restartable;
-} mce_info[MCE_INFO_MAX];
-
-static void mce_save_info(__u64 addr, int c)
-{
- struct mce_info *mi;
-
- for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
- if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
- mi->t = current;
- mi->paddr = addr;
- mi->restartable = c;
- return;
- }
- }
-
- mce_panic("Too many concurrent recoverable errors", NULL, NULL);
-}
-
-static struct mce_info *mce_find_info(void)
-{
- struct mce_info *mi;
-
- for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
- if (atomic_read(&mi->inuse) && mi->t == current)
- return mi;
- return NULL;
-}
-
-static void mce_clear_info(struct mce_info *mi)
-{
- atomic_set(&mi->inuse, 0);
-}
-
-/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
@@ -1063,6 +1020,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
{
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
+ enum ctx_state prev_state;
int i;
int worst = 0;
int severity;
@@ -1084,6 +1042,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
+ u64 recover_paddr = ~0ull;
+ int flags = MF_ACTION_REQUIRED;
+
+ prev_state = ist_enter(regs);
this_cpu_inc(mce_exception_count);
@@ -1203,9 +1165,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
- /* schedule action before return to userland */
- mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
- set_thread_flag(TIF_MCE_NOTIFY);
+ recover_paddr = m.addr;
+ if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ flags |= MF_MUST_KILL;
} else if (kill_it) {
force_sig(SIGBUS, current);
}
@@ -1216,6 +1178,27 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
out:
sync_core();
+
+ if (recover_paddr == ~0ull)
+ goto done;
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx",
+ recover_paddr);
+ /*
+ * We must call memory_failure() here even if the current process is
+ * doomed. We still need to mark the page as poisoned and alert any
+ * other users of the page.
+ */
+ ist_begin_non_atomic(regs);
+ local_irq_enable();
+ if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
+ pr_err("Memory error not recovered");
+ force_sig(SIGBUS, current);
+ }
+ local_irq_disable();
+ ist_end_non_atomic();
+done:
+ ist_exit(regs, prev_state);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1233,42 +1216,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
#endif
/*
- * Called in process context that interrupted by MCE and marked with
- * TIF_MCE_NOTIFY, just before returning to erroneous userland.
- * This code is allowed to sleep.
- * Attempt possible recovery such as calling the high level VM handler to
- * process any corrupted pages, and kill/signal current process if required.
- * Action required errors are handled here.
- */
-void mce_notify_process(void)
-{
- unsigned long pfn;
- struct mce_info *mi = mce_find_info();
- int flags = MF_ACTION_REQUIRED;
-
- if (!mi)
- mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
- pfn = mi->paddr >> PAGE_SHIFT;
-
- clear_thread_flag(TIF_MCE_NOTIFY);
-
- pr_err("Uncorrected hardware memory error in user-access at %llx",
- mi->paddr);
- /*
- * We must call memory_failure() here even if the current process is
- * doomed. We still need to mark the page as poisoned and alert any
- * other users of the page.
- */
- if (!mi->restartable)
- flags |= MF_MUST_KILL;
- if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
- pr_err("Memory error not recovered");
- force_sig(SIGBUS, current);
- }
- mce_clear_info(mi);
-}
-
-/*
* Action optional processing happens here (picking up
* from the list of faulting pages that do_machine_check()
* placed into the "ring").
@@ -1503,7 +1450,7 @@ static void __mcheck_cpu_init_generic(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | m_fl, &all_banks);
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index a3042989398c..737b0ad4e61a 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -8,6 +8,8 @@
#include <linux/smp.h>
#include <asm/processor.h>
+#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -17,8 +19,11 @@ int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
{
+ enum ctx_state prev_state;
u32 loaddr, hi, lotype;
+ prev_state = ist_enter(regs);
+
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -33,6 +38,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
}
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ ist_exit(regs, prev_state);
}
/* Set up machine check reporting for processors with Intel style MCE: */
@@ -59,7 +66,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
"Intel old style machine check architecture supported.\n");
/* Enable MCE: */
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
printk(KERN_INFO
"Intel old style machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 7dc5564d0cdf..44f138296fbe 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -7,14 +7,20 @@
#include <linux/types.h>
#include <asm/processor.h>
+#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
+ enum ctx_state prev_state = ist_enter(regs);
+
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ ist_exit(regs, prev_state);
}
/* Set up machine check reporting on the Winchip C6 series */
@@ -31,7 +37,7 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
printk(KERN_INFO
"Winchip machine check reporting enabled on CPU#0.\n");
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 15c29096136b..36a83617eb21 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -552,7 +552,7 @@ static int __init microcode_init(void)
int error;
if (paravirt_enabled() || dis_ucode_ldr)
- return 0;
+ return -EINVAL;
if (c->x86_vendor == X86_VENDOR_INTEL)
microcode_ops = init_intel_microcode();
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index e2b22df964cd..36d99a337b49 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -28,7 +28,7 @@ function dump_array()
# If the /* comment */ starts with a quote string, grab that.
VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
[ -z "$VALUE" ] && VALUE="\"$NAME\""
- [ "$VALUE" == '""' ] && continue
+ [ "$VALUE" = '""' ] && continue
# Name is uppercase, VALUE is all lowercase
VALUE="$(echo "$VALUE" | tr A-Z a-z)"
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a450373e8e91..939155ffdece 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = {
.rating = 400, /* use this when running on Hyperv*/
.read = read_hv_clock,
.mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static void __init ms_hyperv_init_platform(void)
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 9e451b0876b5..f8c81ba0b465 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -138,8 +138,8 @@ static void prepare_set(void)
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
+ cr4 = __read_cr4();
+ __write_cr4(cr4 & ~X86_CR4_PGE);
}
/*
@@ -171,7 +171,7 @@ static void post_set(void)
/* Restore value of CR4 */
if (cpu_has_pge)
- write_cr4(cr4);
+ __write_cr4(cr4);
}
static void cyrix_set_arr(unsigned int reg, unsigned long base,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0e25a1bc5ab5..7d74f7b3c6ba 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
+ cr4 = __read_cr4();
+ __write_cr4(cr4 & ~X86_CR4_PGE);
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)
/* Restore value of CR4 */
if (cpu_has_pge)
- write_cr4(cr4);
+ __write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 143e5f5dc855..b71a7f86d68a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,8 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
#include <asm/timer.h>
#include <asm/desc.h>
#include <asm/ldt.h>
@@ -43,6 +45,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
+struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1327,8 +1331,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
- if (x86_pmu.attr_rdpmc)
- set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1804,14 +1806,44 @@ static int x86_pmu_event_init(struct perf_event *event)
event->destroy(event);
}
+ if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+ event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
return err;
}
+static void refresh_pce(void *ignored)
+{
+ if (current->mm)
+ load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+ if (!current->mm)
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
- if (!x86_pmu.attr_rdpmc)
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
@@ -1829,16 +1861,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
}
-static void change_rdpmc(void *info)
-{
- bool enable = !!(unsigned long)info;
-
- if (enable)
- set_in_cr4(X86_CR4_PCE);
- else
- clear_in_cr4(X86_CR4_PCE);
-}
-
static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1850,14 +1872,27 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (ret)
return ret;
+ if (val > 2)
+ return -EINVAL;
+
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
- if (!!val != !!x86_pmu.attr_rdpmc) {
- x86_pmu.attr_rdpmc = !!val;
- on_each_cpu(change_rdpmc, (void *)val, 1);
+ if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+ /*
+ * Changing into or out of always available, aka
+ * perf-event-bypassing mode. This path is extremely slow,
+ * but only root can trigger it, so it's okay.
+ */
+ if (val == 2)
+ static_key_slow_inc(&rdpmc_always_available);
+ else
+ static_key_slow_dec(&rdpmc_always_available);
+ on_each_cpu(refresh_pce, NULL, 1);
}
+ x86_pmu.attr_rdpmc = val;
+
return count;
}
@@ -1900,6 +1935,9 @@ static struct pmu pmu = {
.event_init = x86_pmu_event_init,
+ .event_mapped = x86_pmu_event_mapped,
+ .event_unmapped = x86_pmu_event_unmapped,
+
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
@@ -1914,13 +1952,15 @@ static struct pmu pmu = {
.flush_branch_stack = x86_pmu_flush_branch_stack,
};
-void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
{
struct cyc2ns_data *data;
userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0;
- userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits;
if (!sched_clock_stable())
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 4e6cdb0ddc70..df525d2be1e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@ struct event_constraint {
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
+
struct amd_nb {
int nb_id; /* NorthBridge id */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 944bf019b74f..498b6d967138 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2431,6 +2431,7 @@ __init int intel_pmu_init(void)
break;
case 55: /* 22nm Atom "Silvermont" */
+ case 76: /* 14nm Atom "Airmont" */
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3c895d480cd7..073983398364 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -568,8 +568,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
/* Allow all events as PEBS with no flags */
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 673f930c700f..c4bb8b8e5017 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -103,6 +103,13 @@ static struct kobj_attribute format_attr_##_var = \
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
+};
+
struct rapl_pmu {
spinlock_t lock;
int hw_unit; /* 1/2^hw_unit Joule */
@@ -135,7 +142,7 @@ static inline u64 rapl_scale(u64 v)
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
- return v << (32 - __this_cpu_read(rapl_pmu->hw_unit));
+ return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
}
static u64 rapl_event_update(struct perf_event *event)
@@ -379,23 +386,36 @@ static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
-EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
-EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
-EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
+static ssize_t rapl_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr = \
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
+
+ return 0;
+}
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
-EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
-EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
-EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
*/
-EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
-EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
static struct attribute *rapl_events_srv_attr[] = {
EVENT_PTR(rapl_cores),
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 10b8d3eaaf15..c635b8b49e93 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -840,7 +840,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
box->phys_id = phys_id;
box->pci_dev = pdev;
box->pmu = pmu;
- uncore_box_init(box);
pci_set_drvdata(pdev, box);
raw_spin_lock(&uncore_box_lock);
@@ -1004,10 +1003,8 @@ static int uncore_cpu_starting(int cpu)
pmu = &type->pmus[j];
box = *per_cpu_ptr(pmu->box, cpu);
/* called by uncore_cpu_init? */
- if (box && box->phys_id >= 0) {
- uncore_box_init(box);
+ if (box && box->phys_id >= 0)
continue;
- }
for_each_online_cpu(k) {
exist = *per_cpu_ptr(pmu->box, k);
@@ -1023,10 +1020,8 @@ static int uncore_cpu_starting(int cpu)
}
}
- if (box) {
+ if (box)
box->phys_id = phys_id;
- uncore_box_init(box);
- }
}
}
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 18eb78bbdd10..6c8c1e7e69d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -17,7 +17,7 @@
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX 2
+#define UNCORE_EXTRA_PCI_DEV_MAX 3
/* support up to 8 sockets */
#define UNCORE_SOCKET_MAX 8
@@ -257,6 +257,14 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
return box->pmu->type->num_counters;
}
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+ if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->init_box)
+ box->pmu->type->ops->init_box(box);
+ }
+}
+
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -265,6 +273,8 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
static inline void uncore_enable_box(struct intel_uncore_box *box)
{
+ uncore_box_init(box);
+
if (box->pmu->type->ops->enable_box)
box->pmu->type->ops->enable_box(box);
}
@@ -287,14 +297,6 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
return box->pmu->type->ops->read_counter(box, event);
}
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
- if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
- if (box->pmu->type->ops->init_box)
- box->pmu->type->ops->init_box(box);
- }
-}
-
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
return (box->phys_id < 0);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 745b158e9a65..21af6149edf2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void)
enum {
SNBEP_PCI_QPI_PORT0_FILTER,
SNBEP_PCI_QPI_PORT1_FILTER,
+ HSWEP_PCI_PCU_3,
};
static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
@@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void)
{
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+
+ /* Detect 6-8 core systems with only two SBOXes */
+ if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+ u32 capid4;
+
+ pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+ 0x94, &capid4);
+ if (((capid4 >> 6) & 0x3) == 0)
+ hswep_uncore_sbox.num_boxes = 2;
+ }
+
uncore_msr_uncores = hswep_msr_uncores;
}
@@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
SNBEP_PCI_QPI_PORT1_FILTER),
},
+ { /* PCU.3 (for Capability registers) */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ HSWEP_PCI_PCU_3),
+ },
{ /* end: all zeroes */ }
};