aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/Makefile2
-rw-r--r--arch/arc/kernel/stacktrace.c2
-rw-r--r--arch/arm/kvm/arm.c1
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/ptrace.h2
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpu_errata.c6
-rw-r--r--arch/arm64/kernel/entry.S19
-rw-r--r--arch/arm64/mm/fault.c3
-rw-r--r--arch/mips/include/asm/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c1
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/tm.S61
-rw-r--r--arch/powerpc/mm/hash_utils_64.c4
-rw-r--r--arch/powerpc/mm/pgtable-radix.c5
-rw-r--r--arch/s390/include/asm/fpu/api.h2
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/x86/events/core.c11
-rw-r--r--arch/x86/events/intel/Makefile4
-rw-r--r--arch/x86/events/intel/core.c29
-rw-r--r--arch/x86/include/asm/pvclock.h25
-rw-r--r--arch/x86/kernel/amd_nb.c4
-rw-r--r--arch/x86/kernel/pvclock.c11
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/vmx.c23
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/kvm/x86.h7
-rw-r--r--arch/x86/pci/acpi.c1
-rw-r--r--arch/x86/power/hibernate_64.c97
-rw-r--r--arch/x86/power/hibernate_asm_64.S55
32 files changed, 291 insertions, 126 deletions
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d4df6be66d58..85814e74677d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -66,8 +66,6 @@ endif
endif
-cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
-
# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
ifeq ($(atleast_gcc48),y)
cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index e0efff15a5ae..b9192a653b7e 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
* prelogue is setup (callee regs saved and then fp set and not other
* way around
*/
- pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+ pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
return 0;
#endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 893941ec98dc..f1bde7c4e736 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
+ kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 87e1985f3be8..9d9fd4b9a72e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -80,12 +80,14 @@
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define BRCM_CPU_PART_VULCAN 0x516
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6e7fa8..7f94755089e2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -117,6 +117,8 @@ struct pt_regs {
};
u64 orig_x0;
u64 syscallno;
+ u64 orig_addr_limit;
+ u64 unused; // maintain 16 byte alignment
};
#define arch_has_single_step() (1)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47f0880..2f4ba774488a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,6 +60,7 @@ int main(void)
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
+ DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d42789499f17..af716b65110d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX, 0x00,
(1 << MIDR_VARIANT_SHIFT) | 1),
},
+ {
+ /* Cavium ThunderX, T81 pass 1.0 */
+ .desc = "Cavium erratum 27456",
+ .capability = ARM64_WORKAROUND_CAVIUM_27456,
+ MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+ },
#endif
{
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2bcb3f9..6c3b7345a6c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,7 @@
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/irq.h>
+#include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
@@ -97,7 +98,14 @@
mov x29, xzr // fp pointed to user-space
.else
add x21, sp, #S_FRAME_SIZE
- .endif
+ get_thread_info tsk
+ /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+ ldr x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [sp, #S_ORIG_ADDR_LIMIT]
+ mov x20, #TASK_SIZE_64
+ str x20, [tsk, #TI_ADDR_LIMIT]
+ ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+ .endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
@@ -128,6 +136,14 @@
.endm
.macro kernel_exit, el
+ .if \el != 0
+ /* Restore the task's original addr_limit. */
+ ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
+ str x20, [tsk, #TI_ADDR_LIMIT]
+
+ /* No need to restore UAO, it will be restored from SPSR_EL1 */
+ .endif
+
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
@@ -406,7 +422,6 @@ el1_irq:
bl trace_hardirqs_off
#endif
- get_thread_info tsk
irq_handler
#ifdef CONFIG_PREEMPT
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 013e2cbe7924..b1166d1e5955 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
if (permission_fault(esr) && (addr < USER_DS)) {
- if (get_fs() == KERNEL_DS)
+ /* regs->orig_addr_limit may be 0 if we entered from EL0 */
+ if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
if (!search_exception_tables(regs->pc))
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f1da43..f53816744d60 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
struct vm_area_struct;
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
- _CACHE_CACHABLE_NONCOHERENT)
+ _page_cachable_default)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
_page_cachable_default)
#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK;
- pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+ pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
return pte;
}
#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#else
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
}
#endif
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+ pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) |
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
return pmd;
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 88a5ecaa157b..ab84c89c9e98 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size;
#define KERN_VIRT_SIZE __kernel_virt_size
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
+extern unsigned long pci_io_base;
#endif /* __ASSEMBLY__ */
#include <asm/book3s/64/hash.h>
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b5f73cb5eeb6..d70101e1e25c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
}
/*
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3759df52bd67..a5ae49a2dcc4 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
printk(KERN_INFO "PCI: Probing PCI hardware\n");
- pci_io_base = ISA_IO_BASE;
/* For now, override phys_mem_access_prot. If we need it,g
* later, we may move that initialization to each ppc_md
*/
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e2f12cbcade9..0b93893424f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.regs = regs - 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Clear any transactional state, we're exec()ing. The cause is
+ * not important as there will never be a recheckpoint so it's not
+ * user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+#endif
+
memset(regs->gpr, 0, sizeof(regs->gpr));
regs->ctr = 0;
regs->link = 0;
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..b7019b559ddb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
- /* We need to setup MSR for VSX register save instructions. Here we
- * also clear the MSR RI since when we do the treclaim, we won't have a
- * valid kernel pointer for a while. We clear RI here as it avoids
- * adding another mtmsr closer to the treclaim. This makes the region
- * maked as non-recoverable wider than it needs to be but it saves on
- * inserting another mtmsrd later.
- */
+ /* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
ori r15, r15, MSR_FP
- li r16, MSR_RI
+ li r16, 0
ori r16, r16, MSR_EE /* IRQs hard off */
andc r15, r15, r16
oris r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* The moment we treclaim, ALL of our GPRs will switch
+ /* Clear MSR RI since we are about to change r1, EE is already off. */
+ li r4, 0
+ mtmsrd r4, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ *
+ * The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
@@ -197,6 +201,11 @@ dont_backup_fp:
/* Store the PPR in r11 and reset to decent value */
std r11, GPR11(r1) /* Temporary stash */
+
+ /* Reset MSR RI so we can take SLB faults again */
+ li r11, MSR_RI
+ mtmsrd r11, 1
+
mfspr r11, SPRN_PPR
HMT_MEDIUM
@@ -397,11 +406,6 @@ restore_gprs:
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
- /* Clear the MSR RI since we are about to change R1. EE is already off
- */
- li r4, 0
- mtmsrd r4, 1
-
REST_GPR(0, r7) /* GPR0 */
REST_2GPRS(2, r7) /* GPR2-3 */
REST_GPR(4, r7) /* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
ld r6, _CCR(r7)
mtcr r6
- REST_GPR(1, r7) /* GPR1 */
- REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
- ld r7, GPR7(r7)
+
+ /*
+ * Store r1 and r5 on the stack so that we can access them
+ * after we clear MSR RI.
+ */
+
+ REST_GPR(5, r7)
+ std r5, -8(r1)
+ ld r5, GPR1(r7)
+ std r5, -16(r1)
+
+ REST_GPR(7, r7)
+
+ /* Clear MSR RI since we are about to change r1. EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ */
+
+ ld r5, -8(r1)
+ ld r1, -16(r1)
/* Commit register state as checkpointed state: */
TRECHKPT
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5b22ba0b58bc..2971ea18c768 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void)
vmemmap = (struct page *)H_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index e58707deef5c..7931e1496f0d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void)
__vmalloc_end = RADIX_VMALLOC_END;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/*
* For now radix also use the same frag size
*/
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3cbd320..8ae236b0f80b 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
" la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (orig_fpc)
+ : "=d" (rc), "=&d" (orig_fpc)
: "d" (fpc), "0" (-EINVAL));
return rc;
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb5630a..d14069d4b88d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
S390_lowcore.program_new_psw.addr =
(unsigned long) s390_base_pgm_handler;
- /*
- * Clear subchannel ID and number to signal new kernel that no CCW or
- * SCSI IPL has been done (for kexec and kdump)
- */
- S390_lowcore.subchannel_id = 0;
- S390_lowcore.subchannel_nr = 0;
-
do_reset_calls();
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..26ced536005a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2319,7 +2319,7 @@ void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct stack_frame frame;
- const void __user *fp;
+ const unsigned long __user *fp;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
@@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
return;
- fp = (void __user *)regs->bp;
+ fp = (unsigned long __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
pagefault_disable();
while (entry->nr < entry->max_stack) {
unsigned long bytes;
+
frame.next_frame = NULL;
frame.return_address = 0;
- if (!access_ok(VERIFY_READ, fp, 16))
+ if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
break;
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
if (bytes != 0)
break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+ bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
if (bytes != 0)
break;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3660b2cf245a..06c2baa51814 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,8 +1,8 @@
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
-intel-rapl-objs := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
+intel-rapl-perf-objs := rapl.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..9b4f9d3ce465 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
+
+ /*
+ * when HT is off, these can only run on the bottom 4 counters
+ */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */
+
EVENT_CONSTRAINT_END
};
@@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = {
/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+ /*
+ * when HT is off, these can only run on the bottom 4 counters
+ */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc04020636..7c1c89598688 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
- u64 delta = rdtsc_ordered() - src->tsc_timestamp;
- return pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
-}
-
-static __always_inline
unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
cycle_t *cycles, u8 *flags)
{
unsigned version;
- cycle_t ret, offset;
- u8 ret_flags;
+ cycle_t offset;
+ u64 delta;
version = src->version;
+ /* Make the latest version visible */
+ smp_rmb();
- offset = pvclock_get_nsec_offset(src);
- ret = src->system_time + offset;
- ret_flags = src->flags;
-
- *cycles = ret;
- *flags = ret_flags;
+ delta = rdtsc_ordered() - src->tsc_timestamp;
+ offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ *cycles = src->system_time + offset;
+ *flags = src->flags;
return version;
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index a147e676fc7b..e991d5c8bb3a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
i++;
- if (i == 0)
- return 0;
+ if (!i)
+ return -ENODEV;
nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 99bfc025111d..06c58ce46762 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -61,11 +61,16 @@ void pvclock_resume(void)
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
{
unsigned version;
- cycle_t ret;
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
+ version = src->version;
+ /* Make the latest version visible */
+ smp_rmb();
+
+ flags = src->flags;
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
} while ((src->version & 1) || version != src->version);
return flags & valid_flags;
@@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
do {
version = __pvclock_read_cycles(src, &ret, &flags);
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
} while ((src->version & 1) || version != src->version);
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b283ff63..a397200281c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(tsc_deadline - guest_tsc);
+ __delay(min(tsc_deadline - guest_tsc,
+ nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
}
static void start_apic_timer(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e324ce..64a79f271276 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
/* Checks for #GP/#SS exceptions. */
exn = false;
- if (is_protmode(vcpu)) {
+ if (is_long_mode(vcpu)) {
+ /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+ * non-canonical form. This is the only check on the memory
+ * destination for long mode!
+ */
+ exn = is_noncanonical_address(*ret);
+ } else if (is_protmode(vcpu)) {
/* Protected mode: apply checks for segment validity in the
* following order:
* - segment type check (#GP(0) may be thrown)
@@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
* execute-only code segment
*/
exn = ((s.type & 0xa) == 8);
- }
- if (exn) {
- kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
- return 1;
- }
- if (is_long_mode(vcpu)) {
- /* Long mode: #GP(0)/#SS(0) if the memory address is in a
- * non-canonical form. This is an only check for long mode.
- */
- exn = is_noncanonical_address(*ret);
- } else if (is_protmode(vcpu)) {
+ if (exn) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
*/
exn = (s.unusable != 0);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da12392..7da5dd2057a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
static unsigned long max_tsc_khz;
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
- return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
- vcpu->arch.virtual_tsc_shift);
-}
-
static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
u64 v = (u64)khz * (1000000 + ppm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634ab5fe..a82ca466b62e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
+#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
extern struct static_key kvm_no_apic_vcpu;
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
+}
+
/* Same "calling convention" as do_div:
* - divide (n << 32) by base
* - put result in n
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index b2a4e2a61f6b..3cd69832d7f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,6 +396,7 @@ int __init pci_acpi_init(void)
return -ENODEV;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+ acpi_irq_penalty_init();
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
x86_init.pci.init_irq = x86_init_noop;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 009947d419a6..f2b5e6a5cf95 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
#include <asm/mtrr.h>
#include <asm/sections.h>
#include <asm/suspend.h>
+#include <asm/tlbflush.h>
/* Defined in hibernate_asm_64.S */
extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
* kernel's text (this value is passed in the image header).
*/
unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
/*
* Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
pgd_t *temp_level4_pgt __visible;
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+ pmd_t *pmd;
+ pud_t *pud;
+
+ /*
+ * The new mapping only has to cover the page containing the image
+ * kernel's entry point (jump_address_phys), because the switch over to
+ * it is carried out by relocated code running from a page allocated
+ * specifically for this purpose and covered by the identity mapping, so
+ * the temporary kernel text mapping is only needed for the final jump.
+ * Moreover, in that mapping the virtual address of the image kernel's
+ * entry point must be the same as its virtual address in the image
+ * kernel (restore_jump_address), so the image kernel's
+ * restore_registers() code doesn't find itself in a different area of
+ * the virtual address space after switching over to the original page
+ * tables used by the image kernel.
+ */
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+
+ set_pmd(pmd + pmd_index(restore_jump_address),
+ __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+ set_pud(pud + pud_index(restore_jump_address),
+ __pud(__pa(pmd) | _KERNPG_TABLE));
+ set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+ __pgd(__pa(pud) | _KERNPG_TABLE));
+
+ return 0;
+}
static void *alloc_pgt_page(void *context)
{
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
if (!temp_level4_pgt)
return -ENOMEM;
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+ /* Prepare a temporary mapping for the kernel text */
+ result = set_up_temporary_text_mapping();
+ if (result)
+ return result;
/* Set up the direct mapping from scratch */
for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
return 0;
}
+static int relocate_restore_code(void)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+
+ relocated_restore_code = get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+
+ memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+
+ /* Make the page containing the relocated code executable */
+ pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+ pud = pud_offset(pgd, relocated_restore_code);
+ if (pud_large(*pud)) {
+ set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+ } else {
+ pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
+
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ } else {
+ pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
+
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+ }
+ }
+ __flush_tlb_all();
+
+ return 0;
+}
+
int swsusp_arch_resume(void)
{
int error;
/* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
+ error = set_up_temporary_mappings();
+ if (error)
return error;
- relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
- if (!relocated_restore_code)
- return -ENOMEM;
- memcpy(relocated_restore_code, &core_restore_code,
- &restore_registers - &core_restore_code);
+ error = relocate_restore_code();
+ if (error)
+ return error;
restore_image();
return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
struct restore_data_record {
unsigned long jump_address;
+ unsigned long jump_address_phys;
unsigned long cr3;
unsigned long magic;
};
-#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+#define RESTORE_MAGIC 0x123456789ABCDEF0UL
/**
* arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
if (max_size < sizeof(struct restore_data_record))
return -EOVERFLOW;
- rdr->jump_address = restore_jump_address;
+ rdr->jump_address = (unsigned long)&restore_registers;
+ rdr->jump_address_phys = __pa_symbol(&restore_registers);
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
struct restore_data_record *rdr = addr;
restore_jump_address = rdr->jump_address;
+ jump_address_phys = rdr->jump_address_phys;
restore_cr3 = rdr->cr3;
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 4400a43b9e28..3177c2bc26f6 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
pushfq
popq pt_regs_flags(%rax)
- /* save the address of restore_registers */
- movq $restore_registers, %rax
- movq %rax, restore_jump_address(%rip)
/* save cr3 */
movq %cr3, %rax
movq %rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
ENDPROC(swsusp_arch_suspend)
ENTRY(restore_image)
- /* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rdx
- movq temp_level4_pgt(%rip), %rax
- subq %rdx, %rax
- movq %rax, %cr3
- /* Flush TLB */
- movq mmu_cr4_features(%rip), %rax
- movq %rax, %rdx
- andq $~(X86_CR4_PGE), %rdx
- movq %rdx, %cr4; # turn off PGE
- movq %cr3, %rcx; # flush TLB
- movq %rcx, %cr3;
- movq %rax, %cr4; # turn PGE back on
-
/* prepare to jump to the image kernel */
- movq restore_jump_address(%rip), %rax
- movq restore_cr3(%rip), %rbx
+ movq restore_jump_address(%rip), %r8
+ movq restore_cr3(%rip), %r9
+
+ /* prepare to switch to temporary page tables */
+ movq temp_level4_pgt(%rip), %rax
+ movq mmu_cr4_features(%rip), %rbx
/* prepare to copy image data to their original locations */
movq restore_pblist(%rip), %rdx
+
+ /* jump to relocated restore code */
movq relocated_restore_code(%rip), %rcx
jmpq *%rcx
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
+ /* switch to temporary page tables */
+ movq $__PAGE_OFFSET, %rcx
+ subq %rcx, %rax
+ movq %rax, %cr3
+ /* flush TLB */
+ movq %rbx, %rcx
+ andq $~(X86_CR4_PGE), %rcx
+ movq %rcx, %cr4; # turn off PGE
+ movq %cr3, %rcx; # flush TLB
+ movq %rcx, %cr3;
+ movq %rbx, %cr4; # turn PGE back on
.Lloop:
testq %rdx, %rdx
jz .Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
/* progress to the next pbe */
movq pbe_next(%rdx), %rdx
jmp .Lloop
+
.Ldone:
/* jump to the restore_registers address from the image header */
- jmpq *%rax
- /*
- * NOTE: This assumes that the boot kernel's text mapping covers the
- * image kernel's page containing restore_registers and the address of
- * this page is the same as in the image kernel's text mapping (it
- * should always be true, because the text mapping is linear, starting
- * from 0, and is supposed to cover the entire kernel text for every
- * kernel).
- *
- * code below belongs to the image kernel
- */
+ jmpq *%r8
+ /* code below belongs to the image kernel */
+ .align PAGE_SIZE
ENTRY(restore_registers)
FRAME_BEGIN
/* go back to the original page tables */
- movq %rbx, %cr3
+ movq %r9, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax