aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/compressed/Makefile9
-rw-r--r--arch/x86/boot/compressed/eboot.c14
-rw-r--r--arch/x86/boot/compressed/head_32.S14
-rw-r--r--arch/x86/boot/compressed/head_64.S22
-rw-r--r--arch/x86/boot/header.S26
-rw-r--r--arch/x86/boot/tools/build.c39
-rw-r--r--arch/x86/ia32/ia32_aout.c35
-rw-r--r--arch/x86/ia32/ia32_signal.c4
-rw-r--r--arch/x86/ia32/sys_ia32.c23
-rw-r--r--arch/x86/include/asm/apic.h23
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/atomic64_32.h10
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h6
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/hardirq.h9
-rw-r--r--arch/x86/include/asm/ia32.h6
-rw-r--r--arch/x86/include/asm/io_apic.h35
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h118
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kvm_para.h3
-rw-r--r--arch/x86/include/asm/mmu_context.h12
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/nmi.h22
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/posix_types.h6
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/sigcontext.h2
-rw-r--r--arch/x86/include/asm/siginfo.h8
-rw-r--r--arch/x86/include/asm/smp.h15
-rw-r--r--arch/x86/include/asm/spinlock.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h4
-rw-r--r--arch/x86/include/asm/stat.h21
-rw-r--r--arch/x86/include/asm/syscall.h27
-rw-r--r--arch/x86/include/asm/thread_info.h23
-rw-r--r--arch/x86/include/asm/tlbflush.h4
-rw-r--r--arch/x86/include/asm/topology.h38
-rw-r--r--arch/x86/include/asm/unistd.h6
-rw-r--r--arch/x86/include/asm/word-at-a-time.h33
-rw-r--r--arch/x86/include/asm/x86_init.h10
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h4
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c8
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c383
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/check.c20
-rw-r--r--arch/x86/kernel/cpu/amd.c29
-rw-r--r--arch/x86/kernel/cpu/common.c11
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c12
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/ftrace.c500
-rw-r--r--arch/x86/kernel/i387.c3
-rw-r--r--arch/x86/kernel/init_task.c42
-rw-r--r--arch/x86/kernel/irq_32.c8
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/microcode_amd.c12
-rw-r--r--arch/x86/kernel/microcode_core.c19
-rw-r--r--arch/x86/kernel/microcode_intel.c14
-rw-r--r--arch/x86/kernel/nmi.c95
-rw-r--r--arch/x86/kernel/nmi_selftest.c13
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/process.c50
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/setup_percpu.c14
-rw-r--r--arch/x86/kernel/smpboot.c191
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/x86_init.c9
-rw-r--r--arch/x86/kvm/pmu.c18
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/lib/insn.c53
-rw-r--r--arch/x86/lib/usercopy.c20
-rw-r--r--arch/x86/mm/numa_emulation.c8
-rw-r--r--arch/x86/mm/tlb.c10
-rw-r--r--arch/x86/pci/acpi.c128
-rw-r--r--arch/x86/pci/amd_bus.c91
-rw-r--r--arch/x86/pci/broadcom_bus.c12
-rw-r--r--arch/x86/pci/bus_numa.c69
-rw-r--r--arch/x86/pci/bus_numa.h18
-rw-r--r--arch/x86/pci/common.c43
-rw-r--r--arch/x86/pci/fixup.c17
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/platform/geode/net5501.c2
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/tools/.gitignore1
-rw-r--r--arch/x86/tools/Makefile4
-rw-r--r--arch/x86/tools/relocs.c (renamed from arch/x86/boot/compressed/relocs.c)244
-rw-r--r--arch/x86/um/asm/elf.h42
-rw-r--r--arch/x86/um/asm/ptrace.h34
-rw-r--r--arch/x86/um/asm/ptrace_32.h23
-rw-r--r--arch/x86/um/asm/ptrace_64.h26
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h67
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h92
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h101
-rw-r--r--arch/x86/um/signal.c29
-rw-r--r--arch/x86/um/sys_call_table_64.c1
-rw-r--r--arch/x86/um/syscalls_32.c12
-rw-r--r--arch/x86/um/sysrq_32.c8
-rw-r--r--arch/x86/um/sysrq_64.c8
-rw-r--r--arch/x86/um/tls_32.c2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c33
-rw-r--r--arch/x86/xen/enlighten.c48
-rw-r--r--arch/x86/xen/mmu.c11
-rw-r--r--arch/x86/xen/smp.c34
-rw-r--r--arch/x86/xen/xen-asm.S2
-rw-r--r--arch/x86/xen/xen-ops.h4
147 files changed, 2561 insertions, 1761 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2f925ccb3e5b..21ea6d28d71f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,6 @@ config X86
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
- select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KVM
select HAVE_ARCH_KGDB
@@ -77,11 +76,13 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
- select HAVE_BPF_JIT if (X86_64 && NET)
+ select HAVE_BPF_JIT if X86_64
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
- select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
+ select DCACHE_WORD_ACCESS
+ select GENERIC_SMP_IDLE_THREAD
+ select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
config INSTRUCTION_DECODER
@@ -161,9 +162,6 @@ config RWSEM_GENERIC_SPINLOCK
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD
-config ARCH_HAS_CPU_IDLE_WAIT
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 41a7237606a3..dc611a40a336 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
+archscripts:
+ $(Q)$(MAKE) $(build)=arch/x86/tools relocs
+
###
# Syscall table generation
@@ -146,7 +149,6 @@ archheaders:
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
head-y += arch/x86/kernel/head.o
-head-y += arch/x86/kernel/init_task.o
libs-y += arch/x86/lib/
@@ -203,6 +205,7 @@ archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
$(Q)$(MAKE) $(clean)=$(boot)
+ $(Q)$(MAKE) $(clean)=arch/x86/tools
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fd55a2ff3ad8..e398bb5d63bb 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
+targets += vmlinux.bin.all vmlinux.relocs
-targets += vmlinux.bin.all vmlinux.relocs relocs
-hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
-
+CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
- cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
-$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+ cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux FORCE
$(call if_changed,relocs)
vmlinux.bin.all-y := $(obj)/vmlinux.bin
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0cdfc0d2315e..2c14e76bb4c7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -904,11 +904,19 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
memset(boot_params, 0x0, 0x4000);
- /* Copy first two sectors to boot_params */
- memcpy(boot_params, image->image_base, 1024);
-
hdr = &boot_params->hdr;
+ /* Copy the second sector to boot_params */
+ memcpy(&hdr->jump, image->image_base + 512, 512);
+
+ /*
+ * Fill out some of the header fields ourselves because the
+ * EFI firmware loader doesn't load the first sector.
+ */
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
+
/*
* The EFI firmware loader could have placed the kernel image
* anywhere in memory, but the kernel has various restrictions
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index a0559930a180..c85e3ac99bba 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -33,6 +33,9 @@
__HEAD
ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
+ jmp preferred_addr
+
+ .balign 0x10
/*
* We don't need the return address, so set up the stack so
* efi_main() can find its arugments.
@@ -41,12 +44,17 @@ ENTRY(startup_32)
call efi_main
cmpl $0, %eax
- je preferred_addr
movl %eax, %esi
- call 1f
+ jne 2f
1:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp 1b
+2:
+ call 3f
+3:
popl %eax
- subl $1b, %eax
+ subl $3b, %eax
subl BP_pref_address(%esi), %eax
add BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 558d76ce23bc..87e03a13d8e3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -200,18 +200,28 @@ ENTRY(startup_64)
* entire text+data+bss and hopefully all of memory.
*/
#ifdef CONFIG_EFI_STUB
- pushq %rsi
+ /*
+ * The entry point for the PE/COFF executable is 0x210, so only
+ * legacy boot loaders will execute this jmp.
+ */
+ jmp preferred_addr
+
+ .org 0x210
mov %rcx, %rdi
mov %rdx, %rsi
call efi_main
- popq %rsi
- cmpq $0,%rax
- je preferred_addr
movq %rax,%rsi
- call 1f
+ cmpq $0,%rax
+ jne 2f
1:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp 1b
+2:
+ call 3f
+3:
popq %rax
- subq $1b, %rax
+ subq $3b, %rax
subq BP_pref_address(%rsi), %rax
add BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f1bbeeb09148..8bbea6aa40d9 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -147,7 +147,7 @@ optional_header:
# Filled in by build.c
.long 0x0000 # AddressOfEntryPoint
- .long 0x0000 # BaseOfCode
+ .long 0x0200 # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif
@@ -189,7 +189,7 @@ extra_header_fields:
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
- .long 0x1 # NumberOfRvaAndSizes
+ .long 0x6 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
@@ -217,18 +217,17 @@ section_table:
#
# The EFI application loader requires a relocation section
- # because EFI applications are relocatable and not having
- # this section seems to confuse it. But since we don't need
- # the loader to fixup any relocs for us just fill it with a
- # single dummy reloc.
+ # because EFI applications must be relocatable. But since
+ # we don't need the loader to fixup any relocs for us, we
+ # just create an empty (zero-length) .reloc section header.
#
.ascii ".reloc"
.byte 0
.byte 0
- .long reloc_end - reloc_start
- .long reloc_start
- .long reloc_end - reloc_start # SizeOfRawData
- .long reloc_start # PointerToRawData
+ .long 0
+ .long 0
+ .long 0 # SizeOfRawData
+ .long 0 # PointerToRawData
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
@@ -469,10 +468,3 @@ setup_corrupt:
.data
dummy: .long 0
-
- .section .reloc
-reloc_start:
- .long dummy - reloc_start
- .long 10
- .word 0
-reloc_end:
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index ed549767a231..3f61f6e2b46f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -198,35 +198,60 @@ int main(int argc, char ** argv)
pe_header = get_unaligned_le32(&buf[0x3c]);
- /* Size of code */
- put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
-
/* Size of image */
put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
+ /*
+ * Subtract the size of the first section (512 bytes) which
+ * includes the header and .reloc section. The remaining size
+ * is that of the .text section.
+ */
+ file_sz -= 512;
+
+ /* Size of code */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
+
#ifdef CONFIG_X86_32
- /* Address of entry point */
- put_unaligned_le32(i, &buf[pe_header + 0x28]);
+ /*
+ * Address of entry point.
+ *
+ * The EFI stub entry point is +16 bytes from the start of
+ * the .text section.
+ */
+ put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
/* .text size */
put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
+ /* .text vma */
+ put_unaligned_le32(0x200, &buf[pe_header + 0xb4]);
+
/* .text size of initialised data */
put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
+
+ /* .text file offset */
+ put_unaligned_le32(0x200, &buf[pe_header + 0xbc]);
#else
/*
* Address of entry point. startup_32 is at the beginning and
* the 64-bit entry point (startup_64) is always 512 bytes
- * after.
+ * after. The EFI stub entry point is 16 bytes after that, as
+ * the first instruction allows legacy loaders to jump over
+ * the EFI stub initialisation
*/
- put_unaligned_le32(i + 512, &buf[pe_header + 0x28]);
+ put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
/* .text size */
put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
+ /* .text vma */
+ put_unaligned_le32(0x200, &buf[pe_header + 0xc4]);
+
/* .text size of initialised data */
put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
+ /* .text file offset */
+ put_unaligned_le32(0x200, &buf[pe_header + 0xcc]);
#endif /* CONFIG_X86_32 */
#endif /* CONFIG_EFI_STUB */
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index d511d951a052..07b3a68d2d29 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end)
end = PAGE_ALIGN(end);
if (end <= start)
return;
- down_write(&current->mm->mmap_sem);
- do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ vm_brk(start, end - start);
}
#ifdef CORE_DUMP
@@ -296,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
/* OK, This is the point of no return */
set_personality(PER_LINUX);
- set_thread_flag(TIF_IA32);
- current->mm->context.ia32_compat = 1;
+ set_personality_ia32(false);
setup_new_exec(bprm);
@@ -332,9 +329,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
pos = 32;
map_size = ex.a_text+ex.a_data;
- down_write(&current->mm->mmap_sem);
- error = do_brk(text_addr & PAGE_MASK, map_size);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(text_addr & PAGE_MASK, map_size);
if (error != (text_addr & PAGE_MASK)) {
send_sig(SIGKILL, current, 0);
@@ -373,9 +368,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
loff_t pos = fd_offset;
- down_write(&current->mm->mmap_sem);
- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- up_write(&current->mm->mmap_sem);
+ vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
bprm->file->f_op->read(bprm->file,
(char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
@@ -385,26 +378,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto beyond_if;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+ error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
MAP_EXECUTABLE | MAP_32BIT,
fd_offset);
- up_write(&current->mm->mmap_sem);
if (error != N_TXTADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
MAP_EXECUTABLE | MAP_32BIT,
fd_offset + ex.a_text);
- up_write(&current->mm->mmap_sem);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -476,9 +465,7 @@ static int load_aout_library(struct file *file)
error_time = jiffies;
}
#endif
- down_write(&current->mm->mmap_sem);
- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- up_write(&current->mm->mmap_sem);
+ vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
@@ -490,12 +477,10 @@ static int load_aout_library(struct file *file)
goto out;
}
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
N_TXTOFF(ex));
- up_write(&current->mm->mmap_sem);
retval = error;
if (error != start_addr)
goto out;
@@ -503,9 +488,7 @@ static int load_aout_library(struct file *file)
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
- down_write(&current->mm->mmap_sem);
- error = do_brk(start_addr + len, bss - len);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(start_addr + len, bss - len);
retval = error;
if (error != start_addr + len)
goto out;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a69245ba27e3..0b3f2354f6aa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -67,6 +67,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
break;
+ case __SI_SYS >> 16:
+ put_user_ex(from->si_syscall, &to->si_syscall);
+ put_user_ex(from->si_arch, &to->si_arch);
+ break;
case __SI_CHLD >> 16:
if (ia32) {
put_user_ex(from->si_utime, &to->si_utime);
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index aec2202a596c..edca9c0a79cc 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,11 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
return ret;
}
-asmlinkage long sys32_alarm(unsigned int seconds)
-{
- return alarm_setitimer(seconds);
-}
-
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
int options)
{
@@ -300,11 +295,6 @@ asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
/* 32-bit timeval and related flotsam. */
-asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
-{
- return sys_sysfs(option, arg1, arg2);
-}
-
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
struct compat_timespec __user *interval)
{
@@ -375,19 +365,6 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
}
-asmlinkage long sys32_personality(unsigned long personality)
-{
- int ret;
-
- if (personality(current->personality) == PER_LINUX32 &&
- personality == PER_LINUX)
- personality = PER_LINUX32;
- ret = sys_personality(personality);
- if (ret == PER_LINUX32)
- ret = PER_LINUX;
- return ret;
-}
-
asmlinkage long sys32_sendfile(int out_fd, int in_fd,
compat_off_t __user *offset, s32 count)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d85410171260..eaff4790ed96 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -138,6 +138,11 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
}
+static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
+{
+ wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+}
+
static inline u32 native_apic_msr_read(u32 reg)
{
u64 msr;
@@ -351,6 +356,14 @@ struct apic {
/* apic ops */
u32 (*read)(u32 reg);
void (*write)(u32 reg, u32 v);
+ /*
+ * ->eoi_write() has the same signature as ->write().
+ *
+ * Drivers can support both ->eoi_write() and ->write() by passing the same
+ * callback value. Kernel can override ->eoi_write() and fall back
+ * on write for EOI.
+ */
+ void (*eoi_write)(u32 reg, u32 v);
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
@@ -426,6 +439,11 @@ static inline void apic_write(u32 reg, u32 val)
apic->write(reg, val);
}
+static inline void apic_eoi(void)
+{
+ apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
+}
+
static inline u64 apic_icr_read(void)
{
return apic->icr_read();
@@ -450,6 +468,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
static inline u32 apic_read(u32 reg) { return 0; }
static inline void apic_write(u32 reg, u32 val) { }
+static inline void apic_eoi(void) { }
static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
@@ -463,9 +482,7 @@ static inline void ack_APIC_irq(void)
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
*/
-
- /* Docs say use 0 for future compatibility */
- apic_write(APIC_EOI, 0);
+ apic_eoi();
}
static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 134bba00df09..c46bb99d5fb2 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -37,7 +37,7 @@
#define APIC_ARBPRI_MASK 0xFFu
#define APIC_PROCPRI 0xA0
#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0
+#define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */
#define APIC_RRR 0xC0
#define APIC_LDR 0xD0
#define APIC_LDR_MASK (0xFFu << 24)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 198119910da5..b154de75c90c 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -63,7 +63,7 @@ ATOMIC64_DECL(add_unless);
/**
* atomic64_cmpxchg - cmpxchg atomic64 variable
- * @p: pointer to type atomic64_t
+ * @v: pointer to type atomic64_t
* @o: expected value
* @n: new value
*
@@ -98,7 +98,7 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
/**
* atomic64_set - set atomic64 variable
* @v: pointer to type atomic64_t
- * @n: value to assign
+ * @i: value to assign
*
* Atomically sets the value of @v to @n.
*/
@@ -200,7 +200,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
* atomic64_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer to type atomic64_t
- *
+ *
* Atomically subtracts @i from @v and returns
* true if the result is zero, or false for all
* other cases.
@@ -224,9 +224,9 @@ static inline void atomic64_inc(atomic64_t *v)
/**
* atomic64_dec - decrement atomic64 variable
- * @ptr: pointer to type atomic64_t
+ * @v: pointer to type atomic64_t
*
- * Atomically decrements @ptr by 1.
+ * Atomically decrements @v by 1.
*/
static inline void atomic64_dec(atomic64_t *v)
{
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 5e1a2eef3e7c..b13fe63bdc59 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -19,7 +19,7 @@
#ifdef CONFIG_X86_64
#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
#else
-#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
+#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
#endif
#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d6805798d6fc..fedf32b73e65 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -229,7 +229,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp;
} else {
/* -128 for the x32 ABI redzone */
- sp = percpu_read(old_rsp) - 128;
+ sp = this_cpu_read(old_rsp) - 128;
}
return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 4d447b732d82..9476c04ee635 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
- return percpu_read_stable(current_task);
+ return this_cpu_read_stable(current_task);
}
#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e95822d683f4..8bf1c06070d5 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -6,6 +6,7 @@
#include <asm/mmu.h>
#include <linux/smp.h>
+#include <linux/percpu.h>
static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
{
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 4fa88154e4de..75f4c6d6a331 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -290,14 +290,14 @@ static inline int __thread_has_fpu(struct task_struct *tsk)
static inline void __thread_clear_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 0;
- percpu_write(fpu_owner_task, NULL);
+ this_cpu_write(fpu_owner_task, NULL);
}
/* Must be paired with a 'clts' before! */
static inline void __thread_set_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 1;
- percpu_write(fpu_owner_task, tsk);
+ this_cpu_write(fpu_owner_task, tsk);
}
/*
@@ -344,7 +344,7 @@ typedef struct { int preload; } fpu_switch_t;
*/
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
- return new == percpu_read_stable(fpu_owner_task) &&
+ return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
+extern int modifying_ftrace_code;
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
+int ftrace_int3_handler(struct pt_regs *regs);
+
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 382f75d735f3..d3895dbf4ddb 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -35,14 +35,15 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
-#define inc_irq_stat(member) percpu_inc(irq_stat.member)
+#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
-#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
+#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
-#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
+#define set_softirq_pending(x) \
+ this_cpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index ee52760549f0..b04cbdb138cd 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -144,6 +144,12 @@ typedef struct compat_siginfo {
int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
int _fd;
} _sigpoll;
+
+ struct {
+ unsigned int _call_addr; /* calling insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
} _sifields;
} compat_siginfo_t;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 2c4943de5150..73d8c5398ea9 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -5,7 +5,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
-
+#include <asm/x86_init.h>
/*
* Intel IO-APIC support for SMP and UP systems.
*
@@ -21,15 +21,6 @@
#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
#define IO_APIC_REDIR_MASKED (1 << 16)
-struct io_apic_ops {
- void (*init) (void);
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
-};
-
-void __init set_io_apic_ops(const struct io_apic_ops *);
-
/*
* The structure of the IO-APIC:
*/
@@ -156,7 +147,6 @@ struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
void setup_IO_APIC_irq_extra(u32 gsi);
-extern void ioapic_and_gsi_init(void);
extern void ioapic_insert_resources(void);
int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
@@ -185,12 +175,29 @@ extern void mp_save_irq(struct mpc_intsrc *m);
extern void disable_ioapic_support(void);
+extern void __init native_io_apic_init_mappings(void);
+extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
+extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
+extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ return x86_io_apic_ops.read(apic, reg);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ x86_io_apic_ops.write(apic, reg, value);
+}
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ x86_io_apic_ops.modify(apic, reg, value);
+}
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
#define setup_ioapic_ids_from_mpc x86_init_noop
static const int timer_through_8259 = 0;
-static inline void ioapic_and_gsi_init(void) { }
static inline void ioapic_insert_resources(void) { }
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
@@ -212,6 +219,10 @@ static inline int restore_ioapic_entries(void)
static inline void mp_save_irq(struct mpc_intsrc *m) { };
static inline void disable_ioapic_support(void) { }
+#define native_io_apic_init_mappings NULL
+#define native_io_apic_read NULL
+#define native_io_apic_write NULL
+#define native_io_apic_modify NULL
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 77843225b7ea..d82250b1debb 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return percpu_read(irq_regs);
+ return this_cpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
struct pt_regs *old_regs;
old_regs = get_irq_regs();
- percpu_write(irq_regs, new_regs);
+ this_cpu_write(irq_regs, new_regs);
return old_regs;
}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 47d99934580f..5fb9bbbd2f14 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,45 +1,101 @@
-#ifndef _ASM_X86_IRQ_REMAPPING_H
-#define _ASM_X86_IRQ_REMAPPING_H
+/*
+ * Copyright (C) 2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * This header file contains the interface of the interrupt remapping code to
+ * the x86 interrupt management code.
+ */
-#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
+#ifndef __X86_IRQ_REMAPPING_H
+#define __X86_IRQ_REMAPPING_H
+
+#include <asm/io_apic.h>
#ifdef CONFIG_IRQ_REMAP
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-static inline void prepare_irte(struct irte *irte, int vector,
- unsigned int dest)
+
+extern int irq_remapping_enabled;
+
+extern void setup_irq_remapping_ops(void);
+extern int irq_remapping_supported(void);
+extern int irq_remapping_prepare(void);
+extern int irq_remapping_enable(void);
+extern void irq_remapping_disable(void);
+extern int irq_remapping_reenable(int);
+extern int irq_remap_enable_fault_handling(void);
+extern int setup_ioapic_remapped_entry(int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination,
+ int vector,
+ struct io_apic_irq_attr *attr);
+extern int set_remapped_irq_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force);
+extern void free_remapped_irq(int irq);
+extern void compose_remapped_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id);
+extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+ int index, int sub_handle);
+extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+
+#else /* CONFIG_IRQ_REMAP */
+
+#define irq_remapping_enabled 0
+
+static inline void setup_irq_remapping_ops(void) { }
+static inline int irq_remapping_supported(void) { return 0; }
+static inline int irq_remapping_prepare(void) { return -ENODEV; }
+static inline int irq_remapping_enable(void) { return -ENODEV; }
+static inline void irq_remapping_disable(void) { }
+static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
+static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
+static inline int setup_ioapic_remapped_entry(int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination,
+ int vector,
+ struct io_apic_irq_attr *attr)
+{
+ return -ENODEV;
+}
+static inline int set_remapped_irq_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force)
{
- memset(irte, 0, sizeof(*irte));
-
- irte->present = 1;
- irte->dst_mode = apic->irq_dest_mode;
- /*
- * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
- * actual level or edge trigger will be setup in the IO-APIC
- * RTE. This will help simplify level triggered irq migration.
- * For more details, see the comments (in io_apic.c) explainig IO-APIC
- * irq migration in the presence of interrupt-remapping.
- */
- irte->trigger_mode = 0;
- irte->dlvry_mode = apic->irq_delivery_mode;
- irte->vector = vector;
- irte->dest_id = IRTE_DEST(dest);
- irte->redir_hint = 1;
+ return 0;
}
-static inline bool irq_remapped(struct irq_cfg *cfg)
+static inline void free_remapped_irq(int irq) { }
+static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id)
{
- return cfg->irq_2_iommu.iommu != NULL;
}
-#else
-static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
+static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
{
+ return -ENODEV;
}
-static inline bool irq_remapped(struct irq_cfg *cfg)
+static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+ int index, int sub_handle)
{
- return false;
+ return -ENODEV;
}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
+ return -ENODEV;
}
-#endif
+#endif /* CONFIG_IRQ_REMAP */
-#endif /* _ASM_X86_IRQ_REMAPPING_H */
+#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d73f1571bde7..2c37aadcbc35 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,7 +24,6 @@ enum die_val {
extern void printk_address(unsigned long address, int reliable);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
-extern void show_registers(struct pt_regs *regs);
extern void show_trace(struct task_struct *t, struct pt_regs *regs,
unsigned long *sp, unsigned long bp);
extern void __show_regs(struct pt_regs *regs, int all);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c3767cfac..183922e13de1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -170,6 +170,9 @@ static inline int kvm_para_available(void)
unsigned int eax, ebx, ecx, edx;
char signature[13];
+ if (boot_cpu_data.cpuid_level < 0)
+ return 0; /* So we don't blow up on old processors */
+
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
memcpy(signature + 0, &ebx, 4);
memcpy(signature + 4, &ecx, 4);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 69021528b43c..cdbf36776106 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -25,8 +25,8 @@ void destroy_context(struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
- percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
@@ -37,8 +37,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (likely(prev != next)) {
#ifdef CONFIG_SMP
- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- percpu_write(cpu_tlbstate.active_mm, next);
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
@@ -56,8 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
#ifdef CONFIG_SMP
else {
- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f68..957ec87385af 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index fd3f9f18cf3f..0e3793b821ef 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void);
enum {
NMI_LOCAL=0,
NMI_UNKNOWN,
+ NMI_SERR,
+ NMI_IO_CHECK,
NMI_MAX
};
@@ -35,8 +37,24 @@ enum {
typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
-int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long,
- const char *);
+struct nmiaction {
+ struct list_head list;
+ nmi_handler_t handler;
+ unsigned long flags;
+ const char *name;
+};
+
+#define register_nmi_handler(t, fn, fg, n) \
+({ \
+ static struct nmiaction fn##_na = { \
+ .handler = (fn), \
+ .name = (n), \
+ .flags = (fg), \
+ }; \
+ __register_nmi_handler((t), &fn##_na); \
+})
+
+int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ade619ff9e2a..ef17af013475 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,8 +15,8 @@
*/
#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-#define THREAD_ORDER 1
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define STACKFAULT_STACK 0
#define DOUBLEFAULT_STACK 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf5d223..320f7bb95f76 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,8 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#define THREAD_ORDER 1
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER 0
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7a11910a63c4..d9b8e3f7f42a 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -46,7 +46,7 @@
#ifdef CONFIG_SMP
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset percpu_read(this_cpu_off)
+#define __my_cpu_offset this_cpu_read(this_cpu_off)
/*
* Compared to the generic __my_cpu_offset version, the following
@@ -351,23 +351,15 @@ do { \
})
/*
- * percpu_read() makes gcc load the percpu variable every time it is
- * accessed while percpu_read_stable() allows the value to be cached.
- * percpu_read_stable() is more efficient and can be used if its value
+ * this_cpu_read() makes gcc load the percpu variable every time it is
+ * accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
* get_current() and get_thread_info() both of which are actually
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
-#define percpu_read(var) percpu_from_op("mov", var, "m" (var))
-#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
-#define percpu_write(var, val) percpu_to_op("mov", var, val)
-#define percpu_add(var, val) percpu_add_op(var, val)
-#define percpu_sub(var, val) percpu_add_op(var, -(val))
-#define percpu_and(var, val) percpu_to_op("and", var, val)
-#define percpu_or(var, val) percpu_to_op("or", var, val)
-#define percpu_xor(var, val) percpu_to_op("xor", var, val)
-#define percpu_inc(var) percpu_unary_op("inc", var)
+#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -512,7 +504,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
{
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
- return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+#ifdef CONFIG_X86_64
+ return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
+#else
+ return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
+#endif
}
static inline int x86_this_cpu_variable_test_bit(int nr,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b1836..588f52ea810e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
#define IBS_CAPS_OPCNT (1U<<4)
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
+#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -170,21 +171,28 @@ struct x86_pmu_capability {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* IbsFetchCtl bits/masks */
+/* ibs fetch bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* IbsOpCtl bits */
+/* ibs op bits/masks */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_RIP_INVALID (1ULL<<38)
+#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
+#else
+static inline u32 get_ibs_caps(void) { return 0; }
+#endif
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index 3427b7798dbc..7ef7c3020e5c 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -7,9 +7,9 @@
#else
# ifdef __i386__
# include "posix_types_32.h"
-# elif defined(__LP64__)
-# include "posix_types_64.h"
-# else
+# elif defined(__ILP32__)
# include "posix_types_x32.h"
+# else
+# include "posix_types_64.h"
# endif
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa7dcceb6c0..ccbb1ea99ccb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -974,8 +974,6 @@ extern bool cpu_has_amd_erratum(const int *);
#define cpu_has_amd_erratum(x) (false)
#endif /* CONFIG_CPU_SUP_AMD */
-void cpu_idle_wait(void);
-
extern unsigned long arch_align_stack(unsigned long sp);
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 4a085383af27..5ca71c065eef 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -257,7 +257,7 @@ struct sigcontext {
__u64 oldmask;
__u64 cr2;
struct _fpstate __user *fpstate; /* zero when no FPU context */
-#ifndef __LP64__
+#ifdef __ILP32__
__u32 __fpstate_pad;
#endif
__u64 reserved1[8];
diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h
index fc1aa5535646..34c47b3341c0 100644
--- a/arch/x86/include/asm/siginfo.h
+++ b/arch/x86/include/asm/siginfo.h
@@ -2,7 +2,13 @@
#define _ASM_X86_SIGINFO_H
#ifdef __x86_64__
-# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+# ifdef __ILP32__ /* x32 */
+typedef long long __kernel_si_clock_t __attribute__((aligned(4)));
+# define __ARCH_SI_CLOCK_T __kernel_si_clock_t
+# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8)))
+# else /* x86-64 */
+# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+# endif
#endif
#include <asm-generic/siginfo.h>
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0434c400287c..f48394513c37 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -62,6 +62,8 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
+struct task_struct;
+
struct smp_ops {
void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus);
@@ -70,7 +72,7 @@ struct smp_ops {
void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
- int (*cpu_up)(unsigned cpu);
+ int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
@@ -113,9 +115,9 @@ static inline void smp_cpus_done(unsigned int max_cpus)
smp_ops.smp_cpus_done(max_cpus);
}
-static inline int __cpu_up(unsigned int cpu)
+static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- return smp_ops.cpu_up(cpu);
+ return smp_ops.cpu_up(cpu, tidle);
}
static inline int __cpu_disable(void)
@@ -152,7 +154,7 @@ void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
-int native_cpu_up(unsigned int cpunum);
+int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
@@ -162,6 +164,7 @@ int wbinvd_on_all_cpus(void);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
+void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -188,11 +191,11 @@ extern unsigned disabled_cpus __cpuinitdata;
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (percpu_read(cpu_number))
+#define raw_smp_processor_id() (this_cpu_read(cpu_number))
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (percpu_read(cpu_number))
+#define raw_smp_processor_id() (this_cpu_read(cpu_number))
#define stack_smp_processor_id() \
({ \
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 76bfa2cf301d..b315a33867f2 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,10 +20,8 @@
#ifdef CONFIG_X86_32
# define LOCK_PTR_REG "a"
-# define REG_PTR_MODE "k"
#else
# define LOCK_PTR_REG "D"
-# define REG_PTR_MODE "q"
#endif
#if defined(CONFIG_X86_32) && \
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b5d9533d2c38..6a998598f172 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -75,9 +75,9 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
- percpu_write(irq_stack_union.stack_canary, canary);
+ this_cpu_write(irq_stack_union.stack_canary, canary);
#else
- percpu_write(stack_canary.canary, canary);
+ this_cpu_write(stack_canary.canary, canary);
#endif
}
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h
index e0b1d9bbcbc6..7b3ddc348585 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/asm/stat.h
@@ -25,6 +25,12 @@ struct stat {
unsigned long __unused5;
};
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT_PADDING(st) do { \
+ st.__unused4 = 0; \
+ st.__unused5 = 0; \
+} while (0)
+
#define STAT64_HAS_BROKEN_ST_INO 1
/* This matches struct stat64 in glibc2.1, hence the absolutely
@@ -63,6 +69,12 @@ struct stat64 {
unsigned long long st_ino;
};
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT64_PADDING(st) do { \
+ memset(&st.__pad0, 0, sizeof(st.__pad0)); \
+ memset(&st.__pad3, 0, sizeof(st.__pad3)); \
+} while (0)
+
#else /* __i386__ */
struct stat {
@@ -87,6 +99,15 @@ struct stat {
unsigned long st_ctime_nsec;
long __unused[3];
};
+
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT_PADDING(st) do { \
+ st.__pad0 = 0; \
+ st.__unused[0] = 0; \
+ st.__unused[1] = 0; \
+ st.__unused[2] = 0; \
+} while (0)
+
#endif
/* for 32bit emulation and 32 bit kernels */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 386b78686c4d..1ace47b62592 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,9 +13,11 @@
#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H
+#include <linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/asm-offsets.h> /* For NR_syscalls */
+#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
extern const unsigned long sys_call_table[];
@@ -88,6 +90,12 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
+static inline int syscall_get_arch(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return AUDIT_ARCH_I386;
+}
+
#else /* CONFIG_X86_64 */
static inline void syscall_get_arguments(struct task_struct *task,
@@ -212,6 +220,25 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
+static inline int syscall_get_arch(struct task_struct *task,
+ struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+ /*
+ * TS_COMPAT is set for 32-bit syscall entry and then
+ * remains set until we return to user mode.
+ *
+ * TIF_IA32 tasks should always have TS_COMPAT set at
+ * system call time.
+ *
+ * x32 tasks should be considered AUDIT_ARCH_X86_64.
+ */
+ if (task_thread_info(task)->status & TS_COMPAT)
+ return AUDIT_ARCH_I386;
+#endif
+ /* Both x32 and x86_64 are considered "64-bit". */
+ return AUDIT_ARCH_X86_64;
+}
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6df8ccd715..3c9aebc00d39 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -155,24 +155,6 @@ struct thread_info {
#define PREEMPT_ACTIVE 0x10000000
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
-#else
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
-#endif
-
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
-#define alloc_thread_info_node(tsk, node) \
-({ \
- struct page *page = alloc_pages_node(node, THREAD_FLAGS, \
- THREAD_ORDER); \
- struct thread_info *ret = page ? page_address(page) : NULL; \
- \
- ret; \
-})
-
#ifdef CONFIG_X86_32
#define STACK_WARN (THREAD_SIZE/8)
@@ -222,7 +204,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(percpu_read_stable(kernel_stack) +
+ ti = (void *)(this_cpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
@@ -282,8 +264,7 @@ static inline bool is_ia32_task(void)
#ifndef __ASSEMBLY__
extern void arch_task_cache_init(void);
-extern void free_thread_info(struct thread_info *ti);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-#define arch_task_cache_init arch_task_cache_init
+extern void arch_release_task_struct(struct task_struct *tsk);
#endif
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c0e108e08079..1620d23f14d7 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -156,8 +156,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
static inline void reset_lazy_tlbstate(void)
{
- percpu_write(cpu_tlbstate.state, 0);
- percpu_write(cpu_tlbstate.active_mm, &init_mm);
+ this_cpu_write(cpu_tlbstate.state, 0);
+ this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
}
#endif /* SMP */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b9676ae37ada..095b21507b6a 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void);
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#ifdef CONFIG_X86_32
-# define SD_CACHE_NICE_TRIES 1
-# define SD_IDLE_IDX 1
-#else
-# define SD_CACHE_NICE_TRIES 2
-# define SD_IDLE_IDX 2
-#endif
-
-/* sched_domains SD_NODE_INIT for NUMA machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = SD_CACHE_NICE_TRIES, \
- .busy_idx = 3, \
- .idle_idx = SD_IDLE_IDX, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 1*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_POWERSAVINGS_BALANCE \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 1*SD_SERIALIZE \
- | 0*SD_PREFER_SIBLING \
- , \
- .last_balance = jiffies, \
- .balance_interval = 1, \
-}
-
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 37cdc9d99bb1..4437001d8e3d 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -63,10 +63,10 @@
#else
# ifdef __i386__
# include <asm/unistd_32.h>
-# elif defined(__LP64__)
-# include <asm/unistd_64.h>
-# else
+# elif defined(__ILP32__)
# include <asm/unistd_x32.h>
+# else
+# include <asm/unistd_64.h>
# endif
#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 6fe6767b7124..e58f03b206c3 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a)
return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
}
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long ret, dummy;
+
+ asm(
+ "1:\tmov %2,%0\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3:\t"
+ "lea %2,%1\n\t"
+ "and %3,%1\n\t"
+ "mov (%1),%0\n\t"
+ "leal %2,%%ecx\n\t"
+ "andl %4,%%ecx\n\t"
+ "shll $3,%%ecx\n\t"
+ "shr %%cl,%0\n\t"
+ "jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ :"=&r" (ret),"=&c" (dummy)
+ :"m" (*(unsigned long *)addr),
+ "i" (-sizeof(unsigned long)),
+ "i" (sizeof(unsigned long)-1));
+ return ret;
+}
+
#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index baaca8defec8..c090af10ac7d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -188,13 +188,19 @@ struct x86_msi_ops {
void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
};
+struct x86_io_apic_ops {
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+};
+
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
-
+extern struct x86_io_apic_ops x86_io_apic_ops;
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
-extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 532d2e090e6f..56ebd1f98447 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
-extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
+extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a415b1f44365..7c439fe4941b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 103b6ab368d3..146a49c763a4 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags;
static char temp_stack[4096];
#endif
+asmlinkage void acpi_enter_s3(void)
+{
+ acpi_enter_sleep_state(3, wake_sleep_flags);
+}
/**
* acpi_suspend_lowlevel - save kernel state
*
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 416d4be13fef..d68677a2a010 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -3,12 +3,16 @@
*/
#include <asm/trampoline.h>
+#include <linux/linkage.h>
extern unsigned long saved_video_mode;
extern long saved_magic;
extern int wakeup_pmode_return;
+extern u8 wake_sleep_flags;
+extern asmlinkage void acpi_enter_s3(void);
+
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 13ab720573e3..72610839f03b 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,9 +74,7 @@ restore_registers:
ENTRY(do_suspend_lowlevel)
call save_processor_state
call save_registers
- pushl $3
- call acpi_enter_sleep_state
- addl $4, %esp
+ call acpi_enter_s3
# In case of S3 failure, we'll emerge here. Jump
# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8ea5164cbd04..014d1d28c397 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel)
movq %rsi, saved_rsi
addq $8, %rsp
- movl $3, %edi
- xorl %eax, %eax
- call acpi_enter_sleep_state
+ call acpi_enter_s3
/* in case something went wrong, restore the machine status and go on */
jmp resume_point
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11544d8f1e97..39a222e094af 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/irq_remapping.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
@@ -1325,11 +1326,13 @@ void __cpuinit setup_local_APIC(void)
acked);
break;
}
- if (cpu_has_tsc) {
- rdtscll(ntsc);
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
- } else
- max_loops--;
+ if (queued) {
+ if (cpu_has_tsc) {
+ rdtscll(ntsc);
+ max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ } else
+ max_loops--;
+ }
} while (queued && max_loops > 0);
WARN_ON(max_loops <= 0);
@@ -1441,8 +1444,8 @@ void __init bsp_end_local_APIC_setup(void)
* Now that local APIC setup is completed for BP, configure the fault
* handling for interrupt remapping.
*/
- if (intr_remapping_enabled)
- enable_drhd_fault_handling();
+ if (irq_remapping_enabled)
+ irq_remap_enable_fault_handling();
}
@@ -1517,7 +1520,7 @@ void enable_x2apic(void)
int __init enable_IR(void)
{
#ifdef CONFIG_IRQ_REMAP
- if (!intr_remapping_supported()) {
+ if (!irq_remapping_supported()) {
pr_debug("intr-remapping not supported\n");
return -1;
}
@@ -1528,7 +1531,7 @@ int __init enable_IR(void)
return -1;
}
- return enable_intr_remapping();
+ return irq_remapping_enable();
#endif
return -1;
}
@@ -1537,10 +1540,13 @@ void __init enable_IR_x2apic(void)
{
unsigned long flags;
int ret, x2apic_enabled = 0;
- int dmar_table_init_ret;
+ int hardware_init_ret;
+
+ /* Make sure irq_remap_ops are initialized */
+ setup_irq_remapping_ops();
- dmar_table_init_ret = dmar_table_init();
- if (dmar_table_init_ret && !x2apic_supported())
+ hardware_init_ret = irq_remapping_prepare();
+ if (hardware_init_ret && !x2apic_supported())
return;
ret = save_ioapic_entries();
@@ -1556,7 +1562,7 @@ void __init enable_IR_x2apic(void)
if (x2apic_preenabled && nox2apic)
disable_x2apic();
- if (dmar_table_init_ret)
+ if (hardware_init_ret)
ret = -1;
else
ret = enable_IR();
@@ -1637,9 +1643,11 @@ static int __init apic_verify(void)
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
/* The BIOS may have set up the APIC at some other address */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (l & MSR_IA32_APICBASE_ENABLE)
- mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+ if (boot_cpu_data.x86 >= 6) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (l & MSR_IA32_APICBASE_ENABLE)
+ mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+ }
pr_info("Found and enabled local APIC!\n");
return 0;
@@ -1657,13 +1665,15 @@ int __init apic_force_enable(unsigned long addr)
* MSR. This can only be done in software for Intel P6 or later
* and AMD K7 (Model > 1) or later.
*/
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- pr_info("Local APIC disabled by BIOS -- reenabling.\n");
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
- enabled_via_apicbase = 1;
+ if (boot_cpu_data.x86 >= 6) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ enabled_via_apicbase = 1;
+ }
}
return apic_verify();
}
@@ -2172,8 +2182,8 @@ static int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- if (intr_remapping_enabled)
- disable_intr_remapping();
+ if (irq_remapping_enabled)
+ irq_remapping_disable();
local_irq_restore(flags);
return 0;
@@ -2189,7 +2199,7 @@ static void lapic_resume(void)
return;
local_irq_save(flags);
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
/*
* IO-APIC and PIC have their own resume routines.
* We just mask them here to make sure the interrupt
@@ -2209,10 +2219,12 @@ static void lapic_resume(void)
* FIXME! This will be wrong if we ever support suspend on
* SMP! We'll need to do this as part of the CPU restore!
*/
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
+ if (boot_cpu_data.x86 >= 6) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ }
}
maxlvt = lapic_get_maxlvt();
@@ -2239,8 +2251,8 @@ static void lapic_resume(void)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- if (intr_remapping_enabled)
- reenable_intr_remapping(x2apic_mode);
+ if (irq_remapping_enabled)
+ irq_remapping_reenable(x2apic_mode);
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 359b6899a36c..0e881c46e8c8 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -227,6 +227,7 @@ static struct apic apic_flat = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
@@ -386,6 +387,7 @@ static struct apic apic_physflat = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 634ae6cdd5c9..a6e4c6e06c08 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -181,6 +181,7 @@ struct apic apic_noop = {
.read = noop_apic_read,
.write = noop_apic_write,
+ .eoi_write = noop_apic_write,
.icr_read = noop_apic_icr_read,
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 899803e03214..6ec6d5d297c3 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -207,8 +207,11 @@ static void __init map_csrs(void)
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
+
+ if (c->phys_proc_id != node) {
+ c->phys_proc_id = node;
+ per_cpu(cpu_llc_id, smp_processor_id()) = node;
+ }
}
static int __init numachip_system_init(void)
@@ -292,6 +295,7 @@ static struct apic apic_numachip __refconst = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0cdec7065aff..31fbdbfbf960 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e42d1d3b9134..db4ab1be3c79 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e88300d8e80a..ffdc152e507d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,23 +68,21 @@
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
-static void __init __ioapic_init_mappings(void);
-
-static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
-static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
-static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
-
-static struct io_apic_ops io_apic_ops = {
- .init = __ioapic_init_mappings,
- .read = __io_apic_read,
- .write = __io_apic_write,
- .modify = __io_apic_modify,
-};
-
-void __init set_io_apic_ops(const struct io_apic_ops *ops)
+#ifdef CONFIG_IRQ_REMAP
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+ return cfg->irq_2_iommu.iommu != NULL;
+}
+#else
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+ return false;
+}
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
- io_apic_ops = *ops;
}
+#endif
/*
* Is the SiS APIC rmw bug present ?
@@ -313,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
irq_free_desc(at);
}
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- return io_apic_ops.read(apic, reg);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- io_apic_ops.write(apic, reg, value);
-}
-
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- io_apic_ops.modify(apic, reg, value);
-}
-
struct io_apic {
unsigned int index;
@@ -349,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
writel(vector, &io_apic->eoi);
}
-static unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
+unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(reg, &io_apic->index);
return readl(&io_apic->data);
}
-static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -370,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va
*
* Older SiS APIC requires we rewrite the index register
*/
-static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -379,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v
writel(value, &io_apic->data);
}
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
- int pin;
-
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- /* Is the remote IRR bit set? */
- if (reg & IO_APIC_REDIR_REMOTE_IRR) {
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- return true;
- }
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return false;
-}
-
union entry_union {
struct { u32 w1, w2; };
struct IO_APIC_route_entry entry;
@@ -1361,77 +1321,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi ? "fasteoi" : "edge");
}
-
-static int setup_ir_ioapic_entry(int irq,
- struct IR_IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
-{
- int index;
- struct irte irte;
- int ioapic_id = mpc_ioapic_id(attr->ioapic);
- struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
-
- if (!iommu) {
- pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
- return -ENODEV;
- }
-
- index = alloc_irte(iommu, irq, 1);
- if (index < 0) {
- pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
- return -ENOMEM;
- }
-
- prepare_irte(&irte, vector, destination);
-
- /* Set source-id of interrupt request */
- set_ioapic_sid(&irte, ioapic_id);
-
- modify_irte(irq, &irte);
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
- "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
- "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
- "Avail:%X Vector:%02X Dest:%08X "
- "SID:%04X SQ:%X SVT:%X)\n",
- attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
- irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
- irte.avail, irte.vector, irte.dest_id,
- irte.sid, irte.sq, irte.svt);
-
- memset(entry, 0, sizeof(*entry));
-
- entry->index2 = (index >> 15) & 0x1;
- entry->zero = 0;
- entry->format = 1;
- entry->index = (index & 0x7fff);
- /*
- * IO-APIC RTE will be configured with virtual vector.
- * irq handler will do the explicit EOI to the io-apic.
- */
- entry->vector = attr->ioapic_pin;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
-
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
-
- return 0;
-}
-
static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
unsigned int destination, int vector,
struct io_apic_irq_attr *attr)
{
- if (intr_remapping_enabled)
- return setup_ir_ioapic_entry(irq,
- (struct IR_IO_APIC_route_entry *)entry,
- destination, vector, attr);
+ if (irq_remapping_enabled)
+ return setup_ioapic_remapped_entry(irq, entry, destination,
+ vector, attr);
memset(entry, 0, sizeof(*entry));
@@ -1588,7 +1484,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
{
struct IO_APIC_route_entry entry;
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
return;
memset(&entry, 0, sizeof(entry));
@@ -1674,7 +1570,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
" Pol Stat Indx2 Zero Vect:\n");
} else {
@@ -1683,7 +1579,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
}
for (i = 0; i <= reg_01.bits.entries; i++) {
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
struct IO_APIC_route_entry entry;
struct IR_IO_APIC_route_entry *ir_entry;
@@ -2050,7 +1946,7 @@ void disable_IO_APIC(void)
* IOAPIC RTE as well as interrupt-remapping table entry).
* As this gets called during crash dump, keep this simple for now.
*/
- if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
+ if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@@ -2074,7 +1970,7 @@ void disable_IO_APIC(void)
* Use virtual wire A mode when interrupt remapping is enabled.
*/
if (cpu_has_apic || apic_from_smp_config())
- disconnect_bsp_APIC(!intr_remapping_enabled &&
+ disconnect_bsp_APIC(!irq_remapping_enabled &&
ioapic_i8259.pin != -1);
}
@@ -2390,71 +2286,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return ret;
}
-#ifdef CONFIG_IRQ_REMAP
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For both level and edge triggered, irq migration is a simple atomic
- * update(of vector and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we eliminate the io-apic RTE modification (with the
- * updated vector information), by using a virtual vector (io-apic pin number).
- * Real vector that is used for interrupting cpu will be coming from
- * the interrupt-remapping table entry.
- *
- * As the migration is a simple atomic update of IRTE, the same mechanism
- * is used to migrate MSI irq's in the presence of interrupt-remapping.
- */
-static int
-ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest, irq = data->irq;
- struct irte irte;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
-
- if (get_irte(irq, &irte))
- return -EBUSY;
-
- if (assign_irq_vector(irq, cfg, mask))
- return -EBUSY;
-
- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
-
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
-
- /*
- * Atomically updates the IRTE with the new destination, vector
- * and flushes the interrupt entry cache.
- */
- modify_irte(irq, &irte);
-
- /*
- * After this point, all the interrupts will start arriving
- * at the new destination. So, time to cleanup the previous
- * vector allocation.
- */
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- cpumask_copy(data->affinity, mask);
- return 0;
-}
-
-#else
-static inline int
-ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- return 0;
-}
-#endif
-
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -2552,6 +2383,29 @@ static void ack_apic_edge(struct irq_data *data)
atomic_t irq_mis_count;
#ifdef CONFIG_GENERIC_PENDING_IRQ
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ unsigned int reg;
+ int pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ /* Is the remote IRR bit set? */
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return false;
+}
+
static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
{
/* If we are moving the irq we need to mask it */
@@ -2699,7 +2553,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
chip->irq_eoi = ir_ack_apic_level;
#ifdef CONFIG_SMP
- chip->irq_set_affinity = ir_ioapic_set_affinity;
+ chip->irq_set_affinity = set_remapped_irq_affinity;
#endif
}
#endif /* CONFIG_IRQ_REMAP */
@@ -2912,7 +2766,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
@@ -2945,7 +2799,7 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
@@ -3169,7 +3023,7 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
if (irq_remapped(cfg))
- free_irte(irq);
+ free_remapped_irq(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -3198,54 +3052,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
if (irq_remapped(cfg)) {
- struct irte irte;
- int ir_index;
- u16 sub_handle;
-
- ir_index = map_irq_to_irte_handle(irq, &sub_handle);
- BUG_ON(ir_index == -1);
-
- prepare_irte(&irte, cfg->vector, dest);
-
- /* Set source-id of interrupt request */
- if (pdev)
- set_msi_sid(&irte, pdev);
- else
- set_hpet_sid(&irte, hpet_id);
-
- modify_irte(irq, &irte);
+ compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
+ return err;
+ }
+ if (x2apic_enabled())
+ msg->address_hi = MSI_ADDR_BASE_HI |
+ MSI_ADDR_EXT_DEST_ID(dest);
+ else
msg->address_hi = MSI_ADDR_BASE_HI;
- msg->data = sub_handle;
- msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(ir_index) |
- MSI_ADDR_IR_INDEX2(ir_index);
- } else {
- if (x2apic_enabled())
- msg->address_hi = MSI_ADDR_BASE_HI |
- MSI_ADDR_EXT_DEST_ID(dest);
- else
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((apic->irq_dest_mode == 0) ?
- MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((apic->irq_dest_mode == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(cfg->vector);
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(cfg->vector);
- }
return err;
}
@@ -3288,33 +3122,6 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
- struct intel_iommu *iommu;
- int index;
-
- iommu = map_dev_to_ir(dev);
- if (!iommu) {
- printk(KERN_ERR
- "Unable to map PCI %s to iommu\n", pci_name(dev));
- return -ENOENT;
- }
-
- index = alloc_irte(iommu, irq, nvec);
- if (index < 0) {
- printk(KERN_ERR
- "Unable to allocate %d IRTE for PCI %s\n", nvec,
- pci_name(dev));
- return -ENOSPC;
- }
- return index;
-}
-
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
struct irq_chip *chip = &msi_chip;
@@ -3345,7 +3152,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
struct msi_desc *msidesc;
- struct intel_iommu *iommu = NULL;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3359,7 +3165,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (irq == 0)
return -1;
irq_want = irq + 1;
- if (!intr_remapping_enabled)
+ if (!irq_remapping_enabled)
goto no_ir;
if (!sub_handle) {
@@ -3367,23 +3173,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
* allocate the consecutive block of IRTE's
* for 'nvec'
*/
- index = msi_alloc_irte(dev, irq, nvec);
+ index = msi_alloc_remapped_irq(dev, irq, nvec);
if (index < 0) {
ret = index;
goto error;
}
} else {
- iommu = map_dev_to_ir(dev);
- if (!iommu) {
- ret = -ENOENT;
+ ret = msi_setup_remapped_irq(dev, irq, index,
+ sub_handle);
+ if (ret < 0)
goto error;
- }
- /*
- * setup the mapping between the irq and the IRTE
- * base index, the sub_handle pointing to the
- * appropriate interrupt remap table entry.
- */
- set_irte_irq(irq, iommu, index, sub_handle);
}
no_ir:
ret = setup_msi_irq(dev, msidesc, irq);
@@ -3501,15 +3300,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
struct msi_msg msg;
int ret;
- if (intr_remapping_enabled) {
- struct intel_iommu *iommu = map_hpet_to_ir(id);
- int index;
-
- if (!iommu)
- return -1;
-
- index = alloc_irte(iommu, irq, 1);
- if (index < 0)
+ if (irq_remapping_enabled) {
+ if (!setup_hpet_msi_remapped(irq, id))
return -1;
}
@@ -3888,8 +3680,8 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- if (intr_remapping_enabled)
- ir_ioapic_set_affinity(idata, mask, false);
+ if (irq_remapping_enabled)
+ set_remapped_irq_affinity(idata, mask, false);
else
ioapic_set_affinity(idata, mask, false);
}
@@ -3931,12 +3723,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
return res;
}
-void __init ioapic_and_gsi_init(void)
-{
- io_apic_ops.init();
-}
-
-static void __init __ioapic_init_mappings(void)
+void __init native_io_apic_init_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 00d2422ca7c9..f00a68cca37a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ff2c1b9aac4d..1b291da09e60 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -142,6 +142,7 @@ static struct apic apic_default = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index fea000b27f07..659897c00755 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -546,6 +546,7 @@ static struct apic apic_summit = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 48f3103b3c93..ff35cff0e1a7 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8a778db45e3a..c17e982db275 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (x2apic_phys)
return x2apic_enabled();
+ else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
+ x2apic_enabled()) {
+ printk(KERN_DEBUG "System requires x2apic physical mode\n");
+ return 1;
+ }
else
return 0;
}
@@ -166,6 +172,7 @@ static struct apic apic_x2apic_phys = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 87bfa69e216e..c6d03f7a4401 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 459e78cbf61e..07b0c0db466c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2401,7 +2401,7 @@ static void __exit apm_exit(void)
* (pm_idle), Wait for all processors to update cached/local
* copies of pm_idle before proceeding.
*/
- cpu_idle_wait();
+ kick_all_cpus_sync();
}
if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
&& (apm_info.connection_version > 0x0100)) {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5da1269e8ddc..e2dbcb7dabdd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -27,21 +27,29 @@ static int num_scan_areas;
static __init int set_corruption_check(char *arg)
{
- char *end;
+ ssize_t ret;
+ unsigned long val;
- memory_corruption_check = simple_strtol(arg, &end, 10);
+ ret = kstrtoul(arg, 10, &val);
+ if (ret)
+ return ret;
- return (*end == 0) ? 0 : -EINVAL;
+ memory_corruption_check = val;
+ return 0;
}
early_param("memory_corruption_check", set_corruption_check);
static __init int set_corruption_check_period(char *arg)
{
- char *end;
+ ssize_t ret;
+ unsigned long val;
- corruption_check_period = simple_strtoul(arg, &end, 10);
+ ret = kstrtoul(arg, 10, &val);
+ if (ret)
+ return ret;
- return (*end == 0) ? 0 : -EINVAL;
+ corruption_check_period = val;
+ return 0;
}
early_param("memory_corruption_check_period", set_corruption_check_period);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0a44b90602b0..146bb6218eec 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -26,7 +26,8 @@
* contact AMD for precise details and a CPU swap.
*
* See http://www.multimania.com/poulot/k6bug.html
- * http://www.amd.com/K6/k6docs/revgd.html
+ * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6"
+ * (Publication # 21266 Issue Date: August 1998)
*
* The following test is erm.. interesting. AMD neglected to up
* the chip setting when fixing the bug but they also tweaked some
@@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
"system stability may be impaired when more than 32 MB are used.\n");
else
printk(KERN_CONT "probably OK (after B9730xxxx).\n");
- printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
}
/* K6 with old style WHCR */
@@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
node = per_cpu(cpu_llc_id, cpu);
/*
- * If core numbers are inconsistent, it's likely a multi-fabric platform,
- * so invoke platform-specific handler
+ * On multi-fabric platform (e.g. Numascale NumaChip) a
+ * platform-specific handler needs to be called to fixup some
+ * IDs of the CPU.
*/
- if (c->phys_proc_id != node)
+ if (x86_cpuinit.fixup_cpu_id)
x86_cpuinit.fixup_cpu_id(c, node);
if (!node_online(node)) {
@@ -579,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
+ /* re-enable TopologyExtensions if switched off by BIOS */
+ if ((c->x86 == 0x15) &&
+ (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+ !cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ u64 val;
+
+ if (!rdmsrl_amd_safe(0xc0011005, &val)) {
+ val |= 1ULL << 54;
+ wrmsrl_amd_safe(0xc0011005, val);
+ rdmsrl(0xc0011005, val);
+ if (val & (1ULL << 54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ printk(KERN_INFO FW_INFO "CPU: Re-enabling "
+ "disabled Topology Extensions Support\n");
+ }
+ }
+ }
+
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 67e258362a3d..82f29e70d058 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1163,15 +1163,6 @@ static void dbg_restore_debug_regs(void)
#endif /* ! CONFIG_KGDB */
/*
- * Prints an error where the NUMA and configured core-number mismatch and the
- * platform didn't override this to fix it up
- */
-void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
-{
- pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
-}
-
-/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
@@ -1194,7 +1185,7 @@ void __cpuinit cpu_init(void)
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
- if (cpu != 0 && percpu_read(numa_node) == 0 &&
+ if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 73d08ed98a64..9a7c90d80bc4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
/* check if @slot is already used or the index is already disabled */
ret = amd_get_l3_disable_slot(nb, slot);
if (ret >= 0)
- return -EINVAL;
+ return -EEXIST;
if (index > nb->l3_cache.indices)
return -EINVAL;
/* check whether the other slot has disabled the same index already */
if (index == amd_get_l3_disable_slot(nb, !slot))
- return -EINVAL;
+ return -EEXIST;
amd_l3_disable_index(nb, cpu, slot, index);
@@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
if (err) {
if (err == -EEXIST)
- printk(KERN_WARNING "L3 disable slot %d in use!\n",
- slot);
+ pr_warning("L3 slot %d in use/index already disabled!\n",
+ slot);
return err;
}
return count;
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
new_l2 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
- l2_id = c->apicid >> index_msb;
+ l2_id = c->apicid & ~((1 << index_msb) - 1);
break;
case 3:
new_l3 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(
num_threads_sharing);
- l3_id = c->apicid >> index_msb;
+ l3_id = c->apicid & ~((1 << index_msb) - 1);
break;
default:
break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 5502b289341b..36565373af87 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -23,7 +23,7 @@
* %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
*
* Arrays used to match for this should also be declared using
- * MODULE_DEVICE_TABLE(x86_cpu, ...)
+ * MODULE_DEVICE_TABLE(x86cpu, ...)
*
* This always matches against the boot cpu, assuming models and features are
* consistent over all CPUs.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09c087d..297edb1b1fb3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
struct mce m;
int i;
- percpu_inc(mce_poll_count);
+ this_cpu_inc(mce_poll_count);
mce_gather_info(&m, NULL);
@@ -945,9 +945,10 @@ struct mce_info {
atomic_t inuse;
struct task_struct *t;
__u64 paddr;
+ int restartable;
} mce_info[MCE_INFO_MAX];
-static void mce_save_info(__u64 addr)
+static void mce_save_info(__u64 addr, int c)
{
struct mce_info *mi;
@@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr)
if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
mi->t = current;
mi->paddr = addr;
+ mi->restartable = c;
return;
}
}
@@ -1015,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
atomic_inc(&mce_entry);
- percpu_inc(mce_exception_count);
+ this_cpu_inc(mce_exception_count);
if (!banks)
goto out;
@@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
/* schedule action before return to userland */
- mce_save_info(m.addr);
+ mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
set_thread_flag(TIF_MCE_NOTIFY);
} else if (kill_it) {
force_sig(SIGBUS, current);
@@ -1179,7 +1181,13 @@ void mce_notify_process(void)
pr_err("Uncorrected hardware memory error in user-access at %llx",
mi->paddr);
- if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ /*
+ * We must call memory_failure() here even if the current process is
+ * doomed. We still need to mark the page as poisoned and alert any
+ * other users of the page.
+ */
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
+ mi->restartable == 0) {
pr_err("Memory error not recovered");
force_sig(SIGBUS, current);
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e18..e049d6da0183 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
-
- /* mark not used */
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0b..65652265fffd 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
* when we come here
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (nb->owners[i] == event) {
- cmpxchg(nb->owners+i, event, NULL);
+ if (cmpxchg(nb->owners + i, event, NULL) == event)
break;
- }
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14e..da9bcdcd9856 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
#include <linux/perf_event.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/ptrace.h>
#include <asm/apic.h>
@@ -16,36 +17,591 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+ u64 (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < (s64)min)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * If the hw period that triggers the sw overflow is too short
+ * we might hit the irq handler. This biases the results.
+ * Thus we shorten the next-to-last period and set the last
+ * period to the max period.
+ */
+ if (left > max) {
+ left -= max;
+ if (left > max)
+ left = max;
+ else if (left < min)
+ left = min;
+ }
+
+ *hw_period = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+ int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
+
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+ /*
+ * lower 4 bits can not be set in ibs max cnt,
+ * but allowing it in case we adjust the
+ * sample period to set a frequency.
+ */
+ return -EINVAL;
+ hwc->sample_period &= ~0x0FULL;
+ if (!hwc->sample_period)
+ hwc->sample_period = 0x10;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * If we modify hwc->sample_period, we also need to update
+ * hwc->last_period and hwc->period_left.
+ */
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
return 0;
}
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int overflow;
+
+ /* ignore lower 4 bits in min count: */
+ overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ u64 count = 0;
+
+ if (config & IBS_OP_VAL)
+ count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+ if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ count += (config & IBS_OP_CUR_CNT) >> 32;
+
+ return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 *config)
+{
+ u64 count = perf_ibs->get_count(*config);
+
+ /*
+ * Set width to 64 since we do not overflow on max width but
+ * instead on max count. In perf_ibs_set_period() we clear
+ * prev count manually on overflow.
+ */
+ while (!perf_event_try_update(event, count, 64)) {
+ rdmsrl(event->hw.config_base, *config);
+ count = perf_ibs->get_count(*config);
+ }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ config &= ~perf_ibs->cnt_mask;
+ wrmsrl(hwc->config_base, config);
+ config &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 period;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &period);
+ set_bit(IBS_STARTED, pcpu->state);
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
+ int stopping;
+
+ stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
+
+ rdmsrl(hwc->config_base, config);
+
+ if (stopping) {
+ set_bit(IBS_STOPPING, pcpu->state);
+ perf_ibs_disable_event(perf_ibs, hwc, config);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /*
+ * Clear valid bit to not count rollovers on update, rollovers
+ * are only updated in the irq handler.
+ */
+ config &= ~perf_ibs->valid_mask;
+
+ perf_ibs_event_update(perf_ibs, event, &config);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
static int perf_ibs_add(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, PERF_EF_UPDATE);
+
+ pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static void perf_ibs_read(struct perf_event *event) { }
+
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+
+ .get_count = get_ibs_fetch_count,
};
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+
+ .get_count = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size, check_rip, offset_max, throttle = 0;
+ unsigned int msr;
+ u64 *buf, *config, period;
+
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+ /*
+ * Catch spurious interrupts after stopping IBS: After
+ * disabling IBS there could be still incomming NMIs
+ * with samples that even have the valid bit cleared.
+ * Mark all this NMIs as handled.
+ */
+ return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+ }
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ return 0;
+
+ config = &ibs_data.regs[0];
+ perf_ibs_event_update(perf_ibs, event, config);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+ goto out; /* no sw counter overflow */
+
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+ if (event->attr.sample_type & PERF_SAMPLE_RAW)
+ offset_max = perf_ibs->offset_max;
+ else if (check_rip)
+ offset_max = 2;
+ else
+ offset_max = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < offset_max);
+ ibs_data.size = sizeof(u64) * size;
+
+ regs = *iregs;
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
+ instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.size = sizeof(u32) + ibs_data.size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ throttle = perf_event_overflow(event, &data, &regs);
+out:
+ if (throttle)
+ perf_ibs_disable_event(perf_ibs, hwc, *config);
+ else
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+
+ return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef104..166546ec6aef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1082,7 +1080,7 @@ again:
if (!intel_pmu_save_and_restart(event))
continue;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7dd..5a3edc27f6e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
ds->bts_index = ds->bts_buffer_base;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs.ip = 0;
/*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (!intel_pmu_save_and_restart(event))
return;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
/*
* We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd7103..47124a73dd73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
handled += overflow;
/* event overflow for sure */
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event))
continue;
+
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1b81839b6c88..571246d81edf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
- show_registers(regs);
+ show_regs(regs);
#ifdef CONFIG_X86_32
if (user_mode_vm(regs)) {
sp = regs->sp;
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err)
static int __init kstack_setup(char *s)
{
+ ssize_t ret;
+ unsigned long val;
+
if (!s)
return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+
+ ret = kstrtoul(s, 0, &val);
+ if (ret)
+ return ret;
+ kstack_depth_to_print = val;
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
- code_bytes = simple_strtoul(s, NULL, 0);
+ ssize_t ret;
+ unsigned long val;
+
+ if (!s)
+ return -EINVAL;
+
+ ret = kstrtoul(s, 0, &val);
+ if (ret)
+ return ret;
+
+ code_bytes = val;
if (code_bytes > 8192)
code_bytes = 8192;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 88ec9129271d..e0b1d783daab 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
}
-void show_registers(struct pt_regs *regs)
+void show_regs(struct pt_regs *regs)
{
int i;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 17107bd6e1f0..791b76122aa8 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_registers(struct pt_regs *regs)
+void show_regs(struct pt_regs *regs)
{
int i;
unsigned long sp;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..32ff36596ab1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
#include <trace/syscall.h>
#include <asm/cacheflush.h>
+#include <asm/kprobes.h>
#include <asm/ftrace.h>
#include <asm/nops.h>
-#include <asm/nmi.h>
-
#ifdef CONFIG_DYNAMIC_FTRACE
-/*
- * modifying_code is set to notify NMIs that they need to use
- * memory barriers when entering or exiting. But we don't want
- * to burden NMIs with unnecessary memory barriers when code
- * modification is not being done (which is most of the time).
- *
- * A mutex is already held when ftrace_arch_code_modify_prepare
- * and post_process are called. No locks need to be taken here.
- *
- * Stop machine will make sure currently running NMIs are done
- * and new NMIs will see the updated variable before we need
- * to worry about NMIs doing memory barriers.
- */
-static int modifying_code __read_mostly;
-static DEFINE_PER_CPU(int, save_modifying_code);
-
int ftrace_arch_code_modify_prepare(void)
{
set_kernel_text_rw();
set_all_modules_text_rw();
- modifying_code = 1;
return 0;
}
int ftrace_arch_code_modify_post_process(void)
{
- modifying_code = 0;
set_all_modules_text_ro();
set_kernel_text_ro();
return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-/*
- * Modifying code must take extra care. On an SMP machine, if
- * the code being modified is also being executed on another CPU
- * that CPU will have undefined results and possibly take a GPF.
- * We use kstop_machine to stop other CPUS from exectuing code.
- * But this does not stop NMIs from happening. We still need
- * to protect against that. We separate out the modification of
- * the code to take care of this.
- *
- * Two buffers are added: An IP buffer and a "code" buffer.
- *
- * 1) Put the instruction pointer into the IP buffer
- * and the new code into the "code" buffer.
- * 2) Wait for any running NMIs to finish and set a flag that says
- * we are modifying code, it is done in an atomic operation.
- * 3) Write the code
- * 4) clear the flag.
- * 5) Wait for any running NMIs to finish.
- *
- * If an NMI is executed, the first thing it does is to call
- * "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
- *
- * The trick is, it does not matter if everyone is writing the same
- * content to the code location. Also, if a CPU is executing code
- * it is OK to write to that code location if the contents being written
- * are the same as what exists.
- */
-
-#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
-static atomic_t nmi_running = ATOMIC_INIT(0);
-static int mod_code_status; /* holds return value of text write */
-static void *mod_code_ip; /* holds the IP to write to */
-static const void *mod_code_newcode; /* holds the text to write to the IP */
-
-static unsigned nmi_wait_count;
-static atomic_t nmi_update_count = ATOMIC_INIT(0);
-
-int ftrace_arch_read_dyn_info(char *buf, int size)
-{
- int r;
-
- r = snprintf(buf, size, "%u %u",
- nmi_wait_count,
- atomic_read(&nmi_update_count));
- return r;
-}
-
-static void clear_mod_flag(void)
-{
- int old = atomic_read(&nmi_running);
-
- for (;;) {
- int new = old & ~MOD_CODE_WRITE_FLAG;
-
- if (old == new)
- break;
-
- old = atomic_cmpxchg(&nmi_running, old, new);
- }
-}
-
-static void ftrace_mod_code(void)
-{
- /*
- * Yes, more than one CPU process can be writing to mod_code_status.
- * (and the code itself)
- * But if one were to fail, then they all should, and if one were
- * to succeed, then they all should.
- */
- mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
- MCOUNT_INSN_SIZE);
-
- /* if we fail, then kill any new writers */
- if (mod_code_status)
- clear_mod_flag();
-}
-
-void ftrace_nmi_enter(void)
-{
- __this_cpu_write(save_modifying_code, modifying_code);
-
- if (!__this_cpu_read(save_modifying_code))
- return;
-
- if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
- smp_rmb();
- ftrace_mod_code();
- atomic_inc(&nmi_update_count);
- }
- /* Must have previous changes seen before executions */
- smp_mb();
-}
-
-void ftrace_nmi_exit(void)
-{
- if (!__this_cpu_read(save_modifying_code))
- return;
-
- /* Finish all executions before clearing nmi_running */
- smp_mb();
- atomic_dec(&nmi_running);
-}
-
-static void wait_for_nmi_and_set_mod_flag(void)
-{
- if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
- return;
-
- do {
- cpu_relax();
- } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
-
- nmi_wait_count++;
-}
-
-static void wait_for_nmi(void)
-{
- if (!atomic_read(&nmi_running))
- return;
-
- do {
- cpu_relax();
- } while (atomic_read(&nmi_running));
-
- nmi_wait_count++;
-}
-
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip));
- mod_code_ip = (void *)ip;
- mod_code_newcode = new_code;
-
- /* The buffers need to be visible before we let NMIs write them */
- smp_mb();
-
- wait_for_nmi_and_set_mod_flag();
-
- /* Make sure all running NMIs have finished before we write the code */
- smp_mb();
-
- ftrace_mod_code();
-
- /* Make sure the write happens before clearing the bit */
- smp_mb();
-
- clear_mod_flag();
- wait_for_nmi();
-
- return mod_code_status;
+ return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
}
static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
+int modifying_ftrace_code __read_mostly;
+
+/*
+ * A breakpoint was added to the code address we are about to
+ * modify, and this is the handle that will just skip over it.
+ * We are either changing a nop into a trace call, or a trace
+ * call to a nop. While the change is taking place, we treat
+ * it just like it was a nop.
+ */
+int ftrace_int3_handler(struct pt_regs *regs)
+{
+ if (WARN_ON_ONCE(!regs))
+ return 0;
+
+ if (!ftrace_location(regs->ip - 1))
+ return 0;
+
+ regs->ip += MCOUNT_INSN_SIZE - 1;
+
+ return 1;
+}
+
+static int ftrace_write(unsigned long ip, const char *val, int size)
+{
+ /*
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
+ */
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa(ip));
+
+ return probe_kernel_write((void *)ip, val, size);
+}
+
+static int add_break(unsigned long ip, const char *old)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
+
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+ if (ftrace_write(ip, &brk, 1))
+ return -EPERM;
+
+ return 0;
+}
+
+static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned const char *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+
+ return add_break(rec->ip, old);
+}
+
+
+static int add_brk_on_nop(struct dyn_ftrace *rec)
+{
+ unsigned const char *old;
+
+ old = ftrace_nop_replace();
+
+ return add_break(rec->ip, old);
+}
+
+static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_brk_on_nop(rec);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_brk_on_call(rec, ftrace_addr);
+ }
+ return 0;
+}
+
+/*
+ * On error, we need to remove breakpoints. This needs to
+ * be done caefully. If the address does not currently have a
+ * breakpoint, we know we are done. Otherwise, we look at the
+ * remaining 4 bytes of the instruction. If it matches a nop
+ * we replace the breakpoint with the nop. Otherwise we replace
+ * it with the call instruction.
+ */
+static int remove_breakpoint(struct dyn_ftrace *rec)
+{
+ unsigned char ins[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
+ const unsigned char *nop;
+ unsigned long ftrace_addr;
+ unsigned long ip = rec->ip;
+
+ /* If we fail the read, just give up */
+ if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* If this does not have a breakpoint, we are done */
+ if (ins[0] != brk)
+ return -1;
+
+ nop = ftrace_nop_replace();
+
+ /*
+ * If the last 4 bytes of the instruction do not match
+ * a nop, then we assume that this is a call to ftrace_addr.
+ */
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
+ /*
+ * For extra paranoidism, we check if the breakpoint is on
+ * a call that would actually jump to the ftrace_addr.
+ * If not, don't touch the breakpoint, we make just create
+ * a disaster.
+ */
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+ nop = ftrace_call_replace(ip, ftrace_addr);
+
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
+ return -EINVAL;
+ }
+
+ return probe_kernel_write((void *)ip, &nop[0], 1);
+}
+
+static int add_update_code(unsigned long ip, unsigned const char *new)
+{
+ /* skip breakpoint */
+ ip++;
+ new++;
+ if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
+ return -EPERM;
+ return 0;
+}
+
+static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_call_replace(ip, addr);
+ return add_update_code(ip, new);
+}
+
+static int add_update_nop(struct dyn_ftrace *rec)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_nop_replace();
+ return add_update_code(ip, new);
+}
+
+static int add_update(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_update_nop(rec);
+ }
+
+ return 0;
+}
+
+static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_call_replace(ip, addr);
+
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+
+ return 0;
+}
+
+static int finish_update_nop(struct dyn_ftrace *rec)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_nop_replace();
+
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+ return 0;
+}
+
+static int finish_update(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return finish_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return finish_update_nop(rec);
+ }
+
+ return 0;
+}
+
+static void do_sync_core(void *data)
+{
+ sync_core();
+}
+
+static void run_sync(void)
+{
+ int enable_irqs = irqs_disabled();
+
+ /* We may be called with interrupts disbled (on bootup). */
+ if (enable_irqs)
+ local_irq_enable();
+ on_each_cpu(do_sync_core, NULL, 1);
+ if (enable_irqs)
+ local_irq_disable();
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ const char *report = "adding breakpoints";
+ int count = 0;
+ int ret;
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = add_breakpoints(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ count++;
+ }
+
+ run_sync();
+
+ report = "updating code";
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = add_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
+
+ run_sync();
+
+ report = "removing breakpoints";
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = finish_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
+
+ run_sync();
+
+ return;
+
+ remove_breakpoints:
+ ftrace_bug(ret, rec ? rec->ip : 0);
+ printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ remove_breakpoint(rec);
+ }
+}
+
+void arch_ftrace_update_code(int command)
+{
+ modifying_ftrace_code++;
+
+ ftrace_modify_all_code(command);
+
+ modifying_ftrace_code--;
+}
+
int __init ftrace_dyn_arch_init(void *data)
{
/* The return code is retured via data */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 7734bcbb5a3a..f250431fb505 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void)
__thread_clear_has_fpu(me);
/* We do 'stts()' in kernel_fpu_end() */
} else {
- percpu_write(fpu_owner_task, NULL);
+ this_cpu_write(fpu_owner_task, NULL);
clts();
}
}
@@ -235,6 +235,7 @@ int init_fpu(struct task_struct *tsk)
if (tsk_used_math(tsk)) {
if (HAVE_HWFP && tsk == current)
unlazy_fpu(tsk);
+ tsk->thread.fpu.last_cpu = ~0;
return 0;
}
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
deleted file mode 100644
index 43e9ccf44947..000000000000
--- a/arch/x86/kernel/init_task.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is THREAD_SIZE aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union __init_task_data =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-EXPORT_SYMBOL(init_task);
-
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data..cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 58b7f27cb3e9..344faf8d0d62 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu)
return;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREAD_FLAGS,
- THREAD_ORDER));
+ THREADINFO_GFP,
+ THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
@@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu)
per_cpu(hardirq_ctx, cpu) = irqctx;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREAD_FLAGS,
- THREAD_ORDER));
+ THREADINFO_GFP,
+ THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e213fc8408d2..e2f751efb7b1 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
"current sp %p does not match saved sp %p\n",
stack_addr(regs), kcb->jprobe_saved_sp);
printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
+ show_regs(saved_regs);
printk(KERN_ERR "Current registers\n");
- show_registers(regs);
+ show_regs(regs);
BUG();
}
*regs = kcb->jprobe_saved_regs;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4a27e4..e554e5ad2fe8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,7 +79,6 @@ struct kvm_task_sleep_node {
u32 token;
int cpu;
bool halted;
- struct mm_struct *mm;
};
static struct kvm_task_sleep_head {
@@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.mm = current->active_mm;
n.halted = idle || preempt_count() > 1;
- atomic_inc(&n.mm->mm_count);
init_waitqueue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
spin_unlock(&b->lock);
@@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
static void apf_task_wake_one(struct kvm_task_sleep_node *n)
{
hlist_del_init(&n->link);
- if (!n->mm)
- return;
- mmdrop(n->mm);
if (n->halted)
smp_send_reschedule(n->cpu);
else if (waitqueue_active(&n->wq))
@@ -207,7 +201,7 @@ again:
* async PF was not yet handled.
* Add dummy entry for the token.
*/
- n = kmalloc(sizeof(*n), GFP_ATOMIC);
+ n = kzalloc(sizeof(*n), GFP_ATOMIC);
if (!n) {
/*
* Allocation failed! Busy wait while other cpu
@@ -219,7 +213,6 @@ again:
}
n->token = token;
n->cpu = smp_processor_id();
- n->mm = NULL;
init_waitqueue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
} else
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 73465aab28f8..8a2ce8fd41c0 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -82,11 +82,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
- return -1;
- }
-
csig->rev = c->microcode;
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
@@ -380,6 +375,13 @@ static struct microcode_ops microcode_amd_ops = {
struct microcode_ops * __init init_amd_microcode(void)
{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
+ pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+ return NULL;
+ }
+
patch = (void *)get_zeroed_page(GFP_KERNEL);
if (!patch)
return NULL;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 87a0f8688301..fbdfc6917180 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -299,12 +299,11 @@ static ssize_t reload_store(struct device *dev,
{
unsigned long val;
int cpu = dev->id;
- int ret = 0;
- char *end;
+ ssize_t ret = 0;
- val = simple_strtoul(buf, &end, 0);
- if (end == buf)
- return -EINVAL;
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
if (val == 1) {
get_online_cpus();
@@ -419,10 +418,8 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR) {
- sysfs_remove_group(&dev->kobj, &mc_attr_group);
+ if (microcode_init_cpu(cpu) == UCODE_ERROR)
return -EINVAL;
- }
return err;
}
@@ -528,11 +525,11 @@ static int __init microcode_init(void)
microcode_ops = init_intel_microcode();
else if (c->x86_vendor == X86_VENDOR_AMD)
microcode_ops = init_amd_microcode();
-
- if (!microcode_ops) {
+ else
pr_err("no support for this CPU vendor\n");
+
+ if (!microcode_ops)
return -ENODEV;
- }
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3ca42d0e43a2..0327e2b3c408 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
memset(csig, 0, sizeof(*csig));
- if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
- cpu_has(c, X86_FEATURE_IA64)) {
- pr_err("CPU%d not a capable Intel processor\n", cpu_num);
- return -1;
- }
-
csig->sig = cpuid_eax(0x00000001);
if ((c->x86_model >= 5) || (c->x86 > 6)) {
@@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = {
struct microcode_ops * __init init_intel_microcode(void)
{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+ cpu_has(c, X86_FEATURE_IA64)) {
+ pr_err("Intel CPU family 0x%x not supported\n", c->x86);
+ return NULL;
+ }
+
return &microcode_intel_ops;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf319165..bffdfd48c1f2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -31,14 +31,6 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
-#define NMI_MAX_NAMELEN 16
-struct nmiaction {
- struct list_head list;
- nmi_handler_t handler;
- unsigned int flags;
- char *name;
-};
-
struct nmi_desc {
spinlock_t lock;
struct list_head head;
@@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] =
.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
.head = LIST_HEAD_INIT(nmi_desc[1].head),
},
+ {
+ .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
+ .head = LIST_HEAD_INIT(nmi_desc[2].head),
+ },
+ {
+ .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
+ .head = LIST_HEAD_INIT(nmi_desc[3].head),
+ },
};
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
-static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *a;
@@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs,
return handled;
}
-static int __setup_nmi(unsigned int type, struct nmiaction *action)
+int __register_nmi_handler(unsigned int type, struct nmiaction *action)
{
struct nmi_desc *desc = nmi_to_desc(type);
unsigned long flags;
+ if (!action->handler)
+ return -EINVAL;
+
spin_lock_irqsave(&desc->lock, flags);
/*
@@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
* to manage expectations
*/
WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
+ WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
+ WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
/*
* some handlers need to be executed first otherwise a fake
@@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
+EXPORT_SYMBOL(__register_nmi_handler);
-static struct nmiaction *__free_nmi(unsigned int type, const char *name)
+void unregister_nmi_handler(unsigned int type, const char *name)
{
struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *n;
@@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name)
spin_unlock_irqrestore(&desc->lock, flags);
synchronize_rcu();
- return (n);
}
-
-int register_nmi_handler(unsigned int type, nmi_handler_t handler,
- unsigned long nmiflags, const char *devname)
-{
- struct nmiaction *action;
- int retval = -ENOMEM;
-
- if (!handler)
- return -EINVAL;
-
- action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
- if (!action)
- goto fail_action;
-
- action->handler = handler;
- action->flags = nmiflags;
- action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
- if (!action->name)
- goto fail_action_name;
-
- retval = __setup_nmi(type, action);
-
- if (retval)
- goto fail_setup_nmi;
-
- return retval;
-
-fail_setup_nmi:
- kfree(action->name);
-fail_action_name:
- kfree(action);
-fail_action:
-
- return retval;
-}
-EXPORT_SYMBOL_GPL(register_nmi_handler);
-
-void unregister_nmi_handler(unsigned int type, const char *name)
-{
- struct nmiaction *a;
-
- a = __free_nmi(type, name);
- if (a) {
- kfree(a->name);
- kfree(a);
- }
-}
-
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
-static notrace __kprobes void
+static __kprobes void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
+ /* check to see if anyone registered against these types of errors */
+ if (nmi_handle(NMI_SERR, regs, false))
+ return;
+
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
@@ -236,15 +197,19 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT);
}
-static notrace __kprobes void
+static __kprobes void
io_check_error(unsigned char reason, struct pt_regs *regs)
{
unsigned long i;
+ /* check to see if anyone registered against these types of errors */
+ if (nmi_handle(NMI_IO_CHECK, regs, false))
+ return;
+
pr_emerg(
"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
- show_registers(regs);
+ show_regs(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
@@ -263,7 +228,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT);
}
-static notrace __kprobes void
+static __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
int handled;
@@ -305,7 +270,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
static DEFINE_PER_CPU(bool, swallow_nmi);
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+static __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
int handled;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 2c39dcd510fa..e31bf8d5c4d2 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -13,6 +13,7 @@
#include <linux/cpumask.h>
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/percpu.h>
#include <asm/apic.h>
#include <asm/nmi.h>
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
unexpected_testcase_failures++;
if (nmi_fail == FAILURE)
- printk("FAILED |");
+ printk(KERN_CONT "FAILED |");
else if (nmi_fail == TIMEOUT)
- printk("TIMEOUT|");
+ printk(KERN_CONT "TIMEOUT|");
else
- printk("ERROR |");
+ printk(KERN_CONT "ERROR |");
dump_stack();
} else {
testcase_successes++;
- printk(" ok |");
+ printk(KERN_CONT " ok |");
}
testcase_total++;
@@ -150,10 +151,10 @@ void __init nmi_selftest(void)
print_testname("remote IPI");
dotest(remote_ipi, SUCCESS);
- printk("\n");
+ printk(KERN_CONT "\n");
print_testname("local IPI");
dotest(local_ipi, SUCCESS);
- printk("\n");
+ printk(KERN_CONT "\n");
cleanup_nmi_testsuite();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab137605e694..9ce885996fd7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
static inline void enter_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+ BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
- percpu_write(paravirt_lazy_mode, mode);
+ this_cpu_write(paravirt_lazy_mode, mode);
}
static void leave_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
+ BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
- percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+ this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
}
void paravirt_enter_lazy_mmu(void)
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
- if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+ if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
arch_leave_lazy_mmu_mode();
set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
}
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
if (in_interrupt())
return PARAVIRT_LAZY_NONE;
- return percpu_read(paravirt_lazy_mode);
+ return this_cpu_read(paravirt_lazy_mode);
}
void arch_flush_lazy_mmu_mode(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d0b2fb9ccbb1..b72838bae64a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1480,8 +1480,9 @@ cleanup:
static int __init calgary_parse_options(char *p)
{
unsigned int bridge;
+ unsigned long val;
size_t len;
- char* endp;
+ ssize_t ret;
while (*p) {
if (!strncmp(p, "64k", 3))
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtoul(p, &endp, 0);
- if (p == endp)
+ ret = kstrtoul(p, 0, &val);
+ if (ret)
break;
+ bridge = val;
if (bridge < MAX_PHB_BUS_NUM) {
printk(KERN_INFO "Calgary: disabling "
"translation for PHB %#x\n", bridge);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d92a5ab6e8b..f2e8c0f951d6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -27,6 +27,15 @@
#include <asm/debugreg.h>
#include <asm/nmi.h>
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data..cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -67,10 +76,9 @@ void free_thread_xstate(struct task_struct *tsk)
fpu_free(&tsk->thread.fpu);
}
-void free_thread_info(struct thread_info *ti)
+void arch_release_task_struct(struct task_struct *tsk)
{
- free_thread_xstate(ti->task);
- free_pages((unsigned long)ti, THREAD_ORDER);
+ free_thread_xstate(tsk);
}
void arch_task_cache_init(void)
@@ -105,12 +113,6 @@ void exit_thread(void)
}
}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
-}
-
void show_regs_common(void)
{
const char *vendor, *product, *board;
@@ -377,7 +379,7 @@ static inline void play_dead(void)
#ifdef CONFIG_X86_64
void enter_idle(void)
{
- percpu_write(is_idle, 1);
+ this_cpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
@@ -516,26 +518,6 @@ void stop_this_cpu(void *dummy)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
@@ -594,9 +576,17 @@ int mwait_usable(const struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
+ /* Use mwait if idle=mwait boot option is given */
if (boot_option_idle_override == IDLE_FORCE_MWAIT)
return 1;
+ /*
+ * Any idle= boot option other than idle=mwait means that we must not
+ * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
+ */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return 0;
+
if (c->cpuid_level < MWAIT_INFO)
return 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ae6847303e26..01d8d40ccaf6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -302,7 +302,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
switch_fpu_finish(next_p, fpu);
- percpu_write(current_task, next_p);
+ this_cpu_write(current_task, next_p);
return prev_p;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 733ca39f367e..28e810255a0a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -237,7 +237,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
- percpu_write(old_rsp, new_sp);
+ this_cpu_write(old_rsp, new_sp);
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
@@ -359,11 +359,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = percpu_read(old_rsp);
- percpu_write(old_rsp, next->usersp);
- percpu_write(current_task, next_p);
+ prev->usersp = this_cpu_read(old_rsp);
+ this_cpu_write(old_rsp, next->usersp);
+ this_cpu_write(current_task, next_p);
- percpu_write(kernel_stack,
+ this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
@@ -423,6 +423,7 @@ void set_personality_ia32(bool x32)
current_thread_info()->status |= TS_COMPAT;
}
}
+EXPORT_SYMBOL_GPL(set_personality_ia32);
unsigned long get_wchan(struct task_struct *p)
{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 685845cf16e0..13b1990c7c58 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs)
regs->flags |= X86_EFLAGS_TF;
/* do the secure computing check first */
- secure_computing(regs->orig_ax);
+ if (secure_computing(regs->orig_ax)) {
+ /* seccomp failures shouldn't expose any additional code. */
+ ret = -1L;
+ goto out;
+ }
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
ret = -1L;
@@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs)
regs->dx, regs->r10);
#endif
+out:
return ret ?: regs->orig_ax;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a2901562059..9b4204e06665 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -393,10 +393,9 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
- printk(KERN_ERR "initrd too large to handle, "
- "disabling initrd\n");
- return;
+ panic("initrd too large to handle, "
+ "disabling initrd (%lld needed, %lld available)\n",
+ ramdisk_size, end_of_lowmem>>1);
}
printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
@@ -1012,7 +1011,8 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
init_apic_mappings();
- ioapic_and_gsi_init();
+ if (x86_io_apic_ops.init)
+ x86_io_apic_ops.init();
kvm_guest_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 71f4727da373..5a98aa272184 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void)
#endif
rc = -EINVAL;
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
- const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
const size_t dyn_size = PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
+ size_t atom_size;
+ /*
+ * On 64bit, use PMD_SIZE for atom_size so that embedded
+ * percpu areas are aligned to PMD. This, in the future,
+ * can also allow using PMD mappings in vmalloc area. Use
+ * PAGE_SIZE on 32bit as vmalloc space is highly contended
+ * and large vmalloc area allocs can easily fail.
+ */
+#ifdef CONFIG_X86_64
+ atom_size = PMD_SIZE;
+#else
+ atom_size = PAGE_SIZE;
+#endif
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6e1e406038c2..433529e29be4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -76,20 +76,8 @@
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
-/* Store all idle threads, this can be reused instead of creating
-* a new thread. Also avoids complicated thread destroy functionality
-* for idle threads.
-*/
#ifdef CONFIG_HOTPLUG_CPU
/*
- * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
- * removed after init for !CONFIG_HOTPLUG_CPU.
- */
-static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
-#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
-#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
-
-/*
* We need this for trampoline_base protection from concurrent accesses when
* off- and onlining cores wildly.
*/
@@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
void cpu_hotplug_driver_lock(void)
{
- mutex_lock(&x86_cpu_hotplug_driver_mutex);
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
}
void cpu_hotplug_driver_unlock(void)
{
- mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}
ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
-#else
-static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-#define get_idle_for_cpu(x) (idle_thread_array[(x)])
-#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
/* Number of siblings per CPU package */
@@ -315,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}
-static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
+static bool __cpuinit
+topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
{
- cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
- cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
+ "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
+ "[node: %d != %d]. Ignoring dependency.\n",
+ cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
}
+#define link_mask(_m, c1, c2) \
+do { \
+ cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
+ cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
+} while (0)
+
+static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
+ c->compute_unit_id == o->compute_unit_id)
+ return topology_sane(c, o, "smt");
+
+ } else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_core_id == o->cpu_core_id) {
+ return topology_sane(c, o, "smt");
+ }
+
+ return false;
+}
+
+static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
+ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
+ return topology_sane(c, o, "llc");
+
+ return false;
+}
+
+static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (c->phys_proc_id == o->phys_proc_id)
+ return topology_sane(c, o, "mc");
+
+ return false;
+}
void __cpuinit set_cpu_sibling_map(int cpu)
{
- int i;
+ bool has_mc = boot_cpu_data.x86_max_cores > 1;
+ bool has_smt = smp_num_siblings > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct cpuinfo_x86 *o;
+ int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
- if (smp_num_siblings > 1) {
- for_each_cpu(i, cpu_sibling_setup_mask) {
- struct cpuinfo_x86 *o = &cpu_data(i);
-
- if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
- if (c->phys_proc_id == o->phys_proc_id &&
- per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
- c->compute_unit_id == o->compute_unit_id)
- link_thread_siblings(cpu, i);
- } else if (c->phys_proc_id == o->phys_proc_id &&
- c->cpu_core_id == o->cpu_core_id) {
- link_thread_siblings(cpu, i);
- }
- }
- } else {
+ if (!has_smt && !has_mc) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
- }
-
- cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
-
- if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
- cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
c->booted_cores = 1;
return;
}
for_each_cpu(i, cpu_sibling_setup_mask) {
- if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
- per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
- cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
- }
- if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(i));
+ o = &cpu_data(i);
+
+ if ((i == cpu) || (has_smt && match_smt(c, o)))
+ link_mask(sibling, cpu, i);
+
+ if ((i == cpu) || (has_mc && match_llc(c, o)))
+ link_mask(llc_shared, cpu, i);
+
+ if ((i == cpu) || (has_mc && match_mc(c, o))) {
+ link_mask(core, cpu, i);
+
/*
* Does this new cpu bringup a new core?
*/
@@ -398,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
- if ((sched_mc_power_savings || sched_smt_power_savings) &&
- !(cpu_has(c, X86_FEATURE_AMD_DCM)))
+ if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
return cpu_llc_shared_mask(cpu);
@@ -618,22 +632,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-struct create_idle {
- struct work_struct work;
- struct task_struct *idle;
- struct completion done;
- int cpu;
-};
-
-static void __cpuinit do_fork_idle(struct work_struct *work)
-{
- struct create_idle *c_idle =
- container_of(work, struct create_idle, work);
-
- c_idle->idle = fork_idle(c_idle->cpu);
- complete(&c_idle->done);
-}
-
/* reduce the number of lines printed when booting a large cpu count system */
static void __cpuinit announce_cpu(int cpu, int apicid)
{
@@ -660,58 +658,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
-static int __cpuinit do_boot_cpu(int apicid, int cpu)
+static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
unsigned long boot_error = 0;
unsigned long start_ip;
int timeout;
- struct create_idle c_idle = {
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
- };
-
- INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
- c_idle.idle = get_idle_for_cpu(cpu);
-
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- if (c_idle.idle) {
- c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
- init_idle(c_idle.idle, cpu);
- goto do_rest;
- }
-
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
+ idle->thread.sp = (unsigned long) (((struct pt_regs *)
+ (THREAD_SIZE + task_stack_page(idle))) - 1);
+ per_cpu(current_task, cpu) = idle;
- if (IS_ERR(c_idle.idle)) {
- printk("failed fork for CPU %d\n", cpu);
- destroy_work_on_stack(&c_idle.work);
- return PTR_ERR(c_idle.idle);
- }
-
- set_idle_for_cpu(cpu, c_idle.idle);
-do_rest:
- per_cpu(current_task, cpu) = c_idle.idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
- clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+ clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(c_idle.idle) -
+ (unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
- stack_start = c_idle.idle->thread.sp;
+ stack_start = idle->thread.sp;
/* start_ip had better be page-aligned! */
start_ip = trampoline_address();
@@ -813,12 +784,10 @@ do_rest:
*/
smpboot_restore_warm_reset_vector();
}
-
- destroy_work_on_stack(&c_idle.work);
return boot_error;
}
-int __cpuinit native_cpu_up(unsigned int cpu)
+int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
@@ -851,7 +820,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- err = do_boot_cpu(apicid, cpu);
+ err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
+#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
}
/* May run on IST stack. */
-dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* ftrace must be first, everything else may cause a recursive crash */
+ if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
+ return;
+#endif
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e9f265fd79ae..35c5e543f550 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -18,6 +18,7 @@
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
+#include <asm/io_apic.h>
#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -93,7 +94,6 @@ struct x86_init_ops x86_init __initdata = {
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
- .fixup_cpu_id = x86_default_fixup_cpu_id,
};
static void default_nmi_init(void) { };
@@ -120,3 +120,10 @@ struct x86_msi_ops x86_msi = {
.teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
};
+
+struct x86_io_apic_ops x86_io_apic_ops = {
+ .init = native_io_apic_init_mappings,
+ .read = native_io_apic_read,
+ .write = native_io_apic_write,
+ .modify = native_io_apic_modify,
+};
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 173df38dbda5..2e88438ffd83 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
if (pmu->version == 1) {
- pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1;
- return;
+ pmu->nr_arch_fixed_counters = 0;
+ } else {
+ pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+ X86_PMC_MAX_FIXED);
+ pmu->counter_bitmask[KVM_PMC_FIXED] =
+ ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
}
- pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
- X86_PMC_MAX_FIXED);
- pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
- pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
- | (((1ull << pmu->nr_arch_fixed_counters) - 1)
- << X86_PMC_IDX_FIXED));
+ pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+ (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED);
+ pmu->global_ctrl_mask = ~pmu->global_ctrl;
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad85adfef843..4ff0ab9bc3c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
msr = find_msr_entry(vmx, msr_index);
if (msr) {
msr->data = data;
- if (msr - vmx->guest_msrs < vmx->save_nmsrs)
+ if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+ preempt_disable();
kvm_set_shared_msr(msr->index, msr->data,
msr->mask);
+ preempt_enable();
+ }
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0bf7c1..185a2b823a2d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (npages && !old.rmap) {
unsigned long userspace_addr;
- down_write(&current->mm->mmap_sem);
- userspace_addr = do_mmap(NULL, 0,
+ userspace_addr = vm_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
map_flags,
0);
- up_write(&current->mm->mmap_sem);
if (IS_ERR((void *)userspace_addr))
return PTR_ERR((void *)userspace_addr);
@@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
int ret;
- down_write(&current->mm->mmap_sem);
- ret = do_munmap(current->mm, old.userspace_addr,
+ ret = vm_munmap(old.userspace_addr,
old.npages * PAGE_SIZE);
- up_write(&current->mm->mmap_sem);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
@@ -6585,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, &fault);
}
vcpu->arch.apf.halted = false;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 25feb1ae71c5..b1e6c4b2e8eb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -379,8 +379,8 @@ err_out:
return;
}
-/* Decode moffset16/32/64 */
-static void __get_moffset(struct insn *insn)
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
{
switch (insn->addr_bytes) {
case 2:
@@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn)
insn->moffset2.value = get_next(int, insn);
insn->moffset2.nbytes = 4;
break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
}
insn->moffset1.got = insn->moffset2.got = 1;
+ return 1;
+
err_out:
- return;
+ return 0;
}
-/* Decode imm v32(Iz) */
-static void __get_immv32(struct insn *insn)
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
@@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn)
insn->immediate.value = get_next(int, insn);
insn->immediate.nbytes = 4;
break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
}
+ return 1;
+
err_out:
- return;
+ return 0;
}
-/* Decode imm v64(Iv/Ov) */
-static void __get_immv(struct insn *insn)
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
@@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn)
insn->immediate2.value = get_next(int, insn);
insn->immediate2.nbytes = 4;
break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
}
insn->immediate1.got = insn->immediate2.got = 1;
+ return 1;
err_out:
- return;
+ return 0;
}
/* Decode ptr16:16/32(Ap) */
-static void __get_immptr(struct insn *insn)
+static int __get_immptr(struct insn *insn)
{
switch (insn->opnd_bytes) {
case 2:
@@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn)
break;
case 8:
/* ptr16:64 is not exist (no segment) */
- return;
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
}
insn->immediate2.value = get_next(unsigned short, insn);
insn->immediate2.nbytes = 2;
insn->immediate1.got = insn->immediate2.got = 1;
+ return 1;
err_out:
- return;
+ return 0;
}
/**
@@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn)
insn_get_displacement(insn);
if (inat_has_moffset(insn->attr)) {
- __get_moffset(insn);
+ if (!__get_moffset(insn))
+ goto err_out;
goto done;
}
@@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn)
insn->immediate2.nbytes = 4;
break;
case INAT_IMM_PTR:
- __get_immptr(insn);
+ if (!__get_immptr(insn))
+ goto err_out;
break;
case INAT_IMM_VWORD32:
- __get_immv32(insn);
+ if (!__get_immv32(insn))
+ goto err_out;
break;
case INAT_IMM_VWORD:
- __get_immv(insn);
+ if (!__get_immv(insn))
+ goto err_out;
break;
default:
- break;
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
insn->immediate2.value = get_next(char, insn);
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index d6ae30bbd7bb..2e4e4b02c37a 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -44,13 +44,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
-static inline unsigned long count_bytes(unsigned long mask)
-{
- mask = (mask - 1) & ~mask;
- mask >>= 7;
- return count_masked_bytes(mask);
-}
-
/*
* Do a strncpy, return length of string without final '\0'.
* 'count' is the user-supplied count (return 'count' if we
@@ -69,16 +62,19 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
max = count;
while (max >= sizeof(unsigned long)) {
- unsigned long c;
+ unsigned long c, mask;
/* Fall back to byte-at-a-time if we get a page fault */
if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
break;
- /* This can write a few bytes past the NUL character, but that's ok */
+ mask = has_zero(c);
+ if (mask) {
+ mask = (mask - 1) & ~mask;
+ mask >>= 7;
+ *(unsigned long *)(dst+res) = c & mask;
+ return res + count_masked_bytes(mask);
+ }
*(unsigned long *)(dst+res) = c;
- c = has_zero(c);
- if (c)
- return res + count_bytes(c);
res += sizeof(unsigned long);
max -= sizeof(unsigned long);
}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 53489ff6bf82..871dd8868170 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -339,9 +339,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
} else {
unsigned long n;
- n = simple_strtoul(emu_cmdline, NULL, 0);
+ n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
}
+ if (*emu_cmdline == ':')
+ emu_cmdline++;
if (ret < 0)
goto no_emu;
@@ -418,7 +420,9 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
int physj = emu_nid_to_phys[j];
int dist;
- if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
+ if (get_option(&emu_cmdline, &dist) == 2)
+ ;
+ else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
dist = physi == physj ?
LOCAL_DISTANCE : REMOTE_DISTANCE;
else
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d6c0418c3e47..3804471db104 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -61,10 +61,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
*/
void leave_mm(int cpu)
{
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
cpumask_clear_cpu(cpu,
- mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
+ mm_cpumask(this_cpu_read(cpu_tlbstate.active_mm)));
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -152,8 +152,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
* BUG();
*/
- if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
+ if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -322,7 +322,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
static void do_flush_tlb_all(void *info)
{
__flush_tlb_all();
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(smp_processor_id());
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ed2835e148b5..fc09c2754e08 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -9,11 +9,11 @@
struct pci_root_info {
struct acpi_device *bridge;
- char *name;
+ char name[16];
unsigned int res_num;
struct resource *res;
- struct list_head *resources;
int busnum;
+ struct pci_sysdata sd;
};
static bool pci_use_crs = true;
@@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
}
-static bool resource_contains(struct resource *res, resource_size_t point)
-{
- if (res->start <= point && point <= res->end)
- return true;
- return false;
-}
-
static void coalesce_windows(struct pci_root_info *info, unsigned long type)
{
int i, j;
@@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
* our resources no longer match the ACPI _CRS, but
* the kernel resource tree doesn't allow overlaps.
*/
- if (resource_contains(res1, res2->start) ||
- resource_contains(res1, res2->end) ||
- resource_contains(res2, res1->start) ||
- resource_contains(res2, res1->end)) {
+ if (resource_overlaps(res1, res2)) {
res1->start = min(res1->start, res2->start);
res1->end = max(res1->end, res2->end);
dev_info(&info->bridge->dev,
@@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
}
}
-static void add_resources(struct pci_root_info *info)
+static void add_resources(struct pci_root_info *info,
+ struct list_head *resources)
{
int i;
struct resource *res, *root, *conflict;
@@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info)
"ignoring host bridge window %pR (conflicts with %s %pR)\n",
res, conflict->name, conflict);
else
- pci_add_resource(info->resources, res);
+ pci_add_resource(resources, res);
}
}
+static void free_pci_root_info_res(struct pci_root_info *info)
+{
+ kfree(info->res);
+ info->res = NULL;
+ info->res_num = 0;
+}
+
+static void __release_pci_root_info(struct pci_root_info *info)
+{
+ int i;
+ struct resource *res;
+
+ for (i = 0; i < info->res_num; i++) {
+ res = &info->res[i];
+
+ if (!res->parent)
+ continue;
+
+ if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
+ continue;
+
+ release_resource(res);
+ }
+
+ free_pci_root_info_res(info);
+
+ kfree(info);
+}
+static void release_pci_root_info(struct pci_host_bridge *bridge)
+{
+ struct pci_root_info *info = bridge->release_data;
+
+ __release_pci_root_info(info);
+}
+
static void
-get_current_resources(struct acpi_device *device, int busnum,
- int domain, struct list_head *resources)
+probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
+ int busnum, int domain)
{
- struct pci_root_info info;
size_t size;
- info.bridge = device;
- info.res_num = 0;
- info.resources = resources;
+ info->bridge = device;
+ info->res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
- &info);
- if (!info.res_num)
+ info);
+ if (!info->res_num)
return;
- size = sizeof(*info.res) * info.res_num;
- info.res = kmalloc(size, GFP_KERNEL);
- if (!info.res)
+ size = sizeof(*info->res) * info->res_num;
+ info->res_num = 0;
+ info->res = kmalloc(size, GFP_KERNEL);
+ if (!info->res)
return;
- info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum);
- if (!info.name)
- goto name_alloc_fail;
+ sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
- info.res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
- &info);
-
- if (pci_use_crs) {
- add_resources(&info);
-
- return;
- }
-
- kfree(info.name);
-
-name_alloc_fail:
- kfree(info.res);
+ info);
}
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct acpi_device *device = root->device;
+ struct pci_root_info *info = NULL;
int domain = root->segment;
int busnum = root->secondary.start;
LIST_HEAD(resources);
@@ -389,17 +401,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
if (node != -1 && !node_online(node))
node = -1;
- /* Allocate per-root-bus (not per bus) arch-specific data.
- * TODO: leak; this memory is never freed.
- * It's arguable whether it's worth the trouble to care.
- */
- sd = kzalloc(sizeof(*sd), GFP_KERNEL);
- if (!sd) {
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
printk(KERN_WARNING "pci_bus %04x:%02x: "
"ignored (out of memory)\n", domain, busnum);
return NULL;
}
+ sd = &info->sd;
sd->domain = domain;
sd->node = node;
/*
@@ -413,22 +422,32 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
* be replaced by sd.
*/
memcpy(bus->sysdata, sd, sizeof(*sd));
- kfree(sd);
+ kfree(info);
} else {
- get_current_resources(device, busnum, domain, &resources);
+ probe_pci_root_info(info, device, busnum, domain);
/*
* _CRS with no apertures is normal, so only fall back to
* defaults or native bridge info if we're ignoring _CRS.
*/
- if (!pci_use_crs)
+ if (pci_use_crs)
+ add_resources(info, &resources);
+ else {
+ free_pci_root_info_res(info);
x86_pci_root_bus_resources(busnum, &resources);
+ }
+
bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
&resources);
- if (bus)
+ if (bus) {
bus->subordinate = pci_scan_child_bus(bus);
- else
+ pci_set_host_bridge_release(
+ to_pci_host_bridge(bus->bridge),
+ release_pci_root_info, info);
+ } else {
pci_free_resource_list(&resources);
+ __release_pci_root_info(info);
+ }
}
/* After the PCI-E bus has been walked and all devices discovered,
@@ -445,9 +464,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
}
}
- if (!bus)
- kfree(sd);
-
if (bus && node != -1) {
#ifdef CONFIG_ACPI_NUMA
if (pxm >= 0)
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 0567df3890e1..5aed49bff058 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -32,6 +32,27 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
#define RANGE_NUM 16
+static struct pci_root_info __init *find_pci_root_info(int node, int link)
+{
+ struct pci_root_info *info;
+
+ /* find the position */
+ list_for_each_entry(info, &pci_root_infos, list)
+ if (info->node == node && info->link == link)
+ return info;
+
+ return NULL;
+}
+
+static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node)
+{
+#ifdef CONFIG_NUMA
+ int j;
+
+ for (j = min_bus; j <= max_bus; j++)
+ set_mp_bus_to_node(j, node);
+#endif
+}
/**
* early_fill_mp_bus_to_node()
* called before pcibios_scan_root and pci_scan_bus
@@ -41,7 +62,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
static int __init early_fill_mp_bus_info(void)
{
int i;
- int j;
unsigned bus;
unsigned slot;
int node;
@@ -50,7 +70,6 @@ static int __init early_fill_mp_bus_info(void)
int def_link;
struct pci_root_info *info;
u32 reg;
- struct resource *res;
u64 start;
u64 end;
struct range range[RANGE_NUM];
@@ -86,7 +105,6 @@ static int __init early_fill_mp_bus_info(void)
if (!found)
return 0;
- pci_root_num = 0;
for (i = 0; i < 4; i++) {
int min_bus;
int max_bus;
@@ -99,19 +117,11 @@ static int __init early_fill_mp_bus_info(void)
min_bus = (reg >> 16) & 0xff;
max_bus = (reg >> 24) & 0xff;
node = (reg >> 4) & 0x07;
-#ifdef CONFIG_NUMA
- for (j = min_bus; j <= max_bus; j++)
- set_mp_bus_to_node(j, node);
-#endif
+ set_mp_bus_range_to_node(min_bus, max_bus, node);
link = (reg >> 8) & 0x03;
- info = &pci_root_info[pci_root_num];
- info->bus_min = min_bus;
- info->bus_max = max_bus;
- info->node = node;
- info->link = link;
+ info = alloc_pci_root_info(min_bus, max_bus, node, link);
sprintf(info->name, "PCI Bus #%02x", min_bus);
- pci_root_num++;
}
/* get the default node and link for left over res */
@@ -134,16 +144,10 @@ static int __init early_fill_mp_bus_info(void)
link = (reg >> 4) & 0x03;
end = (reg & 0xfff000) | 0xfff;
- /* find the position */
- for (j = 0; j < pci_root_num; j++) {
- info = &pci_root_info[j];
- if (info->node == node && info->link == link)
- break;
- }
- if (j == pci_root_num)
+ info = find_pci_root_info(node, link);
+ if (!info)
continue; /* not found */
- info = &pci_root_info[j];
printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
node, link, start, end);
@@ -155,13 +159,8 @@ static int __init early_fill_mp_bus_info(void)
}
/* add left over io port range to def node/link, [0, 0xffff] */
/* find the position */
- for (j = 0; j < pci_root_num; j++) {
- info = &pci_root_info[j];
- if (info->node == def_node && info->link == def_link)
- break;
- }
- if (j < pci_root_num) {
- info = &pci_root_info[j];
+ info = find_pci_root_info(def_node, def_link);
+ if (info) {
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
@@ -214,16 +213,10 @@ static int __init early_fill_mp_bus_info(void)
end <<= 8;
end |= 0xffff;
- /* find the position */
- for (j = 0; j < pci_root_num; j++) {
- info = &pci_root_info[j];
- if (info->node == node && info->link == link)
- break;
- }
- if (j == pci_root_num)
- continue; /* not found */
+ info = find_pci_root_info(node, link);
- info = &pci_root_info[j];
+ if (!info)
+ continue;
printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
node, link, start, end);
@@ -291,14 +284,8 @@ static int __init early_fill_mp_bus_info(void)
* add left over mmio range to def node/link ?
* that is tricky, just record range in from start_min to 4G
*/
- for (j = 0; j < pci_root_num; j++) {
- info = &pci_root_info[j];
- if (info->node == def_node && info->link == def_link)
- break;
- }
- if (j < pci_root_num) {
- info = &pci_root_info[j];
-
+ info = find_pci_root_info(def_node, def_link);
+ if (info) {
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
@@ -309,20 +296,16 @@ static int __init early_fill_mp_bus_info(void)
}
}
- for (i = 0; i < pci_root_num; i++) {
- int res_num;
+ list_for_each_entry(info, &pci_root_infos, list) {
int busnum;
+ struct pci_root_res *root_res;
- info = &pci_root_info[i];
- res_num = info->res_num;
busnum = info->bus_min;
printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n",
info->bus_min, info->bus_max, info->node, info->link);
- for (j = 0; j < res_num; j++) {
- res = &info->res[j];
- printk(KERN_DEBUG "bus: %02x index %x %pR\n",
- busnum, j, res);
- }
+ list_for_each_entry(root_res, &info->resources, list)
+ printk(KERN_DEBUG "bus: %02x %pR\n",
+ busnum, &root_res->res);
}
return 0;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index f3a7c569a403..614392ced7d6 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -22,19 +22,15 @@
static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
{
struct pci_root_info *info;
+ struct pci_root_res *root_res;
struct resource res;
u16 word1, word2;
u8 fbus, lbus;
- int i;
-
- info = &pci_root_info[pci_root_num];
- pci_root_num++;
/* read the PCI bus numbers */
fbus = read_pci_config_byte(bus, slot, func, 0x44);
lbus = read_pci_config_byte(bus, slot, func, 0x45);
- info->bus_min = fbus;
- info->bus_max = lbus;
+ info = alloc_pci_root_info(fbus, lbus, 0, 0);
/*
* Add the legacy IDE ports on bus 0
@@ -86,8 +82,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
res.flags = IORESOURCE_BUS;
printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res);
- for (i = 0; i < info->res_num; i++)
- printk(KERN_INFO "host bridge window %pR\n", &info->res[i]);
+ list_for_each_entry(root_res, &info->resources, list)
+ printk(KERN_INFO "host bridge window %pR\n", &root_res->res);
}
static int __init broadcom_postcore_init(void)
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index fd3f65510e9d..306579f7d0fd 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -4,35 +4,38 @@
#include "bus_numa.h"
-int pci_root_num;
-struct pci_root_info pci_root_info[PCI_ROOT_NR];
+LIST_HEAD(pci_root_infos);
-void x86_pci_root_bus_resources(int bus, struct list_head *resources)
+static struct pci_root_info *x86_find_pci_root_info(int bus)
{
- int i;
- int j;
struct pci_root_info *info;
- if (!pci_root_num)
- goto default_resources;
+ if (list_empty(&pci_root_infos))
+ return NULL;
- for (i = 0; i < pci_root_num; i++) {
- if (pci_root_info[i].bus_min == bus)
- break;
- }
+ list_for_each_entry(info, &pci_root_infos, list)
+ if (info->bus_min == bus)
+ return info;
+
+ return NULL;
+}
- if (i == pci_root_num)
+void x86_pci_root_bus_resources(int bus, struct list_head *resources)
+{
+ struct pci_root_info *info = x86_find_pci_root_info(bus);
+ struct pci_root_res *root_res;
+
+ if (!info)
goto default_resources;
printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n",
bus);
- info = &pci_root_info[i];
- for (j = 0; j < info->res_num; j++) {
+ list_for_each_entry(root_res, &info->resources, list) {
struct resource *res;
struct resource *root;
- res = &info->res[j];
+ res = &root_res->res;
pci_add_resource(resources, res);
if (res->flags & IORESOURCE_IO)
root = &ioport_resource;
@@ -53,11 +56,32 @@ default_resources:
pci_add_resource(resources, &iomem_resource);
}
+struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
+ int node, int link)
+{
+ struct pci_root_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+
+ if (!info)
+ return info;
+
+ INIT_LIST_HEAD(&info->resources);
+ info->bus_min = bus_min;
+ info->bus_max = bus_max;
+ info->node = node;
+ info->link = link;
+
+ list_add_tail(&info->list, &pci_root_infos);
+
+ return info;
+}
+
void __devinit update_res(struct pci_root_info *info, resource_size_t start,
resource_size_t end, unsigned long flags, int merge)
{
- int i;
struct resource *res;
+ struct pci_root_res *root_res;
if (start > end)
return;
@@ -69,11 +93,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
goto addit;
/* try to merge it with old one */
- for (i = 0; i < info->res_num; i++) {
+ list_for_each_entry(root_res, &info->resources, list) {
resource_size_t final_start, final_end;
resource_size_t common_start, common_end;
- res = &info->res[i];
+ res = &root_res->res;
if (res->flags != flags)
continue;
@@ -93,14 +117,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
addit:
/* need to add that */
- if (info->res_num >= RES_NUM)
+ root_res = kzalloc(sizeof(*root_res), GFP_KERNEL);
+ if (!root_res)
return;
- res = &info->res[info->res_num];
+ res = &root_res->res;
res->name = info->name;
res->flags = flags;
res->start = start;
res->end = end;
- res->child = NULL;
- info->res_num++;
+
+ list_add_tail(&root_res->list, &info->resources);
}
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 804a4b40c31a..226a466b2b2b 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -4,22 +4,24 @@
* sub bus (transparent) will use entres from 3 to store extra from
* root, so need to make sure we have enough slot there.
*/
-#define RES_NUM 16
+struct pci_root_res {
+ struct list_head list;
+ struct resource res;
+};
+
struct pci_root_info {
+ struct list_head list;
char name[12];
- unsigned int res_num;
- struct resource res[RES_NUM];
+ struct list_head resources;
int bus_min;
int bus_max;
int node;
int link;
};
-/* 4 at this time, it may become to 32 */
-#define PCI_ROOT_NR 4
-extern int pci_root_num;
-extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
-
+extern struct list_head pci_root_infos;
+struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max,
+ int node, int link);
extern void update_res(struct pci_root_info *info, resource_size_t start,
resource_size_t end, unsigned long flags, int merge);
#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 323481e06ef8..0ad990a20d4a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -11,6 +11,7 @@
#include <linux/dmi.h>
#include <linux/slab.h>
+#include <asm-generic/pci-bridge.h>
#include <asm/acpi.h>
#include <asm/segment.h>
#include <asm/io.h>
@@ -229,6 +230,14 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
}
#endif
+static int __devinit set_scan_all(const struct dmi_system_id *d)
+{
+ printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
+ d->ident);
+ pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
+ return 0;
+}
+
static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
#ifdef __i386__
/*
@@ -420,6 +429,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),
},
},
+ {
+ .callback = set_scan_all,
+ .ident = "Stratus/NEC ftServer",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
+ },
+ },
{}
};
@@ -430,9 +446,7 @@ void __init dmi_check_pciprobe(void)
struct pci_bus * __devinit pcibios_scan_root(int busnum)
{
- LIST_HEAD(resources);
struct pci_bus *bus = NULL;
- struct pci_sysdata *sd;
while ((bus = pci_find_next_bus(bus)) != NULL) {
if (bus->number == busnum) {
@@ -441,28 +455,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
}
}
- /* Allocate per-root-bus (not per bus) arch-specific data.
- * TODO: leak; this memory is never freed.
- * It's arguable whether it's worth the trouble to care.
- */
- sd = kzalloc(sizeof(*sd), GFP_KERNEL);
- if (!sd) {
- printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
- return NULL;
- }
-
- sd->node = get_mp_bus_to_node(busnum);
-
- printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
- x86_pci_root_bus_resources(busnum, &resources);
- bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);
- if (!bus) {
- pci_free_resource_list(&resources);
- kfree(sd);
- }
-
- return bus;
+ return pci_scan_bus_on_node(busnum, &pci_root_ops,
+ get_mp_bus_to_node(busnum));
}
+
void __init pcibios_set_cache_line_size(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -656,6 +652,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
}
sd->node = node;
x86_pci_root_bus_resources(busno, &resources);
+ printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno);
bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources);
if (!bus) {
pci_free_resource_list(&resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index d0e6e403b4f6..5dd467bd6121 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -519,3 +519,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
}
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
+
+/*
+ * Twinhead H12Y needs us to block out a region otherwise we map devices
+ * there and any access kills the box.
+ *
+ * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231
+ *
+ * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
+ */
+static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
+{
+ if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
+ pr_info("Reserving memory on Twinhead H12Y\n");
+ request_mem_region(0xFFB00000, 0x100000, "twinhead");
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 831971e731f7..dd8ca6f7223b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -57,7 +57,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
{
struct pcibios_fwaddrmap *map;
- WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock));
+ WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock));
list_for_each_entry(map, &pcibios_fwaddrmappings, list)
if (map->dev == dev)
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 66d377e334f7..646e3b5b4bb6 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -63,7 +63,7 @@ static struct gpio_led net5501_leds[] = {
.name = "net5501:1",
.gpio = 6,
.default_trigger = "default-on",
- .active_low = 1,
+ .active_low = 0,
},
};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index e0a37233c0af..e31bcd8f2eee 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -805,7 +805,7 @@ void intel_scu_devices_create(void)
} else
i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
}
- intel_scu_notifier_post(SCU_AVAILABLE, 0L);
+ intel_scu_notifier_post(SCU_AVAILABLE, NULL);
}
EXPORT_SYMBOL_GPL(intel_scu_devices_create);
@@ -814,7 +814,7 @@ void intel_scu_devices_destroy(void)
{
int i;
- intel_scu_notifier_post(SCU_DOWN, 0L);
+ intel_scu_notifier_post(SCU_DOWN, NULL);
for (i = 0; i < ipc_next_dev; i++)
platform_device_del(ipc_devs[i]);
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index c7abf13a213f..94d8a39332ec 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -445,7 +445,7 @@ static void ack_cobalt_irq(struct irq_data *data)
spin_lock_irqsave(&cobalt_lock, flags);
disable_cobalt_irq(data);
- apic_write(APIC_EOI, APIC_EIO_ACK);
+ apic_write(APIC_EOI, APIC_EOI_ACK);
spin_unlock_irqrestore(&cobalt_lock, flags);
}
diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore
new file mode 100644
index 000000000000..be0ed065249b
--- /dev/null
+++ b/arch/x86/tools/.gitignore
@@ -0,0 +1 @@
+relocs
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index d511aa97533a..733057b435b0 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x
$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+hostprogs-y += relocs
+relocs: $(obj)/relocs
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/tools/relocs.c
index d3c0b0277666..b43cfcd9bf40 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -18,6 +18,8 @@ static void die(char *fmt, ...);
static Elf32_Ehdr ehdr;
static unsigned long reloc_count, reloc_idx;
static unsigned long *relocs;
+static unsigned long reloc16_count, reloc16_idx;
+static unsigned long *relocs16;
struct section {
Elf32_Shdr shdr;
@@ -28,52 +30,86 @@ struct section {
};
static struct section *secs;
+enum symtype {
+ S_ABS,
+ S_REL,
+ S_SEG,
+ S_LIN,
+ S_NSYMTYPES
+};
+
+static const char * const sym_regex_kernel[S_NSYMTYPES] = {
/*
* Following symbols have been audited. There values are constant and do
* not change if bzImage is loaded at a different physical address than
* the address for which it has been compiled. Don't warn user about
* absolute relocations present w.r.t these symbols.
*/
-static const char abs_sym_regex[] =
+ [S_ABS] =
"^(xen_irq_disable_direct_reloc$|"
"xen_save_fl_direct_reloc$|"
"VDSO|"
- "__crc_)";
-static regex_t abs_sym_regex_c;
-static int is_abs_reloc(const char *sym_name)
-{
- return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0);
-}
+ "__crc_)",
/*
* These symbols are known to be relative, even if the linker marks them
* as absolute (typically defined outside any section in the linker script.)
*/
-static const char rel_sym_regex[] =
- "^_end$";
-static regex_t rel_sym_regex_c;
-static int is_rel_reloc(const char *sym_name)
+ [S_REL] =
+ "^(__init_(begin|end)|"
+ "__x86_cpu_dev_(start|end)|"
+ "(__parainstructions|__alt_instructions)(|_end)|"
+ "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
+ "_end)$"
+};
+
+
+static const char * const sym_regex_realmode[S_NSYMTYPES] = {
+/*
+ * These are 16-bit segment symbols when compiling 16-bit code.
+ */
+ [S_SEG] =
+ "^real_mode_seg$",
+
+/*
+ * These are offsets belonging to segments, as opposed to linear addresses,
+ * when compiling 16-bit code.
+ */
+ [S_LIN] =
+ "^pa_",
+};
+
+static const char * const *sym_regex;
+
+static regex_t sym_regex_c[S_NSYMTYPES];
+static int is_reloc(enum symtype type, const char *sym_name)
{
- return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0);
+ return sym_regex[type] &&
+ !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0);
}
-static void regex_init(void)
+static void regex_init(int use_real_mode)
{
char errbuf[128];
int err;
-
- err = regcomp(&abs_sym_regex_c, abs_sym_regex,
- REG_EXTENDED|REG_NOSUB);
- if (err) {
- regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf);
- die("%s", errbuf);
- }
+ int i;
+
+ if (use_real_mode)
+ sym_regex = sym_regex_realmode;
+ else
+ sym_regex = sym_regex_kernel;
- err = regcomp(&rel_sym_regex_c, rel_sym_regex,
- REG_EXTENDED|REG_NOSUB);
- if (err) {
- regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf);
- die("%s", errbuf);
+ for (i = 0; i < S_NSYMTYPES; i++) {
+ if (!sym_regex[i])
+ continue;
+
+ err = regcomp(&sym_regex_c[i], sym_regex[i],
+ REG_EXTENDED|REG_NOSUB);
+
+ if (err) {
+ regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf);
+ die("%s", errbuf);
+ }
}
}
@@ -154,6 +190,10 @@ static const char *rel_type(unsigned type)
REL_TYPE(R_386_RELATIVE),
REL_TYPE(R_386_GOTOFF),
REL_TYPE(R_386_GOTPC),
+ REL_TYPE(R_386_8),
+ REL_TYPE(R_386_PC8),
+ REL_TYPE(R_386_16),
+ REL_TYPE(R_386_PC16),
#undef REL_TYPE
};
const char *name = "unknown type rel type name";
@@ -189,7 +229,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
name = sym_strtab + sym->st_name;
}
else {
- name = sec_name(secs[sym->st_shndx].shdr.sh_name);
+ name = sec_name(sym->st_shndx);
}
return name;
}
@@ -403,13 +443,11 @@ static void print_absolute_symbols(void)
for (i = 0; i < ehdr.e_shnum; i++) {
struct section *sec = &secs[i];
char *sym_strtab;
- Elf32_Sym *sh_symtab;
int j;
if (sec->shdr.sh_type != SHT_SYMTAB) {
continue;
}
- sh_symtab = sec->symtab;
sym_strtab = sec->link->strtab;
for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
Elf32_Sym *sym;
@@ -474,7 +512,7 @@ static void print_absolute_relocs(void)
* Before warning check if this absolute symbol
* relocation is harmless.
*/
- if (is_abs_reloc(name) || is_rel_reloc(name))
+ if (is_reloc(S_ABS, name) || is_reloc(S_REL, name))
continue;
if (!printed) {
@@ -498,7 +536,8 @@ static void print_absolute_relocs(void)
printf("\n");
}
-static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
+static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym),
+ int use_real_mode)
{
int i;
/* Walk through the relocations */
@@ -523,30 +562,67 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
Elf32_Rel *rel;
Elf32_Sym *sym;
unsigned r_type;
+ const char *symname;
+ int shn_abs;
+
rel = &sec->reltab[j];
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
r_type = ELF32_R_TYPE(rel->r_info);
- /* Don't visit relocations to absolute symbols */
- if (sym->st_shndx == SHN_ABS &&
- !is_rel_reloc(sym_name(sym_strtab, sym))) {
- continue;
- }
+
+ shn_abs = sym->st_shndx == SHN_ABS;
+
switch (r_type) {
case R_386_NONE:
case R_386_PC32:
+ case R_386_PC16:
+ case R_386_PC8:
/*
* NONE can be ignored and and PC relative
* relocations don't need to be adjusted.
*/
break;
+
+ case R_386_16:
+ symname = sym_name(sym_strtab, sym);
+ if (!use_real_mode)
+ goto bad;
+ if (shn_abs) {
+ if (is_reloc(S_ABS, symname))
+ break;
+ else if (!is_reloc(S_SEG, symname))
+ goto bad;
+ } else {
+ if (is_reloc(S_LIN, symname))
+ goto bad;
+ else
+ break;
+ }
+ visit(rel, sym);
+ break;
+
case R_386_32:
- /* Visit relocations that need to be adjusted */
+ symname = sym_name(sym_strtab, sym);
+ if (shn_abs) {
+ if (is_reloc(S_ABS, symname))
+ break;
+ else if (!is_reloc(S_REL, symname))
+ goto bad;
+ } else {
+ if (use_real_mode &&
+ !is_reloc(S_LIN, symname))
+ break;
+ }
visit(rel, sym);
break;
default:
die("Unsupported relocation type: %s (%d)\n",
rel_type(r_type), r_type);
break;
+ bad:
+ symname = sym_name(sym_strtab, sym);
+ die("Invalid %s %s relocation: %s\n",
+ shn_abs ? "absolute" : "relative",
+ rel_type(r_type), symname);
}
}
}
@@ -554,13 +630,19 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
{
- reloc_count += 1;
+ if (ELF32_R_TYPE(rel->r_info) == R_386_16)
+ reloc16_count++;
+ else
+ reloc_count++;
}
static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
{
/* Remember the address that needs to be adjusted. */
- relocs[reloc_idx++] = rel->r_offset;
+ if (ELF32_R_TYPE(rel->r_info) == R_386_16)
+ relocs16[reloc16_idx++] = rel->r_offset;
+ else
+ relocs[reloc_idx++] = rel->r_offset;
}
static int cmp_relocs(const void *va, const void *vb)
@@ -570,23 +652,41 @@ static int cmp_relocs(const void *va, const void *vb)
return (*a == *b)? 0 : (*a > *b)? 1 : -1;
}
-static void emit_relocs(int as_text)
+static int write32(unsigned int v, FILE *f)
+{
+ unsigned char buf[4];
+
+ put_unaligned_le32(v, buf);
+ return fwrite(buf, 1, 4, f) == 4 ? 0 : -1;
+}
+
+static void emit_relocs(int as_text, int use_real_mode)
{
int i;
/* Count how many relocations I have and allocate space for them. */
reloc_count = 0;
- walk_relocs(count_reloc);
+ walk_relocs(count_reloc, use_real_mode);
relocs = malloc(reloc_count * sizeof(relocs[0]));
if (!relocs) {
die("malloc of %d entries for relocs failed\n",
reloc_count);
}
+
+ relocs16 = malloc(reloc16_count * sizeof(relocs[0]));
+ if (!relocs16) {
+ die("malloc of %d entries for relocs16 failed\n",
+ reloc16_count);
+ }
/* Collect up the relocations */
reloc_idx = 0;
- walk_relocs(collect_reloc);
+ walk_relocs(collect_reloc, use_real_mode);
+
+ if (reloc16_count && !use_real_mode)
+ die("Segment relocations found but --realmode not specified\n");
/* Order the relocations for more efficient processing */
qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
+ qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs);
/* Print the relocations */
if (as_text) {
@@ -595,58 +695,83 @@ static void emit_relocs(int as_text)
*/
printf(".section \".data.reloc\",\"a\"\n");
printf(".balign 4\n");
- for (i = 0; i < reloc_count; i++) {
- printf("\t .long 0x%08lx\n", relocs[i]);
+ if (use_real_mode) {
+ printf("\t.long %lu\n", reloc16_count);
+ for (i = 0; i < reloc16_count; i++)
+ printf("\t.long 0x%08lx\n", relocs16[i]);
+ printf("\t.long %lu\n", reloc_count);
+ for (i = 0; i < reloc_count; i++) {
+ printf("\t.long 0x%08lx\n", relocs[i]);
+ }
+ } else {
+ /* Print a stop */
+ printf("\t.long 0x%08lx\n", (unsigned long)0);
+ for (i = 0; i < reloc_count; i++) {
+ printf("\t.long 0x%08lx\n", relocs[i]);
+ }
}
+
printf("\n");
}
else {
- unsigned char buf[4];
- /* Print a stop */
- fwrite("\0\0\0\0", 4, 1, stdout);
- /* Now print each relocation */
- for (i = 0; i < reloc_count; i++) {
- put_unaligned_le32(relocs[i], buf);
- fwrite(buf, 4, 1, stdout);
+ if (use_real_mode) {
+ write32(reloc16_count, stdout);
+ for (i = 0; i < reloc16_count; i++)
+ write32(relocs16[i], stdout);
+ write32(reloc_count, stdout);
+
+ /* Now print each relocation */
+ for (i = 0; i < reloc_count; i++)
+ write32(relocs[i], stdout);
+ } else {
+ /* Print a stop */
+ write32(0, stdout);
+
+ /* Now print each relocation */
+ for (i = 0; i < reloc_count; i++) {
+ write32(relocs[i], stdout);
+ }
}
}
}
static void usage(void)
{
- die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n");
+ die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n");
}
int main(int argc, char **argv)
{
int show_absolute_syms, show_absolute_relocs;
- int as_text;
+ int as_text, use_real_mode;
const char *fname;
FILE *fp;
int i;
- regex_init();
-
show_absolute_syms = 0;
show_absolute_relocs = 0;
as_text = 0;
+ use_real_mode = 0;
fname = NULL;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (*arg == '-') {
- if (strcmp(argv[1], "--abs-syms") == 0) {
+ if (strcmp(arg, "--abs-syms") == 0) {
show_absolute_syms = 1;
continue;
}
-
- if (strcmp(argv[1], "--abs-relocs") == 0) {
+ if (strcmp(arg, "--abs-relocs") == 0) {
show_absolute_relocs = 1;
continue;
}
- else if (strcmp(argv[1], "--text") == 0) {
+ if (strcmp(arg, "--text") == 0) {
as_text = 1;
continue;
}
+ if (strcmp(arg, "--realmode") == 0) {
+ use_real_mode = 1;
+ continue;
+ }
}
else if (!fname) {
fname = arg;
@@ -657,6 +782,7 @@ int main(int argc, char **argv)
if (!fname) {
usage();
}
+ regex_init(use_real_mode);
fp = fopen(fname, "r");
if (!fp) {
die("Cannot open %s: %s\n",
@@ -675,6 +801,6 @@ int main(int argc, char **argv)
print_absolute_relocs();
return 0;
}
- emit_relocs(as_text);
+ emit_relocs(as_text, use_real_mode);
return 0;
}
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index f3b0633b69a1..0e07adc8cbe4 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -34,25 +34,25 @@
#define ELF_ARCH EM_386
#define ELF_PLAT_INIT(regs, load_addr) do { \
- PT_REGS_EBX(regs) = 0; \
- PT_REGS_ECX(regs) = 0; \
- PT_REGS_EDX(regs) = 0; \
- PT_REGS_ESI(regs) = 0; \
- PT_REGS_EDI(regs) = 0; \
- PT_REGS_EBP(regs) = 0; \
- PT_REGS_EAX(regs) = 0; \
+ PT_REGS_BX(regs) = 0; \
+ PT_REGS_CX(regs) = 0; \
+ PT_REGS_DX(regs) = 0; \
+ PT_REGS_SI(regs) = 0; \
+ PT_REGS_DI(regs) = 0; \
+ PT_REGS_BP(regs) = 0; \
+ PT_REGS_AX(regs) = 0; \
} while (0)
/* Shamelessly stolen from include/asm-i386/elf.h */
#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
- pr_reg[0] = PT_REGS_EBX(regs); \
- pr_reg[1] = PT_REGS_ECX(regs); \
- pr_reg[2] = PT_REGS_EDX(regs); \
- pr_reg[3] = PT_REGS_ESI(regs); \
- pr_reg[4] = PT_REGS_EDI(regs); \
- pr_reg[5] = PT_REGS_EBP(regs); \
- pr_reg[6] = PT_REGS_EAX(regs); \
+ pr_reg[0] = PT_REGS_BX(regs); \
+ pr_reg[1] = PT_REGS_CX(regs); \
+ pr_reg[2] = PT_REGS_DX(regs); \
+ pr_reg[3] = PT_REGS_SI(regs); \
+ pr_reg[4] = PT_REGS_DI(regs); \
+ pr_reg[5] = PT_REGS_BP(regs); \
+ pr_reg[6] = PT_REGS_AX(regs); \
pr_reg[7] = PT_REGS_DS(regs); \
pr_reg[8] = PT_REGS_ES(regs); \
/* fake once used fs and gs selectors? */ \
@@ -130,13 +130,13 @@ do { \
#define ELF_ARCH EM_X86_64
#define ELF_PLAT_INIT(regs, load_addr) do { \
- PT_REGS_RBX(regs) = 0; \
- PT_REGS_RCX(regs) = 0; \
- PT_REGS_RDX(regs) = 0; \
- PT_REGS_RSI(regs) = 0; \
- PT_REGS_RDI(regs) = 0; \
- PT_REGS_RBP(regs) = 0; \
- PT_REGS_RAX(regs) = 0; \
+ PT_REGS_BX(regs) = 0; \
+ PT_REGS_CX(regs) = 0; \
+ PT_REGS_DX(regs) = 0; \
+ PT_REGS_SI(regs) = 0; \
+ PT_REGS_DI(regs) = 0; \
+ PT_REGS_BP(regs) = 0; \
+ PT_REGS_AX(regs) = 0; \
PT_REGS_R8(regs) = 0; \
PT_REGS_R9(regs) = 0; \
PT_REGS_R10(regs) = 0; \
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index c8aca8c501b0..950dfb7b8417 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,5 +1,39 @@
+#ifndef __UM_X86_PTRACE_H
+#define __UM_X86_PTRACE_H
+
#ifdef CONFIG_X86_32
# include "ptrace_32.h"
#else
# include "ptrace_64.h"
#endif
+
+#define PT_REGS_AX(r) UPT_AX(&(r)->regs)
+#define PT_REGS_BX(r) UPT_BX(&(r)->regs)
+#define PT_REGS_CX(r) UPT_CX(&(r)->regs)
+#define PT_REGS_DX(r) UPT_DX(&(r)->regs)
+
+#define PT_REGS_SI(r) UPT_SI(&(r)->regs)
+#define PT_REGS_DI(r) UPT_DI(&(r)->regs)
+#define PT_REGS_BP(r) UPT_BP(&(r)->regs)
+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
+
+#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
+#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
+#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
+#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
+
+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_AX(r)
+#define PT_REGS_SYSCALL_RET(r) PT_REGS_AX(r)
+
+#define PT_FIX_EXEC_STACK(sp) do ; while(0)
+
+#define profile_pc(regs) PT_REGS_IP(regs)
+
+#define UPT_RESTART_SYSCALL(r) (UPT_IP(r) -= 2)
+#define UPT_SET_SYSCALL_RETURN(r, res) (UPT_AX(r) = (res))
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+ return UPT_AX(regs);
+}
+#endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
index 5d2a59112537..2cf225351b65 100644
--- a/arch/x86/um/asm/ptrace_32.h
+++ b/arch/x86/um/asm/ptrace_32.h
@@ -11,29 +11,6 @@
#include "linux/compiler.h"
#include "asm/ptrace-generic.h"
-#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
-#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
-#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
-#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
-#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
-#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
-#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
-
-#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
-#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
-#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
-#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
-#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
-#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
-
-#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
-
-#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
-#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
-#define PT_FIX_EXEC_STACK(sp) do ; while(0)
-
-#define profile_pc(regs) PT_REGS_IP(regs)
-
#define user_mode(r) UPT_IS_USER(&(r)->regs)
/*
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
index 706a0d80545c..ea7bff394320 100644
--- a/arch/x86/um/asm/ptrace_64.h
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -15,13 +15,6 @@
#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
-#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
-#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
-#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
-#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
-#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
-#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
@@ -31,27 +24,8 @@
#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
-#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
-#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
-#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
-#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
-#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
-#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
-
-#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
-#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
-#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
-
-#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
-
/* XXX */
#define user_mode(r) UPT_IS_USER(&(r)->regs)
-#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
-#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
-
-#define PT_FIX_EXEC_STACK(sp) do ; while(0)
-
-#define profile_pc(regs) PT_REGS_IP(regs)
struct user_desc;
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 2bbe1ec2d96a..6ce2d76eb908 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -1,15 +1,74 @@
#ifndef __SYSDEP_X86_PTRACE_H
#define __SYSDEP_X86_PTRACE_H
+#include <generated/user_constants.h>
+#include "sysdep/faultinfo.h"
+
+#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
+
+#define REGS_IP(r) ((r)[HOST_IP])
+#define REGS_SP(r) ((r)[HOST_SP])
+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
+#define REGS_AX(r) ((r)[HOST_AX])
+#define REGS_BX(r) ((r)[HOST_BX])
+#define REGS_CX(r) ((r)[HOST_CX])
+#define REGS_DX(r) ((r)[HOST_DX])
+#define REGS_SI(r) ((r)[HOST_SI])
+#define REGS_DI(r) ((r)[HOST_DI])
+#define REGS_BP(r) ((r)[HOST_BP])
+#define REGS_CS(r) ((r)[HOST_CS])
+#define REGS_SS(r) ((r)[HOST_SS])
+#define REGS_DS(r) ((r)[HOST_DS])
+#define REGS_ES(r) ((r)[HOST_ES])
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_AX(r) REGS_AX((r)->gp)
+#define UPT_BX(r) REGS_BX((r)->gp)
+#define UPT_CX(r) REGS_CX((r)->gp)
+#define UPT_DX(r) REGS_DX((r)->gp)
+#define UPT_SI(r) REGS_SI((r)->gp)
+#define UPT_DI(r) REGS_DI((r)->gp)
+#define UPT_BP(r) REGS_BP((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+
#ifdef __i386__
#include "ptrace_32.h"
#else
#include "ptrace_64.h"
#endif
-static inline long regs_return_value(struct uml_pt_regs *regs)
-{
- return UPT_SYSCALL_RET(regs);
-}
+struct syscall_args {
+ unsigned long args[6];
+};
+
+#define SYSCALL_ARGS(r) ((struct syscall_args) \
+ { .args = { UPT_SYSCALL_ARG1(r), \
+ UPT_SYSCALL_ARG2(r), \
+ UPT_SYSCALL_ARG3(r), \
+ UPT_SYSCALL_ARG4(r), \
+ UPT_SYSCALL_ARG5(r), \
+ UPT_SYSCALL_ARG6(r) } } )
+
+struct uml_pt_regs {
+ unsigned long gp[MAX_REG_NR];
+ unsigned long fp[MAX_FP_NR];
+ struct faultinfo faultinfo;
+ long syscall;
+ int is_user;
+};
+
+#define EMPTY_UML_PT_REGS { }
+
+#define UPT_SYSCALL_NR(r) ((r)->syscall)
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+#define UPT_IS_USER(r) ((r)->is_user)
+
+extern int user_context(unsigned long sp);
#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index befd1df32ed0..b94a108de1dc 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -6,11 +6,7 @@
#ifndef __SYSDEP_I386_PTRACE_H
#define __SYSDEP_I386_PTRACE_H
-#include <generated/user_constants.h>
-#include "sysdep/faultinfo.h"
-
-#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
+#define MAX_FP_NR HOST_FPX_SIZE
static inline void update_debugregs(int seq) {}
@@ -24,90 +20,16 @@ void set_using_sysemu(int value);
int get_using_sysemu(void);
extern int sysemu_supported;
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_EAX(r) ((r)[HOST_AX])
-#define REGS_EBX(r) ((r)[HOST_BX])
-#define REGS_ECX(r) ((r)[HOST_CX])
-#define REGS_EDX(r) ((r)[HOST_DX])
-#define REGS_ESI(r) ((r)[HOST_SI])
-#define REGS_EDI(r) ((r)[HOST_DI])
-#define REGS_EBP(r) ((r)[HOST_BP])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_SS(r) ((r)[HOST_SS])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
#ifndef PTRACE_SYSEMU_SINGLESTEP
#define PTRACE_SYSEMU_SINGLESTEP 32
#endif
-struct uml_pt_regs {
- unsigned long gp[MAX_REG_NR];
- unsigned long fp[HOST_FPX_SIZE];
- struct faultinfo faultinfo;
- long syscall;
- int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_EAX(r) REGS_EAX((r)->gp)
-#define UPT_EBX(r) REGS_EBX((r)->gp)
-#define UPT_ECX(r) REGS_ECX((r)->gp)
-#define UPT_EDX(r) REGS_EDX((r)->gp)
-#define UPT_ESI(r) REGS_ESI((r)->gp)
-#define UPT_EDI(r) REGS_EDI((r)->gp)
-#define UPT_EBP(r) REGS_EBP((r)->gp)
-#define UPT_ORIG_EAX(r) ((r)->syscall)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-
-#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
-#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
-#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
-#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
-#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
-#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
-
-extern int user_context(unsigned long sp);
-
-#define UPT_IS_USER(r) ((r)->is_user)
-
-struct syscall_args {
- unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
- { .args = { UPT_SYSCALL_ARG1(r), \
- UPT_SYSCALL_ARG2(r), \
- UPT_SYSCALL_ARG3(r), \
- UPT_SYSCALL_ARG4(r), \
- UPT_SYSCALL_ARG5(r), \
- UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
-#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
-#define UPT_SYSCALL_RET(r) UPT_EAX(r)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
+#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
+#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
+#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
+#define UPT_SYSCALL_ARG4(r) UPT_SI(r)
+#define UPT_SYSCALL_ARG5(r) UPT_DI(r)
+#define UPT_SYSCALL_ARG6(r) UPT_BP(r)
extern void arch_init_registers(int pid);
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 031edc53ac57..919789f1071e 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -8,22 +8,8 @@
#ifndef __SYSDEP_X86_64_PTRACE_H
#define __SYSDEP_X86_64_PTRACE_H
-#include <generated/user_constants.h>
-#include "sysdep/faultinfo.h"
+#define MAX_FP_NR HOST_FP_SIZE
-#define MAX_REG_OFFSET (UM_FRAME_SIZE)
-#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
-
-#define REGS_IP(r) ((r)[HOST_IP])
-#define REGS_SP(r) ((r)[HOST_SP])
-
-#define REGS_RBX(r) ((r)[HOST_BX])
-#define REGS_RCX(r) ((r)[HOST_CX])
-#define REGS_RDX(r) ((r)[HOST_DX])
-#define REGS_RSI(r) ((r)[HOST_SI])
-#define REGS_RDI(r) ((r)[HOST_DI])
-#define REGS_RBP(r) ((r)[HOST_BP])
-#define REGS_RAX(r) ((r)[HOST_AX])
#define REGS_R8(r) ((r)[HOST_R8])
#define REGS_R9(r) ((r)[HOST_R9])
#define REGS_R10(r) ((r)[HOST_R10])
@@ -32,9 +18,6 @@
#define REGS_R13(r) ((r)[HOST_R13])
#define REGS_R14(r) ((r)[HOST_R14])
#define REGS_R15(r) ((r)[HOST_R15])
-#define REGS_CS(r) ((r)[HOST_CS])
-#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
-#define REGS_SS(r) ((r)[HOST_SS])
#define HOST_FS_BASE 21
#define HOST_GS_BASE 22
@@ -58,45 +41,6 @@
#define GS (HOST_GS * sizeof(long))
#endif
-#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
-#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
-#define REGS_DS(r) ((r)[HOST_DS])
-#define REGS_ES(r) ((r)[HOST_ES])
-#define REGS_FS(r) ((r)[HOST_FS])
-#define REGS_GS(r) ((r)[HOST_GS])
-
-#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
-
-#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
-
-#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
-#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
-
-#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
-
-#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
-
-#define REGS_TRAP(r) ((r)->trap_type)
-
-#define REGS_ERR(r) ((r)->fault_type)
-
-struct uml_pt_regs {
- unsigned long gp[MAX_REG_NR];
- unsigned long fp[HOST_FP_SIZE];
- struct faultinfo faultinfo;
- long syscall;
- int is_user;
-};
-
-#define EMPTY_UML_PT_REGS { }
-
-#define UPT_RBX(r) REGS_RBX((r)->gp)
-#define UPT_RCX(r) REGS_RCX((r)->gp)
-#define UPT_RDX(r) REGS_RDX((r)->gp)
-#define UPT_RSI(r) REGS_RSI((r)->gp)
-#define UPT_RDI(r) REGS_RDI((r)->gp)
-#define UPT_RBP(r) REGS_RBP((r)->gp)
-#define UPT_RAX(r) REGS_RAX((r)->gp)
#define UPT_R8(r) REGS_R8((r)->gp)
#define UPT_R9(r) REGS_R9((r)->gp)
#define UPT_R10(r) REGS_R10((r)->gp)
@@ -105,51 +49,14 @@ struct uml_pt_regs {
#define UPT_R13(r) REGS_R13((r)->gp)
#define UPT_R14(r) REGS_R14((r)->gp)
#define UPT_R15(r) REGS_R15((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
-#define UPT_FS(r) REGS_FS((r)->gp)
-#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
-#define UPT_GS(r) REGS_GS((r)->gp)
-#define UPT_DS(r) REGS_DS((r)->gp)
-#define UPT_ES(r) REGS_ES((r)->gp)
-#define UPT_CS(r) REGS_CS((r)->gp)
-#define UPT_SS(r) REGS_SS((r)->gp)
-#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
-
-#define UPT_IP(r) REGS_IP((r)->gp)
-#define UPT_SP(r) REGS_SP((r)->gp)
-
-#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
-#define UPT_SYSCALL_NR(r) ((r)->syscall)
-#define UPT_SYSCALL_RET(r) UPT_RAX(r)
-
-extern int user_context(unsigned long sp);
-#define UPT_IS_USER(r) ((r)->is_user)
-
-#define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
-#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
-#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
+#define UPT_SYSCALL_ARG1(r) UPT_DI(r)
+#define UPT_SYSCALL_ARG2(r) UPT_SI(r)
+#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
-struct syscall_args {
- unsigned long args[6];
-};
-
-#define SYSCALL_ARGS(r) ((struct syscall_args) \
- { .args = { UPT_SYSCALL_ARG1(r), \
- UPT_SYSCALL_ARG2(r), \
- UPT_SYSCALL_ARG3(r), \
- UPT_SYSCALL_ARG4(r), \
- UPT_SYSCALL_ARG5(r), \
- UPT_SYSCALL_ARG6(r) } } )
-
-#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
-
-#define UPT_FAULTINFO(r) (&(r)->faultinfo)
-
static inline void arch_init_registers(int pid)
{
}
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 4883b9546016..bb0fb03b9f85 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -156,6 +156,9 @@ static int copy_sc_from_user(struct pt_regs *regs,
struct sigcontext sc;
int err, pid;
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
err = copy_from_user(&sc, from, sizeof(sc));
if (err)
return err;
@@ -410,9 +413,9 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
PT_REGS_SP(regs) = (unsigned long) frame;
PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
- PT_REGS_EAX(regs) = (unsigned long) sig;
- PT_REGS_EDX(regs) = (unsigned long) 0;
- PT_REGS_ECX(regs) = (unsigned long) 0;
+ PT_REGS_AX(regs) = (unsigned long) sig;
+ PT_REGS_DX(regs) = (unsigned long) 0;
+ PT_REGS_CX(regs) = (unsigned long) 0;
if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
ptrace_notify(SIGTRAP);
@@ -460,9 +463,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
PT_REGS_SP(regs) = (unsigned long) frame;
PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
- PT_REGS_EAX(regs) = (unsigned long) sig;
- PT_REGS_EDX(regs) = (unsigned long) &frame->info;
- PT_REGS_ECX(regs) = (unsigned long) &frame->uc;
+ PT_REGS_AX(regs) = (unsigned long) sig;
+ PT_REGS_DX(regs) = (unsigned long) &frame->info;
+ PT_REGS_CX(regs) = (unsigned long) &frame->uc;
if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
ptrace_notify(SIGTRAP);
@@ -541,8 +544,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
set->sig[0]);
err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
if (sizeof(*set) == 16) {
- __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+ err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
+ err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
}
else
err |= __copy_to_user(&frame->uc.uc_sigmask, set,
@@ -570,17 +573,17 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
}
PT_REGS_SP(regs) = (unsigned long) frame;
- PT_REGS_RDI(regs) = sig;
+ PT_REGS_DI(regs) = sig;
/* In case the signal handler was declared without prototypes */
- PT_REGS_RAX(regs) = 0;
+ PT_REGS_AX(regs) = 0;
/*
* This also works for non SA_SIGINFO handlers because they expect the
* next argument after the signal number on the stack.
*/
- PT_REGS_RSI(regs) = (unsigned long) &frame->info;
- PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
- PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
+ PT_REGS_SI(regs) = (unsigned long) &frame->info;
+ PT_REGS_DX(regs) = (unsigned long) &frame->uc;
+ PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
out:
return err;
}
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 9924776f4265..170bd926a69c 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
#define stub_fork sys_fork
#define stub_vfork sys_vfork
#define stub_execve sys_execve
-#define stub_rt_sigsuspend sys_rt_sigsuspend
#define stub_sigaltstack sys_sigaltstack
#define stub_rt_sigreturn sys_rt_sigreturn
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index 70ca357393b8..b853e8600b9d 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -44,10 +44,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
old_sigset_t mask;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+ __get_user(mask, &act->sa_mask))
return -EFAULT;
- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
- __get_user(mask, &act->sa_mask);
siginitset(&new_ka.sa.sa_mask, mask);
}
@@ -56,10 +56,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
return -EFAULT;
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
}
return ret;
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 171b3e9dc867..2d5cc51e9bef 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -23,12 +23,10 @@ void show_regs(struct pt_regs *regs)
printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs),
print_tainted());
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
- PT_REGS_EAX(regs), PT_REGS_EBX(regs),
- PT_REGS_ECX(regs),
- PT_REGS_EDX(regs));
+ PT_REGS_AX(regs), PT_REGS_BX(regs),
+ PT_REGS_CX(regs), PT_REGS_DX(regs));
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
- PT_REGS_ESI(regs), PT_REGS_EDI(regs),
- PT_REGS_EBP(regs));
+ PT_REGS_SI(regs), PT_REGS_DI(regs), PT_REGS_BP(regs));
printk(" DS: %04lx ES: %04lx\n",
0xffff & PT_REGS_DS(regs),
0xffff & PT_REGS_ES(regs));
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index e8913436d7dc..08258f179969 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -19,15 +19,15 @@ void __show_regs(struct pt_regs *regs)
printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
current->comm, print_tainted(), init_utsname()->release);
printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
- PT_REGS_RIP(regs));
+ PT_REGS_IP(regs));
printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs),
PT_REGS_EFLAGS(regs));
printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
- PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs));
+ PT_REGS_AX(regs), PT_REGS_BX(regs), PT_REGS_CX(regs));
printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
- PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs));
+ PT_REGS_DX(regs), PT_REGS_SI(regs), PT_REGS_DI(regs));
printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
- PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
+ PT_REGS_BP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index c6c7131e563b..baba84f8ecb8 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -219,7 +219,7 @@ int arch_copy_tls(struct task_struct *new)
int idx, ret = -EFAULT;
if (copy_from_user(&info,
- (void __user *) UPT_ESI(&new->thread.regs.regs),
+ (void __user *) UPT_SI(&new->thread.regs.regs),
sizeof(info)))
goto out;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index add2c2d729ce..96ab2c09cb68 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -20,5 +20,5 @@ obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
-obj-$(CONFIG_XEN_DOM0) += vga.o
+obj-$(CONFIG_XEN_DOM0) += apic.o vga.o
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
new file mode 100644
index 000000000000..ec57bd3818a4
--- /dev/null
+++ b/arch/x86/xen/apic.c
@@ -0,0 +1,33 @@
+#include <linux/init.h>
+
+#include <asm/x86_init.h>
+#include <asm/apic.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/interface/physdev.h>
+
+unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
+{
+ struct physdev_apic apic_op;
+ int ret;
+
+ apic_op.apic_physbase = mpc_ioapic_addr(apic);
+ apic_op.reg = reg;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+ if (!ret)
+ return apic_op.value;
+
+ /* fallback to return an emulated IO_APIC values */
+ if (reg == 0x1)
+ return 0x00170020;
+ else if (reg == 0x0)
+ return apic << 24;
+
+ return 0xfd;
+}
+
+void __init xen_init_apic(void)
+{
+ x86_io_apic_ops.read = xen_io_apic_read;
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4f51bebac02c..c0f5facdb10c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -63,6 +63,7 @@
#include <asm/stackprotector.h>
#include <asm/hypervisor.h>
#include <asm/mwait.h>
+#include <asm/pci_x86.h>
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -261,7 +262,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
static bool __init xen_check_mwait(void)
{
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \
+ !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.u.set_pminfo.id = -1,
@@ -349,7 +351,6 @@ static void __init xen_init_cpuid_mask(void)
/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
if ((cx & xsave_mask) != xsave_mask)
cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
-
if (xen_check_mwait())
cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
}
@@ -809,9 +810,40 @@ static void xen_io_delay(void)
}
#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned long xen_set_apic_id(unsigned int x)
+{
+ WARN_ON(1);
+ return x;
+}
+static unsigned int xen_get_apic_id(unsigned long x)
+{
+ return ((x)>>24) & 0xFFu;
+}
static u32 xen_apic_read(u32 reg)
{
- return 0;
+ struct xen_platform_op op = {
+ .cmd = XENPF_get_cpuinfo,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u.pcpu_info.xen_cpuid = 0,
+ };
+ int ret = 0;
+
+ /* Shouldn't need this as APIC is turned off for PV, and we only
+ * get called on the bootup processor. But just in case. */
+ if (!xen_initial_domain() || smp_processor_id())
+ return 0;
+
+ if (reg == APIC_LVR)
+ return 0x10;
+
+ if (reg != APIC_ID)
+ return 0;
+
+ ret = HYPERVISOR_dom0_op(&op);
+ if (ret)
+ return 0;
+
+ return op.u.pcpu_info.apic_id << 24;
}
static void xen_apic_write(u32 reg, u32 val)
@@ -849,6 +881,8 @@ static void set_xen_basic_apic_ops(void)
apic->icr_write = xen_apic_icr_write;
apic->wait_icr_idle = xen_apic_wait_icr_idle;
apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
+ apic->set_apic_id = xen_set_apic_id;
+ apic->get_apic_id = xen_get_apic_id;
}
#endif
@@ -1362,11 +1396,15 @@ asmlinkage void __init xen_start_kernel(void)
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
+ xen_init_apic();
+
/* Make sure ACS will be enabled */
pci_request_acs();
}
-
-
+#ifdef CONFIG_PCI
+ /* PCI BIOS service won't work from a PV guest. */
+ pci_probe &= ~PCI_PROBE_BIOS;
+#endif
xen_raw_console_write("about to get started...\n");
xen_setup_runstate_info(0);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b8e279479a6b..3506cd4f9a43 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -353,8 +353,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val)
{
if (val & _PAGE_PRESENT) {
unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ unsigned long pfn = mfn_to_pfn(mfn);
+
pteval_t flags = val & PTE_FLAGS_MASK;
- val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ if (unlikely(pfn == ~0))
+ val = flags & ~_PAGE_PRESENT;
+ else
+ val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
}
return val;
@@ -1859,7 +1864,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
#endif /* CONFIG_X86_64 */
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
-static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
@@ -1900,7 +1904,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
* We just don't map the IO APIC - all access is via
* hypercalls. Keep the address in the pte for reference.
*/
- pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
+ pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
break;
#endif
@@ -2065,7 +2069,6 @@ void __init xen_init_mmu_ops(void)
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
- memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 5fac6919b957..3700945ed0d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void)
static void __init xen_filter_cpu_maps(void)
{
int i, rc;
+ unsigned int subtract = 0;
if (!xen_initial_domain())
return;
@@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void)
} else {
set_cpu_possible(i, false);
set_cpu_present(i, false);
+ subtract++;
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+ /* This is akin to using 'nr_cpus' on the Linux command line.
+ * Which is OK as when we use 'dom0_max_vcpus=X' we can only
+ * have up to X, while nr_cpu_ids is greater than X. This
+ * normally is not a problem, except when CPU hotplugging
+ * is involved and then there might be more than X CPUs
+ * in the guest - which will not work as there is no
+ * hypercall to expand the max number of VCPUs an already
+ * running guest has. So cap it up to X. */
+ if (subtract)
+ nr_cpu_ids = nr_cpu_ids - subtract;
+#endif
+
}
static void __init xen_smp_prepare_boot_cpu(void)
@@ -250,18 +265,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
set_cpu_possible(cpu, false);
}
- for_each_possible_cpu (cpu) {
- struct task_struct *idle;
-
- if (cpu == 0)
- continue;
-
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
-
+ for_each_possible_cpu(cpu)
set_cpu_present(cpu, true);
- }
}
static int __cpuinit
@@ -331,9 +336,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
}
-static int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
{
- struct task_struct *idle = idle_task(cpu);
int rc;
per_cpu(current_task, cpu) = idle;
@@ -547,10 +551,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
xen_init_lock_cpu(0);
}
-static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc;
- rc = native_cpu_up(cpu);
+ rc = native_cpu_up(cpu, tidle);
WARN_ON (xen_smp_intr_init(cpu));
return rc;
}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 79d7362ad6d1..3e45aa000718 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct)
/* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
- jz 1f
+ jnz 1f
2: call check_events
1:
ENDPATCH(xen_restore_fl_direct)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b095739ccd4c..45c0c0667bd9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -92,11 +92,15 @@ struct dom0_vga_console_info;
#ifdef CONFIG_XEN_DOM0
void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+void __init xen_init_apic(void);
#else
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
size_t size)
{
}
+static inline void __init xen_init_apic(void)
+{
+}
#endif
/* Declare an asm function, along with symbols needed to make it