aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-05 14:55:20 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-05 14:55:20 -0700
commitea62ccd00fd0b6720b033adfc9984f31130ce195 (patch)
tree9837b797b2466fffcb0af96c388b06eae9c3df18
parentFix compile of tmscsim SCSI driver (diff)
parent[PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall (diff)
downloadlinux-dev-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.xz
linux-dev-ea62ccd00fd0b6720b033adfc9984f31130ce195.zip
Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (231 commits) [PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall [PATCH] i386: type may be unused [PATCH] i386: Some additional chipset register values validation. [PATCH] i386: Add missing !X86_PAE dependincy to the 2G/2G split. [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu [PATCH] i386: white space fixes in i387.h [PATCH] i386: Drop noisy e820 debugging printks [PATCH] x86-64: Fix allnoconfig error in genapic_flat.c [PATCH] x86-64: Shut up warnings for vfat compat ioctls on other file systems [PATCH] x86-64: Share identical video.S between i386 and x86-64 [PATCH] x86-64: Remove CONFIG_REORDER [PATCH] x86-64: Print type and size correctly for unknown compat ioctls [PATCH] i386: Remove copy_*_user BUG_ONs for (size < 0) [PATCH] i386: Little cleanups in smpboot.c [PATCH] x86-64: Don't enable NUMA for a single node in K8 NUMA scanning [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible [PATCH] i386: Add X86_FEATURE_RDTSCP [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 [PATCH] i386: Implement alternative_io for i386 ... Fix up trivial conflict in include/linux/highmem.h manually. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--CREDITS5
-rw-r--r--Documentation/dontdiff4
-rw-r--r--Documentation/i386/boot.txt23
-rw-r--r--Documentation/kernel-parameters.txt21
-rw-r--r--Documentation/x86_64/boot-options.txt14
-rw-r--r--Documentation/x86_64/fake-numa-for-cpusets66
-rw-r--r--Documentation/x86_64/machinecheck7
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/boot/misc.c2
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/boot/compressed/misc.c2
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm26/boot/compressed/misc.c2
-rw-r--r--arch/cris/arch-v32/vmlinux.lds.S1
-rw-r--r--arch/frv/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/Kconfig38
-rw-r--r--arch/i386/Kconfig.cpu35
-rw-r--r--arch/i386/Kconfig.debug10
-rw-r--r--arch/i386/Makefile2
-rw-r--r--arch/i386/Makefile.cpu9
-rw-r--r--arch/i386/boot/Makefile4
-rw-r--r--arch/i386/boot/compressed/misc.c2
-rw-r--r--arch/i386/boot/setup.S24
-rw-r--r--arch/i386/defconfig73
-rw-r--r--arch/i386/kernel/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c2
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c21
-rw-r--r--arch/i386/kernel/alternative.c102
-rw-r--r--arch/i386/kernel/apic.c22
-rw-r--r--arch/i386/kernel/apm.c10
-rw-r--r--arch/i386/kernel/asm-offsets.c18
-rw-r--r--arch/i386/kernel/cpu/Makefile4
-rw-r--r--arch/i386/kernel/cpu/amd.c15
-rw-r--r--arch/i386/kernel/cpu/bugs.c191
-rw-r--r--arch/i386/kernel/cpu/centaur.c10
-rw-r--r--arch/i386/kernel/cpu/common.c217
-rw-r--r--arch/i386/kernel/cpu/cyrix.c21
-rw-r--r--arch/i386/kernel/cpu/intel.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c13
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c3
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c16
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c101
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c11
-rw-r--r--arch/i386/kernel/cpu/nexgen.c10
-rw-r--r--arch/i386/kernel/cpu/perfctr-watchdog.c658
-rw-r--r--arch/i386/kernel/cpu/proc.c3
-rw-r--r--arch/i386/kernel/cpu/rise.c9
-rw-r--r--arch/i386/kernel/cpu/transmeta.c10
-rw-r--r--arch/i386/kernel/cpu/umc.c10
-rw-r--r--arch/i386/kernel/doublefault.c29
-rw-r--r--arch/i386/kernel/e820.c64
-rw-r--r--arch/i386/kernel/efi.c16
-rw-r--r--arch/i386/kernel/entry.S23
-rw-r--r--arch/i386/kernel/head.S118
-rw-r--r--arch/i386/kernel/i386_ksyms.c2
-rw-r--r--arch/i386/kernel/io_apic.c38
-rw-r--r--arch/i386/kernel/ioport.c3
-rw-r--r--arch/i386/kernel/irq.c3
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/nmi.c832
-rw-r--r--arch/i386/kernel/paravirt.c522
-rw-r--r--arch/i386/kernel/process.c37
-rw-r--r--arch/i386/kernel/quirks.c69
-rw-r--r--arch/i386/kernel/reboot.c55
-rw-r--r--arch/i386/kernel/reboot_fixups.c2
-rw-r--r--arch/i386/kernel/smp.c304
-rw-r--r--arch/i386/kernel/smpboot.c146
-rw-r--r--arch/i386/kernel/sysenter.c269
-rw-r--r--arch/i386/kernel/time.c2
-rw-r--r--arch/i386/kernel/trampoline.S12
-rw-r--r--arch/i386/kernel/traps.c32
-rw-r--r--arch/i386/kernel/tsc.c13
-rw-r--r--arch/i386/kernel/verify_cpu.S65
-rw-r--r--arch/i386/kernel/vmi.c131
-rw-r--r--arch/i386/kernel/vmiclock.c318
-rw-r--r--arch/i386/kernel/vmitime.c482
-rw-r--r--arch/i386/kernel/vmlinux.lds.S24
-rw-r--r--arch/i386/kernel/vsyscall.lds.S4
-rw-r--r--arch/i386/lib/bitops.c4
-rw-r--r--arch/i386/lib/checksum.S69
-rw-r--r--arch/i386/lib/getuser.S26
-rw-r--r--arch/i386/lib/putuser.S39
-rw-r--r--arch/i386/lib/usercopy.c7
-rw-r--r--arch/i386/mach-generic/bigsmp.c2
-rw-r--r--arch/i386/mach-generic/es7000.c41
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c16
-rw-r--r--arch/i386/mm/fault.c60
-rw-r--r--arch/i386/mm/highmem.c10
-rw-r--r--arch/i386/mm/init.c196
-rw-r--r--arch/i386/mm/pageattr.c6
-rw-r--r--arch/i386/mm/pgtable.c94
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/pci/init.c2
-rw-r--r--arch/i386/pci/mmconfig-shared.c25
-rw-r--r--arch/i386/power/cpu.c1
-rw-r--r--arch/i386/power/suspend.c14
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S2
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/suspend.c24
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S6
-rw-r--r--arch/ppc/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh64/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc64/kernel/smp.c6
-rw-r--r--arch/um/defconfig1
-rw-r--r--arch/x86_64/Kconfig61
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile2
-rw-r--r--arch/x86_64/boot/compressed/Makefile12
-rw-r--r--arch/x86_64/boot/compressed/head.S339
-rw-r--r--arch/x86_64/boot/compressed/misc.c247
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.lds44
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.scr9
-rw-r--r--arch/x86_64/boot/setup.S85
-rw-r--r--arch/x86_64/boot/video.S2043
-rw-r--r--arch/x86_64/defconfig183
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c10
-rw-r--r--arch/x86_64/ia32/ia32entry.S4
-rw-r--r--arch/x86_64/ia32/syscall32.c1
-rw-r--r--arch/x86_64/kernel/Makefile7
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c24
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S286
-rw-r--r--arch/x86_64/kernel/aperture.c5
-rw-r--r--arch/x86_64/kernel/apic.c35
-rw-r--r--arch/x86_64/kernel/asm-offsets.c10
-rw-r--r--arch/x86_64/kernel/bugs.c21
-rw-r--r--arch/x86_64/kernel/e820.c5
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/early_printk.c5
-rw-r--r--arch/x86_64/kernel/entry.S5
-rw-r--r--arch/x86_64/kernel/functionlist1284
-rw-r--r--arch/x86_64/kernel/genapic.c104
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c137
-rw-r--r--arch/x86_64/kernel/genapic_flat.c25
-rw-r--r--arch/x86_64/kernel/head.S340
-rw-r--r--arch/x86_64/kernel/head64.c41
-rw-r--r--arch/x86_64/kernel/io_apic.c31
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/machine_kexec.c14
-rw-r--r--arch/x86_64/kernel/mce.c32
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c678
-rw-r--r--arch/x86_64/kernel/pci-calgary.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c2
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c12
-rw-r--r--arch/x86_64/kernel/setup.c35
-rw-r--r--arch/x86_64/kernel/setup64.c5
-rw-r--r--arch/x86_64/kernel/signal.c6
-rw-r--r--arch/x86_64/kernel/smp.c30
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c19
-rw-r--r--arch/x86_64/kernel/suspend_asm.S7
-rw-r--r--arch/x86_64/kernel/syscall.c1
-rw-r--r--arch/x86_64/kernel/time.c71
-rw-r--r--arch/x86_64/kernel/trampoline.S123
-rw-r--r--arch/x86_64/kernel/traps.c34
-rw-r--r--arch/x86_64/kernel/tsc.c17
-rw-r--r--arch/x86_64/kernel/tsc_sync.c4
-rw-r--r--arch/x86_64/kernel/verify_cpu.S119
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S20
-rw-r--r--arch/x86_64/kernel/vsyscall.c68
-rw-r--r--arch/x86_64/mm/fault.c5
-rw-r--r--arch/x86_64/mm/init.c172
-rw-r--r--arch/x86_64/mm/k8topology.c9
-rw-r--r--arch/x86_64/mm/numa.c306
-rw-r--r--arch/x86_64/mm/pageattr.c32
-rw-r--r--arch/x86_64/mm/srat.c8
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--drivers/acpi/processor_idle.c4
-rw-r--r--drivers/char/agp/amd64-agp.c13
-rw-r--r--drivers/video/console/vgacon.c3
-rw-r--r--fs/compat.c5
-rw-r--r--fs/compat_ioctl.c19
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--include/asm-alpha/mmu_context.h1
-rw-r--r--include/asm-alpha/percpu.h14
-rw-r--r--include/asm-arm/mmu_context.h1
-rw-r--r--include/asm-arm26/mmu_context.h2
-rw-r--r--include/asm-avr32/mmu_context.h1
-rw-r--r--include/asm-cris/mmu_context.h2
-rw-r--r--include/asm-frv/mmu_context.h1
-rw-r--r--include/asm-generic/mm_hooks.h18
-rw-r--r--include/asm-generic/percpu.h1
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/asm-h8300/mmu_context.h1
-rw-r--r--include/asm-i386/Kbuild2
-rw-r--r--include/asm-i386/alternative.h34
-rw-r--r--include/asm-i386/apic.h9
-rw-r--r--include/asm-i386/bugs.h194
-rw-r--r--include/asm-i386/cpufeature.h13
-rw-r--r--include/asm-i386/current.h5
-rw-r--r--include/asm-i386/desc.h95
-rw-r--r--include/asm-i386/e820.h1
-rw-r--r--include/asm-i386/elf.h28
-rw-r--r--include/asm-i386/fixmap.h11
-rw-r--r--include/asm-i386/genapic.h6
-rw-r--r--include/asm-i386/highmem.h6
-rw-r--r--include/asm-i386/hpet.h2
-rw-r--r--include/asm-i386/i387.h17
-rw-r--r--include/asm-i386/io.h15
-rw-r--r--include/asm-i386/irq.h2
-rw-r--r--include/asm-i386/irq_regs.h12
-rw-r--r--include/asm-i386/irqflags.h64
-rw-r--r--include/asm-i386/kexec.h8
-rw-r--r--include/asm-i386/mach-bigsmp/mach_apic.h2
-rw-r--r--include/asm-i386/mach-default/mach_apic.h2
-rw-r--r--include/asm-i386/mach-es7000/mach_apic.h9
-rw-r--r--include/asm-i386/mach-es7000/mach_mpparse.h32
-rw-r--r--include/asm-i386/mach-generic/mach_apic.h2
-rw-r--r--include/asm-i386/mach-numaq/mach_apic.h2
-rw-r--r--include/asm-i386/mach-summit/mach_apic.h2
-rw-r--r--include/asm-i386/mach-summit/mach_mpparse.h4
-rw-r--r--include/asm-i386/mach-visws/mach_apic.h2
-rw-r--r--include/asm-i386/mmu_context.h17
-rw-r--r--include/asm-i386/module.h2
-rw-r--r--include/asm-i386/msr-index.h278
-rw-r--r--include/asm-i386/msr.h400
-rw-r--r--include/asm-i386/mtrr.h4
-rw-r--r--include/asm-i386/nmi.h8
-rw-r--r--include/asm-i386/page.h81
-rw-r--r--include/asm-i386/paravirt.h957
-rw-r--r--include/asm-i386/pda.h100
-rw-r--r--include/asm-i386/percpu.h136
-rw-r--r--include/asm-i386/pgalloc.h1
-rw-r--r--include/asm-i386/pgtable-2level-defs.h2
-rw-r--r--include/asm-i386/pgtable-2level.h37
-rw-r--r--include/asm-i386/pgtable-3level-defs.h6
-rw-r--r--include/asm-i386/pgtable-3level.h69
-rw-r--r--include/asm-i386/pgtable.h62
-rw-r--r--include/asm-i386/processor-flags.h91
-rw-r--r--include/asm-i386/processor.h187
-rw-r--r--include/asm-i386/reboot.h20
-rw-r--r--include/asm-i386/reboot_fixups.h (renamed from include/linux/reboot_fixups.h)4
-rw-r--r--include/asm-i386/required-features.h34
-rw-r--r--include/asm-i386/segment.h10
-rw-r--r--include/asm-i386/smp.h64
-rw-r--r--include/asm-i386/system.h139
-rw-r--r--include/asm-i386/timer.h2
-rw-r--r--include/asm-i386/tlbflush.h19
-rw-r--r--include/asm-i386/tsc.h15
-rw-r--r--include/asm-i386/uaccess.h14
-rw-r--r--include/asm-i386/vmi_time.h18
-rw-r--r--include/asm-ia64/mmu_context.h1
-rw-r--r--include/asm-m32r/mmu_context.h1
-rw-r--r--include/asm-m68k/mmu_context.h1
-rw-r--r--include/asm-m68knommu/mmu_context.h1
-rw-r--r--include/asm-mips/mmu_context.h1
-rw-r--r--include/asm-parisc/mmu_context.h1
-rw-r--r--include/asm-powerpc/mmu_context.h1
-rw-r--r--include/asm-ppc/mmu_context.h1
-rw-r--r--include/asm-s390/mmu_context.h2
-rw-r--r--include/asm-sh/mmu_context.h1
-rw-r--r--include/asm-sh64/mmu_context.h2
-rw-r--r--include/asm-sparc/mmu_context.h2
-rw-r--r--include/asm-sparc64/mmu_context.h1
-rw-r--r--include/asm-sparc64/percpu.h10
-rw-r--r--include/asm-um/mmu_context.h2
-rw-r--r--include/asm-v850/mmu_context.h2
-rw-r--r--include/asm-x86_64/Kbuild4
-rw-r--r--include/asm-x86_64/alternative.h5
-rw-r--r--include/asm-x86_64/apic.h10
-rw-r--r--include/asm-x86_64/bugs.h30
-rw-r--r--include/asm-x86_64/const.h20
-rw-r--r--include/asm-x86_64/desc.h21
-rw-r--r--include/asm-x86_64/dma-mapping.h2
-rw-r--r--include/asm-x86_64/fixmap.h1
-rw-r--r--include/asm-x86_64/genapic.h4
-rw-r--r--include/asm-x86_64/ipi.h61
-rw-r--r--include/asm-x86_64/irqflags.h9
-rw-r--r--include/asm-x86_64/mmu_context.h1
-rw-r--r--include/asm-x86_64/mmzone.h2
-rw-r--r--include/asm-x86_64/msr-index.h1
-rw-r--r--include/asm-x86_64/msr.h274
-rw-r--r--include/asm-x86_64/mtrr.h12
-rw-r--r--include/asm-x86_64/nmi.h9
-rw-r--r--include/asm-x86_64/page.h39
-rw-r--r--include/asm-x86_64/percpu.h10
-rw-r--r--include/asm-x86_64/pgalloc.h15
-rw-r--r--include/asm-x86_64/pgtable.h42
-rw-r--r--include/asm-x86_64/processor-flags.h1
-rw-r--r--include/asm-x86_64/processor.h55
-rw-r--r--include/asm-x86_64/proto.h15
-rw-r--r--include/asm-x86_64/segment.h2
-rw-r--r--include/asm-x86_64/smp.h4
-rw-r--r--include/asm-x86_64/suspend.h13
-rw-r--r--include/asm-x86_64/system.h7
-rw-r--r--include/asm-x86_64/timex.h2
-rw-r--r--include/asm-x86_64/tlbflush.h33
-rw-r--r--include/asm-x86_64/unistd.h3
-rw-r--r--include/asm-xtensa/mmu_context.h1
-rw-r--r--include/linux/bootmem.h4
-rw-r--r--include/linux/cpufreq.h19
-rw-r--r--include/linux/crash_dump.h8
-rw-r--r--include/linux/elf.h17
-rw-r--r--include/linux/elfnote.h4
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/init.h7
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/poison.h3
-rw-r--r--init/main.c8
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/power/power.h5
-rw-r--r--kernel/power/snapshot.c11
-rw-r--r--kernel/power/swap.c42
-rw-r--r--lib/inflate.c129
-rw-r--r--mm/highmem.c9
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/slab.c7
-rw-r--r--mm/vmalloc.c14
-rw-r--r--scripts/mod/modpost.c1
319 files changed, 7564 insertions, 10400 deletions
diff --git a/CREDITS b/CREDITS
index d7140309e06d..c5f819bacda3 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1745,8 +1745,9 @@ S: D-64295
S: Germany
N: Andi Kleen
-E: ak@muc.de
-D: network hacker, syncookies
+E: andi@firstfloor.org
+U: http://www.halobates.de
+D: network, x86, NUMA, various hacks
S: Schwalbenstr. 96
S: 85551 Ottobrunn
S: Germany
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 63c2d0c55aa2..64e9f6c4826b 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -55,8 +55,8 @@ aic7*seq.h*
aicasm
aicdb.h*
asm
-asm-offsets.*
-asm_offsets.*
+asm-offsets.h
+asm_offsets.h
autoconf.h*
bbootsect
bin2c
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 38fe1f03fb14..6498666ea330 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -2,7 +2,7 @@
----------------------------
H. Peter Anvin <hpa@zytor.com>
- Last update 2007-01-26
+ Last update 2007-03-06
On the i386 platform, the Linux kernel uses a rather complicated boot
convention. This has evolved partially due to historical aspects, as
@@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
initrd address available to the bootloader.
Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
+
Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
Introduce relocatable_kernel and kernel_alignment fields.
+Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
+ the boot command line
+
**** MEMORY LAYOUT
@@ -133,6 +137,8 @@ Offset Proto Name Meaning
022C/4 2.03+ initrd_addr_max Highest legal initrd address
0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
+0235/3 N/A pad2 Unused
+0238/4 2.06+ cmdline_size Maximum size of the kernel command line
(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
@@ -233,6 +239,12 @@ filled out, however:
if your ramdisk is exactly 131072 bytes long and this field is
0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
+ cmdline_size:
+ The maximum size of the command line without the terminating
+ zero. This means that the command line can contain at most
+ cmdline_size characters. With protocol version 2.05 and
+ earlier, the maximum size was 255.
+
**** THE KERNEL COMMAND LINE
@@ -241,11 +253,10 @@ loader to communicate with the kernel. Some of its options are also
relevant to the boot loader itself, see "special command line options"
below.
-The kernel command line is a null-terminated string currently up to
-255 characters long, plus the final null. A string that is too long
-will be automatically truncated by the kernel, a boot loader may allow
-a longer command line to be passed to permit future kernels to extend
-this limit.
+The kernel command line is a null-terminated string. The maximum
+length can be retrieved from the field cmdline_size. Before protocol
+version 2.06, the maximum was 255 characters. A string that is too
+long will be automatically truncated by the kernel.
If the boot protocol version is 2.02 or later, the address of the
kernel command line is given by the header field cmd_line_ptr (see
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 84c3bd05c639..38d7db3262c7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -64,6 +64,7 @@ parameter is applicable:
GENERIC_TIME The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
+ PV_OPS A paravirtualized kernel
PARIDE The ParIDE subsystem is enabled.
PARISC The PA-RISC architecture is enabled.
PCI PCI bus support is enabled.
@@ -695,8 +696,15 @@ and is between 256 and 4096 characters. It is defined in the file
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
See Documentation/ide.txt.
- idle= [HW]
- Format: idle=poll or idle=halt
+ idle= [X86]
+ Format: idle=poll or idle=mwait
+ Poll forces a polling idle loop that can slightly improves the performance
+ of waking up a idle CPU, but will use a lot of power and make the system
+ run hot. Not recommended.
+ idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
+ to not use it because it doesn't save as much power as a normal idle
+ loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
+ as idle=poll.
ignore_loglevel [KNL]
Ignore loglevel setting - this will print /all/
@@ -1157,6 +1165,11 @@ and is between 256 and 4096 characters. It is defined in the file
nomce [IA-32] Machine Check Exception
+ noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
+
+ noreplace-smp [IA-32,SMP] Don't replace SMP instructions
+ with UP alternatives
+
noresidual [PPC] Don't use residual data on PReP machines.
noresume [SWSUSP] Disables resume and restores original swap
@@ -1562,6 +1575,9 @@ and is between 256 and 4096 characters. It is defined in the file
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
+ smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
+ attempt to substitute SMP alternatives once at boot.
+
snd-ad1816a= [HW,ALSA]
snd-ad1848= [HW,ALSA]
@@ -1820,6 +1836,7 @@ and is between 256 and 4096 characters. It is defined in the file
[USBHID] The interval which mice are to be polled at.
vdso= [IA-32,SH]
+ vdso=2: enable compat VDSO (default with COMPAT_VDSO)
vdso=1: enable VDSO (default)
vdso=0: disable VDSO mapping
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 85f51e5a749f..6177d881983f 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -149,7 +149,19 @@ NUMA
numa=noacpi Don't parse the SRAT table for NUMA setup
- numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine.
+ numa=fake=CMDLINE
+ If a number, fakes CMDLINE nodes and ignores NUMA setup of the
+ actual machine. Otherwise, system memory is configured
+ depending on the sizes and coefficients listed. For example:
+ numa=fake=2*512,1024,4*256,*128
+ gives two 512M nodes, a 1024M node, four 256M nodes, and the
+ rest split into 128M chunks. If the last character of CMDLINE
+ is a *, the remaining memory is divided up equally among its
+ coefficient:
+ numa=fake=2*512,2*
+ gives two 512M nodes and the rest split into two nodes.
+ Otherwise, the remaining system RAM is allocated to an
+ additional node.
numa=hotadd=percent
Only allow hotadd memory to preallocate page structures upto
diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets
new file mode 100644
index 000000000000..d1a985c5b00a
--- /dev/null
+++ b/Documentation/x86_64/fake-numa-for-cpusets
@@ -0,0 +1,66 @@
+Using numa=fake and CPUSets for Resource Management
+Written by David Rientjes <rientjes@cs.washington.edu>
+
+This document describes how the numa=fake x86_64 command-line option can be used
+in conjunction with cpusets for coarse memory management. Using this feature,
+you can create fake NUMA nodes that represent contiguous chunks of memory and
+assign them to cpusets and their attached tasks. This is a way of limiting the
+amount of system memory that are available to a certain class of tasks.
+
+For more information on the features of cpusets, see Documentation/cpusets.txt.
+There are a number of different configurations you can use for your needs. For
+more information on the numa=fake command line option and its various ways of
+configuring fake nodes, see Documentation/x86_64/boot-options.txt.
+
+For the purposes of this introduction, we'll assume a very primitive NUMA
+emulation setup of "numa=fake=4*512,". This will split our system memory into
+four equal chunks of 512M each that we can now use to assign to cpusets. As
+you become more familiar with using this combination for resource control,
+you'll determine a better setup to minimize the number of nodes you have to deal
+with.
+
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
+
+ Faking node 0 at 0000000000000000-0000000020000000 (512MB)
+ Faking node 1 at 0000000020000000-0000000040000000 (512MB)
+ Faking node 2 at 0000000040000000-0000000060000000 (512MB)
+ Faking node 3 at 0000000060000000-0000000080000000 (512MB)
+ ...
+ On node 0 totalpages: 130975
+ On node 1 totalpages: 131072
+ On node 2 totalpages: 131072
+ On node 3 totalpages: 131072
+
+Now following the instructions for mounting the cpusets filesystem from
+Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
+address spaces) to individual cpusets:
+
+ [root@xroads /]# mkdir exampleset
+ [root@xroads /]# mount -t cpuset none exampleset
+ [root@xroads /]# mkdir exampleset/ddset
+ [root@xroads /]# cd exampleset/ddset
+ [root@xroads /exampleset/ddset]# echo 0-1 > cpus
+ [root@xroads /exampleset/ddset]# echo 0-1 > mems
+
+Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
+memory allocations (1G).
+
+You can now assign tasks to these cpusets to limit the memory resources
+available to them according to the fake nodes assigned as mems:
+
+ [root@xroads /exampleset/ddset]# echo $$ > tasks
+ [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
+ [1] 13425
+
+Notice the difference between the system memory usage as reported by
+/proc/meminfo between the restricted cpuset case above and the unrestricted
+case (i.e. running the same 'dd' command without assigning it to a fake NUMA
+cpuset):
+ Unrestricted Restricted
+ MemTotal: 3091900 kB 3091900 kB
+ MemFree: 42113 kB 1513236 kB
+
+This allows for coarse memory management for the tasks you assign to particular
+cpusets. Since cpusets can form a hierarchy, you can create some pretty
+interesting combinations of use-cases for various classes of tasks for your
+memory management needs.
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck
index 068a6d9904b9..feaeaf6f6e4d 100644
--- a/Documentation/x86_64/machinecheck
+++ b/Documentation/x86_64/machinecheck
@@ -36,7 +36,12 @@ between all CPUs.
check_interval
How often to poll for corrected machine check errors, in seconds
- (Note output is hexademical). Default 5 minutes.
+ (Note output is hexademical). Default 5 minutes. When the poller
+ finds MCEs it triggers an exponential speedup (poll more often) on
+ the polling interval. When the poller stops finding MCEs, it
+ triggers an exponential backoff (poll less often) on the polling
+ interval. The check_interval variable is both the initial and
+ maximum polling interval.
tolerant
Tolerance level. When a machine check exception occurs for a non
diff --git a/MAINTAINERS b/MAINTAINERS
index b36923e72ce8..0492dd88e12a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1617,7 +1617,7 @@ S: Maintained
HPET: x86_64
P: Andi Kleen and Vojtech Pavlik
-M: ak@muc.de and vojtech@suse.cz
+M: andi@firstfloor.org and vojtech@suse.cz
S: Maintained
HPET: ACPI hpet.c
@@ -2652,6 +2652,19 @@ T: git kernel.org:/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
T: cvs cvs.parisc-linux.org:/var/cvs/linux-2.6
S: Maintained
+PARAVIRT_OPS INTERFACE
+P: Jeremy Fitzhardinge
+M: jeremy@xensource.com
+P: Chris Wright
+M: chrisw@sous-sol.org
+P: Zachary Amsden
+M: zach@vmware.com
+P: Rusty Russell
+M: rusty@rustcorp.com.au
+L: virtualization@lists.osdl.org
+L: linux-kernel@vger.kernel.org
+S: Supported
+
PC87360 HARDWARE MONITORING DRIVER
P: Jim Cromie
M: jim.cromie@gmail.com
@@ -3876,6 +3889,15 @@ M: eis@baty.hanse.de
L: linux-x25@vger.kernel.org
S: Maintained
+XEN HYPERVISOR INTERFACE
+P: Jeremy Fitzhardinge
+M: jeremy@xensource.com
+P: Chris Wright
+M: chrisw@sous-sol.org
+L: virtualization@lists.osdl.org
+L: xen-devel@lists.xensource.com
+S: Supported
+
XFS FILESYSTEM
P: Silicon Graphics Inc
P: Tim Shimmin, David Chatterton
diff --git a/Makefile b/Makefile
index d970cb16545a..387526b69d4f 100644
--- a/Makefile
+++ b/Makefile
@@ -491,7 +491,7 @@ endif
include $(srctree)/arch/$(ARCH)/Makefile
ifdef CONFIG_FRAME_POINTER
-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
else
CFLAGS += -fomit-frame-pointer
endif
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index 1d65adf5691e..c00646b25f6e 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -98,7 +98,7 @@ extern int end;
static ulg free_mem_ptr;
static ulg free_mem_ptr_end;
-#define HEAP_SIZE 0x2000
+#define HEAP_SIZE 0x3000
#include "../../../lib/inflate.c"
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 4cc44bd33d33..cf1e6fc6c686 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ SECTIONS
. = ALIGN(8);
SECURITY_INIT
- . = ALIGN(64);
+ . = ALIGN(8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 283891c736c4..9b444022cb9b 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -239,7 +239,7 @@ extern int end;
static ulg free_mem_ptr;
static ulg free_mem_ptr_end;
-#define HEAP_SIZE 0x2000
+#define HEAP_SIZE 0x3000
#include "../../../../lib/inflate.c"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ddbdad48f5b2..d1a6a597ed9a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -59,7 +59,7 @@ SECTIONS
usr/built-in.o(.init.ramfs)
__initramfs_end = .;
#endif
- . = ALIGN(64);
+ . = ALIGN(4096);
__per_cpu_start = .;
*(.data.percpu)
__per_cpu_end = .;
diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c
index f17f50e5516f..0714d19c5776 100644
--- a/arch/arm26/boot/compressed/misc.c
+++ b/arch/arm26/boot/compressed/misc.c
@@ -182,7 +182,7 @@ extern int end;
static ulg free_mem_ptr;
static ulg free_mem_ptr_end;
-#define HEAP_SIZE 0x2000
+#define HEAP_SIZE 0x3000
#include "../../../../lib/inflate.c"
diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S
index e124fcd766d5..dfa25e1542b9 100644
--- a/arch/cris/arch-v32/vmlinux.lds.S
+++ b/arch/cris/arch-v32/vmlinux.lds.S
@@ -91,6 +91,7 @@ SECTIONS
}
SECURITY_INIT
+ . = ALIGN (8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 97910e016825..28eae9735ad6 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -57,6 +57,7 @@ SECTIONS
__alt_instructions_end = .;
.altinstr_replacement : { *(.altinstr_replacement) }
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index bcf2fc408a1a..a9af760c7e5f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -220,7 +220,7 @@ config PARAVIRT
config VMI
bool "VMI Paravirt-ops support"
- depends on PARAVIRT && !COMPAT_VDSO
+ depends on PARAVIRT
help
VMI provides a paravirtualized interface to the VMware ESX server
(it could be used by other hypervisors in theory too, but is not
@@ -571,6 +571,9 @@ choice
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
+ config VMSPLIT_2G_OPT
+ depends on !HIGHMEM
+ bool "2G/2G user/kernel split (for full 2G low memory)"
config VMSPLIT_1G
bool "1G/3G user/kernel split"
endchoice
@@ -578,7 +581,8 @@ endchoice
config PAGE_OFFSET
hex
default 0xB0000000 if VMSPLIT_3G_OPT
- default 0x78000000 if VMSPLIT_2G
+ default 0x80000000 if VMSPLIT_2G
+ default 0x78000000 if VMSPLIT_2G_OPT
default 0x40000000 if VMSPLIT_1G
default 0xC0000000
@@ -915,12 +919,9 @@ source kernel/power/Kconfig
source "drivers/acpi/Kconfig"
-menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
-
-config APM
+menuconfig APM
tristate "APM (Advanced Power Management) BIOS support"
- depends on PM
+ depends on PM && !X86_VISWS
---help---
APM is a BIOS specification for saving power using several different
techniques. This is mostly useful for battery powered laptops with
@@ -977,9 +978,10 @@ config APM
To compile this driver as a module, choose M here: the
module will be called apm.
+if APM
+
config APM_IGNORE_USER_SUSPEND
bool "Ignore USER SUSPEND"
- depends on APM
help
This option will ignore USER SUSPEND requests. On machines with a
compliant APM BIOS, you want to say N. However, on the NEC Versa M
@@ -987,7 +989,6 @@ config APM_IGNORE_USER_SUSPEND
config APM_DO_ENABLE
bool "Enable PM at boot time"
- depends on APM
---help---
Enable APM features at boot time. From page 36 of the APM BIOS
specification: "When disabled, the APM BIOS does not automatically
@@ -1005,7 +1006,6 @@ config APM_DO_ENABLE
config APM_CPU_IDLE
bool "Make CPU Idle calls when idle"
- depends on APM
help
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
On some machines, this can activate improved power savings, such as
@@ -1017,7 +1017,6 @@ config APM_CPU_IDLE
config APM_DISPLAY_BLANK
bool "Enable console blanking using APM"
- depends on APM
help
Enable console blanking using the APM. Some laptops can use this to
turn off the LCD backlight when the screen blanker of the Linux
@@ -1029,22 +1028,8 @@ config APM_DISPLAY_BLANK
backlight at all, or it might print a lot of errors to the console,
especially if you are using gpm.
-config APM_RTC_IS_GMT
- bool "RTC stores time in GMT"
- depends on APM
- help
- Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
- stores the time in GMT (Greenwich Mean Time). Say N if your RTC
- stores localtime.
-
- It is in fact recommended to store GMT in your RTC, because then you
- don't have to worry about daylight savings time changes. The only
- reason not to use GMT in your RTC is if you also run a broken OS
- that doesn't understand GMT.
-
config APM_ALLOW_INTS
bool "Allow interrupts during APM BIOS calls"
- depends on APM
help
Normally we disable external interrupts while we are making calls to
the APM BIOS as a measure to lessen the effects of a badly behaving
@@ -1055,13 +1040,12 @@ config APM_ALLOW_INTS
config APM_REAL_MODE_POWER_OFF
bool "Use real mode APM BIOS call to power off"
- depends on APM
help
Use real mode APM BIOS calls to switch off the computer. This is
a work-around for a number of buggy BIOSes. Switch this option on if
your computer crashes instead of powering off properly.
-endmenu
+endif # APM
source "arch/i386/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index b99c0e2a4e63..dce6124cb842 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -43,6 +43,7 @@ config M386
- "Geode GX/LX" For AMD Geode GX and LX processors.
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
+ - "VIA C7" for VIA C7.
If you don't know what to do, choose "386".
@@ -203,6 +204,12 @@ config MVIAC3_2
of SSE and tells gcc to treat the CPU as a 686.
Note, this kernel will not boot on older (pre model 9) C3s.
+config MVIAC7
+ bool "VIA C7"
+ help
+ Select this for a VIA C7. Selecting this uses the correct cache
+ shift and tells gcc to treat the CPU as a 686.
+
endchoice
config X86_GENERIC
@@ -231,16 +238,21 @@ config X86_L1_CACHE_SHIFT
default "7" if MPENTIUM4 || X86_GENERIC
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
+
+config X86_XADD
+ bool
+ depends on !M386
+ default y
config RWSEM_GENERIC_SPINLOCK
bool
- depends on M386
+ depends on !X86_XADD
default y
config RWSEM_XCHGADD_ALGORITHM
bool
- depends on !M386
+ depends on X86_XADD
default y
config ARCH_HAS_ILOG2_U32
@@ -297,7 +309,7 @@ config X86_ALIGNMENT_16
config X86_GOOD_APIC
bool
- depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2
+ depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7
default y
config X86_INTEL_USERCOPY
@@ -322,5 +334,18 @@ config X86_OOSTORE
config X86_TSC
bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
default y
+
+# this should be set for all -march=.. options where the compiler
+# generates cmov.
+config X86_CMOV
+ bool
+ depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
+ default y
+
+config X86_MINIMUM_CPU_MODEL
+ int
+ default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP
+ default "0"
+
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index 458bc1611933..b31c0802e1cc 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -85,14 +85,4 @@ config DOUBLEFAULT
option saves about 4k and might cause you much additional grey
hair.
-config DEBUG_PARAVIRT
- bool "Enable some paravirtualization debugging"
- default n
- depends on PARAVIRT && DEBUG_KERNEL
- help
- Currently deliberately clobbers regs which are allowed to be
- clobbered in inlined paravirt hooks, even in native mode.
- If turning this off solves a problem, then DISABLE_INTERRUPTS() or
- ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
-
endmenu
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index bd28f9f9b4b7..6dc5e5d90fec 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -34,7 +34,7 @@ CHECKFLAGS += -D__i386__
CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
# prevent gcc from keeping the stack 16 byte aligned
-CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+CFLAGS += -mpreferred-stack-boundary=4
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/i386/Makefile.cpu
diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu
index a32c031c90d7..e372b584e919 100644
--- a/arch/i386/Makefile.cpu
+++ b/arch/i386/Makefile.cpu
@@ -4,9 +4,9 @@
#-mtune exists since gcc 3.4
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
ifeq ($(HAS_MTUNE),y)
-tune = $(call cc-option,-mtune=$(1),)
+tune = $(call cc-option,-mtune=$(1),$(2))
else
-tune = $(call cc-option,-mcpu=$(1),)
+tune = $(call cc-option,-mcpu=$(1),$(2))
endif
align := $(cc-option-align)
@@ -32,7 +32,8 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
-cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686))
+cflags-$(CONFIG_MVIAC7) += -march=i686
+cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
# AMD Elan support
cflags-$(CONFIG_X86_ELAN) += -march=i486
@@ -42,5 +43,5 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
# add at the end to overwrite eventual tuning options from earlier
# cpu entries
-cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic)
+cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index e97946626064..bfbc32098a4a 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -36,9 +36,9 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE)
# ---------------------------------------------------------------------------
$(obj)/zImage: IMAGE_OFFSET := 0x1000
-$(obj)/zImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK)
+$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK)
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
-$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
+$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
quiet_cmd_image = BUILD $@
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index 1ce7017fd627..b28505c544c9 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -189,7 +189,7 @@ static void putstr(const char *);
static unsigned long free_mem_ptr;
static unsigned long free_mem_end_ptr;
-#define HEAP_SIZE 0x3000
+#define HEAP_SIZE 0x4000
static char *vidmem = (char *)0xb8000;
static int vidport;
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index 06edf1c66242..f8b3b9cda2b1 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -52,6 +52,7 @@
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page.h>
+#include <asm/setup.h>
/* Signature words to ensure LILO loaded us right */
#define SIG1 0xAA55
@@ -81,7 +82,7 @@ start:
# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
.ascii "HdrS" # header signature
- .word 0x0205 # header version number (>= 0x0105)
+ .word 0x0206 # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG
@@ -171,6 +172,10 @@ relocatable_kernel: .byte 0
pad2: .byte 0
pad3: .word 0
+cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
+ #added with boot protocol
+ #version 2.06
+
trampoline: call start_of_setup
.align 16
# The offset at this point is 0x240
@@ -297,7 +302,24 @@ good_sig:
loader_panic_mess: .string "Wrong loader, giving up..."
+# check minimum cpuid
+# we do this here because it is the last place we can actually
+# show a user visible error message. Later the video modus
+# might be already messed up.
loader_ok:
+ call verify_cpu
+ testl %eax,%eax
+ jz cpu_ok
+ lea cpu_panic_mess,%si
+ call prtstr
+1: jmp 1b
+
+cpu_panic_mess:
+ .asciz "PANIC: CPU too old for this kernel."
+
+#include "../kernel/verify_cpu.S"
+
+cpu_ok:
# Get memory size (extended mem, kB)
xorl %eax, %eax
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index c96911c37aea..9da84412a831 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.21-rc3
-# Wed Mar 7 15:29:47 2007
+# Linux kernel version: 2.6.21-git3
+# Tue May 1 07:30:51 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
@@ -108,9 +108,9 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
#
# Processor type and features
#
-# CONFIG_TICK_ONESHOT is not set
-# CONFIG_NO_HZ is not set
-# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
# CONFIG_X86_PC is not set
# CONFIG_X86_ELAN is not set
@@ -146,9 +146,11 @@ CONFIG_MPENTIUMIII=y
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
+# CONFIG_MVIAC7 is not set
CONFIG_X86_GENERIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_XADD=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
@@ -162,6 +164,8 @@ CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
CONFIG_X86_TSC=y
+CONFIG_X86_CMOV=y
+CONFIG_X86_MINIMUM_CPU_MODEL=4
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_NR_CPUS=32
@@ -248,7 +252,6 @@ CONFIG_ACPI_FAN=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI_ASUS is not set
-# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=2001
CONFIG_ACPI_DEBUG=y
@@ -257,10 +260,7 @@ CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_X86_PM_TIMER=y
# CONFIG_ACPI_CONTAINER is not set
-
-#
-# APM (Advanced Power Management) BIOS Support
-#
+# CONFIG_ACPI_SBS is not set
# CONFIG_APM is not set
#
@@ -277,7 +277,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
#
# CPUFreq processor drivers
@@ -349,7 +349,6 @@ CONFIG_NET=y
#
# Networking options
#
-# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
@@ -388,6 +387,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
@@ -443,6 +443,13 @@ CONFIG_IPV6_SIT=y
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
# CONFIG_IEEE80211 is not set
#
@@ -463,10 +470,6 @@ CONFIG_FW_LOADER=y
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
# CONFIG_MTD is not set
#
@@ -513,6 +516,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_SONY_LAPTOP is not set
+# CONFIG_THINKPAD_ACPI is not set
#
# ATA/ATAPI/MFM/RLL support
@@ -548,7 +552,6 @@ CONFIG_BLK_DEV_IDEPCI=y
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -580,7 +583,6 @@ CONFIG_BLK_DEV_PIIX=y
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set
#
@@ -669,6 +671,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_ESP_CORE is not set
# CONFIG_SCSI_SRP is not set
#
@@ -697,6 +700,7 @@ CONFIG_SATA_ACPI=y
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
@@ -762,10 +766,9 @@ CONFIG_IEEE1394=y
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
#
-# Device Drivers
+# Controllers
#
#
@@ -774,10 +777,11 @@ CONFIG_IEEE1394=y
CONFIG_IEEE1394_OHCI1394=y
#
-# Protocol Drivers
+# Protocols
#
# CONFIG_IEEE1394_VIDEO1394 is not set
# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
# CONFIG_IEEE1394_ETH1394 is not set
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=y
@@ -820,7 +824,9 @@ CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
#
# Tulip family network device support
@@ -901,9 +907,10 @@ CONFIG_BNX2=y
# CONFIG_TR is not set
#
-# Wireless LAN (non-hamradio)
+# Wireless LAN
#
-# CONFIG_NET_RADIO is not set
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
#
# Wan interfaces
@@ -917,7 +924,6 @@ CONFIG_BNX2=y
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
@@ -1050,7 +1056,7 @@ CONFIG_MAX_RAW_DEVS=256
CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
CONFIG_HPET_MMAP=y
-CONFIG_HANGCHECK_TIMER=y
+# CONFIG_HANGCHECK_TIMER is not set
#
# TPM devices
@@ -1142,6 +1148,14 @@ CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
@@ -1154,6 +1168,7 @@ CONFIG_USB=y
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
# CONFIG_USB_OTG is not set
@@ -1204,10 +1219,6 @@ CONFIG_USB_STORAGE=y
#
# USB Input Devices
#
-CONFIG_USB_HID=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
@@ -1528,7 +1539,7 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
+CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 4ae3dcf1d2f0..4f98516b9f94 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -39,12 +39,10 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_VMI) += vmi.o vmitime.o
+obj-$(CONFIG_VMI) += vmi.o vmiclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-y += pcspeaker.o
-EXTRA_AFLAGS := -traditional
-
obj-$(CONFIG_SCx200) += scx200.o
# vsyscall.o contains the vsyscall DSO images as __initdata.
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 9ea5b8ecc7e1..280898b045b2 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -874,7 +874,7 @@ static void __init acpi_process_madt(void)
acpi_ioapic = 1;
smp_found_config = 1;
- clustered_apic_check();
+ setup_apic_routing();
}
}
if (error == -EINVAL) {
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 8f7efd38254d..23f78efc577d 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -10,7 +10,6 @@
#include <asm/pci-direct.h>
#include <asm/acpi.h>
#include <asm/apic.h>
-#include <asm/irq.h>
#ifdef CONFIG_ACPI
@@ -48,24 +47,6 @@ static int __init check_bridge(int vendor, int device)
return 0;
}
-static void check_intel(void)
-{
- u16 vendor, device;
-
- vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
-
- if (vendor != PCI_VENDOR_ID_INTEL)
- return;
-
- device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
-#ifdef CONFIG_SMP
- if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7525_MCH)
- quirk_intel_irqbalance();
-#endif
-}
-
void __init check_acpi_pci(void)
{
int num, slot, func;
@@ -77,8 +58,6 @@ void __init check_acpi_pci(void)
if (!early_pci_allowed())
return;
- check_intel();
-
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++) {
for (slot = 0; slot < 32; slot++) {
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 426f59b0106b..e5cec6685cc5 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -5,6 +5,7 @@
#include <asm/alternative.h>
#include <asm/sections.h>
+static int noreplace_smp = 0;
static int smp_alt_once = 0;
static int debug_alternative = 0;
@@ -13,15 +14,33 @@ static int __init bootonly(char *str)
smp_alt_once = 1;
return 1;
}
+__setup("smp-alt-boot", bootonly);
+
static int __init debug_alt(char *str)
{
debug_alternative = 1;
return 1;
}
-
-__setup("smp-alt-boot", bootonly);
__setup("debug-alternative", debug_alt);
+static int __init setup_noreplace_smp(char *str)
+{
+ noreplace_smp = 1;
+ return 1;
+}
+__setup("noreplace-smp", setup_noreplace_smp);
+
+#ifdef CONFIG_PARAVIRT
+static int noreplace_paravirt = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+ noreplace_paravirt = 1;
+ return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+#endif
+
#define DPRINTK(fmt, args...) if (debug_alternative) \
printk(KERN_DEBUG fmt, args)
@@ -132,11 +151,8 @@ static void nop_out(void *insns, unsigned int len)
}
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
@@ -171,29 +187,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
#ifdef CONFIG_SMP
-static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
-{
- struct alt_instr *a;
-
- DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
- for (a = start; a < end; a++) {
- memcpy(a->replacement + a->replacementlen,
- a->instr,
- a->instrlen);
- }
-}
-
-static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
-{
- struct alt_instr *a;
-
- for (a = start; a < end; a++) {
- memcpy(a->instr,
- a->replacement + a->replacementlen,
- a->instrlen);
- }
-}
-
static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
u8 **ptr;
@@ -211,6 +204,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
{
u8 **ptr;
+ if (noreplace_smp)
+ return;
+
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
@@ -245,6 +241,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
+ if (noreplace_smp)
+ return;
+
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -279,7 +278,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
- if (smp_alt_once)
+ if (smp_alt_once || noreplace_smp)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -310,7 +309,7 @@ void alternatives_smp_switch(int smp)
return;
#endif
- if (smp_alt_once)
+ if (noreplace_smp || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -319,8 +318,6 @@ void alternatives_smp_switch(int smp)
printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- alternatives_smp_apply(__smp_alt_instructions,
- __smp_alt_instructions_end);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
@@ -328,8 +325,6 @@ void alternatives_smp_switch(int smp)
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- apply_alternatives(__smp_alt_instructions,
- __smp_alt_instructions_end);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_unlock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
@@ -340,36 +335,31 @@ void alternatives_smp_switch(int smp)
#endif
#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end)
{
- struct paravirt_patch *p;
+ struct paravirt_patch_site *p;
+
+ if (noreplace_paravirt)
+ return;
for (p = start; p < end; p++) {
unsigned int used;
used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
p->len);
-#ifdef CONFIG_DEBUG_PARAVIRT
- {
- int i;
- /* Deliberately clobber regs using "not %reg" to find bugs. */
- for (i = 0; i < 3; i++) {
- if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
- memcpy(p->instr + used, "\xf7\xd0", 2);
- p->instr[used+1] |= i;
- used += 2;
- }
- }
- }
-#endif
+
+ BUG_ON(used > p->len);
+
/* Pad the rest with nops */
nop_out(p->instr + used, p->len - used);
}
- /* Sync to be conservative, in case we patched following instructions */
+ /* Sync to be conservative, in case we patched following
+ * instructions */
sync_core();
}
-extern struct paravirt_patch __start_parainstructions[],
+extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */
@@ -396,23 +386,19 @@ void __init alternative_instructions(void)
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- apply_alternatives(__smp_alt_instructions,
- __smp_alt_instructions_end);
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
_text, _etext);
}
free_init_pages("SMP alternatives",
- (unsigned long)__smp_alt_begin,
- (unsigned long)__smp_alt_end);
+ __pa_symbol(&__smp_locks),
+ __pa_symbol(&__smp_locks_end));
} else {
- alternatives_smp_save(__smp_alt_instructions,
- __smp_alt_instructions_end);
alternatives_smp_module_add(NULL, "core kernel",
__smp_locks, __smp_locks_end,
_text, _etext);
alternatives_smp_switch(0);
}
#endif
- apply_paravirt(__start_parainstructions, __stop_parainstructions);
+ apply_paravirt(__parainstructions, __parainstructions_end);
local_irq_restore(flags);
}
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 93aa911646ad..aca054cc0552 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -129,6 +129,28 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
+void apic_wait_icr_idle(void)
+{
+ while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+unsigned long safe_apic_wait_icr_idle(void)
+{
+ unsigned long send_status;
+ int timeout;
+
+ timeout = 0;
+ do {
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ if (!send_status)
+ break;
+ udelay(100);
+ } while (timeout++ < 1000);
+
+ return send_status;
+}
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 064bbf2861f4..367ff1d930cb 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -233,11 +233,10 @@
#include <asm/desc.h>
#include <asm/i8253.h>
#include <asm/paravirt.h>
+#include <asm/reboot.h>
#include "io_ports.h"
-extern void machine_real_restart(unsigned char *, int);
-
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
#endif
@@ -384,13 +383,6 @@ static int ignore_sys_suspend;
static int ignore_normal_resume;
static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
-#ifdef CONFIG_APM_RTC_IS_GMT
-# define clock_cmos_diff 0
-# define got_clock_diff 1
-#else
-static long clock_cmos_diff;
-static int got_clock_diff;
-#endif
static int debug __read_mostly;
static int smp __read_mostly;
static int apm_disabled = -1;
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index c37535163bfc..27a776c9044d 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -11,11 +11,11 @@
#include <linux/suspend.h>
#include <asm/ucontext.h>
#include "sigframe.h"
+#include <asm/pgtable.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/elf.h>
-#include <asm/pda.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -25,6 +25,9 @@
#define OFFSET(sym, str, mem) \
DEFINE(sym, offsetof(struct str, mem));
+/* workaround for a warning with -Wmissing-prototypes */
+void foo(void);
+
void foo(void)
{
OFFSET(SIGCONTEXT_eax, sigcontext, eax);
@@ -90,17 +93,18 @@ void foo(void)
OFFSET(pbe_next, pbe, next);
/* Offset from the sysenter stack to tss.esp0 */
- DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+ DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
sizeof(struct tss_struct));
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(VDSO_PRELINK, VDSO_PRELINK);
+ DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
+ DEFINE(PTRS_PER_PTE, PTRS_PER_PTE);
+ DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
+ DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+ DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
- BLANK();
- OFFSET(PDA_cpu, i386_pda, cpu_number);
- OFFSET(PDA_pcurrent, i386_pda, pcurrent);
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
#ifdef CONFIG_PARAVIRT
BLANK();
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 010aecfffbc1..74f27a463db0 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -2,7 +2,7 @@
# Makefile for x86-compatible CPU details and quirks
#
-obj-y := common.o proc.o
+obj-y := common.o proc.o bugs.o
obj-y += amd.o
obj-y += cyrix.o
@@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db482972..4fec702afd7e 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void)
return 0;
}
+int force_mwait __cpuinitdata;
+
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+
+ if (c->x86 == 0x10 && !force_mwait)
+ clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
}
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
@@ -314,13 +319,3 @@ int __init amd_init_cpu(void)
cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
return 0;
}
-
-//early_arch_initcall(amd_init_cpu);
-
-static int __init amd_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_AMD] = NULL;
- return 0;
-}
-
-late_initcall(amd_exit_cpu);
diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c
new file mode 100644
index 000000000000..54428a2500f3
--- /dev/null
+++ b/arch/i386/kernel/cpu/bugs.c
@@ -0,0 +1,191 @@
+/*
+ * arch/i386/cpu/bugs.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Cyrix stuff, June 1998 by:
+ * - Rafael R. Reilova (moved everything from head.S),
+ * <rreilova@ececs.uc.edu>
+ * - Channing Corn (tests & fixes),
+ * - Andrew D. Balsa (code cleanup).
+ */
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/paravirt.h>
+#include <asm/alternative.h>
+
+static int __init no_halt(char *s)
+{
+ boot_cpu_data.hlt_works_ok = 0;
+ return 1;
+}
+
+__setup("no-hlt", no_halt);
+
+static int __init mca_pentium(char *s)
+{
+ mca_pentium_flag = 1;
+ return 1;
+}
+
+__setup("mca-pentium", mca_pentium);
+
+static int __init no_387(char *s)
+{
+ boot_cpu_data.hard_math = 0;
+ write_cr0(0xE | read_cr0());
+ return 1;
+}
+
+__setup("no387", no_387);
+
+static double __initdata x = 4195835.0;
+static double __initdata y = 3145727.0;
+
+/*
+ * This used to check for exceptions..
+ * However, it turns out that to support that,
+ * the XMM trap handlers basically had to
+ * be buggy. So let's have a correct XMM trap
+ * handler, and forget about printing out
+ * some status at boot.
+ *
+ * We should really only care about bugs here
+ * anyway. Not features.
+ */
+static void __init check_fpu(void)
+{
+ if (!boot_cpu_data.hard_math) {
+#ifndef CONFIG_MATH_EMULATION
+ printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
+ printk(KERN_EMERG "Giving up.\n");
+ for (;;) ;
+#endif
+ return;
+ }
+
+/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
+ /* Test for the divl bug.. */
+ __asm__("fninit\n\t"
+ "fldl %1\n\t"
+ "fdivl %2\n\t"
+ "fmull %2\n\t"
+ "fldl %1\n\t"
+ "fsubp %%st,%%st(1)\n\t"
+ "fistpl %0\n\t"
+ "fwait\n\t"
+ "fninit"
+ : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "m" (*&x), "m" (*&y));
+ if (boot_cpu_data.fdiv_bug)
+ printk("Hmm, FPU with FDIV bug.\n");
+}
+
+static void __init check_hlt(void)
+{
+ if (paravirt_enabled())
+ return;
+
+ printk(KERN_INFO "Checking 'hlt' instruction... ");
+ if (!boot_cpu_data.hlt_works_ok) {
+ printk("disabled\n");
+ return;
+ }
+ halt();
+ halt();
+ halt();
+ halt();
+ printk("OK.\n");
+}
+
+/*
+ * Most 386 processors have a bug where a POPAD can lock the
+ * machine even from user space.
+ */
+
+static void __init check_popad(void)
+{
+#ifndef CONFIG_X86_POPAD_OK
+ int res, inp = (int) &res;
+
+ printk(KERN_INFO "Checking for popad bug... ");
+ __asm__ __volatile__(
+ "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
+ : "=&a" (res)
+ : "d" (inp)
+ : "ecx", "edi" );
+ /* If this fails, it means that any user program may lock the CPU hard. Too bad. */
+ if (res != 12345678) printk( "Buggy.\n" );
+ else printk( "OK.\n" );
+#endif
+}
+
+/*
+ * Check whether we are able to run this kernel safely on SMP.
+ *
+ * - In order to run on a i386, we need to be compiled for i386
+ * (for due to lack of "invlpg" and working WP on a i386)
+ * - In order to run on anything without a TSC, we need to be
+ * compiled for a i486.
+ * - In order to support the local APIC on a buggy Pentium machine,
+ * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
+ * which happens implicitly if compiled for a Pentium or lower
+ * (unless an advanced selection of CPU features is used) as an
+ * otherwise config implies a properly working local APIC without
+ * the need to do extra reads from the APIC.
+*/
+
+static void __init check_config(void)
+{
+/*
+ * We'd better not be a i386 if we're configured to use some
+ * i486+ only features! (WP works in supervisor mode and the
+ * new "invlpg" and "bswap" instructions)
+ */
+#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
+ if (boot_cpu_data.x86 == 3)
+ panic("Kernel requires i486+ for 'invlpg' and other features");
+#endif
+
+/*
+ * If we configured ourselves for a TSC, we'd better have one!
+ */
+#ifdef CONFIG_X86_TSC
+ if (!cpu_has_tsc && !tsc_disable)
+ panic("Kernel compiled for Pentium+, requires TSC feature!");
+#endif
+
+/*
+ * If we were told we had a good local APIC, check for buggy Pentia,
+ * i.e. all B steppings and the C2 stepping of P54C when using their
+ * integrated APIC (see 11AP erratum in "Pentium Processor
+ * Specification Update").
+ */
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
+ && cpu_has_apic
+ && boot_cpu_data.x86 == 5
+ && boot_cpu_data.x86_model == 2
+ && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
+ panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
+#endif
+}
+
+
+void __init check_bugs(void)
+{
+ identify_boot_cpu();
+#ifndef CONFIG_SMP
+ printk("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+#endif
+ check_config();
+ check_fpu();
+ check_hlt();
+ check_popad();
+ init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ alternative_instructions();
+}
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
index 8c25047975c0..473eac883c7b 100644
--- a/arch/i386/kernel/cpu/centaur.c
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -469,13 +469,3 @@ int __init centaur_init_cpu(void)
cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
return 0;
}
-
-//early_arch_initcall(centaur_init_cpu);
-
-static int __init centaur_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_CENTAUR] = NULL;
- return 0;
-}
-
-late_initcall(centaur_exit_cpu);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index dcbbd0a8bfc2..794d593c47eb 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,15 +18,37 @@
#include <asm/apic.h>
#include <mach_apic.h>
#endif
-#include <asm/pda.h>
#include "cpu.h"
-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+ [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
+ [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
+ [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
+ [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+ /*
+ * Segments used for calling PnP BIOS have byte granularity.
+ * They code segments and data segments have fixed 64k limits,
+ * the transfer segment sizes are set at run time.
+ */
+ [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+ [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
+ [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
+ [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
+ [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+ /*
+ * The APM segments have byte granularity and their bases
+ * are set at run time. All have 64k limits.
+ */
+ [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+ /* 16-bit code */
+ [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
+ [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
+ [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
+ [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_fxsr __cpuinitdata;
@@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Init Machine Check Exception if available. */
mcheck_init(c);
+}
- if (c == &boot_cpu_data)
- sysenter_setup();
+void __init identify_boot_cpu(void)
+{
+ identify_cpu(&boot_cpu_data);
+ sysenter_setup();
enable_sep_cpu();
+ mtrr_bp_init();
+}
- if (c == &boot_cpu_data)
- mtrr_bp_init();
- else
- mtrr_ap_init();
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+ BUG_ON(c == &boot_cpu_data);
+ identify_cpu(c);
+ enable_sep_cpu();
+ mtrr_ap_init();
}
#ifdef CONFIG_X86_HT
@@ -601,129 +630,36 @@ void __init early_cpu_init(void)
#endif
}
-/* Make sure %gs is initialized properly in idle threads */
+/* Make sure %fs is initialized properly in idle threads */
struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
- regs->xfs = __KERNEL_PDA;
+ regs->xfs = __KERNEL_PERCPU;
return regs;
}
-static __cpuinit int alloc_gdt(int cpu)
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
{
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
- struct desc_struct *gdt;
- struct i386_pda *pda;
-
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
- pda = cpu_pda(cpu);
-
- /*
- * This is a horrible hack to allocate the GDT. The problem
- * is that cpu_init() is called really early for the boot CPU
- * (and hence needs bootmem) but much later for the secondary
- * CPUs, when bootmem will have gone away
- */
- if (NODE_DATA(0)->bdata->node_bootmem_map) {
- BUG_ON(gdt != NULL || pda != NULL);
-
- gdt = alloc_bootmem_pages(PAGE_SIZE);
- pda = alloc_bootmem(sizeof(*pda));
- /* alloc_bootmem(_pages) panics on failure, so no check */
-
- memset(gdt, 0, PAGE_SIZE);
- memset(pda, 0, sizeof(*pda));
- } else {
- /* GDT and PDA might already have been allocated if
- this is a CPU hotplug re-insertion. */
- if (gdt == NULL)
- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
-
- if (pda == NULL)
- pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
-
- if (unlikely(!gdt || !pda)) {
- free_pages((unsigned long)gdt, 0);
- kfree(pda);
- return 0;
- }
- }
-
- cpu_gdt_descr->address = (unsigned long)gdt;
- cpu_pda(cpu) = pda;
-
- return 1;
-}
+ struct Xgt_desc_struct gdt_descr;
-/* Initial PDA used by boot CPU */
-struct i386_pda boot_pda = {
- ._pda = &boot_pda,
- .cpu_number = 0,
- .pcurrent = &init_task,
-};
-
-static inline void set_kernel_fs(void)
-{
- /* Set %fs for this CPU's PDA. Memory clobber is to create a
- barrier with respect to any PDA operations, so the compiler
- doesn't move any before here. */
- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
}
-/* Initialize the CPU's GDT and PDA. The boot CPU does this for
- itself, but secondaries find this done for them. */
-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
-{
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
- struct desc_struct *gdt;
- struct i386_pda *pda;
-
- /* For non-boot CPUs, the GDT and PDA should already have been
- allocated. */
- if (!alloc_gdt(cpu)) {
- printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
- return 0;
- }
-
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
- pda = cpu_pda(cpu);
-
- BUG_ON(gdt == NULL || pda == NULL);
-
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
- cpu_gdt_descr->size = GDT_SIZE - 1;
-
- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
- (u32 *)&gdt[GDT_ENTRY_PDA].b,
- (unsigned long)pda, sizeof(*pda) - 1,
- 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
-
- memset(pda, 0, sizeof(*pda));
- pda->_pda = pda;
- pda->cpu_number = cpu;
- pda->pcurrent = idle;
-
- return 1;
-}
-
-void __cpuinit cpu_set_gdt(int cpu)
-{
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-
- /* Reinit these anyway, even if they've already been done (on
- the boot CPU, this will transition from the boot gdt+pda to
- the real ones). */
- load_gdt(cpu_gdt_descr);
- set_kernel_fs();
-}
-
-/* Common CPU init for both boot and secondary CPUs */
-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
{
+ int cpu = smp_processor_id();
+ struct task_struct *curr = current;
struct tss_struct * t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = &curr->thread;
@@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
}
load_idt(&idt_descr);
+ switch_to_new_gdt();
/*
* Set up and load the per-CPU TSS and LDT
@@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
mxcsr_feature_mask_init();
}
-/* Entrypoint to initialize secondary CPU */
-void __cpuinit secondary_cpu_init(void)
-{
- int cpu = smp_processor_id();
- struct task_struct *curr = current;
-
- _cpu_init(cpu, curr);
-}
-
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __cpuinit cpu_init(void)
-{
- int cpu = smp_processor_id();
- struct task_struct *curr = current;
-
- /* Set up the real GDT and PDA, so we can transition from the
- boot versions. */
- if (!init_gdt(cpu, curr)) {
- /* failed to allocate something; not much we can do... */
- for (;;)
- local_irq_enable();
- }
-
- cpu_set_gdt(cpu);
- _cpu_init(cpu, curr);
-}
-
#ifdef CONFIG_HOTPLUG_CPU
void __cpuinit cpu_uninit(void)
{
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index de27bd07bc9c..0b8411a864fb 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
*/
if (vendor == PCI_VENDOR_ID_CYRIX &&
(device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
- pit_latch_buggy = 1;
+ mark_tsc_unstable("cyrix 5510/5520 detected");
}
#endif
c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
@@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void)
return 0;
}
-//early_arch_initcall(cyrix_init_cpu);
-
-static int __init cyrix_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_CYRIX] = NULL;
- return 0;
-}
-
-late_initcall(cyrix_exit_cpu);
-
static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
@@ -470,12 +460,3 @@ int __init nsc_init_cpu(void)
return 0;
}
-//early_arch_initcall(nsc_init_cpu);
-
-static int __init nsc_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_NSC] = NULL;
- return 0;
-}
-
-late_initcall(nsc_exit_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 56fe26584957..dc4e08147b1f 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
#endif
- if (c->x86 == 15)
+ if (c->x86 == 15) {
set_bit(X86_FEATURE_P4, c->x86_capability);
+ set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
+ }
if (c->x86 == 6)
set_bit(X86_FEATURE_P3, c->x86_capability);
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index b0862af595aa..f9fa4142551e 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
machine_check_vector = k7_machine_check;
wmb();
+ if (!cpu_has(c, X86_FEATURE_MCE))
+ return;
+
printk (KERN_INFO "Intel machine check architecture supported.\n");
rdmsr (MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
@@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
nr_mce_banks = l & 0xff;
/* Clear status for MC index 0 separately, we don't touch CTL,
- * as some Athlons cause spurious MCEs when its enabled. */
- wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
- for (i=1; i<nr_mce_banks; i++) {
+ * as some K7 Athlons cause spurious MCEs when its enabled. */
+ if (boot_cpu_data.x86 == 6) {
+ wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+ i = 1;
+ } else
+ i = 0;
+ for (; i<nr_mce_banks; i++) {
wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index 4f10c62d180c..56cd485b127c 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
- if (c->x86==6 || c->x86==15)
- amd_mcheck_init(c);
+ amd_mcheck_init(c);
break;
case X86_VENDOR_INTEL:
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 504434a46011..1509edfb2313 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{
u32 h;
- if (mce_num_extended_msrs == 0)
- goto done;
-
rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
@@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
-
- /* can we rely on kmalloc to do a dynamic
- * allocation for the reserved registers?
- */
-done:
- return mce_num_extended_msrs;
}
static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
@@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
- struct intel_mce_extended_msrs dbg;
rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if (mcgstl & (1<<0)) /* Recoverable ? */
@@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
smp_processor_id(), mcgsth, mcgstl);
- if (intel_get_extended_msrs(&dbg)) {
+ if (mce_num_extended_msrs > 0) {
+ struct intel_mce_extended_msrs dbg;
+ intel_get_extended_msrs(&dbg);
printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
smp_processor_id(), dbg.eip, dbg.eflags);
printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f77fc53db654..5367e32e0403 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -20,13 +20,25 @@ struct mtrr_state {
mtrr_type def_type;
};
+struct fixed_range_block {
+ int base_msr; /* start address of an MTRR block */
+ int ranges; /* number of MTRRs in this block */
+};
+
+static struct fixed_range_block fixed_range_blocks[] = {
+ { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */
+ { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */
+ { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */
+ {}
+};
+
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
-static __initdata int mtrr_show;
+static int mtrr_show;
module_param_named(show, mtrr_show, bool, 0);
/* Get the MSR pair relating to a var range */
@@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
-static void __init
+static void
get_fixed_ranges(mtrr_type * frs)
{
unsigned int *p = (unsigned int *) frs;
@@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs)
rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
}
-static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types)
+void mtrr_save_fixed_ranges(void *info)
+{
+ get_fixed_ranges(mtrr_state.fixed_ranges);
+}
+
+static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types)
{
unsigned i;
for (i = 0; i < 8; ++i, ++types, base += step)
- printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types));
+ printk(KERN_INFO "MTRR %05X-%05X %s\n",
+ base, base + step - 1, mtrr_attrib_to_str(*types));
}
/* Grab all of the MTRR state for this CPU into *state */
@@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
smp_processor_id(), msr, a, b);
}
+/**
+ * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
+ * see AMD publication no. 24593, chapter 3.2.1 for more information
+ */
+static inline void k8_enable_fixed_iorrs(void)
+{
+ unsigned lo, hi;
+
+ rdmsr(MSR_K8_SYSCFG, lo, hi);
+ mtrr_wrmsr(MSR_K8_SYSCFG, lo
+ | K8_MTRRFIXRANGE_DRAM_ENABLE
+ | K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
+}
+
+/**
+ * Checks and updates an fixed-range MTRR if it differs from the value it
+ * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
+ * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
+ * \param msr MSR address of the MTTR which should be checked and updated
+ * \param changed pointer which indicates whether the MTRR needed to be changed
+ * \param msrwords pointer to the MSR values which the MSR should have
+ */
+static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+{
+ unsigned lo, hi;
+
+ rdmsr(msr, lo, hi);
+
+ if (lo != msrwords[0] || hi != msrwords[1]) {
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 == 15 &&
+ ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
+ k8_enable_fixed_iorrs();
+ mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
+ *changed = TRUE;
+ }
+}
+
int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
/* [SUMMARY] Get a free MTRR.
<base> The starting (base) address of the region.
@@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
*type = base_lo & 0xff;
}
+/**
+ * Checks and updates the fixed-range MTRRs if they differ from the saved set
+ * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ */
static int set_fixed_ranges(mtrr_type * frs)
{
- unsigned int *p = (unsigned int *) frs;
+ unsigned long long *saved = (unsigned long long *) frs;
int changed = FALSE;
- int i;
- unsigned int lo, hi;
+ int block=-1, range;
- rdmsr(MTRRfix64K_00000_MSR, lo, hi);
- if (p[0] != lo || p[1] != hi) {
- mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
- changed = TRUE;
- }
+ while (fixed_range_blocks[++block].ranges)
+ for (range=0; range < fixed_range_blocks[block].ranges; range++)
+ set_fixed_range(fixed_range_blocks[block].base_msr + range,
+ &changed, (unsigned int *) saved++);
- for (i = 0; i < 2; i++) {
- rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
- if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
- mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
- p[3 + i * 2]);
- changed = TRUE;
- }
- }
-
- for (i = 0; i < 8; i++) {
- rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
- if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
- mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
- p[7 + i * 2]);
- changed = TRUE;
- }
- }
return changed;
}
@@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
}
}
- if (base + size < 0x100) {
+ if (base < 0x100) {
printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
base, size);
return -EINVAL;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 0acfb6a5a220..02a2f39e5e0a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -729,6 +729,17 @@ void mtrr_ap_init(void)
local_irq_restore(flags);
}
+/**
+ * Save current fixed-range MTRR state of the BSP
+ */
+void mtrr_save_state(void)
+{
+ if (smp_processor_id() == 0)
+ mtrr_save_fixed_ranges(NULL);
+ else
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+}
+
static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
index 8bf23cc80c63..961fbe1a748f 100644
--- a/arch/i386/kernel/cpu/nexgen.c
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void)
cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
return 0;
}
-
-//early_arch_initcall(nexgen_init_cpu);
-
-static int __init nexgen_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_NEXGEN] = NULL;
- return 0;
-}
-
-late_initcall(nexgen_exit_cpu);
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
new file mode 100644
index 000000000000..2b04c8f1db62
--- /dev/null
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -0,0 +1,658 @@
+/* local apic based NMI watchdog for various CPUs.
+ This file also handles reservation of performance counters for coordination
+ with other users (like oprofile).
+
+ Note that these events normally don't tick when the CPU idles. This means
+ the frequency varies with CPU load.
+
+ Original code for K7/P6 written by Keith Owens */
+
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <asm/apic.h>
+#include <asm/intel_arch_perfmon.h>
+
+struct nmi_watchdog_ctlblk {
+ unsigned int cccr_msr;
+ unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
+ unsigned int evntsel_msr; /* the MSR to select the events to handle */
+};
+
+/* Interface defining a CPU specific perfctr watchdog */
+struct wd_ops {
+ int (*reserve)(void);
+ void (*unreserve)(void);
+ int (*setup)(unsigned nmi_hz);
+ void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
+ void (*stop)(void *);
+ unsigned perfctr;
+ unsigned evntsel;
+ u64 checkbit;
+};
+
+static struct wd_ops *wd_ops;
+
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
+ */
+#define NMI_MAX_COUNTER_BITS 66
+
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ * different subsystems this reservation system just tries to coordinate
+ * things a little
+ */
+static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
+static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
+
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+ return wd_ops ? msr - wd_ops->perfctr : 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+/* returns the bit offset of the event selection register */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+ return wd_ops ? msr - wd_ops->evntsel : 0;
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, perfctr_nmi_owner))
+ return 1;
+ return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_perfctr_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, perfctr_nmi_owner);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ if (!test_and_set_bit(counter, evntsel_nmi_owner))
+ return 1;
+ return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+ unsigned int counter;
+
+ counter = nmi_evntsel_msr_to_bit(msr);
+ BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+ clear_bit(counter, evntsel_nmi_owner);
+}
+
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
+
+void disable_lapic_nmi_watchdog(void)
+{
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ if (atomic_read(&nmi_active) <= 0)
+ return;
+
+ on_each_cpu(wd_ops->stop, NULL, 0, 1);
+ wd_ops->unreserve();
+
+ BUG_ON(atomic_read(&nmi_active) != 0);
+}
+
+void enable_lapic_nmi_watchdog(void)
+{
+ BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+ /* are we already enabled */
+ if (atomic_read(&nmi_active) != 0)
+ return;
+
+ /* are we lapic aware */
+ if (!wd_ops)
+ return;
+ if (!wd_ops->reserve()) {
+ printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
+ return;
+ }
+
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ touch_nmi_watchdog();
+}
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ */
+
+static unsigned int adjust_for_32bit_ctr(unsigned int hz)
+{
+ u64 counter_val;
+ unsigned int retval = hz;
+
+ /*
+ * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
+ * are writable, with higher bits sign extending from bit 31.
+ * So, we can only program the counter with 31 bit values and
+ * 32nd bit should be 1, for 33.. to be 1.
+ * Find the appropriate nmi_hz
+ */
+ counter_val = (u64)cpu_khz * 1000;
+ do_div(counter_val, retval);
+ if (counter_val > 0x7fffffffULL) {
+ u64 count = (u64)cpu_khz * 1000;
+ do_div(count, 0x7fffffffUL);
+ retval = count + 1;
+ }
+ return retval;
+}
+
+static void
+write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+{
+ u64 count = (u64)cpu_khz * 1000;
+
+ do_div(count, nmi_hz);
+ if(descr)
+ Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ wrmsrl(perfctr_msr, 0 - count);
+}
+
+static void write_watchdog_counter32(unsigned int perfctr_msr,
+ const char *descr, unsigned nmi_hz)
+{
+ u64 count = (u64)cpu_khz * 1000;
+
+ do_div(count, nmi_hz);
+ if(descr)
+ Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ wrmsr(perfctr_msr, (u32)(-count), 0);
+}
+
+/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
+ nicely stable so there is not much variety */
+
+#define K7_EVNTSEL_ENABLE (1 << 22)
+#define K7_EVNTSEL_INT (1 << 20)
+#define K7_EVNTSEL_OS (1 << 17)
+#define K7_EVNTSEL_USR (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
+#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+static int setup_k7_watchdog(unsigned nmi_hz)
+{
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ perfctr_msr = MSR_K7_PERFCTR0;
+ evntsel_msr = MSR_K7_EVNTSEL0;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = K7_EVNTSEL_INT
+ | K7_EVNTSEL_OS
+ | K7_EVNTSEL_USR
+ | K7_NMI_EVENT;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= K7_EVNTSEL_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ return 1;
+}
+
+static void single_msr_stop_watchdog(void *arg)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int single_msr_reserve(void)
+{
+ if (!reserve_perfctr_nmi(wd_ops->perfctr))
+ return 0;
+
+ if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
+ release_perfctr_nmi(wd_ops->perfctr);
+ return 0;
+ }
+ return 1;
+}
+
+static void single_msr_unreserve(void)
+{
+ release_evntsel_nmi(wd_ops->perfctr);
+ release_perfctr_nmi(wd_ops->evntsel);
+}
+
+static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+ /* start the cycle over again */
+ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops k7_wd_ops = {
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_k7_watchdog,
+ .rearm = single_msr_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_K7_PERFCTR0,
+ .evntsel = MSR_K7_EVNTSEL0,
+ .checkbit = 1ULL<<63,
+};
+
+/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
+
+#define P6_EVNTSEL0_ENABLE (1 << 22)
+#define P6_EVNTSEL_INT (1 << 20)
+#define P6_EVNTSEL_OS (1 << 17)
+#define P6_EVNTSEL_USR (1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+static int setup_p6_watchdog(unsigned nmi_hz)
+{
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ perfctr_msr = MSR_P6_PERFCTR0;
+ evntsel_msr = MSR_P6_EVNTSEL0;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = P6_EVNTSEL_INT
+ | P6_EVNTSEL_OS
+ | P6_EVNTSEL_USR
+ | P6_NMI_EVENT;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+ write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= P6_EVNTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ return 1;
+}
+
+static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+ /* P6 based Pentium M need to re-unmask
+ * the apic vector but it doesn't hurt
+ * other P6 variant.
+ * ArchPerfom/Core Duo also needs this */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ /* P6/ARCH_PERFMON has 32 bit counter write */
+ write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+}
+
+static struct wd_ops p6_wd_ops = {
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_p6_watchdog,
+ .rearm = p6_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_P6_PERFCTR0,
+ .evntsel = MSR_P6_EVNTSEL0,
+ .checkbit = 1ULL<<39,
+};
+
+/* Intel P4 performance counters. By far the most complicated of all. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
+#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
+#define P4_ESCR_OS (1<<3)
+#define P4_ESCR_USR (1<<2)
+#define P4_CCCR_OVF_PMI0 (1<<26)
+#define P4_CCCR_OVF_PMI1 (1<<27)
+#define P4_CCCR_THRESHOLD(N) ((N)<<20)
+#define P4_CCCR_COMPLEMENT (1<<19)
+#define P4_CCCR_COMPARE (1<<18)
+#define P4_CCCR_REQUIRED (3<<16)
+#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
+#define P4_CCCR_ENABLE (1<<12)
+#define P4_CCCR_OVF (1<<31)
+
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+ CRU_ESCR0 (with any non-null event selector) through a complemented
+ max threshold. [IA32-Vol3, Section 14.9.9] */
+
+static int setup_p4_watchdog(unsigned nmi_hz)
+{
+ unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+ unsigned int evntsel, cccr_val;
+ unsigned int misc_enable, dummy;
+ unsigned int ht_num;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
+ if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+ return 0;
+
+#ifdef CONFIG_SMP
+ /* detect which hyperthread we are on */
+ if (smp_num_siblings == 2) {
+ unsigned int ebx, apicid;
+
+ ebx = cpuid_ebx(1);
+ apicid = (ebx >> 24) & 0xff;
+ ht_num = apicid & 1;
+ } else
+#endif
+ ht_num = 0;
+
+ /* performance counters are shared resources
+ * assign each hyperthread its own set
+ * (re-use the ESCR0 register, seems safe
+ * and keeps the cccr_val the same)
+ */
+ if (!ht_num) {
+ /* logical cpu 0 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR0;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR0;
+ cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+ } else {
+ /* logical cpu 1 */
+ perfctr_msr = MSR_P4_IQ_PERFCTR1;
+ evntsel_msr = MSR_P4_CRU_ESCR0;
+ cccr_msr = MSR_P4_IQ_CCCR1;
+ cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+ }
+
+ evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+ | P4_ESCR_OS
+ | P4_ESCR_USR;
+
+ cccr_val |= P4_CCCR_THRESHOLD(15)
+ | P4_CCCR_COMPLEMENT
+ | P4_CCCR_COMPARE
+ | P4_CCCR_REQUIRED;
+
+ wrmsr(evntsel_msr, evntsel, 0);
+ wrmsr(cccr_msr, cccr_val, 0);
+ write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ cccr_val |= P4_CCCR_ENABLE;
+ wrmsr(cccr_msr, cccr_val, 0);
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = cccr_msr;
+ return 1;
+}
+
+static void stop_p4_watchdog(void *arg)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+ wrmsr(wd->cccr_msr, 0, 0);
+ wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int p4_reserve(void)
+{
+ if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
+ return 0;
+#ifdef CONFIG_SMP
+ if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
+ goto fail1;
+#endif
+ if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+ goto fail2;
+ /* RED-PEN why is ESCR1 not reserved here? */
+ return 1;
+ fail2:
+#ifdef CONFIG_SMP
+ if (smp_num_siblings > 1)
+ release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
+ fail1:
+#endif
+ release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
+ return 0;
+}
+
+static void p4_unreserve(void)
+{
+#ifdef CONFIG_SMP
+ if (smp_num_siblings > 1)
+ release_evntsel_nmi(MSR_P4_IQ_PERFCTR1);
+#endif
+ release_evntsel_nmi(MSR_P4_IQ_PERFCTR0);
+ release_perfctr_nmi(MSR_P4_CRU_ESCR0);
+}
+
+static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+ unsigned dummy;
+ /*
+ * P4 quirks:
+ * - An overflown perfctr will assert its interrupt
+ * until the OVF flag in its CCCR is cleared.
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ rdmsrl(wd->cccr_msr, dummy);
+ dummy &= ~P4_CCCR_OVF;
+ wrmsrl(wd->cccr_msr, dummy);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ /* start the cycle over again */
+ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops p4_wd_ops = {
+ .reserve = p4_reserve,
+ .unreserve = p4_unreserve,
+ .setup = setup_p4_watchdog,
+ .rearm = p4_rearm,
+ .stop = stop_p4_watchdog,
+ /* RED-PEN this is wrong for the other sibling */
+ .perfctr = MSR_P4_BPU_PERFCTR0,
+ .evntsel = MSR_P4_BSU_ESCR0,
+ .checkbit = 1ULL<<39,
+};
+
+/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
+ all future Intel CPUs. */
+
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(unsigned nmi_hz)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
+ evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+ write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
+ return 1;
+}
+
+static struct wd_ops intel_arch_wd_ops = {
+ .reserve = single_msr_reserve,
+ .unreserve = single_msr_unreserve,
+ .setup = setup_intel_arch_watchdog,
+ .rearm = p6_rearm,
+ .stop = single_msr_stop_watchdog,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+};
+
+static void probe_nmi_watchdog(void)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
+ boot_cpu_data.x86 != 16)
+ return;
+ wd_ops = &k7_wd_ops;
+ break;
+ case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ wd_ops = &intel_arch_wd_ops;
+ break;
+ }
+ switch (boot_cpu_data.x86) {
+ case 6:
+ if (boot_cpu_data.x86_model > 0xd)
+ return;
+
+ wd_ops = &p6_wd_ops;
+ break;
+ case 15:
+ if (boot_cpu_data.x86_model > 0x4)
+ return;
+
+ wd_ops = &p4_wd_ops;
+ break;
+ default:
+ return;
+ }
+ break;
+ }
+}
+
+/* Interface to nmi.c */
+
+int lapic_watchdog_init(unsigned nmi_hz)
+{
+ if (!wd_ops) {
+ probe_nmi_watchdog();
+ if (!wd_ops)
+ return -1;
+ }
+
+ if (!(wd_ops->setup(nmi_hz))) {
+ printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
+ raw_smp_processor_id());
+ return -1;
+ }
+
+ return 0;
+}
+
+void lapic_watchdog_stop(void)
+{
+ if (wd_ops)
+ wd_ops->stop(NULL);
+}
+
+unsigned lapic_adjust_nmi_hz(unsigned hz)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+ if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+ wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
+ hz = adjust_for_32bit_ctr(hz);
+ return hz;
+}
+
+int lapic_wd_event(unsigned nmi_hz)
+{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+ u64 ctr;
+ rdmsrl(wd->perfctr_msr, ctr);
+ if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+ return 0;
+ }
+ wd_ops->rearm(wd, nmi_hz);
+ return 1;
+}
+
+int lapic_watchdog_ok(void)
+{
+ return wd_ops != NULL;
+}
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 47e3ebbfb28d..89d91e6cc972 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"stc",
"100mhzsteps",
"hwpstate",
- NULL,
- NULL, /* constant_tsc - moved to flags */
+ "", /* constant_tsc - moved to flags */
/* nothing */
};
struct cpuinfo_x86 *c = v;
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
index 9317f7414989..50076f22e90f 100644
--- a/arch/i386/kernel/cpu/rise.c
+++ b/arch/i386/kernel/cpu/rise.c
@@ -50,12 +50,3 @@ int __init rise_init_cpu(void)
return 0;
}
-//early_arch_initcall(rise_init_cpu);
-
-static int __init rise_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_RISE] = NULL;
- return 0;
-}
-
-late_initcall(rise_exit_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 5678d46863c6..6471a5a13202 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void)
cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
return 0;
}
-
-//early_arch_initcall(transmeta_init_cpu);
-
-static int __init transmeta_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_TRANSMETA] = NULL;
- return 0;
-}
-
-late_initcall(transmeta_exit_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
index 1bf3f87e9c5b..a7a4e75bdcd7 100644
--- a/arch/i386/kernel/cpu/umc.c
+++ b/arch/i386/kernel/cpu/umc.c
@@ -24,13 +24,3 @@ int __init umc_init_cpu(void)
cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
return 0;
}
-
-//early_arch_initcall(umc_init_cpu);
-
-static int __init umc_exit_cpu(void)
-{
- cpu_devs[X86_VENDOR_UMC] = NULL;
- return 0;
-}
-
-late_initcall(umc_exit_cpu);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index b4d14c2eb345..265c5597efb0 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -33,7 +33,7 @@ static void doublefault_fn(void)
printk("double fault, tss at %08lx\n", tss);
if (ptr_ok(tss)) {
- struct tss_struct *t = (struct tss_struct *)tss;
+ struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
@@ -49,18 +49,21 @@ static void doublefault_fn(void)
}
struct tss_struct doublefault_tss __cacheline_aligned = {
- .esp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+ .x86_tss = {
+ .esp0 = STACK_START,
+ .ss0 = __KERNEL_DS,
+ .ldt = 0,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
- .eip = (unsigned long) doublefault_fn,
- .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
- .esp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
+ .eip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .eflags = X86_EFLAGS_SF | 0x2,
+ .esp = STACK_START,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
- .__cr3 = __pa(swapper_pg_dir)
+ .__cr3 = __pa(swapper_pg_dir)
+ }
};
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index 70f39560846a..9645bb51f76a 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -161,26 +161,27 @@ static struct resource standard_io_resources[] = { {
static int __init romsignature(const unsigned char *rom)
{
+ const unsigned short * const ptr = (const unsigned short *)rom;
unsigned short sig;
- return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
- sig == ROMSIGNATURE;
+ return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
}
-static int __init romchecksum(unsigned char *rom, unsigned long length)
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
{
- unsigned char sum;
+ unsigned char sum, c;
- for (sum = 0; length; length--)
- sum += *rom++;
- return sum == 0;
+ for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
+ sum += c;
+ return !length && !sum;
}
static void __init probe_roms(void)
{
+ const unsigned char *rom;
unsigned long start, length, upper;
- unsigned char *rom;
- int i;
+ unsigned char c;
+ int i;
/* video rom */
upper = adapter_rom_resources[0].start;
@@ -191,8 +192,11 @@ static void __init probe_roms(void)
video_rom_resource.start = start;
+ if (probe_kernel_address(rom + 2, c) != 0)
+ continue;
+
/* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
+ length = c * 512;
/* if checksum okay, trust length byte */
if (length && romchecksum(rom, length))
@@ -226,8 +230,11 @@ static void __init probe_roms(void)
if (!romsignature(rom))
continue;
+ if (probe_kernel_address(rom + 2, c) != 0)
+ continue;
+
/* 0 < length <= 0x7f * 512, historically */
- length = rom[2] * 512;
+ length = c * 512;
/* but accept any length that fits if checksum okay */
if (!length || start + length > upper || !romchecksum(rom, length))
@@ -386,10 +393,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
____________________33__
______________________4_
*/
- printk("sanitize start\n");
/* if there's only one memory region, don't bother */
if (*pnr_map < 2) {
- printk("sanitize bail 0\n");
return -1;
}
@@ -398,7 +403,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
/* bail out if we find any unreasonable addresses in bios map */
for (i=0; i<old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
- printk("sanitize bail 1\n");
return -1;
}
@@ -494,7 +498,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
*pnr_map = new_nr;
- printk("sanitize end\n");
return 0;
}
@@ -525,7 +528,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
unsigned long long size = biosmap->size;
unsigned long long end = start + size;
unsigned long type = biosmap->type;
- printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
@@ -536,17 +538,11 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
* Not right. Fix it up.
*/
if (type == E820_RAM) {
- printk("copy_e820_map() type is E820_RAM\n");
if (start < 0x100000ULL && end > 0xA0000ULL) {
- printk("copy_e820_map() lies in range...\n");
- if (start < 0xA0000ULL) {
- printk("copy_e820_map() start < 0xA0000ULL\n");
+ if (start < 0xA0000ULL)
add_memory_region(start, 0xA0000ULL-start, type);
- }
- if (end <= 0x100000ULL) {
- printk("copy_e820_map() end <= 0x100000ULL\n");
+ if (end <= 0x100000ULL)
continue;
- }
start = 0x100000ULL;
size = end - start;
}
@@ -818,6 +814,26 @@ void __init limit_regions(unsigned long long size)
print_memory_map("limit_regions endfunc");
}
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int
+e820_any_mapped(u64 start, u64 end, unsigned type)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ const struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
+
/*
* This function checks if the entire range <start,end> is mapped with type.
*
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 8f9c624ace6f..dd9e7faafa7c 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -69,13 +69,11 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
{
unsigned long cr4;
unsigned long temp;
- struct Xgt_desc_struct *cpu_gdt_descr;
+ struct Xgt_desc_struct gdt_descr;
spin_lock(&efi_rt_lock);
local_irq_save(efi_rt_eflags);
- cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
-
/*
* If I don't have PSE, I should just duplicate two entries in page
* directory. If I have PSE, I just need to duplicate one entry in
@@ -105,17 +103,19 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
*/
local_flush_tlb();
- cpu_gdt_descr->address = __pa(cpu_gdt_descr->address);
- load_gdt(cpu_gdt_descr);
+ gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
}
static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
{
unsigned long cr4;
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
+ struct Xgt_desc_struct gdt_descr;
- cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address);
- load_gdt(cpu_gdt_descr);
+ gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
cr4 = read_cr4();
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 18bddcb8e9e8..b1f16ee65e4d 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -15,7 +15,7 @@
* I changed all the .align's to 4 (16 byte alignment), as that's faster
* on a 486.
*
- * Stack layout in 'ret_from_system_call':
+ * Stack layout in 'syscall_exit':
* ptrace needs to have all regs on the stack.
* if the order here is changed, it needs to be
* updated in fork.c:copy_process, signal.c:do_signal,
@@ -132,7 +132,7 @@ VM_MASK = 0x00020000
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es; \
- movl $(__KERNEL_PDA), %edx; \
+ movl $(__KERNEL_PERCPU), %edx; \
movl %edx, %fs
#define RESTORE_INT_REGS \
@@ -305,16 +305,12 @@ sysenter_past_esp:
pushl $(__USER_CS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET cs, 0*/
-#ifndef CONFIG_COMPAT_VDSO
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
-#else
- pushl $SYSENTER_RETURN
-#endif
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
@@ -342,7 +338,7 @@ sysenter_past_esp:
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
- DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
+ DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
@@ -560,9 +556,7 @@ END(syscall_badsys)
#define FIXUP_ESPFIX_STACK \
/* since we are on a wrong stack, we cant make it a C code :( */ \
- movl %fs:PDA_cpu, %ebx; \
- PER_CPU(cpu_gdt_descr, %ebx); \
- movl GDS_address(%ebx), %ebx; \
+ PER_CPU(gdt_page, %ebx); \
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
addl %esp, %eax; \
pushl $__KERNEL_DS; \
@@ -635,7 +629,7 @@ ENTRY(name) \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
- call smp_/**/name; \
+ call smp_##name; \
jmp ret_from_intr; \
CFI_ENDPROC; \
ENDPROC(name)
@@ -643,11 +637,6 @@ ENDPROC(name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
-/* This alternate entry is needed because we hijack the apic LVTT */
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
-BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
-#endif
-
KPROBE_ENTRY(page_fault)
RING0_EC_FRAME
pushl $do_page_fault
@@ -686,7 +675,7 @@ error_code:
pushl %fs
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET fs, 0*/
- movl $(__KERNEL_PDA), %ecx
+ movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
UNWIND_ESPFIX_STACK
popl %ecx
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 3fa7f9389afe..9b10af65faaa 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -34,17 +34,32 @@
/*
* This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially. We need one bit for
- * each possible page, but only in low memory, which means
- * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ * and including _end* we need mapped initially.
+ * We need:
+ * - one bit for each possible page, but only in low memory, which means
+ * 2^32/4096/8 = 128K worst case (4G/4G split.)
+ * - enough space to map all low memory, which means
+ * (2^32/4096) / 1024 pages (worst case, non PAE)
+ * (2^32/4096) / 512 + 4 pages (worst case for PAE)
+ * - a few pages for allocator use before the kernel pagetable has
+ * been set up
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*/
-#define INIT_MAP_BEYOND_END (128*1024)
+LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
+#if PTRS_PER_PMD > 1
+PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
+#else
+PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
+#endif
+BOOTBITMAP_SIZE = LOW_PAGES / 8
+ALLOCATOR_SLOP = 4
+
+INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -147,8 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
- * we know the trampoline has already loaded the boot_gdt_table GDT
- * for us.
+ * we know the trampoline has already loaded the boot_gdt for us.
*
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
@@ -318,12 +332,12 @@ is386: movl $2,%ecx # set MP
movl %eax,%cr0
call check_x87
- call setup_pda
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
+ movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
@@ -333,16 +347,17 @@ is386: movl $2,%ecx # set MP
movl %eax,%gs
lldt %ax
- movl $(__KERNEL_PDA),%eax
- mov %eax,%fs
-
cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
#ifdef CONFIG_SMP
movb ready, %cl
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
- jne initialize_secondary # all other CPUs call initialize_secondary
+ je 1f
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax,%fs # set this cpu's percpu
+ jmp initialize_secondary # all other CPUs call initialize_secondary
+1:
#endif /* CONFIG_SMP */
jmp start_kernel
@@ -366,23 +381,6 @@ check_x87:
ret
/*
- * Point the GDT at this CPU's PDA. On boot this will be
- * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
- * that CPU's GDT and PDA.
- */
-ENTRY(setup_pda)
- /* get the PDA pointer */
- movl start_pda, %eax
-
- /* slot the PDA address into the GDT */
- mov early_gdt_descr+2, %ecx
- mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
- shr $16, %eax
- mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
- mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
- ret
-
-/*
* setup_idt
*
* sets up a idt with 256 entries pointing to
@@ -554,9 +552,6 @@ ENTRY(empty_zero_page)
* This starts the data section.
*/
.data
-ENTRY(start_pda)
- .long boot_pda
-
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
.long __BOOT_DS
@@ -588,7 +583,7 @@ fault_msg:
.word 0 # 32 bit align gdt_desc.address
boot_gdt_descr:
.word __BOOT_DS+7
- .long boot_gdt_table - __PAGE_OFFSET
+ .long boot_gdt - __PAGE_OFFSET
.word 0 # 32-bit align idt_desc.address
idt_descr:
@@ -599,67 +594,14 @@ idt_descr:
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
.word GDT_ENTRIES*8-1
- .long cpu_gdt_table
+ .long per_cpu__gdt_page /* Overwritten for secondary CPUs */
/*
- * The boot_gdt_table must mirror the equivalent in setup.S and is
+ * The boot_gdt must mirror the equivalent in setup.S and is
* used only for booting.
*/
.align L1_CACHE_BYTES
-ENTRY(boot_gdt_table)
+ENTRY(boot_gdt)
.fill GDT_ENTRY_BOOT_CS,8,0
.quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
-
-/*
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
- */
- .align L1_CACHE_BYTES
-ENTRY(cpu_gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* 0x0b reserved */
- .quad 0x0000000000000000 /* 0x13 reserved */
- .quad 0x0000000000000000 /* 0x1b reserved */
- .quad 0x0000000000000000 /* 0x20 unused */
- .quad 0x0000000000000000 /* 0x28 unused */
- .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
- .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
- .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
- .quad 0x0000000000000000 /* 0x4b reserved */
- .quad 0x0000000000000000 /* 0x53 reserved */
- .quad 0x0000000000000000 /* 0x5b reserved */
-
- .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
-
- .quad 0x0000000000000000 /* 0x80 TSS descriptor */
- .quad 0x0000000000000000 /* 0x88 LDT descriptor */
-
- /*
- * Segments used for calling PnP BIOS have byte granularity.
- * They code segments and data segments have fixed 64k limits,
- * the transfer segment sizes are set at run time.
- */
- .quad 0x00409a000000ffff /* 0x90 32-bit code */
- .quad 0x00009a000000ffff /* 0x98 16-bit code */
- .quad 0x000092000000ffff /* 0xa0 16-bit data */
- .quad 0x0000920000000000 /* 0xa8 16-bit data */
- .quad 0x0000920000000000 /* 0xb0 16-bit data */
-
- /*
- * The APM segments have byte granularity and their bases
- * are set at run time. All have 64k limits.
- */
- .quad 0x00409a000000ffff /* 0xb8 APM CS code */
- .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */
- .quad 0x004092000000ffff /* 0xc8 APM DS data */
-
- .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */
- .quad 0x00cf92000000ffff /* 0xd8 - PDA */
- .quad 0x0000000000000000 /* 0xe0 - unused */
- .quad 0x0000000000000000 /* 0xe8 - unused */
- .quad 0x0000000000000000 /* 0xf0 - unused */
- .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
-
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 4afe26e86260..e3d4b73bfdb0 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed);
#endif
EXPORT_SYMBOL(csum_partial);
-
-EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 89d85d244926..1b623cda3a64 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -35,6 +35,7 @@
#include <linux/msi.h>
#include <linux/htirq.h>
#include <linux/freezer.h>
+#include <linux/kthread.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -661,8 +662,6 @@ static int balanced_irq(void *unused)
unsigned long prev_balance_time = jiffies;
long time_remaining = balanced_irq_interval;
- daemonize("kirqd");
-
/* push everything to CPU 0 to give us a starting point. */
for (i = 0 ; i < NR_IRQS ; i++) {
irq_desc[i].pending_mask = cpumask_of_cpu(0);
@@ -722,10 +721,9 @@ static int __init balanced_irq_init(void)
}
printk(KERN_INFO "Starting balanced_irq\n");
- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
+ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
return 0;
- else
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
for_each_possible_cpu(i) {
kfree(irq_cpu_data[i].irq_delta);
@@ -1403,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
enable_8259A_irq(0);
}
-static inline void UNEXPECTED_IO_APIC(void)
-{
-}
-
void __init print_IO_APIC(void)
{
int apic, i;
@@ -1446,34 +1440,12 @@ void __init print_IO_APIC(void)
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
- if (reg_00.bits.ID >= get_physical_broadcast())
- UNEXPECTED_IO_APIC();
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.bits.entries != 0x2E) &&
- (reg_01.bits.entries != 0x3F)
- )
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
/*
* Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
@@ -1483,8 +1455,6 @@ void __init print_IO_APIC(void)
if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
}
/*
@@ -1496,8 +1466,6 @@ void __init print_IO_APIC(void)
reg_03.raw != reg_01.raw) {
printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
- if (reg_03.bits.__reserved_1)
- UNEXPECTED_IO_APIC();
}
printk(KERN_DEBUG ".... IRQ redirection table:\n");
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 498e8bc197d5..d1e42e0dbe67 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -16,6 +16,7 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
+#include <linux/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
@@ -113,7 +114,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* Reset the owner so that a process switch will not set
* tss->io_bitmap_base to IO_BITMAP_OFFSET.
*/
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+ tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
tss->io_bitmap_owner = NULL;
put_cpu();
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 8db8d514c9c0..d2daf672f4a2 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -24,6 +24,9 @@
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
+
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 4f5983c98669..0952eccd8f28 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -477,7 +477,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
++mpc_record;
}
- clustered_apic_check();
+ setup_apic_routing();
if (!num_processors)
printk(KERN_ERR "SMP mptable: no processors registered!\n");
return num_processors;
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 84c3497efb60..33cf2f3c444f 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -20,7 +20,6 @@
#include <linux/sysdev.h>
#include <linux/sysctl.h>
#include <linux/percpu.h>
-#include <linux/dmi.h>
#include <linux/kprobes.h>
#include <linux/cpumask.h>
#include <linux/kernel_stat.h>
@@ -28,30 +27,14 @@
#include <asm/smp.h>
#include <asm/nmi.h>
#include <asm/kdebug.h>
-#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
int unknown_nmi_panic;
int nmi_watchdog_enabled;
-/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
- * evtsel_nmi_owner tracks the ownership of the event selection
- * - different performance counters/ event selection may be reserved for
- * different subsystems this reservation system just tries to coordinate
- * things a little
- */
-
-/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
- */
-#define NMI_MAX_COUNTER_BITS 66
-#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
-
-static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-
static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
* <0: the lapic NMI watchdog has not been set up, and cannot
@@ -63,206 +46,11 @@ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
-struct nmi_watchdog_ctlblk {
- int enabled;
- u64 check_bit;
- unsigned int cccr_msr;
- unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
- unsigned int evntsel_msr; /* the MSR to select the events to handle */
-};
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+static DEFINE_PER_CPU(short, wd_enabled);
/* local prototypes */
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-extern void show_registers(struct pt_regs *regs);
-extern int unknown_nmi_panic;
-
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the performance counter register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_PERFCTR0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_PERFCTR0);
-
- switch (boot_cpu_data.x86) {
- case 6:
- return (msr - MSR_P6_PERFCTR0);
- case 15:
- return (msr - MSR_P4_BPU_PERFCTR0);
- }
- }
- return 0;
-}
-
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the event selection register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_EVNTSEL0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
-
- switch (boot_cpu_data.x86) {
- case 6:
- return (msr - MSR_P6_EVNTSEL0);
- case 15:
- return (msr - MSR_P4_BSU_ESCR0);
- }
- }
- return 0;
-}
-
-/* checks for a bit availability (hack for oprofile) */
-int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
-{
- int cpu;
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
- return 0;
- }
- return 1;
-}
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
- unsigned int counter;
- int cpu;
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
- return 0;
- }
- return 1;
-}
-
-static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
- return 1;
- return 0;
-}
-
-static void __release_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
-}
-
-int reserve_perfctr_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_perfctr_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_perfctr_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_perfctr_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu) {
- __release_perfctr_nmi(cpu, msr);
- }
-}
-
-int __reserve_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
- return 1;
- return 0;
-}
-
-static void __release_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
-}
-
-int reserve_evntsel_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_evntsel_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_evntsel_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_evntsel_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu) {
- __release_evntsel_nmi(cpu, msr);
- }
-}
-
-static __cpuinit inline int nmi_known_cpu(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
- || (boot_cpu_data.x86 == 16));
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return 1;
- else
- return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
- }
- return 0;
-}
-
static int endflag __initdata = 0;
#ifdef CONFIG_SMP
@@ -284,28 +72,6 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
- u64 counter_val;
- unsigned int retval = hz;
-
- /*
- * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
- * are writable, with higher bits sign extending from bit 31.
- * So, we can only program the counter with 31 bit values and
- * 32nd bit should be 1, for 33.. to be 1.
- * Find the appropriate nmi_hz
- */
- counter_val = (u64)cpu_khz * 1000;
- do_div(counter_val, retval);
- if (counter_val > 0x7fffffffULL) {
- u64 count = (u64)cpu_khz * 1000;
- do_div(count, 0x7fffffffUL);
- retval = count + 1;
- }
- return retval;
-}
-
static int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -338,14 +104,14 @@ static int __init check_nmi_watchdog(void)
if (!cpu_isset(cpu, cpu_callin_map))
continue;
#endif
- if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+ if (!per_cpu(wd_enabled, cpu))
continue;
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
cpu,
prev_nmi_count[cpu],
nmi_count(cpu));
- per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+ per_cpu(wd_enabled, cpu) = 0;
atomic_dec(&nmi_active);
}
}
@@ -359,16 +125,8 @@ static int __init check_nmi_watchdog(void)
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- nmi_hz = 1;
-
- if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
- wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- }
- }
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ nmi_hz = lapic_adjust_nmi_hz(1);
kfree(prev_nmi_count);
return 0;
@@ -391,85 +149,8 @@ static int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
-static void disable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-static void enable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- /* are we already enabled */
- if (atomic_read(&nmi_active) != 0)
- return;
-
- /* are we lapic aware */
- if (nmi_known_cpu() <= 0)
- return;
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- touch_nmi_watchdog();
-}
-
-void disable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- disable_irq(0);
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) == 0) {
- touch_nmi_watchdog();
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- enable_irq(0);
- }
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
-}
-
-static void __acpi_nmi_enable(void *__unused)
-{
- apic_write_around(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
-}
+/* Suspend/resume support */
#ifdef CONFIG_PM
@@ -516,7 +197,7 @@ static int __init init_lapic_nmi_sysfs(void)
if (nmi_watchdog != NMI_LOCAL_APIC)
return 0;
- if ( atomic_read(&nmi_active) < 0 )
+ if (atomic_read(&nmi_active) < 0)
return 0;
error = sysdev_class_register(&nmi_sysclass);
@@ -529,433 +210,69 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
-{
- u64 count = (u64)cpu_khz * 1000;
-
- do_div(count, nmi_hz);
- if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
- wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
- const char *descr)
-{
- u64 count = (u64)cpu_khz * 1000;
-
- do_div(count, nmi_hz);
- if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
- wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = MSR_K7_PERFCTR0;
- evntsel_msr = MSR_K7_EVNTSEL0;
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL<<63;
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_k7_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-#define P6_EVNTSEL0_ENABLE (1 << 22)
-#define P6_EVNTSEL_INT (1 << 20)
-#define P6_EVNTSEL_OS (1 << 17)
-#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = MSR_P6_PERFCTR0;
- evntsel_msr = MSR_P6_EVNTSEL0;
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = P6_EVNTSEL_INT
- | P6_EVNTSEL_OS
- | P6_EVNTSEL_USR
- | P6_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL<<39;
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_p6_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-#define P4_CCCR_OVF (1<<31)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-
-static int setup_p4_watchdog(void)
+static void __acpi_nmi_enable(void *__unused)
{
- unsigned int perfctr_msr, evntsel_msr, cccr_msr;
- unsigned int evntsel, cccr_val;
- unsigned int misc_enable, dummy;
- unsigned int ht_num;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
-#ifdef CONFIG_SMP
- /* detect which hyperthread we are on */
- if (smp_num_siblings == 2) {
- unsigned int ebx, apicid;
-
- ebx = cpuid_ebx(1);
- apicid = (ebx >> 24) & 0xff;
- ht_num = apicid & 1;
- } else
-#endif
- ht_num = 0;
-
- /* performance counters are shared resources
- * assign each hyperthread its own set
- * (re-use the ESCR0 register, seems safe
- * and keeps the cccr_val the same)
- */
- if (!ht_num) {
- /* logical cpu 0 */
- perfctr_msr = MSR_P4_IQ_PERFCTR0;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR0;
- cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
- } else {
- /* logical cpu 1 */
- perfctr_msr = MSR_P4_IQ_PERFCTR1;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
- }
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- evntsel = P4_ESCR_EVENT_SELECT(0x3F)
- | P4_ESCR_OS
- | P4_ESCR_USR;
-
- cccr_val |= P4_CCCR_THRESHOLD(15)
- | P4_CCCR_COMPLEMENT
- | P4_CCCR_COMPARE
- | P4_CCCR_REQUIRED;
-
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsr(cccr_msr, cccr_val, 0);
- write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = cccr_msr;
- wd->check_bit = 1ULL<<39;
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
+ apic_write_around(APIC_LVT0, APIC_DM_NMI);
}
-static void stop_p4_watchdog(void)
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->cccr_msr, 0, 0);
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
}
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static int setup_intel_arch_watchdog(void)
+static void __acpi_nmi_disable(void *__unused)
{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- goto fail;
-
- perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
- evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL << (eax.split.bit_width - 1);
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
}
-static void stop_intel_arch_watchdog(void)
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return;
-
- wrmsr(wd->evntsel_msr, 0, 0);
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
}
void setup_apic_nmi_watchdog (void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /* only support LOCAL and IO APICs for now */
- if ((nmi_watchdog != NMI_LOCAL_APIC) &&
- (nmi_watchdog != NMI_IO_APIC))
- return;
-
- if (wd->enabled == 1)
- return;
+ if (__get_cpu_var(wd_enabled))
+ return;
/* cheap hack to support suspend/resume */
/* if cpu0 is not active neither should the other cpus */
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
return;
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
- boot_cpu_data.x86 != 16)
- return;
- if (!setup_k7_watchdog())
- return;
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- if (!setup_intel_arch_watchdog())
- return;
- break;
- }
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- return;
-
- if (!setup_p6_watchdog())
- return;
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
- return;
-
- if (!setup_p4_watchdog())
- return;
- break;
- default:
- return;
- }
- break;
- default:
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */
+ if (lapic_watchdog_init(nmi_hz) < 0) {
+ __get_cpu_var(wd_enabled) = 0;
return;
}
+ /* FALL THROUGH */
+ case NMI_IO_APIC:
+ __get_cpu_var(wd_enabled) = 1;
+ atomic_inc(&nmi_active);
}
- wd->enabled = 1;
- atomic_inc(&nmi_active);
}
void stop_apic_nmi_watchdog(void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
-
- if (wd->enabled == 0)
+ if (__get_cpu_var(wd_enabled) == 0)
return;
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- stop_k7_watchdog();
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- stop_intel_arch_watchdog();
- break;
- }
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 0xd)
- break;
- stop_p6_watchdog();
- break;
- case 15:
- if (boot_cpu_data.x86_model > 0x4)
- break;
- stop_p4_watchdog();
- break;
- }
- break;
- default:
- return;
- }
- }
- wd->enabled = 0;
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ lapic_watchdog_stop();
+ __get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -1011,8 +328,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
unsigned int sum;
int touched = 0;
int cpu = smp_processor_id();
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- u64 dummy;
int rc=0;
/* check for other users first */
@@ -1055,53 +370,20 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
alert_counter[cpu] = 0;
}
/* see if the nmi watchdog went off */
- if (wd->enabled) {
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- rdmsrl(wd->perfctr_msr, dummy);
- if (dummy & wd->check_bit){
- /* this wasn't a watchdog timer interrupt */
- goto done;
- }
-
- /* only Intel P4 uses the cccr msr */
- if (wd->cccr_msr != 0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- rdmsrl(wd->cccr_msr, dummy);
- dummy &= ~P4_CCCR_OVF;
- wrmsrl(wd->cccr_msr, dummy);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* start the cycle over again */
- write_watchdog_counter(wd->perfctr_msr, NULL);
- }
- else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
- wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
- /* P6 based Pentium M need to re-unmask
- * the apic vector but it doesn't hurt
- * other P6 variant.
- * ArchPerfom/Core Duo also needs this */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* P6/ARCH_PERFMON has 32 bit counter write */
- write_watchdog_counter32(wd->perfctr_msr, NULL);
- } else {
- /* start the cycle over again */
- write_watchdog_counter(wd->perfctr_msr, NULL);
- }
- rc = 1;
- } else if (nmi_watchdog == NMI_IO_APIC) {
- /* don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- }
+ if (!__get_cpu_var(wd_enabled))
+ return rc;
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ rc |= lapic_wd_event(nmi_hz);
+ break;
+ case NMI_IO_APIC:
+ /* don't know how to accurately check for this.
+ * just assume it was a watchdog timer interrupt
+ * This matches the old behaviour.
+ */
+ rc = 1;
+ break;
}
-done:
return rc;
}
@@ -1146,7 +428,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
}
if (nmi_watchdog == NMI_DEFAULT) {
- if (nmi_known_cpu() > 0)
+ if (lapic_watchdog_ok())
nmi_watchdog = NMI_LOCAL_APIC;
else
nmi_watchdog = NMI_IO_APIC;
@@ -1182,11 +464,3 @@ void __trigger_all_cpu_backtrace(void)
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-EXPORT_SYMBOL(reserve_perfctr_nmi);
-EXPORT_SYMBOL(release_perfctr_nmi);
-EXPORT_SYMBOL(reserve_evntsel_nmi);
-EXPORT_SYMBOL(release_evntsel_nmi);
-EXPORT_SYMBOL(disable_timer_nmi_watchdog);
-EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 2ec331e03fa9..5c10f376bce1 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -20,6 +20,7 @@
#include <linux/efi.h>
#include <linux/bcd.h>
#include <linux/start_kernel.h>
+#include <linux/highmem.h>
#include <asm/bug.h>
#include <asm/paravirt.h>
@@ -35,7 +36,7 @@
#include <asm/timer.h>
/* nop stub */
-static void native_nop(void)
+void _paravirt_nop(void)
{
}
@@ -54,331 +55,148 @@ char *memory_setup(void)
#define DEF_NATIVE(name, code) \
extern const char start_##name[], end_##name[]; \
asm("start_" #name ": " code "; end_" #name ":")
-DEF_NATIVE(cli, "cli");
-DEF_NATIVE(sti, "sti");
-DEF_NATIVE(popf, "push %eax; popf");
-DEF_NATIVE(pushf, "pushf; pop %eax");
-DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
+
+DEF_NATIVE(irq_disable, "cli");
+DEF_NATIVE(irq_enable, "sti");
+DEF_NATIVE(restore_fl, "push %eax; popf");
+DEF_NATIVE(save_fl, "pushf; pop %eax");
DEF_NATIVE(iret, "iret");
-DEF_NATIVE(sti_sysexit, "sti; sysexit");
+DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(clts, "clts");
+DEF_NATIVE(read_tsc, "rdtsc");
-static const struct native_insns
-{
- const char *start, *end;
-} native_insns[] = {
- [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
- [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
- [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
- [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
- [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
- [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
- [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
-};
+DEF_NATIVE(ud2a, "ud2a");
static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
{
- unsigned int insn_len;
-
- /* Don't touch it if we don't have a replacement */
- if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
- return len;
-
- insn_len = native_insns[type].end - native_insns[type].start;
-
- /* Similarly if we can't fit replacement. */
- if (len < insn_len)
- return len;
+ const unsigned char *start, *end;
+ unsigned ret;
+
+ switch(type) {
+#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
+ SITE(irq_disable);
+ SITE(irq_enable);
+ SITE(restore_fl);
+ SITE(save_fl);
+ SITE(iret);
+ SITE(irq_enable_sysexit);
+ SITE(read_cr2);
+ SITE(read_cr3);
+ SITE(write_cr3);
+ SITE(clts);
+ SITE(read_tsc);
+#undef SITE
+
+ patch_site:
+ ret = paravirt_patch_insns(insns, len, start, end);
+ break;
- memcpy(insns, native_insns[type].start, insn_len);
- return insn_len;
-}
+ case PARAVIRT_PATCH(make_pgd):
+ case PARAVIRT_PATCH(make_pte):
+ case PARAVIRT_PATCH(pgd_val):
+ case PARAVIRT_PATCH(pte_val):
+#ifdef CONFIG_X86_PAE
+ case PARAVIRT_PATCH(make_pmd):
+ case PARAVIRT_PATCH(pmd_val):
+#endif
+ /* These functions end up returning exactly what
+ they're passed, in the same registers. */
+ ret = paravirt_patch_nop();
+ break;
-static unsigned long native_get_debugreg(int regno)
-{
- unsigned long val = 0; /* Damn you, gcc! */
-
- switch (regno) {
- case 0:
- asm("movl %%db0, %0" :"=r" (val)); break;
- case 1:
- asm("movl %%db1, %0" :"=r" (val)); break;
- case 2:
- asm("movl %%db2, %0" :"=r" (val)); break;
- case 3:
- asm("movl %%db3, %0" :"=r" (val)); break;
- case 6:
- asm("movl %%db6, %0" :"=r" (val)); break;
- case 7:
- asm("movl %%db7, %0" :"=r" (val)); break;
default:
- BUG();
- }
- return val;
-}
-
-static void native_set_debugreg(int regno, unsigned long value)
-{
- switch (regno) {
- case 0:
- asm("movl %0,%%db0" : /* no output */ :"r" (value));
- break;
- case 1:
- asm("movl %0,%%db1" : /* no output */ :"r" (value));
- break;
- case 2:
- asm("movl %0,%%db2" : /* no output */ :"r" (value));
+ ret = paravirt_patch_default(type, clobbers, insns, len);
break;
- case 3:
- asm("movl %0,%%db3" : /* no output */ :"r" (value));
- break;
- case 6:
- asm("movl %0,%%db6" : /* no output */ :"r" (value));
- break;
- case 7:
- asm("movl %0,%%db7" : /* no output */ :"r" (value));
- break;
- default:
- BUG();
}
-}
-
-void init_IRQ(void)
-{
- paravirt_ops.init_IRQ();
-}
-
-static void native_clts(void)
-{
- asm volatile ("clts");
-}
-
-static unsigned long native_read_cr0(void)
-{
- unsigned long val;
- asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
- return val;
-}
-
-static void native_write_cr0(unsigned long val)
-{
- asm volatile("movl %0,%%cr0": :"r" (val));
-}
-
-static unsigned long native_read_cr2(void)
-{
- unsigned long val;
- asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
- return val;
-}
-
-static void native_write_cr2(unsigned long val)
-{
- asm volatile("movl %0,%%cr2": :"r" (val));
-}
-
-static unsigned long native_read_cr3(void)
-{
- unsigned long val;
- asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
- return val;
-}
-
-static void native_write_cr3(unsigned long val)
-{
- asm volatile("movl %0,%%cr3": :"r" (val));
-}
-
-static unsigned long native_read_cr4(void)
-{
- unsigned long val;
- asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
- return val;
-}
-
-static unsigned long native_read_cr4_safe(void)
-{
- unsigned long val;
- /* This could fault if %cr4 does not exist */
- asm("1: movl %%cr4, %0 \n"
- "2: \n"
- ".section __ex_table,\"a\" \n"
- ".long 1b,2b \n"
- ".previous \n"
- : "=r" (val): "0" (0));
- return val;
-}
-
-static void native_write_cr4(unsigned long val)
-{
- asm volatile("movl %0,%%cr4": :"r" (val));
-}
-
-static unsigned long native_save_fl(void)
-{
- unsigned long f;
- asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
- return f;
-}
-
-static void native_restore_fl(unsigned long f)
-{
- asm volatile("pushl %0 ; popfl": /* no output */
- :"g" (f)
- :"memory", "cc");
-}
-
-static void native_irq_disable(void)
-{
- asm volatile("cli": : :"memory");
-}
-
-static void native_irq_enable(void)
-{
- asm volatile("sti": : :"memory");
-}
-
-static void native_safe_halt(void)
-{
- asm volatile("sti; hlt": : :"memory");
-}
-static void native_halt(void)
-{
- asm volatile("hlt": : :"memory");
+ return ret;
}
-static void native_wbinvd(void)
+unsigned paravirt_patch_nop(void)
{
- asm volatile("wbinvd": : :"memory");
+ return 0;
}
-static unsigned long long native_read_msr(unsigned int msr, int *err)
+unsigned paravirt_patch_ignore(unsigned len)
{
- unsigned long long val;
-
- asm volatile("2: rdmsr ; xorl %0,%0\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: movl %3,%0 ; jmp 1b\n\t"
- ".previous\n\t"
- ".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
- ".previous"
- : "=r" (*err), "=A" (val)
- : "c" (msr), "i" (-EFAULT));
-
- return val;
+ return len;
}
-static int native_write_msr(unsigned int msr, unsigned long long val)
+unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
+ void *site, u16 site_clobbers,
+ unsigned len)
{
- int err;
- asm volatile("2: wrmsr ; xorl %0,%0\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: movl %4,%0 ; jmp 1b\n\t"
- ".previous\n\t"
- ".section __ex_table,\"a\"\n"
- " .align 4\n\t"
- " .long 2b,3b\n\t"
- ".previous"
- : "=a" (err)
- : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
- "i" (-EFAULT));
- return err;
-}
+ unsigned char *call = site;
+ unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
-static unsigned long long native_read_tsc(void)
-{
- unsigned long long val;
- asm volatile("rdtsc" : "=A" (val));
- return val;
-}
+ if (tgt_clobbers & ~site_clobbers)
+ return len; /* target would clobber too much for this site */
+ if (len < 5)
+ return len; /* call too long for patch site */
-static unsigned long long native_read_pmc(void)
-{
- unsigned long long val;
- asm volatile("rdpmc" : "=A" (val));
- return val;
-}
+ *call++ = 0xe8; /* call */
+ *(unsigned long *)call = delta;
-static void native_load_tr_desc(void)
-{
- asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ return 5;
}
-static void native_load_gdt(const struct Xgt_desc_struct *dtr)
+unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
+ unsigned char *jmp = site;
+ unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
-static void native_load_idt(const struct Xgt_desc_struct *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
+ if (len < 5)
+ return len; /* call too long for patch site */
-static void native_store_gdt(struct Xgt_desc_struct *dtr)
-{
- asm ("sgdt %0":"=m" (*dtr));
-}
+ *jmp++ = 0xe9; /* jmp */
+ *(unsigned long *)jmp = delta;
-static void native_store_idt(struct Xgt_desc_struct *dtr)
-{
- asm ("sidt %0":"=m" (*dtr));
+ return 5;
}
-static unsigned long native_store_tr(void)
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
{
- unsigned long tr;
- asm ("str %0":"=r" (tr));
- return tr;
-}
+ void *opfunc = *((void **)&paravirt_ops + type);
+ unsigned ret;
-static void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
- C(0); C(1); C(2);
-#undef C
-}
+ if (opfunc == NULL)
+ /* If there's no function, patch it with a ud2a (BUG) */
+ ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a);
+ else if (opfunc == paravirt_nop)
+ /* If the operation is a nop, then nop the callsite */
+ ret = paravirt_patch_nop();
+ else if (type == PARAVIRT_PATCH(iret) ||
+ type == PARAVIRT_PATCH(irq_enable_sysexit))
+ /* If operation requires a jmp, then jmp */
+ ret = paravirt_patch_jmp(opfunc, site, len);
+ else
+ /* Otherwise call the function; assume target could
+ clobber any caller-save reg */
+ ret = paravirt_patch_call(opfunc, CLBR_ANY,
+ site, clobbers, len);
-static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
-{
- u32 *lp = (u32 *)((char *)dt + entry*8);
- lp[0] = entry_low;
- lp[1] = entry_high;
+ return ret;
}
-static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
+unsigned paravirt_patch_insns(void *site, unsigned len,
+ const char *start, const char *end)
{
- native_write_dt_entry(dt, entrynum, low, high);
-}
+ unsigned insn_len = end - start;
-static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
-{
- native_write_dt_entry(dt, entrynum, low, high);
-}
-
-static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
-{
- native_write_dt_entry(dt, entrynum, low, high);
-}
+ if (insn_len > len || start == NULL)
+ insn_len = len;
+ else
+ memcpy(site, start, insn_len);
-static void native_load_esp0(struct tss_struct *tss,
- struct thread_struct *thread)
-{
- tss->esp0 = thread->esp0;
-
- /* This can only happen when SEP is enabled, no need to test "SEP"arately */
- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
- tss->ss1 = thread->sysenter_cs;
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
+ return insn_len;
}
-static void native_io_delay(void)
+void init_IRQ(void)
{
- asm volatile("outb %al,$0x80");
+ paravirt_ops.init_IRQ();
}
static void native_flush_tlb(void)
@@ -395,83 +213,11 @@ static void native_flush_tlb_global(void)
__native_flush_tlb_global();
}
-static void native_flush_tlb_single(u32 addr)
+static void native_flush_tlb_single(unsigned long addr)
{
__native_flush_tlb_single(addr);
}
-#ifndef CONFIG_X86_PAE
-static void native_set_pte(pte_t *ptep, pte_t pteval)
-{
- *ptep = pteval;
-}
-
-static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
-{
- *ptep = pteval;
-}
-
-static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- *pmdp = pmdval;
-}
-
-#else /* CONFIG_X86_PAE */
-
-static void native_set_pte(pte_t *ptep, pte_t pte)
-{
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-
-static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
-{
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-
-static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
-{
- ptep->pte_low = 0;
- smp_wmb();
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-
-static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
-{
- set_64bit((unsigned long long *)ptep,pte_val(pteval));
-}
-
-static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
-}
-
-static void native_set_pud(pud_t *pudp, pud_t pudval)
-{
- *pudp = pudval;
-}
-
-static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- ptep->pte_low = 0;
- smp_wmb();
- ptep->pte_high = 0;
-}
-
-static void native_pmd_clear(pmd_t *pmd)
-{
- u32 *tmp = (u32 *)pmd;
- *tmp = 0;
- smp_wmb();
- *(tmp + 1) = 0;
-}
-#endif /* CONFIG_X86_PAE */
-
/* These are in entry.S */
extern void native_iret(void);
extern void native_irq_enable_sysexit(void);
@@ -487,10 +233,11 @@ struct paravirt_ops paravirt_ops = {
.name = "bare hardware",
.paravirt_enabled = 0,
.kernel_rpl = 0,
+ .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
.patch = native_patch,
.banner = default_banner,
- .arch_setup = native_nop,
+ .arch_setup = paravirt_nop,
.memory_setup = machine_specific_memory_setup,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
@@ -517,8 +264,8 @@ struct paravirt_ops paravirt_ops = {
.safe_halt = native_safe_halt,
.halt = native_halt,
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr,
- .write_msr = native_write_msr,
+ .read_msr = native_read_msr_safe,
+ .write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.get_scheduled_cycles = native_read_tsc,
@@ -531,9 +278,9 @@ struct paravirt_ops paravirt_ops = {
.store_idt = native_store_idt,
.store_tr = native_store_tr,
.load_tls = native_load_tls,
- .write_ldt_entry = native_write_ldt_entry,
- .write_gdt_entry = native_write_gdt_entry,
- .write_idt_entry = native_write_idt_entry,
+ .write_ldt_entry = write_dt_entry,
+ .write_gdt_entry = write_dt_entry,
+ .write_idt_entry = write_dt_entry,
.load_esp0 = native_load_esp0,
.set_iopl_mask = native_set_iopl_mask,
@@ -545,44 +292,57 @@ struct paravirt_ops paravirt_ops = {
.apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
+ .startup_ipi_hook = paravirt_nop,
#endif
- .set_lazy_mode = (void *)native_nop,
+ .set_lazy_mode = paravirt_nop,
+
+ .pagetable_setup_start = native_pagetable_setup_start,
+ .pagetable_setup_done = native_pagetable_setup_done,
.flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
+ .flush_tlb_others = native_flush_tlb_others,
- .map_pt_hook = (void *)native_nop,
-
- .alloc_pt = (void *)native_nop,
- .alloc_pd = (void *)native_nop,
- .alloc_pd_clone = (void *)native_nop,
- .release_pt = (void *)native_nop,
- .release_pd = (void *)native_nop,
+ .alloc_pt = paravirt_nop,
+ .alloc_pd = paravirt_nop,
+ .alloc_pd_clone = paravirt_nop,
+ .release_pt = paravirt_nop,
+ .release_pd = paravirt_nop,
.set_pte = native_set_pte,
.set_pte_at = native_set_pte_at,
.set_pmd = native_set_pmd,
- .pte_update = (void *)native_nop,
- .pte_update_defer = (void *)native_nop,
+ .pte_update = paravirt_nop,
+ .pte_update_defer = paravirt_nop,
+
+#ifdef CONFIG_HIGHPTE
+ .kmap_atomic_pte = kmap_atomic,
+#endif
+
#ifdef CONFIG_X86_PAE
.set_pte_atomic = native_set_pte_atomic,
.set_pte_present = native_set_pte_present,
.set_pud = native_set_pud,
.pte_clear = native_pte_clear,
.pmd_clear = native_pmd_clear,
+
+ .pmd_val = native_pmd_val,
+ .make_pmd = native_make_pmd,
#endif
+ .pte_val = native_pte_val,
+ .pgd_val = native_pgd_val,
+
+ .make_pte = native_make_pte,
+ .make_pgd = native_make_pgd,
+
.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,
- .startup_ipi_hook = (void *)native_nop,
+ .dup_mmap = paravirt_nop,
+ .exit_mmap = paravirt_nop,
+ .activate_mm = paravirt_nop,
};
-/*
- * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
- * semantics are subject to change. Hence we only do this
- * internal-only export of this, until it gets sorted out and
- * all lowlevel CPU ops used by modules are separately exported.
- */
-EXPORT_SYMBOL_GPL(paravirt_ops);
+EXPORT_SYMBOL(paravirt_ops);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d5d943..61999479b7a4 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -39,6 +39,7 @@
#include <linux/random.h>
#include <linux/personality.h>
#include <linux/tick.h>
+#include <linux/percpu.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -57,7 +58,6 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
-#include <asm/pda.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -66,6 +66,12 @@ static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
/*
* Return saved PC of a blocked thread.
*/
@@ -272,25 +278,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
}
}
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
{
- if (!strncmp(str, "poll", 4)) {
+ if (!strcmp(str, "poll")) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
#ifdef CONFIG_X86_SMP
if (smp_num_siblings > 1)
printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
#endif
- } else if (!strncmp(str, "halt", 4)) {
- printk("using halt in idle threads.\n");
- pm_idle = default_idle;
- }
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;
boot_option_idle_override = 1;
- return 1;
+ return 0;
}
-
-__setup("idle=", idle_setup);
+early_param("idle", idle_setup);
void show_regs(struct pt_regs * regs)
{
@@ -343,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
regs.xds = __USER_DS;
regs.xes = __USER_DS;
- regs.xfs = __KERNEL_PDA;
+ regs.xfs = __KERNEL_PERCPU;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -376,7 +381,7 @@ void exit_thread(void)
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
tss->io_bitmap_max = 0;
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+ tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
}
@@ -555,7 +560,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
* Disable the bitmap via an invalid offset. We still cache
* the previous bitmap owner and the IO bitmap contents:
*/
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+ tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
return;
}
@@ -565,7 +570,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
* matches the next task, we dont have to do anything but
* to set a valid offset in the TSS:
*/
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
return;
}
/*
@@ -577,7 +582,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
* redundant copies when the currently switched task does not
* perform any I/O during its timeslice.
*/
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+ tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
}
/*
@@ -712,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
- write_pda(pcurrent, next_p);
+ x86_write_percpu(current_task, next_p);
return prev_p;
}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
index 34874c398b44..9f6ab1789bb0 100644
--- a/arch/i386/kernel/quirks.c
+++ b/arch/i386/kernel/quirks.c
@@ -3,12 +3,10 @@
*/
#include <linux/pci.h>
#include <linux/irq.h>
-#include <asm/pci-direct.h>
-#include <asm/genapic.h>
-#include <asm/cpu.h>
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
-static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
+
+static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config, rev;
u32 word;
@@ -16,12 +14,14 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
/* BIOS may enable hardware IRQ balancing for
* E7520/E7320/E7525(revision ID 0x9 and below)
* based platforms.
- * For those platforms, make sure that the genapic is set to 'flat'
+ * Disable SW irqbalance/affinity on those platforms.
*/
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
if (rev > 0x9)
return;
+ printk(KERN_INFO "Intel E7520/7320/7525 detected.");
+
/* enable access to config space*/
pci_read_config_byte(dev, 0xf4, &config);
pci_write_config_byte(dev, 0xf4, config|0x2);
@@ -30,44 +30,6 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
if (!(word & (1 << 13))) {
-#ifdef CONFIG_X86_64
- if (genapic != &apic_flat)
- panic("APIC mode must be flat on this system\n");
-#elif defined(CONFIG_X86_GENERICARCH)
- if (genapic != &apic_default)
- panic("APIC mode must be default(flat) on this system. Use apic=default\n");
-#endif
- }
-
- /* put back the original value for config space*/
- if (!(config & 0x2))
- pci_write_config_byte(dev, 0xf4, config);
-}
-
-void __init quirk_intel_irqbalance(void)
-{
- u8 config, rev;
- u32 word;
-
- /* BIOS may enable hardware IRQ balancing for
- * E7520/E7320/E7525(revision ID 0x9 and below)
- * based platforms.
- * Disable SW irqbalance/affinity on those platforms.
- */
- rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
- if (rev > 0x9)
- return;
-
- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
-
- /* enable access to config space */
- config = read_pci_config_byte(0, 0, 0, 0xf4);
- write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
-
- /* read xTPR register */
- word = read_pci_config_16(0, 0, 0x40, 0x4c);
-
- if (!(word & (1 << 13))) {
printk(KERN_INFO "Disabling irq balancing and affinity\n");
#ifdef CONFIG_IRQBALANCE
irqbalance_disable("");
@@ -76,24 +38,13 @@ void __init quirk_intel_irqbalance(void)
#ifdef CONFIG_PROC_FS
no_irq_affinity = 1;
#endif
-#ifdef CONFIG_HOTPLUG_CPU
- printk(KERN_INFO "Disabling cpu hotplug control\n");
- enable_cpu_hotplug = 0;
-#endif
-#ifdef CONFIG_X86_64
- /* force the genapic selection to flat mode so that
- * interrupts can be redirected to more than one CPU.
- */
- genapic_force = &apic_flat;
-#endif
}
- /* put back the original value for config space */
+ /* put back the original value for config space*/
if (!(config & 0x2))
- write_pci_config_byte(0, 0, 0, 0xf4, config);
+ pci_write_config_byte(dev, 0xf4, config);
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
-
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
#endif
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 3514b4153f7f..50dfc65319cd 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -17,7 +17,8 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include "mach_reboot.h"
-#include <linux/reboot_fixups.h>
+#include <asm/reboot_fixups.h>
+#include <asm/reboot.h>
/*
* Power off function, if any
@@ -197,8 +198,6 @@ static unsigned char jump_to_bios [] =
*/
void machine_real_restart(unsigned char *code, int length)
{
- unsigned long flags;
-
local_irq_disable();
/* Write zero to CMOS register number 0x0f, which the BIOS POST
@@ -211,9 +210,9 @@ void machine_real_restart(unsigned char *code, int length)
safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
*/
- spin_lock_irqsave(&rtc_lock, flags);
+ spin_lock(&rtc_lock);
CMOS_WRITE(0x00, 0x8f);
- spin_unlock_irqrestore(&rtc_lock, flags);
+ spin_unlock(&rtc_lock);
/* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at
@@ -280,7 +279,7 @@ void machine_real_restart(unsigned char *code, int length)
EXPORT_SYMBOL(machine_real_restart);
#endif
-void machine_shutdown(void)
+static void native_machine_shutdown(void)
{
#ifdef CONFIG_SMP
int reboot_cpu_id;
@@ -316,7 +315,11 @@ void machine_shutdown(void)
#endif
}
-void machine_emergency_restart(void)
+void __attribute__((weak)) mach_reboot_fixups(void)
+{
+}
+
+static void native_machine_emergency_restart(void)
{
if (!reboot_thru_bios) {
if (efi_enabled) {
@@ -340,17 +343,17 @@ void machine_emergency_restart(void)
machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
}
-void machine_restart(char * __unused)
+static void native_machine_restart(char * __unused)
{
machine_shutdown();
machine_emergency_restart();
}
-void machine_halt(void)
+static void native_machine_halt(void)
{
}
-void machine_power_off(void)
+static void native_machine_power_off(void)
{
if (pm_power_off) {
machine_shutdown();
@@ -359,3 +362,35 @@ void machine_power_off(void)
}
+struct machine_ops machine_ops = {
+ .power_off = native_machine_power_off,
+ .shutdown = native_machine_shutdown,
+ .emergency_restart = native_machine_emergency_restart,
+ .restart = native_machine_restart,
+ .halt = native_machine_halt,
+};
+
+void machine_power_off(void)
+{
+ machine_ops.power_off();
+}
+
+void machine_shutdown(void)
+{
+ machine_ops.shutdown();
+}
+
+void machine_emergency_restart(void)
+{
+ machine_ops.emergency_restart();
+}
+
+void machine_restart(char *cmd)
+{
+ machine_ops.restart(cmd);
+}
+
+void machine_halt(void)
+{
+ machine_ops.halt();
+}
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c
index 99aab41a05b0..2d78d918340f 100644
--- a/arch/i386/kernel/reboot_fixups.c
+++ b/arch/i386/kernel/reboot_fixups.c
@@ -10,7 +10,7 @@
#include <asm/delay.h>
#include <linux/pci.h>
-#include <linux/reboot_fixups.h>
+#include <asm/reboot_fixups.h>
static void cs5530a_warm_reset(struct pci_dev *dev)
{
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 0e8977871b1f..89a45a9ddcd4 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -165,20 +165,20 @@ void fastcall send_IPI_self(int vector)
}
/*
- * This is only used on smaller machines.
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
*/
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+static inline void __send_IPI_dest_field(unsigned long mask, int vector)
{
- unsigned long mask = cpus_addr(cpumask)[0];
unsigned long cfg;
- unsigned long flags;
- local_irq_save(flags);
- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
*/
- apic_wait_icr_idle();
+ if (unlikely(vector == NMI_VECTOR))
+ safe_apic_wait_icr_idle();
+ else
+ apic_wait_icr_idle();
/*
* prepare target chip field
@@ -195,13 +195,25 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
* Send the IPI. The write to APIC_ICR fires this off.
*/
apic_write_around(APIC_ICR, cfg);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+{
+ unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned long flags;
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+ __send_IPI_dest_field(mask, vector);
local_irq_restore(flags);
}
void send_IPI_mask_sequence(cpumask_t mask, int vector)
{
- unsigned long cfg, flags;
+ unsigned long flags;
unsigned int query_cpu;
/*
@@ -211,30 +223,10 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
*/
local_irq_save(flags);
-
for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
if (cpu_isset(query_cpu, mask)) {
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
- apic_write_around(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write_around(APIC_ICR, cfg);
+ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+ vector);
}
}
local_irq_restore(flags);
@@ -256,7 +248,6 @@ static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL 0xffffffff
/*
* We cannot call mmdrop() because we are in interrupt context,
@@ -338,7 +329,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
+ if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
@@ -353,9 +344,11 @@ out:
put_cpu_no_resched();
}
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+ unsigned long va)
{
+ cpumask_t cpumask = *cpumaskp;
+
/*
* A couple of (to be removed) sanity checks:
*
@@ -366,10 +359,12 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
+#ifdef CONFIG_HOTPLUG_CPU
/* If a CPU which we ran on has gone down, OK. */
cpus_and(cpumask, cpumask, cpu_online_map);
- if (cpus_empty(cpumask))
+ if (unlikely(cpus_empty(cpumask)))
return;
+#endif
/*
* i'm not happy about this global shared spinlock in the
@@ -380,17 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
flush_mm = mm;
flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
- atomic_set_mask(cpumask, &flush_cpumask);
-#else
- {
- int k;
- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
- unsigned long *cpu_mask = (unsigned long *)&cpumask;
- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
- }
-#endif
+ cpus_or(flush_cpumask, cpumask, flush_cpumask);
/*
* We have to send the IPI only to
* CPUs affected.
@@ -417,7 +402,7 @@ void flush_tlb_current_task(void)
local_flush_tlb();
if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -436,7 +421,7 @@ void flush_tlb_mm (struct mm_struct * mm)
leave_mm(smp_processor_id());
}
if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -483,7 +468,7 @@ void flush_tlb_all(void)
* it goes straight through and wastes no time serializing
* anything. Worst case is that we lose a reschedule ...
*/
-void smp_send_reschedule(int cpu)
+void native_smp_send_reschedule(int cpu)
{
WARN_ON(cpu_is_offline(cpu));
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
@@ -515,36 +500,78 @@ void unlock_ipi_call_lock(void)
static struct call_data_struct *call_data;
+static void __smp_call_function(void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = num_online_cpus() - 1;
+
+ if (!cpus)
+ return;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ mb();
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+
/**
- * smp_call_function(): Run a function on all other CPUs.
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on. Must not include the current cpu.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
* @wait: If true, wait (atomically) until function has completed on other CPUs.
*
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
+int native_smp_call_function_mask(cpumask_t mask,
+ void (*func)(void *), void *info,
+ int wait)
{
struct call_data_struct data;
+ cpumask_t allbutself;
int cpus;
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
/* Holding any lock stops cpus from going down. */
spin_lock(&call_lock);
- cpus = num_online_cpus() - 1;
+
+ allbutself = cpu_online_map;
+ cpu_clear(smp_processor_id(), allbutself);
+
+ cpus_and(mask, mask, allbutself);
+ cpus = cpus_weight(mask);
+
if (!cpus) {
spin_unlock(&call_lock);
return 0;
}
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
data.func = func;
data.info = info;
atomic_set(&data.started, 0);
@@ -554,9 +581,12 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
call_data = &data;
mb();
-
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Send a message to other CPUs */
+ if (cpus_equal(mask, allbutself))
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ else
+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
/* Wait for response */
while (atomic_read(&data.started) != cpus)
@@ -569,15 +599,68 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
return 0;
}
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+{
+ return smp_call_function_mask(cpu_online_map, func, info, wait);
+}
EXPORT_SYMBOL(smp_call_function);
+/**
+ * smp_call_function_single - Run a function on another CPU
+ * @cpu: The target CPU. Cannot be the calling CPU.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ */
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ /* prevent preemption and reschedule on another processor */
+ int ret;
+ int me = get_cpu();
+ if (cpu == me) {
+ WARN_ON(1);
+ put_cpu();
+ return -EBUSY;
+ }
+
+ ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+
+ put_cpu();
+ return ret;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+
static void stop_this_cpu (void * dummy)
{
+ local_irq_disable();
/*
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok)
for(;;) halt();
@@ -588,13 +671,18 @@ static void stop_this_cpu (void * dummy)
* this function calls the 'stop' function on all other CPUs in the system.
*/
-void smp_send_stop(void)
+void native_smp_send_stop(void)
{
- smp_call_function(stop_this_cpu, NULL, 1, 0);
+ /* Don't deadlock on the call lock in panic */
+ int nolock = !spin_trylock(&call_lock);
+ unsigned long flags;
- local_irq_disable();
+ local_irq_save(flags);
+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
+ if (!nolock)
+ spin_unlock(&call_lock);
disable_local_APIC();
- local_irq_enable();
+ local_irq_restore(flags);
}
/*
@@ -633,77 +721,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
}
}
-/*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
- */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = 1;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-}
-
-/*
- * smp_call_function_single - Run a function on another CPU
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Currently unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Retrurns 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- /* prevent preemption and reschedule on another processor */
- int me = get_cpu();
- if (cpu == me) {
- WARN_ON(1);
- put_cpu();
- return -EBUSY;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- spin_lock_bh(&call_lock);
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock_bh(&call_lock);
- put_cpu();
- return 0;
-}
-EXPORT_SYMBOL(smp_call_function_single);
-
static int convert_apicid_to_cpu(int apic_id)
{
int i;
@@ -730,3 +747,14 @@ int safe_smp_processor_id(void)
return cpuid >= 0 ? cpuid : 0;
}
+
+struct smp_ops smp_ops = {
+ .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = native_smp_prepare_cpus,
+ .cpu_up = native_cpu_up,
+ .smp_cpus_done = native_smp_cpus_done,
+
+ .smp_send_stop = native_smp_send_stop,
+ .smp_send_reschedule = native_smp_send_reschedule,
+ .smp_call_function_mask = native_smp_call_function_mask,
+};
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4ff55e675576..a4b7ad283f49 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -53,13 +53,12 @@
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/nmi.h>
-#include <asm/pda.h>
-#include <asm/genapic.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
#include <asm/vmi.h>
+#include <asm/mtrr.h>
/* Set if we find a B stepping CPU */
static int __devinitdata smp_b_stepping;
@@ -100,6 +99,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
u8 apicid_2_node[MAX_APICID];
+DEFINE_PER_CPU(unsigned long, this_cpu_off);
+EXPORT_PER_CPU_SYMBOL(this_cpu_off);
+
/*
* Trampoline 80x86 program as an array.
*/
@@ -156,7 +158,7 @@ static void __cpuinit smp_store_cpu_info(int id)
*c = boot_cpu_data;
if (id!=0)
- identify_cpu(c);
+ identify_secondary_cpu(c);
/*
* Mask B, Pentium, but not Pentium MMX
*/
@@ -379,14 +381,14 @@ set_cpu_sibling_map(int cpu)
static void __cpuinit start_secondary(void *unused)
{
/*
- * Don't put *anything* before secondary_cpu_init(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
+ * Don't put *anything* before cpu_init(), SMP booting is too
+ * fragile that we want to limit the things done here to the
+ * most necessary things.
*/
#ifdef CONFIG_VMI
vmi_bringup();
#endif
- secondary_cpu_init();
+ cpu_init();
preempt_disable();
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -441,12 +443,6 @@ static void __cpuinit start_secondary(void *unused)
void __devinit initialize_secondary(void)
{
/*
- * switch to the per CPU GDT we already set up
- * in do_boot_cpu()
- */
- cpu_set_gdt(current_thread_info()->cpu);
-
- /*
* We don't actually need to load the full TSS,
* basically just the stack pointer and the eip.
*/
@@ -463,7 +459,6 @@ extern struct {
void * esp;
unsigned short ss;
} stack_start;
-extern struct i386_pda *start_pda;
#ifdef CONFIG_NUMA
@@ -521,12 +516,12 @@ static void unmap_cpu_to_logical_apicid(int cpu)
unmap_cpu_to_node(cpu);
}
-#if APIC_DEBUG
static inline void __inquire_remote_apic(int apicid)
{
int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
+ int timeout;
+ unsigned long status;
printk("Inquiring remote APIC #%d...\n", apicid);
@@ -536,7 +531,9 @@ static inline void __inquire_remote_apic(int apicid)
/*
* Wait for idle.
*/
- apic_wait_icr_idle();
+ status = safe_apic_wait_icr_idle();
+ if (status)
+ printk("a previous APIC delivery may have failed\n");
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -550,14 +547,13 @@ static inline void __inquire_remote_apic(int apicid)
switch (status) {
case APIC_ICR_RR_VALID:
status = apic_read(APIC_RRR);
- printk("%08x\n", status);
+ printk("%lx\n", status);
break;
default:
printk("failed\n");
}
}
}
-#endif
#ifdef WAKE_SECONDARY_VIA_NMI
/*
@@ -568,8 +564,8 @@ static inline void __inquire_remote_apic(int apicid)
static int __devinit
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
- unsigned long send_status = 0, accept_status = 0;
- int timeout, maxlvt;
+ unsigned long send_status, accept_status = 0;
+ int maxlvt;
/* Target chip */
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
@@ -579,12 +575,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
/*
* Give the other CPU some time to accept the IPI.
@@ -614,8 +605,8 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
static int __devinit
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
+ unsigned long send_status, accept_status = 0;
+ int maxlvt, num_starts, j;
/*
* Be paranoid about clearing APIC errors.
@@ -640,12 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
| APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
mdelay(10);
@@ -658,12 +644,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
atomic_set(&init_deasserted, 1);
@@ -719,12 +700,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
Dprintk("Startup point 1.\n");
Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
+ send_status = safe_apic_wait_icr_idle();
/*
* Give the other CPU some time to accept the IPI.
@@ -788,6 +764,25 @@ static inline struct task_struct * alloc_idle_task(int cpu)
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif
+/* Initialize the CPU's GDT. This is either the boot CPU doing itself
+ (still using the master per-cpu area), or a CPU doing it for a
+ secondary which will soon come up. */
+static __cpuinit void init_gdt(int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+
+ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
+ (u32 *)&gdt[GDT_ENTRY_PERCPU].b,
+ __per_cpu_offset[cpu], 0xFFFFF,
+ 0x80 | DESCTYPE_S | 0x2, 0x8);
+
+ per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
+ per_cpu(cpu_number, cpu) = cpu;
+}
+
+/* Defined in head.S */
+extern struct Xgt_desc_struct early_gdt_descr;
+
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -802,6 +797,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
unsigned short nmi_high = 0, nmi_low = 0;
/*
+ * Save current MTRR state in case it was changed since early boot
+ * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
+ */
+ mtrr_save_state();
+
+ /*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
@@ -809,13 +810,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
- /* Pre-allocate and initialize the CPU's GDT and PDA so it
- doesn't have to do any memory allocation during the
- delicate CPU-bringup phase. */
- if (!init_gdt(cpu, idle)) {
- printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
- return -1; /* ? */
- }
+ init_gdt(cpu);
+ per_cpu(current_task, cpu) = idle;
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
idle->thread.eip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
@@ -941,7 +938,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
DECLARE_COMPLETION_ONSTACK(done);
struct warm_boot_cpu_info info;
int apicid, ret;
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
apicid = x86_cpu_to_apicid[cpu];
if (apicid == BAD_APICID) {
@@ -949,18 +945,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
goto exit;
}
- /*
- * the CPU isn't initialized at boot time, allocate gdt table here.
- * cpu_init will initialize it
- */
- if (!cpu_gdt_descr->address) {
- cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL);
- if (!cpu_gdt_descr->address)
- printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
- ret = -ENOMEM;
- goto exit;
- }
-
info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
@@ -1173,7 +1157,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
/* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
smp_commenced_mask = cpumask_of_cpu(0);
cpu_callin_map = cpumask_of_cpu(0);
@@ -1181,13 +1165,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
smp_boot_cpus(max_cpus);
}
-void __devinit smp_prepare_boot_cpu(void)
+void __init native_smp_prepare_boot_cpu(void)
{
- cpu_set(smp_processor_id(), cpu_online_map);
- cpu_set(smp_processor_id(), cpu_callout_map);
- cpu_set(smp_processor_id(), cpu_present_map);
- cpu_set(smp_processor_id(), cpu_possible_map);
- per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+ unsigned int cpu = smp_processor_id();
+
+ init_gdt(cpu);
+ switch_to_new_gdt();
+
+ cpu_set(cpu, cpu_online_map);
+ cpu_set(cpu, cpu_callout_map);
+ cpu_set(cpu, cpu_present_map);
+ cpu_set(cpu, cpu_possible_map);
+ __get_cpu_var(cpu_state) = CPU_ONLINE;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1277,7 +1266,7 @@ void __cpu_die(unsigned int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-int __cpuinit __cpu_up(unsigned int cpu)
+int __cpuinit native_cpu_up(unsigned int cpu)
{
unsigned long flags;
#ifdef CONFIG_HOTPLUG_CPU
@@ -1319,15 +1308,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
touch_nmi_watchdog();
}
-#ifdef CONFIG_X86_GENERICARCH
- if (num_online_cpus() > 8 && genapic == &apic_default)
- panic("Default flat APIC routing can't be used with > 8 cpus\n");
-#endif
-
return 0;
}
-void __init smp_cpus_done(unsigned int max_cpus)
+void __init native_smp_cpus_done(unsigned int max_cpus)
{
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 13ca54a85a1c..ff4ee6f3326b 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -22,16 +22,26 @@
#include <asm/msr.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
+#include <asm/elf.h>
+#include <asm/tlbflush.h>
+
+enum {
+ VDSO_DISABLED = 0,
+ VDSO_ENABLED = 1,
+ VDSO_COMPAT = 2,
+};
+
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_DEFAULT VDSO_COMPAT
+#else
+#define VDSO_DEFAULT VDSO_ENABLED
+#endif
/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
-#ifdef CONFIG_PARAVIRT
-unsigned int __read_mostly vdso_enabled = 0;
-#else
-unsigned int __read_mostly vdso_enabled = 1;
-#endif
+unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
EXPORT_SYMBOL_GPL(vdso_enabled);
@@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup);
extern asmlinkage void sysenter_entry(void);
+static __init void reloc_symtab(Elf32_Ehdr *ehdr,
+ unsigned offset, unsigned size)
+{
+ Elf32_Sym *sym = (void *)ehdr + offset;
+ unsigned nsym = size / sizeof(*sym);
+ unsigned i;
+
+ for(i = 0; i < nsym; i++, sym++) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_ABS)
+ continue; /* skip */
+
+ if (sym->st_shndx > SHN_LORESERVE) {
+ printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
+ sym->st_shndx);
+ continue;
+ }
+
+ switch(ELF_ST_TYPE(sym->st_info)) {
+ case STT_OBJECT:
+ case STT_FUNC:
+ case STT_SECTION:
+ case STT_FILE:
+ sym->st_value += VDSO_HIGH_BASE;
+ }
+ }
+}
+
+static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
+{
+ Elf32_Dyn *dyn = (void *)ehdr + offset;
+
+ for(; dyn->d_tag != DT_NULL; dyn++)
+ switch(dyn->d_tag) {
+ case DT_PLTGOT:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_INIT:
+ case DT_FINI:
+ case DT_REL:
+ case DT_DEBUG:
+ case DT_JMPREL:
+ case DT_VERSYM:
+ case DT_VERDEF:
+ case DT_VERNEED:
+ case DT_ADDRRNGLO ... DT_ADDRRNGHI:
+ /* definitely pointers needing relocation */
+ dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+ break;
+
+ case DT_ENCODING ... OLD_DT_LOOS-1:
+ case DT_LOOS ... DT_HIOS-1:
+ /* Tags above DT_ENCODING are pointers if
+ they're even */
+ if (dyn->d_tag >= DT_ENCODING &&
+ (dyn->d_tag & 1) == 0)
+ dyn->d_un.d_ptr += VDSO_HIGH_BASE;
+ break;
+
+ case DT_VERDEFNUM:
+ case DT_VERNEEDNUM:
+ case DT_FLAGS_1:
+ case DT_RELACOUNT:
+ case DT_RELCOUNT:
+ case DT_VALRNGLO ... DT_VALRNGHI:
+ /* definitely not pointers */
+ break;
+
+ case OLD_DT_LOOS ... DT_LOOS-1:
+ case DT_HIOS ... DT_VALRNGLO-1:
+ default:
+ if (dyn->d_tag > DT_ENCODING)
+ printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
+ dyn->d_tag);
+ break;
+ }
+}
+
+static __init void relocate_vdso(Elf32_Ehdr *ehdr)
+{
+ Elf32_Phdr *phdr;
+ Elf32_Shdr *shdr;
+ int i;
+
+ BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
+ !elf_check_arch(ehdr) ||
+ ehdr->e_type != ET_DYN);
+
+ ehdr->e_entry += VDSO_HIGH_BASE;
+
+ /* rebase phdrs */
+ phdr = (void *)ehdr + ehdr->e_phoff;
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr[i].p_vaddr += VDSO_HIGH_BASE;
+
+ /* relocate dynamic stuff */
+ if (phdr[i].p_type == PT_DYNAMIC)
+ reloc_dyn(ehdr, phdr[i].p_offset);
+ }
+
+ /* rebase sections */
+ shdr = (void *)ehdr + ehdr->e_shoff;
+ for(i = 0; i < ehdr->e_shnum; i++) {
+ if (!(shdr[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ shdr[i].sh_addr += VDSO_HIGH_BASE;
+
+ if (shdr[i].sh_type == SHT_SYMTAB ||
+ shdr[i].sh_type == SHT_DYNSYM)
+ reloc_symtab(ehdr, shdr[i].sh_offset,
+ shdr[i].sh_size);
+ }
+}
+
void enable_sep_cpu(void)
{
int cpu = get_cpu();
@@ -56,14 +183,33 @@ void enable_sep_cpu(void)
return;
}
- tss->ss1 = __KERNEL_CS;
- tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+ tss->x86_tss.ss1 = __KERNEL_CS;
+ tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
+ wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
put_cpu();
}
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+ gate_vma.vm_mm = NULL;
+ gate_vma.vm_start = FIXADDR_USER_START;
+ gate_vma.vm_end = FIXADDR_USER_END;
+ gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+ gate_vma.vm_page_prot = __P101;
+ /*
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ gate_vma.vm_flags |= VM_ALWAYSDUMP;
+ return 0;
+}
+
/*
* These symbols are defined by vsyscall.o to mark the bounds
* of the ELF DSO images included therein.
@@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
static struct page *syscall_pages[1];
+static void map_compat_vdso(int map)
+{
+ static int vdso_mapped;
+
+ if (map == vdso_mapped)
+ return;
+
+ vdso_mapped = map;
+
+ __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
+ map ? PAGE_READONLY_EXEC : PAGE_NONE);
+
+ /* flush stray tlbs */
+ flush_tlb_all();
+}
+
int __init sysenter_setup(void)
{
void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ const void *vsyscall;
+ size_t vsyscall_len;
+
syscall_pages[0] = virt_to_page(syscall_page);
-#ifdef CONFIG_COMPAT_VDSO
- __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
+ gate_vma_init();
+
printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#endif
if (!boot_cpu_has(X86_FEATURE_SEP)) {
- memcpy(syscall_page,
- &vsyscall_int80_start,
- &vsyscall_int80_end - &vsyscall_int80_start);
- return 0;
+ vsyscall = &vsyscall_int80_start;
+ vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
+ } else {
+ vsyscall = &vsyscall_sysenter_start;
+ vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
}
- memcpy(syscall_page,
- &vsyscall_sysenter_start,
- &vsyscall_sysenter_end - &vsyscall_sysenter_start);
+ memcpy(syscall_page, vsyscall, vsyscall_len);
+ relocate_vdso(syscall_page);
return 0;
}
-#ifndef CONFIG_COMPAT_VDSO
/* Defined in vsyscall-sysenter.S */
extern void SYSENTER_RETURN;
@@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
- int ret;
+ int ret = 0;
+ bool compat;
down_write(&mm->mmap_sem);
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
- if (IS_ERR_VALUE(addr)) {
- ret = addr;
- goto up_fail;
- }
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- *
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- ret = install_special_mapping(mm, addr, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
- syscall_pages);
- if (ret)
- goto up_fail;
+ /* Test compat mode once here, in case someone
+ changes it via sysctl */
+ compat = (vdso_enabled == VDSO_COMPAT);
+
+ map_compat_vdso(compat);
+
+ if (compat)
+ addr = VDSO_HIGH_BASE;
+ else {
+ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully
+ * interpretable later without matching up the same
+ * kernel and hardware config to see what PC values
+ * meant.
+ */
+ ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall_pages);
+
+ if (ret)
+ goto up_fail;
+ }
current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
- (void *)VDSO_SYM(&SYSENTER_RETURN);
-up_fail:
+ (void *)VDSO_SYM(&SYSENTER_RETURN);
+
+ up_fail:
up_write(&mm->mmap_sem);
+
return ret;
}
@@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma)
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
+ struct mm_struct *mm = tsk->mm;
+
+ /* Check to see if this task was created in compat vdso mode */
+ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
+ return &gate_vma;
return NULL;
}
@@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr)
{
return 0;
}
-#endif
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 94e5cb091104..a665df61f08c 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -70,8 +70,6 @@
#include <asm/i8259.h>
-int pit_latch_buggy; /* extern */
-
#include "do_timer.h"
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
index 2f1814c5cfd7..f62815f8d06a 100644
--- a/arch/i386/kernel/trampoline.S
+++ b/arch/i386/kernel/trampoline.S
@@ -29,7 +29,7 @@
*
* TYPE VALUE
* R_386_32 startup_32_smp
- * R_386_32 boot_gdt_table
+ * R_386_32 boot_gdt
*/
#include <linux/linkage.h>
@@ -62,8 +62,8 @@ r_base = .
* to 32 bit.
*/
- lidtl boot_idt - r_base # load idt with 0, 0
- lgdtl boot_gdt - r_base # load gdt with whatever is appropriate
+ lidtl boot_idt_descr - r_base # load idt with 0, 0
+ lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate
xor %ax, %ax
inc %ax # protected mode (PE) bit
@@ -73,11 +73,11 @@ r_base = .
# These need to be in the same 64K segment as the above;
# hence we don't use the boot_gdt_descr defined in head.S
-boot_gdt:
+boot_gdt_descr:
.word __BOOT_DS + 7 # gdt limit
- .long boot_gdt_table-__PAGE_OFFSET # gdt base
+ .long boot_gdt - __PAGE_OFFSET # gdt base
-boot_idt:
+boot_idt_descr:
.word 0 # idt limit = 0
.long 0 # idt base = 0L
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index af0d3f70a817..f21b41e7770c 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -476,8 +476,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
siginfo_t *info)
{
struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
if (regs->eflags & VM_MASK) {
if (vm86)
@@ -489,6 +487,18 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
goto kernel_trap;
trap_signal: {
+ /*
+ * We want error_code and trap_no set for userspace faults and
+ * kernelspace faults which result in die(), but not
+ * kernelspace faults which are fixed up. die() gives the
+ * process no chance to handle the signal and notice the
+ * kernel fault information, so that won't result in polluting
+ * the information about previously queued, but not yet
+ * delivered, faults. See also do_general_protection below.
+ */
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
if (info)
force_sig_info(signr, info, tsk);
else
@@ -497,8 +507,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
}
kernel_trap: {
- if (!fixup_exception(regs))
+ if (!fixup_exception(regs)) {
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
die(str, regs, error_code);
+ }
return;
}
@@ -583,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
* and we set the offset field correctly. Then we let the CPU to
* restart the faulting instruction.
*/
- if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+ if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
thread->io_bitmap_ptr) {
memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
thread->io_bitmap_max);
@@ -596,16 +609,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
thread->io_bitmap_max, 0xff,
tss->io_bitmap_max - thread->io_bitmap_max);
tss->io_bitmap_max = thread->io_bitmap_max;
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
tss->io_bitmap_owner = thread;
put_cpu();
return;
}
put_cpu();
- current->thread.error_code = error_code;
- current->thread.trap_no = 13;
-
if (regs->eflags & VM_MASK)
goto gp_in_vm86;
@@ -624,6 +634,8 @@ gp_in_vm86:
gp_in_kernel:
if (!fixup_exception(regs)) {
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
if (notify_die(DIE_GPF, "general protection fault", regs,
error_code, 13, SIGSEGV) == NOTIFY_STOP)
return;
@@ -1018,9 +1030,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
fastcall unsigned long patch_espfix_desc(unsigned long uesp,
unsigned long kesp)
{
- int cpu = smp_processor_id();
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
- struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
unsigned long new_kesp = kesp - base;
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 6cb8f5336732..f64b81f3033b 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -200,13 +200,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_seqlock_irq(&xtime_lock);
-
if (!ref_freq) {
if (!freq->old){
ref_freq = freq->new;
- goto end;
+ return 0;
}
ref_freq = freq->old;
loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
@@ -233,13 +230,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
* TSC based sched_clock turns
* to junk w/ cpufreq
*/
- mark_tsc_unstable();
+ mark_tsc_unstable("cpufreq changes");
}
}
}
-end:
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_sequnlock_irq(&xtime_lock);
return 0;
}
@@ -281,11 +275,12 @@ static struct clocksource clocksource_tsc = {
CLOCK_SOURCE_MUST_VERIFY,
};
-void mark_tsc_unstable(void)
+void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
tsc_enabled = 0;
+ printk("Marking TSC unstable due to: %s.\n", reason);
/* Can be called before registration */
if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0);
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e51a8695d54e
--- /dev/null
+++ b/arch/i386/kernel/verify_cpu.S
@@ -0,0 +1,65 @@
+/* Check if CPU has some minimum CPUID bits
+ This runs in 16bit mode so that the caller can still use the BIOS
+ to output errors on the screen */
+#include <asm/cpufeature.h>
+
+verify_cpu:
+ pushfl # Save caller passed flags
+ pushl $0 # Kill any dangerous flags
+ popfl
+
+#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4
+ pushfl
+ orl $(1<<18),(%esp) # try setting AC
+ popfl
+ pushfl
+ popl %eax
+ testl $(1<<18),%eax
+ jz bad
+#endif
+#if REQUIRED_MASK1 != 0
+ pushfl # standard way to check for cpuid
+ popl %eax
+ movl %eax,%ebx
+ xorl $0x200000,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ cmpl %eax,%ebx
+ pushfl # standard way to check for cpuid
+ popl %eax
+ movl %eax,%ebx
+ xorl $0x200000,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ cmpl %eax,%ebx
+ jz bad # REQUIRED_MASK1 != 0 requires CPUID
+
+ movl $0x0,%eax # See if cpuid 1 is implemented
+ cpuid
+ cmpl $0x1,%eax
+ jb bad # no cpuid 1
+
+ movl $0x1,%eax # Does the cpu have what it takes
+ cpuid
+
+#if CONFIG_X86_MINIMUM_CPU_MODEL > 4
+#error add proper model checking here
+#endif
+
+ andl $REQUIRED_MASK1,%edx
+ xorl $REQUIRED_MASK1,%edx
+ jnz bad
+#endif /* REQUIRED_MASK1 */
+
+ popfl
+ xor %eax,%eax
+ ret
+
+bad:
+ popfl
+ movl $1,%eax
+ ret
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 697a70e8c0c9..c8726c424b35 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -26,6 +26,7 @@
#include <linux/cpu.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <asm/vmi.h>
#include <asm/io.h>
#include <asm/fixmap.h>
@@ -56,7 +57,7 @@ static int disable_noidle;
static int disable_vmi_timer;
/* Cached VMI operations */
-struct {
+static struct {
void (*cpuid)(void /* non-c */);
void (*_set_ldt)(u32 selector);
void (*set_tr)(u32 selector);
@@ -65,16 +66,15 @@ struct {
void (*release_page)(u32, u32);
void (*set_pte)(pte_t, pte_t *, unsigned);
void (*update_pte)(pte_t *, unsigned);
- void (*set_linear_mapping)(int, u32, u32, u32);
- void (*flush_tlb)(int);
+ void (*set_linear_mapping)(int, void *, u32, u32);
+ void (*_flush_tlb)(int);
void (*set_initial_ap_state)(int, int);
void (*halt)(void);
void (*set_lazy_mode)(int mode);
} vmi_ops;
-/* XXX move this to alternative.h */
-extern struct paravirt_patch __start_parainstructions[],
- __stop_parainstructions[];
+/* Cached VMI operations */
+struct vmi_timer_ops vmi_timer_ops;
/*
* VMI patching routines.
@@ -83,11 +83,6 @@ extern struct paravirt_patch __start_parainstructions[],
#define MNEM_JMP 0xe9
#define MNEM_RET 0xc3
-static char irq_save_disable_callout[] = {
- MNEM_CALL, 0, 0, 0, 0,
- MNEM_CALL, 0, 0, 0, 0,
- MNEM_RET
-};
#define IRQ_PATCH_INT_MASK 0
#define IRQ_PATCH_DISABLE 5
@@ -135,33 +130,17 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
{
switch (type) {
- case PARAVIRT_IRQ_DISABLE:
+ case PARAVIRT_PATCH(irq_disable):
return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
- case PARAVIRT_IRQ_ENABLE:
+ case PARAVIRT_PATCH(irq_enable):
return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
- case PARAVIRT_RESTORE_FLAGS:
+ case PARAVIRT_PATCH(restore_fl):
return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
- case PARAVIRT_SAVE_FLAGS:
+ case PARAVIRT_PATCH(save_fl):
return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
- case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE:
- if (len >= 10) {
- patch_internal(VMI_CALL_GetInterruptMask, len, insns);
- patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5);
- return 10;
- } else {
- /*
- * You bastards didn't leave enough room to
- * patch save_flags_irq_disable inline. Patch
- * to a helper
- */
- BUG_ON(len < 5);
- *(char *)insns = MNEM_CALL;
- patch_offset(insns, irq_save_disable_callout);
- return 5;
- }
- case PARAVIRT_INTERRUPT_RETURN:
+ case PARAVIRT_PATCH(iret):
return patch_internal(VMI_CALL_IRET, len, insns);
- case PARAVIRT_STI_SYSEXIT:
+ case PARAVIRT_PATCH(irq_enable_sysexit):
return patch_internal(VMI_CALL_SYSEXIT, len, insns);
default:
break;
@@ -230,24 +209,24 @@ static void vmi_set_tr(void)
static void vmi_load_esp0(struct tss_struct *tss,
struct thread_struct *thread)
{
- tss->esp0 = thread->esp0;
+ tss->x86_tss.esp0 = thread->esp0;
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
- tss->ss1 = thread->sysenter_cs;
+ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+ tss->x86_tss.ss1 = thread->sysenter_cs;
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
- vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0);
+ vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
}
static void vmi_flush_tlb_user(void)
{
- vmi_ops.flush_tlb(VMI_FLUSH_TLB);
+ vmi_ops._flush_tlb(VMI_FLUSH_TLB);
}
static void vmi_flush_tlb_kernel(void)
{
- vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
+ vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
}
/* Stub to do nothing at all; used for delays and unimplemented calls */
@@ -255,18 +234,6 @@ static void vmi_nop(void)
{
}
-/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
-static fastcall void vmi_safe_halt(void)
-{
- int idle = vmi_stop_hz_timer();
- vmi_ops.halt();
- if (idle) {
- local_irq_disable();
- vmi_account_time_restart_hz_timer();
- local_irq_enable();
- }
-}
-
#ifdef CONFIG_DEBUG_PAGE_TYPE
#ifdef CONFIG_X86_PAE
@@ -370,8 +337,11 @@ static void vmi_check_page_type(u32 pfn, int type)
#define vmi_check_page_type(p,t) do { } while (0)
#endif
-static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
+#ifdef CONFIG_HIGHPTE
+static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
{
+ void *va = kmap_atomic(page, type);
+
/*
* Internally, the VMI ROM must map virtual addresses to physical
* addresses for processing MMU updates. By the time MMU updates
@@ -385,8 +355,11 @@ static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
* args: SLOT VA COUNT PFN
*/
BUG_ON(type != KM_PTE0 && type != KM_PTE1);
- vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn);
+ vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
+
+ return va;
}
+#endif
static void vmi_allocate_pt(u32 pfn)
{
@@ -443,13 +416,13 @@ static void vmi_release_pd(u32 pfn)
((level) | (is_current_as(mm, user) ? \
(VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
-static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep)
+static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
-static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
+static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
@@ -462,7 +435,7 @@ static void vmi_set_pte(pte_t *ptep, pte_t pte)
vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
}
-static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
+static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
@@ -516,7 +489,7 @@ static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
-void vmi_pmd_clear(pmd_t *pmd)
+static void vmi_pmd_clear(pmd_t *pmd)
{
const pte_t pte = { 0 };
vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
@@ -525,8 +498,6 @@ void vmi_pmd_clear(pmd_t *pmd)
#endif
#ifdef CONFIG_SMP
-extern void setup_pda(void);
-
static void __devinit
vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp)
@@ -551,13 +522,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
ap.ds = __USER_DS;
ap.es = __USER_DS;
- ap.fs = __KERNEL_PDA;
+ ap.fs = __KERNEL_PERCPU;
ap.gs = 0;
ap.eflags = 0;
- setup_pda();
-
#ifdef CONFIG_X86_PAE
/* efer should match BSP efer. */
if (cpu_has_nx) {
@@ -575,9 +544,9 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
}
#endif
-static void vmi_set_lazy_mode(int mode)
+static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
{
- static DEFINE_PER_CPU(int, lazy_mode);
+ static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
if (!vmi_ops.set_lazy_mode)
return;
@@ -685,7 +654,7 @@ void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
if (vmi_ops.set_linear_mapping)
- vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
+ vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
}
/*
@@ -740,7 +709,6 @@ do { \
} \
} while (0)
-
/*
* Activate the VMI interface and switch into paravirtualized mode
*/
@@ -796,12 +764,6 @@ static inline int __init activate_vmi(void)
para_fill(irq_disable, DisableInterrupts);
para_fill(irq_enable, EnableInterrupts);
- /* irq_save_disable !!! sheer pain */
- patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
- (char *)paravirt_ops.save_fl);
- patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
- (char *)paravirt_ops.irq_disable);
-
para_fill(wbinvd, WBINVD);
para_fill(read_tsc, RDTSC);
@@ -831,8 +793,8 @@ static inline int __init activate_vmi(void)
para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
/* user and kernel flush are just handled with different flags to FlushTLB */
- para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
- para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
+ para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
+ para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
para_fill(flush_tlb_single, InvalPage);
/*
@@ -878,8 +840,13 @@ static inline int __init activate_vmi(void)
paravirt_ops.release_pt = vmi_release_pt;
paravirt_ops.release_pd = vmi_release_pd;
}
- para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
- SetLinearMapping);
+
+ /* Set linear is needed in all cases */
+ vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
+#ifdef CONFIG_HIGHPTE
+ if (vmi_ops.set_linear_mapping)
+ paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
+#endif
/*
* These MUST always be patched. Don't support indirect jumps
@@ -920,8 +887,8 @@ static inline int __init activate_vmi(void)
paravirt_ops.get_wallclock = vmi_get_wallclock;
paravirt_ops.set_wallclock = vmi_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
- paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
- paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
+ paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
+ paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
#endif
paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
@@ -933,11 +900,7 @@ static inline int __init activate_vmi(void)
disable_vmi_timer = 1;
}
- /* No idle HZ mode only works if VMI timer and no idle is enabled */
- if (disable_noidle || disable_vmi_timer)
- para_fill(safe_halt, Halt);
- else
- para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
+ para_fill(safe_halt, Halt);
/*
* Alternative instruction rewriting doesn't happen soon enough
@@ -945,7 +908,7 @@ static inline int __init activate_vmi(void)
* to do this before IRQs get reenabled. Fortunately, it is
* idempotent.
*/
- apply_paravirt(__start_parainstructions, __stop_parainstructions);
+ apply_paravirt(__parainstructions, __parainstructions_end);
vmi_bringup();
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
new file mode 100644
index 000000000000..26a37f8a8762
--- /dev/null
+++ b/arch/i386/kernel/vmiclock.c
@@ -0,0 +1,318 @@
+/*
+ * VMI paravirtual timer support routines.
+ *
+ * Copyright (C) 2007, VMware, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+
+#include <asm/vmi.h>
+#include <asm/vmi_time.h>
+#include <asm/arch_hooks.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/timer.h>
+
+#include <irq_vectors.h>
+#include "io_ports.h"
+
+#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
+#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
+
+static DEFINE_PER_CPU(struct clock_event_device, local_events);
+
+static inline u32 vmi_counter(u32 flags)
+{
+ /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
+ * cycle counter. */
+ return flags & VMI_ALARM_COUNTER_MASK;
+}
+
+/* paravirt_ops.get_wallclock = vmi_get_wallclock */
+unsigned long vmi_get_wallclock(void)
+{
+ unsigned long long wallclock;
+ wallclock = vmi_timer_ops.get_wallclock(); // nsec
+ (void)do_div(wallclock, 1000000000); // sec
+
+ return wallclock;
+}
+
+/* paravirt_ops.set_wallclock = vmi_set_wallclock */
+int vmi_set_wallclock(unsigned long now)
+{
+ return 0;
+}
+
+/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
+unsigned long long vmi_get_sched_cycles(void)
+{
+ return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
+}
+
+/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
+unsigned long vmi_cpu_khz(void)
+{
+ unsigned long long khz;
+ khz = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(khz, 1000);
+ return khz;
+}
+
+static inline unsigned int vmi_get_timer_vector(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ return FIRST_DEVICE_VECTOR;
+#else
+ return FIRST_EXTERNAL_VECTOR;
+#endif
+}
+
+/** vmi clockchip */
+#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned int startup_timer_irq(unsigned int irq)
+{
+ unsigned long val = apic_read(APIC_LVTT);
+ apic_write(APIC_LVTT, vmi_get_timer_vector());
+
+ return (val & APIC_SEND_PENDING);
+}
+
+static void mask_timer_irq(unsigned int irq)
+{
+ unsigned long val = apic_read(APIC_LVTT);
+ apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
+}
+
+static void unmask_timer_irq(unsigned int irq)
+{
+ unsigned long val = apic_read(APIC_LVTT);
+ apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
+}
+
+static void ack_timer_irq(unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static struct irq_chip vmi_chip __read_mostly = {
+ .name = "VMI-LOCAL",
+ .startup = startup_timer_irq,
+ .mask = mask_timer_irq,
+ .unmask = unmask_timer_irq,
+ .ack = ack_timer_irq
+};
+#endif
+
+/** vmi clockevent */
+#define VMI_ALARM_WIRED_IRQ0 0x00000000
+#define VMI_ALARM_WIRED_LVTT 0x00010000
+static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
+
+static inline int vmi_get_alarm_wiring(void)
+{
+ return vmi_wiring;
+}
+
+static void vmi_timer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ cycle_t now, cycles_per_hz;
+ BUG_ON(!irqs_disabled());
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_ONESHOT:
+ break;
+ case CLOCK_EVT_MODE_PERIODIC:
+ cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(cycles_per_hz, HZ);
+ now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
+ vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
+ break;
+ case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ switch (evt->mode) {
+ case CLOCK_EVT_MODE_ONESHOT:
+ vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
+ break;
+ case CLOCK_EVT_MODE_PERIODIC:
+ vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static int vmi_timer_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ /* Unfortunately, set_next_event interface only passes relative
+ * expiry, but we want absolute expiry. It'd be better if were
+ * were passed an aboslute expiry, since a bunch of time may
+ * have been stolen between the time the delta is computed and
+ * when we set the alarm below. */
+ cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
+
+ BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+ vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
+ return 0;
+}
+
+static struct clock_event_device vmi_clockevent = {
+ .name = "vmi-timer",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ .shift = 22,
+ .set_mode = vmi_timer_set_mode,
+ .set_next_event = vmi_timer_next_event,
+ .rating = 1000,
+ .irq = 0,
+};
+
+static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = &__get_cpu_var(local_events);
+ evt->event_handler(evt);
+ return IRQ_HANDLED;
+}
+
+static struct irqaction vmi_clock_action = {
+ .name = "vmi-timer",
+ .handler = vmi_timer_interrupt,
+ .flags = IRQF_DISABLED | IRQF_NOBALANCING,
+ .mask = CPU_MASK_ALL,
+};
+
+static void __devinit vmi_time_init_clockevent(void)
+{
+ cycle_t cycles_per_msec;
+ struct clock_event_device *evt;
+
+ int cpu = smp_processor_id();
+ evt = &__get_cpu_var(local_events);
+
+ /* Use cycles_per_msec since div_sc params are 32-bits. */
+ cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(cycles_per_msec, 1000);
+
+ memcpy(evt, &vmi_clockevent, sizeof(*evt));
+ /* Must pick .shift such that .mult fits in 32-bits. Choosing
+ * .shift to be 22 allows 2^(32-22) cycles per nano-seconds
+ * before overflow. */
+ evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
+ /* Upper bound is clockevent's use of ulong for cycle deltas. */
+ evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
+ evt->min_delta_ns = clockevent_delta2ns(1, evt);
+ evt->cpumask = cpumask_of_cpu(cpu);
+
+ printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
+ evt->name, evt->mult, evt->shift);
+ clockevents_register_device(evt);
+}
+
+void __init vmi_time_init(void)
+{
+ /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
+ outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
+
+ vmi_time_init_clockevent();
+ setup_irq(0, &vmi_clock_action);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+void __devinit vmi_time_bsp_init(void)
+{
+ /*
+ * On APIC systems, we want local timers to fire on each cpu. We do
+ * this by programming LVTT to deliver timer events to the IRQ handler
+ * for IRQ-0, since we can't re-use the APIC local timer handler
+ * without interfering with that code.
+ */
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+ local_irq_disable();
+#ifdef CONFIG_X86_SMP
+ /*
+ * XXX handle_percpu_irq only defined for SMP; we need to switch over
+ * to using it, since this is a local interrupt, which each CPU must
+ * handle individually without locking out or dropping simultaneous
+ * local timers on other CPUs. We also don't want to trigger the
+ * quirk workaround code for interrupts which gets invoked from
+ * handle_percpu_irq via eoi, so we use our own IRQ chip.
+ */
+ set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
+#else
+ set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
+#endif
+ vmi_wiring = VMI_ALARM_WIRED_LVTT;
+ apic_write(APIC_LVTT, vmi_get_timer_vector());
+ local_irq_enable();
+ clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
+}
+
+void __devinit vmi_time_ap_init(void)
+{
+ vmi_time_init_clockevent();
+ apic_write(APIC_LVTT, vmi_get_timer_vector());
+}
+#endif
+
+/** vmi clocksource */
+
+static cycle_t read_real_cycles(void)
+{
+ return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+}
+
+static struct clocksource clocksource_vmi = {
+ .name = "vmi-timer",
+ .rating = 450,
+ .read = read_real_cycles,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int __init init_vmi_clocksource(void)
+{
+ cycle_t cycles_per_msec;
+
+ if (!vmi_timer_ops.get_cycle_frequency)
+ return 0;
+ /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
+ cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(cycles_per_msec, 1000);
+
+ /* Note that clocksource.{mult, shift} converts in the opposite direction
+ * as clockevents. */
+ clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
+ clocksource_vmi.shift);
+
+ printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
+ return clocksource_register(&clocksource_vmi);
+
+}
+module_init(init_vmi_clocksource);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
deleted file mode 100644
index 9dfb17739b67..000000000000
--- a/arch/i386/kernel/vmitime.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * VMI paravirtual timer support routines.
- *
- * Copyright (C) 2005, VMware, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to dhecht@vmware.com
- *
- */
-
-/*
- * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
- * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-#include <linux/rcupdate.h>
-#include <linux/clocksource.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/div64.h>
-#include <asm/timer.h>
-#include <asm/desc.h>
-
-#include <asm/vmi.h>
-#include <asm/vmi_time.h>
-
-#include <mach_timer.h>
-#include <io_ports.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
-#else
-#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
-#endif
-
-/* Cached VMI operations */
-struct vmi_timer_ops vmi_timer_ops;
-
-#ifdef CONFIG_NO_IDLE_HZ
-
-/* /proc/sys/kernel/hz_timer state. */
-int sysctl_hz_timer;
-
-/* Some stats */
-static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
-static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
-static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
-
-#endif /* CONFIG_NO_IDLE_HZ */
-
-/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
-static int alarm_hz = CONFIG_VMI_ALARM_HZ;
-
-/* Cache of the value get_cycle_frequency / HZ. */
-static signed long long cycles_per_jiffy;
-
-/* Cache of the value get_cycle_frequency / alarm_hz. */
-static signed long long cycles_per_alarm;
-
-/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
- * Protected by xtime_lock. */
-static unsigned long long real_cycles_accounted_system;
-
-/* The number of cycles accounted for by update_process_times(), per cpu. */
-static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
-
-/* The number of stolen cycles accounted, per cpu. */
-static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
-
-/* Clock source. */
-static cycle_t read_real_cycles(void)
-{
- return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
-}
-
-static cycle_t read_available_cycles(void)
-{
- return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
-}
-
-#if 0
-static cycle_t read_stolen_cycles(void)
-{
- return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
-}
-#endif /* 0 */
-
-static struct clocksource clocksource_vmi = {
- .name = "vmi-timer",
- .rating = 450,
- .read = read_real_cycles,
- .mask = CLOCKSOURCE_MASK(64),
- .mult = 0, /* to be set */
- .shift = 22,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-
-/* Timer interrupt handler. */
-static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
-
-static struct irqaction vmi_timer_irq = {
- .handler = vmi_timer_interrupt,
- .flags = IRQF_DISABLED,
- .mask = CPU_MASK_NONE,
- .name = "VMI-alarm",
-};
-
-/* Alarm rate */
-static int __init vmi_timer_alarm_rate_setup(char* str)
-{
- int alarm_rate;
- if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
- alarm_hz = alarm_rate;
- printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
- }
- return 1;
-}
-__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
-
-
-/* Initialization */
-static void vmi_get_wallclock_ts(struct timespec *ts)
-{
- unsigned long long wallclock;
- wallclock = vmi_timer_ops.get_wallclock(); // nsec units
- ts->tv_nsec = do_div(wallclock, 1000000000);
- ts->tv_sec = wallclock;
-}
-
-unsigned long vmi_get_wallclock(void)
-{
- struct timespec ts;
- vmi_get_wallclock_ts(&ts);
- return ts.tv_sec;
-}
-
-int vmi_set_wallclock(unsigned long now)
-{
- return -1;
-}
-
-unsigned long long vmi_get_sched_cycles(void)
-{
- return read_available_cycles();
-}
-
-unsigned long vmi_cpu_khz(void)
-{
- unsigned long long khz;
-
- khz = vmi_timer_ops.get_cycle_frequency();
- (void)do_div(khz, 1000);
- return khz;
-}
-
-void __init vmi_time_init(void)
-{
- unsigned long long cycles_per_sec, cycles_per_msec;
- unsigned long flags;
-
- local_irq_save(flags);
- setup_irq(0, &vmi_timer_irq);
-#ifdef CONFIG_X86_LOCAL_APIC
- set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
-#endif
-
- real_cycles_accounted_system = read_real_cycles();
- per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
-
- cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
- cycles_per_jiffy = cycles_per_sec;
- (void)do_div(cycles_per_jiffy, HZ);
- cycles_per_alarm = cycles_per_sec;
- (void)do_div(cycles_per_alarm, alarm_hz);
- cycles_per_msec = cycles_per_sec;
- (void)do_div(cycles_per_msec, 1000);
-
- printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
- "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
- cycles_per_alarm);
-
- clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
- clocksource_vmi.shift);
- if (clocksource_register(&clocksource_vmi))
- printk(KERN_WARNING "Error registering VMITIME clocksource.");
-
- /* Disable PIT. */
- outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
-
- /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
- * reduce the latency calling update_process_times. */
- vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
- per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
- cycles_per_alarm);
-
- local_irq_restore(flags);
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-void __init vmi_timer_setup_boot_alarm(void)
-{
- local_irq_disable();
-
- /* Route the interrupt to the correct vector. */
- apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
-
- /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
- vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
- vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
- per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
- cycles_per_alarm);
- local_irq_enable();
-}
-
-/* Initialize the time accounting variables for an AP on an SMP system.
- * Also, set the local alarm for the AP. */
-void __devinit vmi_timer_setup_secondary_alarm(void)
-{
- int cpu = smp_processor_id();
-
- /* Route the interrupt to the correct vector. */
- apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
-
- per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
-
- vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
- per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
- cycles_per_alarm);
-}
-
-#endif
-
-/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
-static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
-{
- long long cycles_not_accounted;
-
- write_seqlock(&xtime_lock);
-
- cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
- while (cycles_not_accounted >= cycles_per_jiffy) {
- /* systems wide jiffies. */
- do_timer(1);
-
- cycles_not_accounted -= cycles_per_jiffy;
- real_cycles_accounted_system += cycles_per_jiffy;
- }
-
- write_sequnlock(&xtime_lock);
-}
-
-/* Update per-cpu process times. */
-static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
- unsigned long long cur_process_times_cycles)
-{
- long long cycles_not_accounted;
- cycles_not_accounted = cur_process_times_cycles -
- per_cpu(process_times_cycles_accounted_cpu, cpu);
-
- while (cycles_not_accounted >= cycles_per_jiffy) {
- /* Account time to the current process. This includes
- * calling into the scheduler to decrement the timeslice
- * and possibly reschedule.*/
- update_process_times(user_mode(regs));
- /* XXX handle /proc/profile multiplier. */
- profile_tick(CPU_PROFILING);
-
- cycles_not_accounted -= cycles_per_jiffy;
- per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
- }
-}
-
-#ifdef CONFIG_NO_IDLE_HZ
-/* Update per-cpu idle times. Used when a no-hz halt is ended. */
-static void vmi_account_no_hz_idle_cycles(int cpu,
- unsigned long long cur_process_times_cycles)
-{
- long long cycles_not_accounted;
- unsigned long no_idle_hz_jiffies = 0;
-
- cycles_not_accounted = cur_process_times_cycles -
- per_cpu(process_times_cycles_accounted_cpu, cpu);
-
- while (cycles_not_accounted >= cycles_per_jiffy) {
- no_idle_hz_jiffies++;
- cycles_not_accounted -= cycles_per_jiffy;
- per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
- }
- /* Account time to the idle process. */
- account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
-}
-#endif
-
-/* Update per-cpu stolen time. */
-static void vmi_account_stolen_cycles(int cpu,
- unsigned long long cur_real_cycles,
- unsigned long long cur_avail_cycles)
-{
- long long stolen_cycles_not_accounted;
- unsigned long stolen_jiffies = 0;
-
- if (cur_real_cycles < cur_avail_cycles)
- return;
-
- stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
- per_cpu(stolen_cycles_accounted_cpu, cpu);
-
- while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
- stolen_jiffies++;
- stolen_cycles_not_accounted -= cycles_per_jiffy;
- per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
- }
- /* HACK: pass NULL to force time onto cpustat->steal. */
- account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
-}
-
-/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
- * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
-static void vmi_local_timer_interrupt(int cpu)
-{
- unsigned long long cur_real_cycles, cur_process_times_cycles;
-
- cur_real_cycles = read_real_cycles();
- cur_process_times_cycles = read_available_cycles();
- /* Update system wide (real) time state (xtime, jiffies). */
- vmi_account_real_cycles(cur_real_cycles);
- /* Update per-cpu process times. */
- vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
- /* Update time stolen from this cpu by the hypervisor. */
- vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
-}
-
-#ifdef CONFIG_NO_IDLE_HZ
-
-/* Must be called only from idle loop, with interrupts disabled. */
-int vmi_stop_hz_timer(void)
-{
- /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
-
- unsigned long seq, next;
- unsigned long long real_cycles_expiry;
- int cpu = smp_processor_id();
-
- BUG_ON(!irqs_disabled());
- if (sysctl_hz_timer != 0)
- return 0;
-
- cpu_set(cpu, nohz_cpu_mask);
- smp_mb();
-
- if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
- (next = next_timer_interrupt(),
- time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
- cpu_clear(cpu, nohz_cpu_mask);
- return 0;
- }
-
- /* Convert jiffies to the real cycle counter. */
- do {
- seq = read_seqbegin(&xtime_lock);
- real_cycles_expiry = real_cycles_accounted_system +
- (long)(next - jiffies) * cycles_per_jiffy;
- } while (read_seqretry(&xtime_lock, seq));
-
- /* This cpu is going idle. Disable the periodic alarm. */
- vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
- per_cpu(idle_start_jiffies, cpu) = jiffies;
- /* Set the real time alarm to expire at the next event. */
- vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
- real_cycles_expiry, 0);
- return 1;
-}
-
-static void vmi_reenable_hz_timer(int cpu)
-{
- /* For /proc/vmi/info idle_hz stat. */
- per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
- per_cpu(vmi_idle_no_hz_irqs, cpu)++;
-
- /* Don't bother explicitly cancelling the one-shot alarm -- at
- * worse we will receive a spurious timer interrupt. */
- vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
- per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
- cycles_per_alarm);
- /* Indicate this cpu is no longer nohz idle. */
- cpu_clear(cpu, nohz_cpu_mask);
-}
-
-/* Called from interrupt handlers when (local) HZ timer is disabled. */
-void vmi_account_time_restart_hz_timer(void)
-{
- unsigned long long cur_real_cycles, cur_process_times_cycles;
- int cpu = smp_processor_id();
-
- BUG_ON(!irqs_disabled());
- /* Account the time during which the HZ timer was disabled. */
- cur_real_cycles = read_real_cycles();
- cur_process_times_cycles = read_available_cycles();
- /* Update system wide (real) time state (xtime, jiffies). */
- vmi_account_real_cycles(cur_real_cycles);
- /* Update per-cpu idle times. */
- vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
- /* Update time stolen from this cpu by the hypervisor. */
- vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
- /* Reenable the hz timer. */
- vmi_reenable_hz_timer(cpu);
-}
-
-#endif /* CONFIG_NO_IDLE_HZ */
-
-/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
- * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
- * APIC setup and setup_boot_vmi_alarm() is called. */
-static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
-{
- vmi_local_timer_interrupt(smp_processor_id());
- return IRQ_HANDLED;
-}
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
- * Also used in UP when CONFIG_X86_LOCAL_APIC.
- * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
-void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
- int cpu = smp_processor_id();
-
- /*
- * the NMI deadlock-detector uses this.
- */
- per_cpu(irq_stat,cpu).apic_timer_irqs++;
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- */
- ack_APIC_irq();
-
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- irq_enter();
- vmi_local_timer_interrupt(cpu);
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 6f38f818380b..23e8614edeee 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -26,12 +26,11 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
jiffies = jiffies_64;
-_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
+ note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
{
@@ -61,8 +60,6 @@ SECTIONS
__stop___ex_table = .;
}
- RODATA
-
BUG_TABLE
. = ALIGN(4);
@@ -72,6 +69,8 @@ SECTIONS
__tracedata_end = .;
}
+ RODATA
+
/* writeable */
. = ALIGN(4096);
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
@@ -117,22 +116,11 @@ SECTIONS
/* might get freed after init */
. = ALIGN(4096);
- .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
- __smp_alt_begin = .;
- __smp_alt_instructions = .;
- *(.smp_altinstructions)
- __smp_alt_instructions_end = .;
- }
- . = ALIGN(4);
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
__smp_locks = .;
*(.smp_locks)
__smp_locks_end = .;
}
- .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
- *(.smp_altinstr_replacement)
- __smp_alt_end = .;
- }
/* will be freed after init
* Following ALIGN() is required to make sure no other data falls on the
* same page where __smp_alt_end is pointing as that page might be freed
@@ -178,9 +166,9 @@ SECTIONS
}
. = ALIGN(4);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
- __start_parainstructions = .;
+ __parainstructions = .;
*(.parainstructions)
- __stop_parainstructions = .;
+ __parainstructions_end = .;
}
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
@@ -194,7 +182,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(4096);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu)
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
index f66cd11adb72..4a8b0ed9b8fb 100644
--- a/arch/i386/kernel/vsyscall.lds.S
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -7,7 +7,7 @@
SECTIONS
{
- . = VDSO_PRELINK + SIZEOF_HEADERS;
+ . = VDSO_PRELINK_asm + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -21,7 +21,7 @@ SECTIONS
For the layouts to match, we need to skip more than enough
space for the dynamic symbol table et al. If this amount
is insufficient, ld -shared will barf. Just increase it here. */
- . = VDSO_PRELINK + 0x400;
+ . = VDSO_PRELINK_asm + 0x400;
.text : { *(.text) } :text =0x90909090
.note : { *(.note.*) } :text :note
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
index 97db3853dc82..afd0045595d4 100644
--- a/arch/i386/lib/bitops.c
+++ b/arch/i386/lib/bitops.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(find_next_bit);
*/
int find_next_zero_bit(const unsigned long *addr, int size, int offset)
{
- unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+ const unsigned long *p = addr + (offset >> 5);
int set = 0, bit = offset & 31, res;
if (bit) {
@@ -64,7 +64,7 @@ int find_next_zero_bit(const unsigned long *addr, int size, int offset)
/*
* No zero yet, search remaining full bytes for a zero
*/
- res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+ res = find_first_zero_bit(p, size - 32 * (p - addr));
return (offset + set + res);
}
EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
index 75ffd02654fc..adbccd0bbb78 100644
--- a/arch/i386/lib/checksum.S
+++ b/arch/i386/lib/checksum.S
@@ -25,6 +25,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/errno.h>
/*
@@ -36,8 +38,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
.text
-.align 4
-.globl csum_partial
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
@@ -48,9 +48,14 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
-csum_partial:
+ENTRY(csum_partial)
+ CFI_STARTPROC
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -128,16 +133,27 @@ csum_partial:
roll $8, %eax
8:
popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
ret
+ CFI_ENDPROC
+ENDPROC(csum_partial)
#else
/* Version for PentiumII/PPro */
-csum_partial:
+ENTRY(csum_partial)
+ CFI_STARTPROC
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -245,8 +261,14 @@ csum_partial:
roll $8, %eax
90:
popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
ret
+ CFI_ENDPROC
+ENDPROC(csum_partial)
#endif
@@ -278,19 +300,24 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
.long 9999b, 6002f ; \
.previous
-.align 4
-.globl csum_partial_copy_generic
-
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
#define ARGBASE 16
#define FP 12
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+ CFI_STARTPROC
subl $4,%esp
+ CFI_ADJUST_CFA_OFFSET 4
pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
@@ -400,10 +427,19 @@ DST( movb %cl, (%edi) )
.previous
popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
popl %edi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edi
popl %ecx # equivalent to addl $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
ret
+ CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)
#else
@@ -421,10 +457,17 @@ DST( movb %cl, (%edi) )
#define ARGBASE 12
-csum_partial_copy_generic:
+ENTRY(csum_partial_copy_generic)
+ CFI_STARTPROC
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
@@ -485,9 +528,17 @@ DST( movb %dl, (%edi) )
.previous
popl %esi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE esi
popl %edi
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE edi
popl %ebx
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE ebx
ret
+ CFI_ENDPROC
+ENDPROC(csum_partial_copy_generic)
#undef ROUND
#undef ROUND1
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
index 62d7f178a326..6d84b53f12a2 100644
--- a/arch/i386/lib/getuser.S
+++ b/arch/i386/lib/getuser.S
@@ -8,6 +8,8 @@
* return an error value in addition to the "real"
* return value.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/thread_info.h>
@@ -24,19 +26,19 @@
*/
.text
-.align 4
-.globl __get_user_1
-__get_user_1:
+ENTRY(__get_user_1)
+ CFI_STARTPROC
GET_THREAD_INFO(%edx)
cmpl TI_addr_limit(%edx),%eax
jae bad_get_user
1: movzbl (%eax),%edx
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__get_user_1)
-.align 4
-.globl __get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
+ CFI_STARTPROC
addl $1,%eax
jc bad_get_user
GET_THREAD_INFO(%edx)
@@ -45,10 +47,11 @@ __get_user_2:
2: movzwl -1(%eax),%edx
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__get_user_2)
-.align 4
-.globl __get_user_4
-__get_user_4:
+ENTRY(__get_user_4)
+ CFI_STARTPROC
addl $3,%eax
jc bad_get_user
GET_THREAD_INFO(%edx)
@@ -57,11 +60,16 @@ __get_user_4:
3: movl -3(%eax),%edx
xorl %eax,%eax
ret
+ CFI_ENDPROC
+ENDPROC(__get_user_4)
bad_get_user:
+ CFI_STARTPROC
xorl %edx,%edx
movl $-14,%eax
ret
+ CFI_ENDPROC
+END(bad_get_user)
.section __ex_table,"a"
.long 1b,bad_get_user
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
index a32d9f570f48..f58fba109d18 100644
--- a/arch/i386/lib/putuser.S
+++ b/arch/i386/lib/putuser.S
@@ -8,6 +8,8 @@
* return an error value in addition to the "real"
* return value.
*/
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
#include <asm/thread_info.h>
@@ -23,23 +25,28 @@
* as they get called from within inline assembly.
*/
-#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
-#define EXIT popl %ebx ; ret
+#define ENTER CFI_STARTPROC ; \
+ pushl %ebx ; \
+ CFI_ADJUST_CFA_OFFSET 4 ; \
+ CFI_REL_OFFSET ebx, 0 ; \
+ GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; \
+ CFI_ADJUST_CFA_OFFSET -4 ; \
+ CFI_RESTORE ebx ; \
+ ret ; \
+ CFI_ENDPROC
.text
-.align 4
-.globl __put_user_1
-__put_user_1:
+ENTRY(__put_user_1)
ENTER
cmpl TI_addr_limit(%ebx),%ecx
jae bad_put_user
1: movb %al,(%ecx)
xorl %eax,%eax
EXIT
+ENDPROC(__put_user_1)
-.align 4
-.globl __put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
ENTER
movl TI_addr_limit(%ebx),%ebx
subl $1,%ebx
@@ -48,10 +55,9 @@ __put_user_2:
2: movw %ax,(%ecx)
xorl %eax,%eax
EXIT
+ENDPROC(__put_user_2)
-.align 4
-.globl __put_user_4
-__put_user_4:
+ENTRY(__put_user_4)
ENTER
movl TI_addr_limit(%ebx),%ebx
subl $3,%ebx
@@ -60,10 +66,9 @@ __put_user_4:
3: movl %eax,(%ecx)
xorl %eax,%eax
EXIT
+ENDPROC(__put_user_4)
-.align 4
-.globl __put_user_8
-__put_user_8:
+ENTRY(__put_user_8)
ENTER
movl TI_addr_limit(%ebx),%ebx
subl $7,%ebx
@@ -73,10 +78,16 @@ __put_user_8:
5: movl %edx,4(%ecx)
xorl %eax,%eax
EXIT
+ENDPROC(__put_user_8)
bad_put_user:
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 2*4
+ CFI_OFFSET eip, -1*4
+ CFI_OFFSET ebx, -2*4
movl $-14,%eax
EXIT
+END(bad_put_user)
.section __ex_table,"a"
.long 1b,bad_put_user
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 086b3726862a..9f38b12b4af1 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -716,7 +716,6 @@ do { \
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
- BUG_ON((long) n < 0);
#ifndef CONFIG_X86_WP_WORKS_OK
if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
((unsigned long )to) < TASK_SIZE) {
@@ -785,7 +784,6 @@ EXPORT_SYMBOL(__copy_to_user_ll);
unsigned long __copy_from_user_ll(void *to, const void __user *from,
unsigned long n)
{
- BUG_ON((long)n < 0);
if (movsl_is_ok(to, from, n))
__copy_user_zeroing(to, from, n);
else
@@ -797,7 +795,6 @@ EXPORT_SYMBOL(__copy_from_user_ll);
unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
unsigned long n)
{
- BUG_ON((long)n < 0);
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
@@ -810,7 +807,6 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long n)
{
- BUG_ON((long)n < 0);
#ifdef CONFIG_X86_INTEL_USERCOPY
if ( n > 64 && cpu_has_xmm2)
n = __copy_user_zeroing_intel_nocache(to, from, n);
@@ -825,7 +821,6 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
unsigned long n)
{
- BUG_ON((long)n < 0);
#ifdef CONFIG_X86_INTEL_USERCOPY
if ( n > 64 && cpu_has_xmm2)
n = __copy_user_intel_nocache(to, from, n);
@@ -853,7 +848,6 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
unsigned long
copy_to_user(void __user *to, const void *from, unsigned long n)
{
- BUG_ON((long) n < 0);
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
@@ -879,7 +873,6 @@ EXPORT_SYMBOL(copy_to_user);
unsigned long
copy_from_user(void *to, const void __user *from, unsigned long n)
{
- BUG_ON((long) n < 0);
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
index 8a210fa915b5..e932d3485ae2 100644
--- a/arch/i386/mach-generic/bigsmp.c
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -45,7 +45,7 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
};
-static int probe_bigsmp(void)
+static int __init probe_bigsmp(void)
{
if (def_to_bigsmp)
dmi_bigsmp = 1;
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
index b8963a5a3b25..b47f951c0ec2 100644
--- a/arch/i386/mach-generic/es7000.c
+++ b/arch/i386/mach-generic/es7000.c
@@ -25,4 +25,45 @@ static int probe_es7000(void)
return 0;
}
+extern void es7000_sw_apic(void);
+static void __init enable_apic_mode(void)
+{
+ es7000_sw_apic();
+ return;
+}
+
+static __init int mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (mpc->mpc_oemptr) {
+ struct mp_config_oemtable *oem_table =
+ (struct mp_config_oemtable *)mpc->mpc_oemptr;
+ if (!strncmp(oem, "UNISYS", 6))
+ return parse_unisys_oem((char *)oem_table);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_ACPI
+/* Hook from generic ACPI tables.c */
+static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ unsigned long oem_addr;
+ if (!find_unisys_acpi_oem_table(&oem_addr)) {
+ if (es7000_check_dsdt())
+ return parse_unisys_oem((char *)oem_addr);
+ else {
+ setup_unisys();
+ return 1;
+ }
+ }
+ return 0;
+}
+#else
+static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+#endif
+
struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index fe0ed393294c..1a5e448a29c7 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -573,15 +573,7 @@ do_boot_cpu(__u8 cpu)
/* init_tasks (in sched.c) is indexed logically */
stack_start.esp = (void *) idle->thread.esp;
- /* Pre-allocate and initialize the CPU's GDT and PDA so it
- doesn't have to do any memory allocation during the
- delicate CPU-bringup phase. */
- if (!init_gdt(cpu, idle)) {
- printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
- cpucount--;
- return;
- }
-
+ init_gdt(cpu, idle);
irq_ctx_init(cpu);
/* Note: Don't modify initial ss override */
@@ -749,12 +741,6 @@ initialize_secondary(void)
#endif
/*
- * switch to the per CPU GDT we already set up
- * in do_boot_cpu()
- */
- cpu_set_gdt(current_thread_info()->cpu);
-
- /*
* We don't actually need to load the full TSS,
* basically just the stack pointer and the eip.
*/
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index b8c4e259fc8b..f534c29e80b2 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -20,6 +20,7 @@
#include <linux/tty.h>
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/highmem.h>
+#include <linux/bootmem.h> /* for max_low_pfn */
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
@@ -301,7 +302,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long address;
- unsigned long page;
int write, si_code;
/* get the address */
@@ -510,7 +510,9 @@ no_context:
bust_spinlocks(1);
if (oops_may_print()) {
- #ifdef CONFIG_X86_PAE
+ __typeof__(pte_val(__pte(0))) page;
+
+#ifdef CONFIG_X86_PAE
if (error_code & 16) {
pte_t *pte = lookup_address(address);
@@ -519,7 +521,7 @@ no_context:
"NX-protected page - exploit attempt? "
"(uid: %d)\n", current->uid);
}
- #endif
+#endif
if (address < PAGE_SIZE)
printk(KERN_ALERT "BUG: unable to handle kernel NULL "
"pointer dereference");
@@ -529,25 +531,38 @@ no_context:
printk(" at virtual address %08lx\n",address);
printk(KERN_ALERT " printing eip:\n");
printk("%08lx\n", regs->eip);
- }
- page = read_cr3();
- page = ((unsigned long *) __va(page))[address >> 22];
- if (oops_may_print())
+
+ page = read_cr3();
+ page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+#ifdef CONFIG_X86_PAE
+ printk(KERN_ALERT "*pdpt = %016Lx\n", page);
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && page & _PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+ & (PTRS_PER_PMD - 1)];
+ printk(KERN_ALERT "*pde = %016Lx\n", page);
+ page &= ~_PAGE_NX;
+ }
+#else
printk(KERN_ALERT "*pde = %08lx\n", page);
- /*
- * We must not directly access the pte in the highpte
- * case, the page table might be allocated in highmem.
- * And lets rather not kmap-atomic the pte, just in case
- * it's allocated already.
- */
-#ifndef CONFIG_HIGHPTE
- if ((page & 1) && oops_may_print()) {
- page &= PAGE_MASK;
- address &= 0x003ff000;
- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
- printk(KERN_ALERT "*pte = %08lx\n", page);
- }
#endif
+
+ /*
+ * We must not directly access the pte in the highpte
+ * case if the page table is located in highmem.
+ * And let's rather not kmap-atomic the pte, just in case
+ * it's allocated already.
+ */
+ if ((page >> PAGE_SHIFT) < max_low_pfn
+ && (page & _PAGE_PRESENT)) {
+ page &= PAGE_MASK;
+ page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+ & (PTRS_PER_PTE - 1)];
+ printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page);
+ }
+ }
+
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
@@ -588,7 +603,6 @@ do_sigbus:
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
-#ifndef CONFIG_X86_PAE
void vmalloc_sync_all(void)
{
/*
@@ -601,6 +615,9 @@ void vmalloc_sync_all(void)
static unsigned long start = TASK_SIZE;
unsigned long address;
+ if (SHARED_KERNEL_PMD)
+ return;
+
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
@@ -623,4 +640,3 @@ void vmalloc_sync_all(void)
start = address + PGDIR_SIZE;
}
}
-#endif
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index ac70d09df7ee..ad8d86cc683e 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -26,7 +26,7 @@ void kunmap(struct page *page)
* However when holding an atomic kmap is is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
-void *kmap_atomic(struct page *page, enum km_type type)
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
@@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type)
return page_address(page);
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+ set_pte(kmap_pte-idx, mk_pte(page, prot));
arch_flush_lazy_mmu_mode();
return (void*) vaddr;
}
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+ return kmap_atomic_prot(page, type, kmap_prot);
+}
+
void kunmap_atomic(void *kvaddr, enum km_type type)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
@@ -67,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
#endif
}
+ arch_flush_lazy_mmu_mode();
pagefault_enable();
}
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index ae436882af7a..dbe16f63a566 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
@@ -42,6 +43,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/paravirt.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
pmd_t *pmd_table;
#ifdef CONFIG_X86_PAE
- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
- if (pmd_table != pmd_offset(pud, 0))
- BUG();
-#else
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
+ pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
+ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+ pud = pud_offset(pgd, 0);
+ if (pmd_table != pmd_offset(pud, 0))
+ BUG();
+ }
+#endif
pud = pud_offset(pgd, 0);
pmd_table = pmd_offset(pud, 0);
-#endif
-
return pmd_table;
}
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
*/
static pte_t * __init one_page_table_init(pmd_t *pmd)
{
- if (pmd_none(*pmd)) {
+ if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
- if (page_table != pte_offset_kernel(pmd, 0))
- BUG();
-
- return page_table;
+ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
return pte_offset_kernel(pmd, 0);
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
{
pgd_t *pgd;
- pud_t *pud;
pmd_t *pmd;
int pgd_idx, pmd_idx;
unsigned long vaddr;
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
pgd = pgd_base + pgd_idx;
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- if (pgd_none(*pgd))
- one_md_table_init(pgd);
- pud = pud_offset(pgd, vaddr);
- pmd = pmd_offset(pud, vaddr);
+ pmd = one_md_table_init(pgd);
+ pmd = pmd + pmd_index(vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (pmd_none(*pmd))
- one_page_table_init(pmd);
+ one_page_table_init(pmd);
vaddr += PMD_SIZE;
}
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
/* Map with big pages if possible, otherwise create normal page tables. */
if (cpu_has_pse) {
unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
if (is_kernel_text(address) || is_kernel_text(address2))
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
else
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+
pfn += PTRS_PER_PTE;
} else {
pte = one_page_table_init(pmd);
- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
- if (is_kernel_text(address))
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
- else
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ for (pte_ofs = 0;
+ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+ if (is_kernel_text(address))
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ else
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
}
}
}
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);
#define remap_numa_kva() do {} while (0)
#endif
-static void __init pagetable_init (void)
+void __init native_pagetable_setup_start(pgd_t *base)
{
- unsigned long vaddr;
- pgd_t *pgd_base = swapper_pg_dir;
-
#ifdef CONFIG_X86_PAE
int i;
- /* Init entries of the first-level page table to the zero page */
- for (i = 0; i < PTRS_PER_PGD; i++)
- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+
+ /*
+ * Init entries of the first-level page table to the
+ * zero page, if they haven't already been set up.
+ *
+ * In a normal native boot, we'll be running on a
+ * pagetable rooted in swapper_pg_dir, but not in PAE
+ * mode, so this will end up clobbering the mappings
+ * for the lower 24Mbytes of the address space,
+ * without affecting the kernel address space.
+ */
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ set_pgd(&base[i],
+ __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+
+ /* Make sure kernel address space is empty so that a pagetable
+ will be allocated for it. */
+ memset(&base[USER_PTRS_PER_PGD], 0,
+ KERNEL_PGD_PTRS * sizeof(pgd_t));
#else
paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
#endif
+}
+
+void __init native_pagetable_setup_done(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+ /*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+ set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
+#endif
+}
+
+/*
+ * Build a proper pagetable for the kernel mappings. Up until this
+ * point, we've been running on some set of pagetables constructed by
+ * the boot process.
+ *
+ * If we're booting on native hardware, this will be a pagetable
+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
+ * (even if we'll end up running in PAE). The root of the pagetable
+ * will be swapper_pg_dir.
+ *
+ * If we're booting paravirtualized under a hypervisor, then there are
+ * more options: we may already be running PAE, and the pagetable may
+ * or may not be based in swapper_pg_dir. In any case,
+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * appropriately for the rest of the initialization to work.
+ *
+ * In general, pagetable_init() assumes that the pagetable may already
+ * be partially populated, and so it avoids stomping on any existing
+ * mappings.
+ */
+static void __init pagetable_init (void)
+{
+ unsigned long vaddr, end;
+ pgd_t *pgd_base = swapper_pg_dir;
+
+ paravirt_pagetable_setup_start(pgd_base);
/* Enable PSE if available */
- if (cpu_has_pse) {
+ if (cpu_has_pse)
set_in_cr4(X86_CR4_PSE);
- }
/* Enable PGE if available */
if (cpu_has_pge) {
@@ -371,20 +424,12 @@ static void __init pagetable_init (void)
* created - mappings will be set by set_fixmap():
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, 0, pgd_base);
+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+ page_table_range_init(vaddr, end, pgd_base);
permanent_kmaps_init(pgd_base);
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
-#endif
+ paravirt_pagetable_setup_done(pgd_base);
}
#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache;
void __init pgtable_cache_init(void)
{
+ size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
+
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pmd",
PTRS_PER_PMD*sizeof(pmd_t),
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void)
NULL);
if (!pmd_cache)
panic("pgtable_cache_init(): cannot create pmd cache");
+
+ if (!SHARED_KERNEL_PMD) {
+ /* If we're in PAE mode and have a non-shared
+ kernel pmd, then the pgd size must be a
+ page size. This is because the pgd_list
+ links through the page structure, so there
+ can only be one pgd per page for this to
+ work. */
+ pgd_size = PAGE_SIZE;
+ }
}
pgd_cache = kmem_cache_create("pgd",
- PTRS_PER_PGD*sizeof(pgd_t),
- PTRS_PER_PGD*sizeof(pgd_t),
+ pgd_size,
+ pgd_size,
0,
pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void)
void mark_rodata_ro(void)
{
- unsigned long addr = (unsigned long)__start_rodata;
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long size = PFN_ALIGN(_etext) - start;
- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
- change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
+#ifdef CONFIG_HOTPLUG_CPU
+ /* It must still be possible to apply SMP alternatives. */
+ if (num_possible_cpus() <= 1)
+#endif
+ {
+ change_page_attr(virt_to_page(start),
+ size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+ printk("Write protecting the kernel text: %luk\n", size >> 10);
+ }
- printk("Write protecting the kernel read-only data: %uk\n",
- (__end_rodata - __start_rodata) >> 10);
+ start += size;
+ size = (unsigned long)__end_rodata - start;
+ change_page_attr(virt_to_page(start),
+ size >> PAGE_SHIFT, PAGE_KERNEL_RO);
+ printk("Write protecting the kernel read-only data: %luk\n",
+ size >> 10);
/*
* change_page_attr() requires a global_flush_tlb() call after it.
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
unsigned long addr;
for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
+ struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
+ ClearPageReserved(page);
+ init_page_count(page);
+ memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
+ __free_page(page);
totalram_pages++;
}
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
}
void free_initmem(void)
{
free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
+ __pa_symbol(&__init_begin),
+ __pa_symbol(&__init_end));
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_init_pages("initrd memory", __pa(start), __pa(end));
}
#endif
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 412ebbd8adb0..47bd477c8ecc 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
unsigned long flags;
set_pte_atomic(kpte, pte); /* change init_mm */
- if (PTRS_PER_PMD > 1)
+ if (SHARED_KERNEL_PMD)
return;
spin_lock_irqsave(&pgd_lock, flags);
@@ -142,7 +142,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
return -EINVAL;
kpte_page = virt_to_page(kpte);
if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
- if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
+ if (!pte_huge(*kpte)) {
set_pte_atomic(kpte, mk_pte(page, prot));
} else {
pgprot_t ref_prot;
@@ -158,7 +158,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
kpte_page = split;
}
page_private(kpte_page)++;
- } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
+ } else if (!pte_huge(*kpte)) {
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
BUG_ON(page_private(kpte_page) == 0);
page_private(kpte_page)--;
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index fa0cfbd551e1..9a96c1647428 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
}
static int fixmaps;
-#ifndef CONFIG_COMPAT_VDSO
unsigned long __FIXADDR_TOP = 0xfffff000;
EXPORT_SYMBOL(__FIXADDR_TOP);
-#endif
void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
{
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve)
BUG_ON(fixmaps > 0);
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
(int)-reserve);
-#ifdef CONFIG_COMPAT_VDSO
- BUG_ON(reserve != 0);
-#else
__FIXADDR_TOP = -reserve - PAGE_SIZE;
__VMALLOC_RESERVE += reserve;
-#endif
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -238,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd)
set_page_private(next, (unsigned long)pprev);
}
+#if (PTRS_PER_PMD == 1)
+/* Non-PAE pgd constructor */
void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags;
- if (PTRS_PER_PMD == 1) {
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
- }
+ /* !PAE, no pagetable sharing */
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /* must happen under lock */
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
-
- if (PTRS_PER_PMD > 1)
- return;
-
- /* must happen under lock */
paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
-
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
+#else /* PTRS_PER_PMD > 1 */
+/* PAE pgd constructor */
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+{
+ /* PAE, kernel PMD may be shared */
+
+ if (SHARED_KERNEL_PMD) {
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ } else {
+ unsigned long flags;
+
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+#endif /* PTRS_PER_PMD */
-/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
+ BUG_ON(SHARED_KERNEL_PMD);
+
paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+
+/* If we allocate a pmd for part of the kernel address space, then
+ make sure its initialized with the appropriate kernel mappings.
+ Otherwise use a cached zeroed pmd. */
+static pmd_t *pmd_cache_alloc(int idx)
+{
+ pmd_t *pmd;
+
+ if (idx >= USER_PTRS_PER_PGD) {
+ pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+ if (pmd)
+ memcpy(pmd,
+ (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
+ sizeof(pmd_t) * PTRS_PER_PMD);
+ } else
+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+
+ return pmd;
+}
+
+static void pmd_cache_free(pmd_t *pmd, int idx)
+{
+ if (idx >= USER_PTRS_PER_PGD)
+ free_page((unsigned long)pmd);
+ else
+ kmem_cache_free(pmd_cache, pmd);
+}
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
@@ -282,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = pmd_cache_alloc(i);
+
if (!pmd)
goto out_oom;
+
paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
@@ -296,7 +342,7 @@ out_oom:
pgd_t pgdent = pgd[i];
void* pmd = (void *)__va(pgd_val(pgdent)-1);
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- kmem_cache_free(pmd_cache, pmd);
+ pmd_cache_free(pmd, i);
}
kmem_cache_free(pgd_cache, pgd);
return NULL;
@@ -308,11 +354,11 @@ void pgd_free(pgd_t *pgd)
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1)
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
pgd_t pgdent = pgd[i];
void* pmd = (void *)__va(pgd_val(pgdent)-1);
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- kmem_cache_free(pmd_cache, pmd);
+ pmd_cache_free(pmd, i);
}
/* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 8fda7be9dd4d..695f737516ae 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -414,6 +414,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
user space an consistent name. */
cpu_type = "x86-64/hammer";
break;
+ case 0x10:
+ model = &op_athlon_spec;
+ cpu_type = "x86-64/family10";
+ break;
}
break;
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c
index b21b6da8ab1d..1cf11af96de2 100644
--- a/arch/i386/pci/init.c
+++ b/arch/i386/pci/init.c
@@ -6,7 +6,7 @@
in the right sequence from here. */
static __init int pci_access_init(void)
{
- int type = 0;
+ int type __attribute__((unused)) = 0;
#ifdef CONFIG_PCI_DIRECT
type = pci_direct_probe();
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
index 747d8c63b0c4..c7cabeed4d7b 100644
--- a/arch/i386/pci/mmconfig-shared.c
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -60,14 +60,19 @@ static const char __init *pci_mmcfg_e7520(void)
u32 win;
pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
- pci_mmcfg_config_num = 1;
- pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
- if (!pci_mmcfg_config)
- return NULL;
- pci_mmcfg_config[0].address = (win & 0xf000) << 16;
- pci_mmcfg_config[0].pci_segment = 0;
- pci_mmcfg_config[0].start_bus_number = 0;
- pci_mmcfg_config[0].end_bus_number = 255;
+ win = win & 0xf000;
+ if(win == 0x0000 || win == 0xf000)
+ pci_mmcfg_config_num = 0;
+ else {
+ pci_mmcfg_config_num = 1;
+ pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
+ if (!pci_mmcfg_config)
+ return NULL;
+ pci_mmcfg_config[0].address = win << 16;
+ pci_mmcfg_config[0].pci_segment = 0;
+ pci_mmcfg_config[0].start_bus_number = 0;
+ pci_mmcfg_config[0].end_bus_number = 255;
+ }
return "Intel Corporation E7520 Memory Controller Hub";
}
@@ -108,6 +113,10 @@ static const char __init *pci_mmcfg_intel_945(void)
if ((pciexbar & mask) & 0x0fffffffU)
pci_mmcfg_config_num = 0;
+ /* Don't hit the APIC registers and their friends */
+ if ((pciexbar & mask) >= 0xf0000000U)
+ pci_mmcfg_config_num = 0;
+
if (pci_mmcfg_config_num) {
pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
if (!pci_mmcfg_config)
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 2c15500f8713..998fd3ec0d68 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -21,6 +21,7 @@ unsigned long saved_context_eflags;
void __save_processor_state(struct saved_context *ctxt)
{
+ mtrr_save_fixed_ranges(NULL);
kernel_fpu_begin();
/*
diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c
index db5e98d2eb73..a0020b913f31 100644
--- a/arch/i386/power/suspend.c
+++ b/arch/i386/power/suspend.c
@@ -16,6 +16,9 @@
/* Defined in arch/i386/power/swsusp.S */
extern int restore_image(void);
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
/* Pointer to the temporary resume page tables */
pgd_t *resume_pg_dir;
@@ -156,3 +159,14 @@ int swsusp_arch_resume(void)
restore_image();
return 0;
}
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 439cc257cd1d..6c73bca3f478 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -110,7 +110,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index c76b793310c2..043f637e3d10 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -119,7 +119,7 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 2a8253358c6c..c74585990598 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -181,7 +181,7 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(ASM_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index e0fa80eca366..aa693d0f151a 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
obj32-$(CONFIG_MODULES) += module_32.o
ifeq ($(CONFIG_PPC_MERGE),y)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 22083ce3cc30..6018178708a5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -582,14 +582,14 @@ void __init setup_per_cpu_areas(void)
char *ptr;
/* Copy section for each CPU (we discard the original) */
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+ size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
#ifdef CONFIG_MODULES
if (size < PERCPU_ENOUGH_ROOM)
size = PERCPU_ENOUGH_ROOM;
#endif
for_each_possible_cpu(i) {
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
if (!ptr)
panic("Cannot allocate cpu data for CPU %d\n", i);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
new file mode 100644
index 000000000000..8cee57107541
--- /dev/null
+++ b/arch/powerpc/kernel/suspend.c
@@ -0,0 +1,24 @@
+/*
+ * Suspend support specific for power.
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <asm/page.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7eefeb4a30e7..132067313147 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -139,11 +139,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
-#ifdef CONFIG_PPC32
- . = ALIGN(32);
-#else
- . = ALIGN(128);
-#endif
+ . = ALIGN(PAGE_SIZE);
.data.percpu : {
__per_cpu_start = .;
*(.data.percpu)
diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S
index a0625562a44b..44cd128fb719 100644
--- a/arch/ppc/kernel/vmlinux.lds.S
+++ b/arch/ppc/kernel/vmlinux.lds.S
@@ -130,7 +130,7 @@ SECTIONS
__ftr_fixup : { *(__ftr_fixup) }
__stop___ftr_fixup = .;
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 418f6426a949..e9d3432aba60 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -107,7 +107,7 @@ SECTIONS
. = ALIGN(2);
__initramfs_end = .;
#endif
- . = ALIGN(256);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 78a6c09875b2..2f606d0ce1f6 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -54,7 +54,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.data.page_aligned : { *(.data.page_aligned) }
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S
index a59c5e998131..4f9616f39830 100644
--- a/arch/sh64/kernel/vmlinux.lds.S
+++ b/arch/sh64/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) }
- . = ALIGN(L1_CACHE_BYTES);
+ . = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) }
__per_cpu_end = . ;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index e5c24e0521de..f0bb6e60e620 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -65,7 +65,7 @@ SECTIONS
__initramfs_end = .;
#endif
- . = ALIGN(32);
+ . = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index d4f0a70f4845..1fac215252e4 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1343,11 +1343,11 @@ void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */
goal = PERCPU_ENOUGH_ROOM;
- __per_cpu_shift = 0;
- for (size = 1UL; size < goal; size <<= 1UL)
+ __per_cpu_shift = PAGE_SHIFT;
+ for (size = PAGE_SIZE; size < goal; size <<= 1UL)
__per_cpu_shift++;
- ptr = alloc_bootmem(size * NR_CPUS);
+ ptr = alloc_bootmem_pages(size * NR_CPUS);
__per_cpu_base = ptr - __per_cpu_start;
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 780cc0a4a128..f938fa822146 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -41,6 +41,7 @@ CONFIG_M686=y
# CONFIG_MGEODE_LX is not set
# CONFIG_MCYRIXIII is not set
# CONFIG_MVIAC3_2 is not set
+# CONFIG_MVIAC7 is not set
# CONFIG_X86_GENERIC is not set
CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e9b4f058a49a..145bb824b2a8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -415,13 +415,13 @@ config OUT_OF_LINE_PFN_TO_PAGE
depends on DISCONTIGMEM
config NR_CPUS
- int "Maximum number of CPUs (2-256)"
+ int "Maximum number of CPUs (2-255)"
range 2 255
depends on SMP
default "8"
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. Current maximum is 256 CPUs due to
+ kernel will support. Current maximum is 255 CPUs due to
APIC addressing limits. Less depending on the hardware.
This is purely to save memory - each supported CPU requires
@@ -565,23 +565,56 @@ config CRASH_DUMP
PHYSICAL_START.
For more details see Documentation/kdump/kdump.txt
+config RELOCATABLE
+ bool "Build a relocatable kernel(EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ Builds a relocatable kernel. This enables loading and running
+ a kernel binary from a different physical address than it has
+ been compiled for.
+
+ One use is for the kexec on panic case where the recovery kernel
+ must live at a different physical address than the primary
+ kernel.
+
+ Note: If CONFIG_RELOCATABLE=y, then kernel run from the address
+ it has been loaded at and compile time physical address
+ (CONFIG_PHYSICAL_START) is ignored.
+
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
- default "0x1000000" if CRASH_DUMP
default "0x200000"
help
- This gives the physical address where the kernel is loaded. Normally
- for regular kernels this value is 0x200000 (2MB). But in the case
- of kexec on panic the fail safe kernel needs to run at a different
- address than the panic-ed kernel. This option is used to set the load
- address for kernels used to capture crash dump on being kexec'ed
- after panic. The default value for crash dump kernels is
- 0x1000000 (16MB). This can also be set based on the "X" value as
+ This gives the physical address where the kernel is loaded. It
+ should be aligned to 2MB boundary.
+
+ If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
+ bzImage will decompress itself to above physical address and
+ run from there. Otherwise, bzImage will run from the address where
+ it has been loaded by the boot loader and will ignore above physical
+ address.
+
+ In normal kdump cases one does not have to set/change this option
+ as now bzImage can be compiled as a completely relocatable image
+ (CONFIG_RELOCATABLE=y) and be used to load and run from a different
+ address. This option is mainly useful for the folks who don't want
+ to use a bzImage for capturing the crash dump and want to use a
+ vmlinux instead.
+
+ So if you are using bzImage for capturing the crash dump, leave
+ the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y.
+ Otherwise if you plan to use vmlinux for capturing the crash dump
+ change this value to start of the reserved region (Typically 16MB
+ 0x1000000). In other words, it can be set based on the "X" value as
specified in the "crashkernel=YM@XM" command line boot parameter
passed to the panic-ed kernel. Typically this parameter is set as
crashkernel=64M@16M. Please take a look at
Documentation/kdump/kdump.txt for more details about crash dumps.
+ Usage of bzImage for capturing the crash dump is advantageous as
+ one does not have to build two kernels. Same kernel can be used
+ as production kernel and capture kernel.
+
Don't change this unless you know what you are doing.
config SECCOMP
@@ -627,14 +660,6 @@ config CC_STACKPROTECTOR_ALL
source kernel/Kconfig.hz
-config REORDER
- bool "Function reordering"
- default n
- help
- This option enables the toolchain to reorder functions for a more
- optimal TLB usage. If you have pretty much any version of binutils,
- this can increase your kernel build time by roughly one minute.
-
config K8_NB
def_bool y
depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 2941a915d4ef..29617ae3926d 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -40,10 +40,6 @@ cflags-y += -m64
cflags-y += -mno-red-zone
cflags-y += -mcmodel=kernel
cflags-y += -pipe
-cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
-# this makes reading assembly source easier, but produces worse code
-# actually it makes the kernel smaller too.
-cflags-y += -fno-reorder-blocks
cflags-y += -Wno-sign-compare
cflags-y += -fno-asynchronous-unwind-tables
ifneq ($(CONFIG_DEBUG_INFO),y)
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e7762d..ee6f6505f95f 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -36,7 +36,7 @@ subdir- := compressed/ #Let make clean descend in compressed/
# ---------------------------------------------------------------------------
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
-$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
+$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
quiet_cmd_image = BUILD $@
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index e70fa6e1da08..705a3e33d7e1 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -8,16 +8,14 @@
targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
EXTRA_AFLAGS := -traditional
-AFLAGS := $(subst -m64,-m32,$(AFLAGS))
# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
# -m32
-CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing
-LDFLAGS := -m elf_i386
+CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
+LDFLAGS := -m elf_x86_64
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386
-
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+LDFLAGS_vmlinux := -T
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
$(call if_changed,ld)
@:
@@ -27,7 +25,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
$(call if_changed,gzip)
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565e4d42..f9d5692a0106 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,279 @@
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/msr.h>
+.section ".text.head"
.code32
.globl startup_32
-
+
startup_32:
cld
cli
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
- movl %eax,%gs
-
- lss stack_start,%esp
- xorl %eax,%eax
-1: incl %eax # check that A20 really IS enabled
- movl %eax,0x000000 # loop forever if it isn't
- cmpl %eax,0x100000
- je 1b
+ movl $(__KERNEL_DS), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at. This can only be done
+ * with a short local call on x86. Nothing else will tell us what
+ * address we are running at. The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+ leal 0x40(%esi), %esp
+ call 1f
+1: popl %ebp
+ subl $1b, %ebp
+
+/* setup a stack and make sure cpu supports long mode. */
+ movl $user_stack_end, %eax
+ addl %ebp, %eax
+ movl %eax, %esp
+
+ call verify_cpu
+ testl %eax, %eax
+ jnz no_longmode
+
+/* Compute the delta between where we were compiled to run at
+ * and where the code will actually run at.
+ */
+/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+ movl %ebp, %ebx
+ addl $(LARGE_PAGE_SIZE -1), %ebx
+ andl $LARGE_PAGE_MASK, %ebx
+#else
+ movl $CONFIG_PHYSICAL_START, %ebx
+#endif
+
+ /* Replace the compressed data size with the uncompressed size */
+ subl input_len(%ebp), %ebx
+ movl output_len(%ebp), %eax
+ addl %eax, %ebx
+ /* Add 8 bytes for every 32K input block */
+ shrl $12, %eax
+ addl %eax, %ebx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addl $(32768 + 18 + 4095), %ebx
+ andl $~4095, %ebx
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Prepare for entering 64 bit mode
*/
- pushl $0
- popfl
+
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
+ leal gdt(%ebp), %eax
+ movl %eax, gdt+2(%ebp)
+ lgdt gdt(%ebp)
+
+ /* Enable PAE mode */
+ xorl %eax, %eax
+ orl $(1 << 5), %eax
+ movl %eax, %cr4
+
+ /*
+ * Build early 4G boot pagetable
+ */
+ /* Initialize Page tables to 0*/
+ leal pgtable(%ebx), %edi
+ xorl %eax, %eax
+ movl $((4096*6)/4), %ecx
+ rep stosl
+
+ /* Build Level 4 */
+ leal pgtable + 0(%ebx), %edi
+ leal 0x1007 (%edi), %eax
+ movl %eax, 0(%edi)
+
+ /* Build Level 3 */
+ leal pgtable + 0x1000(%ebx), %edi
+ leal 0x1007(%edi), %eax
+ movl $4, %ecx
+1: movl %eax, 0x00(%edi)
+ addl $0x00001000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Build Level 2 */
+ leal pgtable + 0x2000(%ebx), %edi
+ movl $0x00000183, %eax
+ movl $2048, %ecx
+1: movl %eax, 0(%edi)
+ addl $0x00200000, %eax
+ addl $8, %edi
+ decl %ecx
+ jnz 1b
+
+ /* Enable the boot page tables */
+ leal pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable Long mode in EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ /* Setup for the jump to 64bit mode
+ *
+ * When the jump is performend we will be in long mode but
+ * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
+ * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
+ * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ * We place all of the values on our mini stack so lret can
+ * used to perform that far jump.
+ */
+ pushl $__KERNEL_CS
+ leal startup_64(%ebp), %eax
+ pushl %eax
+
+ /* Enter paged protected Mode, activating Long Mode */
+ movl $0x80000001, %eax /* Enable Paging and Protected mode */
+ movl %eax, %cr0
+
+ /* Jump from 32bit compatibility mode into 64bit mode. */
+ lret
+
+no_longmode:
+ /* This isn't an x86-64 CPU so hang */
+1:
+ hlt
+ jmp 1b
+
+#include "../../kernel/verify_cpu.S"
+
+ /* Be careful here startup_64 needs to be at a predictable
+ * address so I can export it in an ELF header. Bootloaders
+ * should look at the ELF header to find this address, as
+ * it may change in the future.
+ */
+ .code64
+ .org 0x200
+ENTRY(startup_64)
+ /* We come here either from startup_32 or directly from a
+ * 64bit bootloader. If we come here from a bootloader we depend on
+ * an identity mapped page table being provied that maps our
+ * entire text+data+bss and hopefully all of memory.
+ */
+
+ /* Setup data segments. */
+ xorl %eax, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ /* Compute the decompressed kernel start address. It is where
+ * we were loaded at aligned to a 2M boundary. %rbp contains the
+ * decompressed kernel start address.
+ *
+ * If it is a relocatable kernel then decompress and run the kernel
+ * from load address aligned to 2MB addr, otherwise decompress and
+ * run the kernel from CONFIG_PHYSICAL_START
+ */
+
+ /* Start with the delta to where the kernel will run at. */
+#ifdef CONFIG_RELOCATABLE
+ leaq startup_32(%rip) /* - $startup_32 */, %rbp
+ addq $(LARGE_PAGE_SIZE - 1), %rbp
+ andq $LARGE_PAGE_MASK, %rbp
+ movq %rbp, %rbx
+#else
+ movq $CONFIG_PHYSICAL_START, %rbp
+ movq %rbp, %rbx
+#endif
+
+ /* Replace the compressed data size with the uncompressed size */
+ movl input_len(%rip), %eax
+ subq %rax, %rbx
+ movl output_len(%rip), %eax
+ addq %rax, %rbx
+ /* Add 8 bytes for every 32K input block */
+ shrq $12, %rax
+ addq %rax, %rbx
+ /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
+ addq $(32768 + 18 + 4095), %rbx
+ andq $~4095, %rbx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+ leaq _end(%rip), %r8
+ leaq _end(%rbx), %r9
+ movq $_end /* - $startup_32 */, %rcx
+1: subq $8, %r8
+ subq $8, %r9
+ movq 0(%r8), %rax
+ movq %rax, 0(%r9)
+ subq $8, %rcx
+ jnz 1b
+
+/*
+ * Jump to the relocated address.
+ */
+ leaq relocated(%rbx), %rax
+ jmp *%rax
+
+.section ".text"
+relocated:
+
/*
* Clear BSS
*/
- xorl %eax,%eax
- movl $_edata,%edi
- movl $_end,%ecx
- subl %edi,%ecx
+ xorq %rax, %rax
+ leaq _edata(%rbx), %rdi
+ leaq _end(%rbx), %rcx
+ subq %rdi, %rcx
cld
rep
stosb
+
+ /* Setup the stack */
+ leaq user_stack_end(%rip), %rsp
+
+ /* zero EFLAGS after setting rsp */
+ pushq $0
+ popfq
+
/*
* Do the decompression, and jump to the new kernel..
*/
- subl $16,%esp # place for structure on the stack
- movl %esp,%eax
- pushl %esi # real mode pointer as second arg
- pushl %eax # address of structure as first arg
- call decompress_kernel
- orl %eax,%eax
- jnz 3f
- addl $8,%esp
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
+ pushq %rsi # Save the real mode argument
+ movq %rsi, %rdi # real mode address
+ leaq _heap(%rip), %rsi # _heap
+ leaq input_data(%rip), %rdx # input_data
+ movl input_len(%rip), %eax
+ movq %rax, %rcx # input_len
+ movq %rbp, %r8 # output
+ call decompress_kernel
+ popq %rsi
-/*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
- */
-3:
- movl %esi,%ebx
- movl $move_routine_start,%esi
- movl $0x1000,%edi
- movl $move_routine_end,%ecx
- subl %esi,%ecx
- addl $3,%ecx
- shrl $2,%ecx
- cld
- rep
- movsl
-
- popl %esi # discard the address
- addl $4,%esp # real mode pointer
- popl %esi # low_buffer_start
- popl %ecx # lcount
- popl %edx # high_buffer_start
- popl %eax # hcount
- movl $__PHYSICAL_START,%edi
- cli # make sure we don't get interrupted
- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
/*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
+ * Jump to the decompressed kernel.
*/
-move_routine_start:
- movl %ecx,%ebp
- shrl $2,%ecx
- rep
- movsl
- movl %ebp,%ecx
- andl $3,%ecx
- rep
- movsb
- movl %edx,%esi
- movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
- addl $3,%ecx
- shrl $2,%ecx
- rep
- movsl
- movl %ebx,%esi # Restore setup pointer
- xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $__PHYSICAL_START
-move_routine_end:
+ jmp *%rbp
-
-/* Stack for uncompression */
- .align 32
-user_stack:
+ .data
+gdt:
+ .word gdt_end - gdt
+ .long gdt
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+gdt_end:
+ .bss
+/* Stack for uncompression */
+ .balign 4
+user_stack:
.fill 4096,4,0
-stack_start:
- .long user_stack+4096
- .word __KERNEL_DS
-
+user_stack_end:
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e394d0..f932b0e89096 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,10 +9,95 @@
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
*/
+#define _LINUX_STRING_H_ 1
+#define __LINUX_BITMAP_H 1
+
+#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/page.h>
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ * magic[2]
+ * method[1]
+ * flags[1]
+ * timestamp[4]
+ * extraflags[1]
+ * os[1]
+ * compressed data blocks[N]
+ * crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ * 32 bits length in bytes.
+ *
+ * fixed:
+ * magic fixed tree.
+ * symbols.
+ *
+ * dynamic:
+ * dynamic tree encoding.
+ * symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer. The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts. Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer. A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe. To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient. To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed. Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well. Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
+
/*
* gzip declarations
*/
@@ -28,15 +113,20 @@ typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;
-#define WSIZE 0x8000 /* Window size must be at least 32k, */
- /* and a power of two */
+#define WSIZE 0x80000000 /* Window size must be at least 32k,
+ * and a power of two
+ * We don't actually have a window just
+ * a huge output buffer so I report
+ * a 2G windows size, as that should
+ * always be larger than our output buffer.
+ */
-static uch *inbuf; /* input buffer */
-static uch window[WSIZE]; /* Sliding window buffer */
+static uch *inbuf; /* input buffer */
+static uch *window; /* Sliding window buffer, (and final output buffer) */
-static unsigned insize = 0; /* valid bytes in inbuf */
-static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0; /* bytes in output buffer */
+static unsigned insize; /* valid bytes in inbuf */
+static unsigned inptr; /* index of next byte to be processed in inbuf */
+static unsigned outcnt; /* bytes in output buffer */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -87,8 +177,6 @@ extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
@@ -98,17 +186,10 @@ static void *memcpy(void *dest, const void *src, unsigned n);
static void putstr(const char *);
-extern int end;
-static long free_mem_ptr = (long)&end;
+static long free_mem_ptr;
static long free_mem_end_ptr;
-#define INPLACE_MOVE_ROUTINE 0x1000
-#define LOW_BUFFER_START 0x2000
-#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+#define HEAP_SIZE 0x7000
static char *vidmem = (char *)0xb8000;
static int vidport;
@@ -218,58 +299,31 @@ static void* memcpy(void* dest, const void* src, unsigned n)
*/
static int fill_inbuf(void)
{
- if (insize != 0) {
- error("ran out of input data");
- }
-
- inbuf = input_data;
- insize = input_len;
- inptr = 1;
- return inbuf[0];
+ error("ran out of input data");
+ return 0;
}
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
-static void flush_window_low(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, *out, ch;
-
- in = window;
- out = &output_data[output_ptr];
- for (n = 0; n < outcnt; n++) {
- ch = *out++ = *in++;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- output_ptr += (ulg)outcnt;
- outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
- ulg c = crc; /* temporary variable */
- unsigned n;
- uch *in, ch;
- in = window;
- for (n = 0; n < outcnt; n++) {
- ch = *output_data++ = *in++;
- if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
- c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
- }
- crc = c;
- bytes_out += (ulg)outcnt;
- outcnt = 0;
-}
-
static void flush_window(void)
{
- if (high_loaded) flush_window_high();
- else flush_window_low();
+ /* With my window equal to my output buffer
+ * I only need to compute the crc here.
+ */
+ ulg c = crc; /* temporary variable */
+ unsigned n;
+ uch *in, ch;
+
+ in = window;
+ for (n = 0; n < outcnt; n++) {
+ ch = *in++;
+ c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+ }
+ crc = c;
+ bytes_out += (ulg)outcnt;
+ outcnt = 0;
}
static void error(char *x)
@@ -281,57 +335,8 @@ static void error(char *x)
while(1); /* Halt */
}
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
- output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
- free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
- uch *low_buffer_start; int lcount;
- uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
- if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif
- mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
- low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
- ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
- low_buffer_size = low_buffer_end - LOW_BUFFER_START;
- high_loaded = 1;
- free_mem_end_ptr = (long)high_buffer_start;
- if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
- high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
- mv->hcount = 0; /* say: we need not to move high_buffer */
- }
- else mv->hcount = -1;
- mv->high_buffer_start = high_buffer_start;
-}
-
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
- if (bytes_out > low_buffer_size) {
- mv->lcount = low_buffer_size;
- if (mv->hcount)
- mv->hcount = bytes_out - low_buffer_size;
- } else {
- mv->lcount = bytes_out;
- mv->hcount = 0;
- }
-}
-
-int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
+ uch *input_data, unsigned long input_len, uch *output)
{
real_mode = rmode;
@@ -346,13 +351,21 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
lines = RM_SCREEN_INFO.orig_video_lines;
cols = RM_SCREEN_INFO.orig_video_cols;
- if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
- else setup_output_buffer_if_we_run_high(mv);
+ window = output; /* Output buffer (Normally at 1M) */
+ free_mem_ptr = heap; /* Heap */
+ free_mem_end_ptr = heap + HEAP_SIZE;
+ inbuf = input_data; /* Input buffer */
+ insize = input_len;
+ inptr = 0;
+
+ if ((ulg)output & (__KERNEL_ALIGN - 1))
+ error("Destination address not 2M aligned");
+ if ((ulg)output >= 0xffffffffffUL)
+ error("Destination address too large");
makecrc();
putstr(".\nDecompressing Linux...");
gunzip();
putstr("done.\nBooting the kernel.\n");
- if (high_loaded) close_output_buffer_if_we_run_high(mv);
- return high_loaded;
+ return;
}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 000000000000..94c13e557fb4
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(startup_64)
+SECTIONS
+{
+ /* Be careful parts of head.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .text : {
+ _head = . ;
+ *(.text.head)
+ _ehead = . ;
+ *(.text.compressed)
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8);
+ _end = . ;
+ . = ALIGN(4096);
+ pgtable = . ;
+ . = . + 4096 * 6;
+ _heap = .;
+ }
+}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d791f863..bd1429ce193e 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
SECTIONS
{
- .data : {
+ .text.compressed : {
input_len = .;
- LONG(input_data_end - input_data) input_data = .;
- *(.data)
- input_data_end = .;
+ LONG(input_data_end - input_data) input_data = .;
+ *(.data)
+ output_len = . - 4;
+ input_data_end = .;
}
}
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 770940cc0108..e9e33f949697 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -51,6 +51,7 @@
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page.h>
+#include <asm/setup.h>
/* Signature words to ensure LILO loaded us right */
#define SIG1 0xAA55
@@ -80,7 +81,7 @@ start:
# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
.ascii "HdrS" # header signature
- .word 0x0204 # header version number (>= 0x0105)
+ .word 0x0206 # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG
@@ -155,7 +156,20 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
# low memory 0x10000 or higher.
ramdisk_max: .long 0xffffffff
-
+kernel_alignment: .long 0x200000 # physical addr alignment required for
+ # protected mode relocatable kernel
+#ifdef CONFIG_RELOCATABLE
+relocatable_kernel: .byte 1
+#else
+relocatable_kernel: .byte 0
+#endif
+pad2: .byte 0
+pad3: .word 0
+
+cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
+ #added with boot protocol
+ #version 2.06
+
trampoline: call start_of_setup
.align 16
# The offset at this point is 0x240
@@ -290,64 +304,10 @@ loader_ok:
movw %cs,%ax
movw %ax,%ds
- /* minimum CPUID flags for x86-64 */
- /* see http://www.x86-64.org/lists/discuss/msg02971.html */
-#define SSE_MASK ((1<<25)|(1<<26))
-#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\
- (1<<13)|(1<<15)|(1<<24))
-#define REQUIRED_MASK2 (1<<29)
-
- pushfl /* standard way to check for cpuid */
- popl %eax
- movl %eax,%ebx
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- cmpl %eax,%ebx
- jz no_longmode /* cpu has no cpuid */
- movl $0x0,%eax
- cpuid
- cmpl $0x1,%eax
- jb no_longmode /* no cpuid 1 */
- xor %di,%di
- cmpl $0x68747541,%ebx /* AuthenticAMD */
- jnz noamd
- cmpl $0x69746e65,%edx
- jnz noamd
- cmpl $0x444d4163,%ecx
- jnz noamd
- mov $1,%di /* cpu is from AMD */
-noamd:
- movl $0x1,%eax
- cpuid
- andl $REQUIRED_MASK1,%edx
- xorl $REQUIRED_MASK1,%edx
- jnz no_longmode
- movl $0x80000000,%eax
- cpuid
- cmpl $0x80000001,%eax
- jb no_longmode /* no extended cpuid */
- movl $0x80000001,%eax
- cpuid
- andl $REQUIRED_MASK2,%edx
- xorl $REQUIRED_MASK2,%edx
- jnz no_longmode
-sse_test:
- movl $1,%eax
- cpuid
- andl $SSE_MASK,%edx
- cmpl $SSE_MASK,%edx
- je sse_ok
- test %di,%di
- jz no_longmode /* only try to force SSE on AMD */
- movl $0xc0010015,%ecx /* HWCR */
- rdmsr
- btr $15,%eax /* enable SSE */
- wrmsr
- xor %di,%di /* don't loop */
- jmp sse_test /* try again */
+ call verify_cpu
+ testl %eax,%eax
+ jz sse_ok
+
no_longmode:
call beep
lea long_mode_panic,%si
@@ -357,7 +317,8 @@ no_longmode_loop:
long_mode_panic:
.string "Your CPU does not support long mode. Use a 32bit distribution."
.byte 0
-
+
+#include "../kernel/verify_cpu.S"
sse_ok:
popw %ds
@@ -846,7 +807,7 @@ gdt_48:
# Include video setup & detection code
-#include "video.S"
+#include "../../i386/boot/video.S"
# Setup signature -- must be last
setup_sig1: .word SIG1
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
deleted file mode 100644
index 6090516c9c7f..000000000000
--- a/arch/x86_64/boot/video.S
+++ /dev/null
@@ -1,2043 +0,0 @@
-/* video.S
- *
- * Display adapter & video mode setup, version 2.13 (14-May-99)
- *
- * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
- * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
- *
- * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
- *
- * For further information, look at Documentation/svga.txt.
- *
- */
-
-/* Enable autodetection of SVGA adapters and modes. */
-#undef CONFIG_VIDEO_SVGA
-
-/* Enable autodetection of VESA modes */
-#define CONFIG_VIDEO_VESA
-
-/* Enable compacting of mode table */
-#define CONFIG_VIDEO_COMPACT
-
-/* Retain screen contents when switching modes */
-#define CONFIG_VIDEO_RETAIN
-
-/* Enable local mode list */
-#undef CONFIG_VIDEO_LOCAL
-
-/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
-#undef CONFIG_VIDEO_400_HACK
-
-/* Hack that lets you force specific BIOS mode ID and specific dimensions */
-#undef CONFIG_VIDEO_GFX_HACK
-#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
-#define VIDEO_GFX_BIOS_BX 0x0102
-#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- * NORMAL_VGA (-1)
- * EXTENDED_VGA (-2)
- * ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-/* Special video modes */
-#define VIDEO_FIRST_SPECIAL 0x0f00
-#define VIDEO_80x25 0x0f00
-#define VIDEO_8POINT 0x0f01
-#define VIDEO_80x43 0x0f02
-#define VIDEO_80x28 0x0f03
-#define VIDEO_CURRENT_MODE 0x0f04
-#define VIDEO_80x30 0x0f05
-#define VIDEO_80x34 0x0f06
-#define VIDEO_80x60 0x0f07
-#define VIDEO_GFX_HACK 0x0f08
-#define VIDEO_LAST_SPECIAL 0x0f09
-
-/* Video modes given by resolution */
-#define VIDEO_FIRST_RESOLUTION 0x1000
-
-/* The "recalculate timings" flag */
-#define VIDEO_RECALC 0x8000
-
-/* Positions of various video parameters passed to the kernel */
-/* (see also include/linux/tty.h) */
-#define PARAM_CURSOR_POS 0x00
-#define PARAM_VIDEO_PAGE 0x04
-#define PARAM_VIDEO_MODE 0x06
-#define PARAM_VIDEO_COLS 0x07
-#define PARAM_VIDEO_EGA_BX 0x0a
-#define PARAM_VIDEO_LINES 0x0e
-#define PARAM_HAVE_VGA 0x0f
-#define PARAM_FONT_POINTS 0x10
-
-#define PARAM_LFB_WIDTH 0x12
-#define PARAM_LFB_HEIGHT 0x14
-#define PARAM_LFB_DEPTH 0x16
-#define PARAM_LFB_BASE 0x18
-#define PARAM_LFB_SIZE 0x1c
-#define PARAM_LFB_LINELENGTH 0x24
-#define PARAM_LFB_COLORS 0x26
-#define PARAM_VESAPM_SEG 0x2e
-#define PARAM_VESAPM_OFF 0x30
-#define PARAM_LFB_PAGES 0x32
-#define PARAM_VESA_ATTRIB 0x34
-#define PARAM_CAPABILITIES 0x36
-
-/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
-#ifdef CONFIG_VIDEO_RETAIN
-#define DO_STORE call store_screen
-#else
-#define DO_STORE
-#endif /* CONFIG_VIDEO_RETAIN */
-
-# This is the main entry point called by setup.S
-# %ds *must* be pointing to the bootsector
-video: pushw %ds # We use different segments
- pushw %ds # FS contains original DS
- popw %fs
- pushw %cs # DS is equal to CS
- popw %ds
- pushw %cs # ES is equal to CS
- popw %es
- xorw %ax, %ax
- movw %ax, %gs # GS is zero
- cld
- call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
-#ifdef CONFIG_VIDEO_SELECT
- movw %fs:(0x01fa), %ax # User selected video mode
- cmpw $ASK_VGA, %ax # Bring up the menu
- jz vid2
-
- call mode_set # Set the mode
- jc vid1
-
- leaw badmdt, %si # Invalid mode ID
- call prtstr
-vid2: call mode_menu
-vid1:
-#ifdef CONFIG_VIDEO_RETAIN
- call restore_screen # Restore screen contents
-#endif /* CONFIG_VIDEO_RETAIN */
- call store_edid
-#endif /* CONFIG_VIDEO_SELECT */
- call mode_params # Store mode parameters
- popw %ds # Restore original DS
- ret
-
-# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
-basic_detect:
- movb $0, %fs:(PARAM_HAVE_VGA)
- movb $0x12, %ah # Check EGA/VGA
- movb $0x10, %bl
- int $0x10
- movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
- cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
- je basret
-
- incb adapter
- movw $0x1a00, %ax # Check EGA or VGA?
- int $0x10
- cmpb $0x1a, %al # 1a means VGA...
- jne basret # anything else is EGA.
-
- incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
- incb adapter
-basret: ret
-
-# Store the video mode parameters for later usage by the kernel.
-# This is done by asking the BIOS except for the rows/columns
-# parameters in the default 80x25 mode -- these are set directly,
-# because some very obscure BIOSes supply insane values.
-mode_params:
-#ifdef CONFIG_VIDEO_SELECT
- cmpb $0, graphic_mode
- jnz mopar_gr
-#endif
- movb $0x03, %ah # Read cursor position
- xorb %bh, %bh
- int $0x10
- movw %dx, %fs:(PARAM_CURSOR_POS)
- movb $0x0f, %ah # Read page/mode/width
- int $0x10
- movw %bx, %fs:(PARAM_VIDEO_PAGE)
- movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
- cmpb $0x7, %al # MDA/HGA => segment differs
- jnz mopar0
-
- movw $0xb000, video_segment
-mopar0: movw %gs:(0x485), %ax # Font size
- movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
- movw force_size, %ax # Forced size?
- orw %ax, %ax
- jz mopar1
-
- movb %ah, %fs:(PARAM_VIDEO_COLS)
- movb %al, %fs:(PARAM_VIDEO_LINES)
- ret
-
-mopar1: movb $25, %al
- cmpb $0, adapter # If we are on CGA/MDA/HGA, the
- jz mopar2 # screen must have 25 lines.
-
- movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
- incb %al # location of max lines.
-mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
- ret
-
-#ifdef CONFIG_VIDEO_SELECT
-# Fetching of VESA frame buffer parameters
-mopar_gr:
- leaw modelist+1024, %di
- movb $0x23, %fs:(PARAM_HAVE_VGA)
- movw 16(%di), %ax
- movw %ax, %fs:(PARAM_LFB_LINELENGTH)
- movw 18(%di), %ax
- movw %ax, %fs:(PARAM_LFB_WIDTH)
- movw 20(%di), %ax
- movw %ax, %fs:(PARAM_LFB_HEIGHT)
- movb 25(%di), %al
- movb $0, %ah
- movw %ax, %fs:(PARAM_LFB_DEPTH)
- movb 29(%di), %al
- movb $0, %ah
- movw %ax, %fs:(PARAM_LFB_PAGES)
- movl 40(%di), %eax
- movl %eax, %fs:(PARAM_LFB_BASE)
- movl 31(%di), %eax
- movl %eax, %fs:(PARAM_LFB_COLORS)
- movl 35(%di), %eax
- movl %eax, %fs:(PARAM_LFB_COLORS+4)
- movw 0(%di), %ax
- movw %ax, %fs:(PARAM_VESA_ATTRIB)
-
-# get video mem size
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
- xorl %eax, %eax
- movw 18(%di), %ax
- movl %eax, %fs:(PARAM_LFB_SIZE)
-
-# store mode capabilities
- movl 10(%di), %eax
- movl %eax, %fs:(PARAM_CAPABILITIES)
-
-# switching the DAC to 8-bit is for <= 8 bpp only
- movw %fs:(PARAM_LFB_DEPTH), %ax
- cmpw $8, %ax
- jg dac_done
-
-# get DAC switching capability
- xorl %eax, %eax
- movb 10(%di), %al
- testb $1, %al
- jz dac_set
-
-# attempt to switch DAC to 8-bit
- movw $0x4f08, %ax
- movw $0x0800, %bx
- int $0x10
- cmpw $0x004f, %ax
- jne dac_set
- movb %bh, dac_size # store actual DAC size
-
-dac_set:
-# set color size to DAC size
- movb dac_size, %al
- movb %al, %fs:(PARAM_LFB_COLORS+0)
- movb %al, %fs:(PARAM_LFB_COLORS+2)
- movb %al, %fs:(PARAM_LFB_COLORS+4)
- movb %al, %fs:(PARAM_LFB_COLORS+6)
-
-# set color offsets to 0
- movb $0, %fs:(PARAM_LFB_COLORS+1)
- movb $0, %fs:(PARAM_LFB_COLORS+3)
- movb $0, %fs:(PARAM_LFB_COLORS+5)
- movb $0, %fs:(PARAM_LFB_COLORS+7)
-
-dac_done:
-# get protected mode interface informations
- movw $0x4f0a, %ax
- xorw %bx, %bx
- xorw %di, %di
- int $0x10
- cmp $0x004f, %ax
- jnz no_pm
-
- movw %es, %fs:(PARAM_VESAPM_SEG)
- movw %di, %fs:(PARAM_VESAPM_OFF)
-no_pm: ret
-
-# The video mode menu
-mode_menu:
- leaw keymsg, %si # "Return/Space/Timeout" message
- call prtstr
- call flush
-nokey: call getkt
-
- cmpb $0x0d, %al # ENTER ?
- je listm # yes - manual mode selection
-
- cmpb $0x20, %al # SPACE ?
- je defmd1 # no - repeat
-
- call beep
- jmp nokey
-
-defmd1: ret # No mode chosen? Default 80x25
-
-listm: call mode_table # List mode table
-listm0: leaw name_bann, %si # Print adapter name
- call prtstr
- movw card_name, %si
- orw %si, %si
- jnz an2
-
- movb adapter, %al
- leaw old_name, %si
- orb %al, %al
- jz an1
-
- leaw ega_name, %si
- decb %al
- jz an1
-
- leaw vga_name, %si
- jmp an1
-
-an2: call prtstr
- leaw svga_name, %si
-an1: call prtstr
- leaw listhdr, %si # Table header
- call prtstr
- movb $0x30, %dl # DL holds mode number
- leaw modelist, %si
-lm1: cmpw $ASK_VGA, (%si) # End?
- jz lm2
-
- movb %dl, %al # Menu selection number
- call prtchr
- call prtsp2
- lodsw
- call prthw # Mode ID
- call prtsp2
- movb 0x1(%si), %al
- call prtdec # Rows
- movb $0x78, %al # the letter 'x'
- call prtchr
- lodsw
- call prtdec # Columns
- movb $0x0d, %al # New line
- call prtchr
- movb $0x0a, %al
- call prtchr
- incb %dl # Next character
- cmpb $0x3a, %dl
- jnz lm1
-
- movb $0x61, %dl
- jmp lm1
-
-lm2: leaw prompt, %si # Mode prompt
- call prtstr
- leaw edit_buf, %di # Editor buffer
-lm3: call getkey
- cmpb $0x0d, %al # Enter?
- jz lment
-
- cmpb $0x08, %al # Backspace?
- jz lmbs
-
- cmpb $0x20, %al # Printable?
- jc lm3
-
- cmpw $edit_buf+4, %di # Enough space?
- jz lm3
-
- stosb
- call prtchr
- jmp lm3
-
-lmbs: cmpw $edit_buf, %di # Backspace
- jz lm3
-
- decw %di
- movb $0x08, %al
- call prtchr
- call prtspc
- movb $0x08, %al
- call prtchr
- jmp lm3
-
-lment: movb $0, (%di)
- leaw crlft, %si
- call prtstr
- leaw edit_buf, %si
- cmpb $0, (%si) # Empty string = default mode
- jz lmdef
-
- cmpb $0, 1(%si) # One character = menu selection
- jz mnusel
-
- cmpw $0x6373, (%si) # "scan" => mode scanning
- jnz lmhx
-
- cmpw $0x6e61, 2(%si)
- jz lmscan
-
-lmhx: xorw %bx, %bx # Else => mode ID in hex
-lmhex: lodsb
- orb %al, %al
- jz lmuse1
-
- subb $0x30, %al
- jc lmbad
-
- cmpb $10, %al
- jc lmhx1
-
- subb $7, %al
- andb $0xdf, %al
- cmpb $10, %al
- jc lmbad
-
- cmpb $16, %al
- jnc lmbad
-
-lmhx1: shlw $4, %bx
- orb %al, %bl
- jmp lmhex
-
-lmuse1: movw %bx, %ax
- jmp lmuse
-
-mnusel: lodsb # Menu selection
- xorb %ah, %ah
- subb $0x30, %al
- jc lmbad
-
- cmpb $10, %al
- jc lmuse
-
- cmpb $0x61-0x30, %al
- jc lmbad
-
- subb $0x61-0x30-10, %al
- cmpb $36, %al
- jnc lmbad
-
-lmuse: call mode_set
- jc lmdef
-
-lmbad: leaw unknt, %si
- call prtstr
- jmp lm2
-lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
- jz lmbad
-
- movw $0, mt_end # Scanning of modes is
- movb $1, scanning # done as new autodetection.
- call mode_table
- jmp listm0
-lmdef: ret
-
-# Additional parts of mode_set... (relative jumps, you know)
-setv7: # Video7 extended modes
- DO_STORE
- subb $VIDEO_FIRST_V7>>8, %bh
- movw $0x6f05, %ax
- int $0x10
- stc
- ret
-
-_setrec: jmp setrec # Ugly...
-_set_80x25: jmp set_80x25
-
-# Aliases for backward compatibility.
-setalias:
- movw $VIDEO_80x25, %ax
- incw %bx
- jz mode_set
-
- movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
- incw %bx
- jnz setbad # Fall-through!
-
-# Setting of user mode (AX=mode ID) => CF=success
-mode_set:
- movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
- movw %ax, %bx
- cmpb $0xff, %ah
- jz setalias
-
- testb $VIDEO_RECALC>>8, %ah
- jnz _setrec
-
- cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
- jnc setres
-
- cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
- jz setspc
-
- cmpb $VIDEO_FIRST_V7>>8, %ah
- jz setv7
-
- cmpb $VIDEO_FIRST_VESA>>8, %ah
- jnc check_vesa
-
- orb %ah, %ah
- jz setmenu
-
- decb %ah
- jz setbios
-
-setbad: clc
- movb $0, do_restore # The screen needn't be restored
- ret
-
-setvesa:
- DO_STORE
- subb $VIDEO_FIRST_VESA>>8, %bh
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz setbad # AH=0 if OK
-
- stc
- ret
-
-setbios:
- DO_STORE
- int $0x10 # Standard BIOS mode set call
- pushw %bx
- movb $0x0f, %ah # Check if really set
- int $0x10
- popw %bx
- cmpb %bl, %al
- jnz setbad
-
- stc
- ret
-
-setspc: xorb %bh, %bh # Set special mode
- cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
- jnc setbad
-
- addw %bx, %bx
- jmp *spec_inits(%bx)
-
-setmenu:
- orb %al, %al # 80x25 is an exception
- jz _set_80x25
-
- pushw %bx # Set mode chosen from menu
- call mode_table # Build the mode table
- popw %ax
- shlw $2, %ax
- addw %ax, %si
- cmpw %di, %si
- jnc setbad
-
- movw (%si), %ax # Fetch mode ID
-_m_s: jmp mode_set
-
-setres: pushw %bx # Set mode chosen by resolution
- call mode_table
- popw %bx
- xchgb %bl, %bh
-setr1: lodsw
- cmpw $ASK_VGA, %ax # End of the list?
- jz setbad
-
- lodsw
- cmpw %bx, %ax
- jnz setr1
-
- movw -4(%si), %ax # Fetch mode ID
- jmp _m_s
-
-check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
- cmpw $0x004f, %ax
- jnz setbad
-
- movw 4(%di), %ax
- movw %ax, vbe_version
-#endif
- leaw modelist+1024, %di
- subb $VIDEO_FIRST_VESA>>8, %bh
- movw %bx, %cx # Get mode information structure
- movw $0x4f01, %ax
- int $0x10
- addb $VIDEO_FIRST_VESA>>8, %bh
- cmpw $0x004f, %ax
- jnz setbad
-
- movb (%di), %al # Check capabilities.
- andb $0x19, %al
- cmpb $0x09, %al
- jz setvesa # This is a text mode
-
- movb (%di), %al # Check capabilities.
- andb $0x99, %al
- cmpb $0x99, %al
- jnz _setbad # Doh! No linear frame buffer.
-
- subb $VIDEO_FIRST_VESA>>8, %bh
- orw $0x4000, %bx # Use linear frame buffer
- movw $0x4f02, %ax # VESA BIOS mode set call
- int $0x10
- cmpw $0x004f, %ax # AL=4f if implemented
- jnz _setbad # AH=0 if OK
-
- movb $1, graphic_mode # flag graphic mode
- movb $0, do_restore # no screen restore
- stc
- ret
-
-_setbad: jmp setbad # Ugly...
-
-# Recalculate vertical display end registers -- this fixes various
-# inconsistencies of extended modes on many adapters. Called when
-# the VIDEO_RECALC flag is set in the mode ID.
-
-setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
- call mode_set
- jnc rct3
-
- movw %gs:(0x485), %ax # Font size in pixels
- movb %gs:(0x484), %bl # Number of rows
- incb %bl
- mulb %bl # Number of visible
- decw %ax # scan lines - 1
- movw $0x3d4, %dx
- movw %ax, %bx
- movb $0x12, %al # Lower 8 bits
- movb %bl, %ah
- outw %ax, %dx
- movb $0x07, %al # Bits 8 and 9 in the overflow register
- call inidx
- xchgb %al, %ah
- andb $0xbd, %ah
- shrb %bh
- jnc rct1
- orb $0x02, %ah
-rct1: shrb %bh
- jnc rct2
- orb $0x40, %ah
-rct2: movb $0x07, %al
- outw %ax, %dx
- stc
-rct3: ret
-
-# Table of routines for setting of the special modes.
-spec_inits:
- .word set_80x25
- .word set_8pixel
- .word set_80x43
- .word set_80x28
- .word set_current
- .word set_80x30
- .word set_80x34
- .word set_80x60
- .word set_gfx
-
-# Set the 80x25 mode. If already set, do nothing.
-set_80x25:
- movw $0x5019, force_size # Override possibly broken BIOS
-use_80x25:
-#ifdef CONFIG_VIDEO_400_HACK
- movw $0x1202, %ax # Force 400 scan lines
- movb $0x30, %bl
- int $0x10
-#else
- movb $0x0f, %ah # Get current mode ID
- int $0x10
- cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
- jz st80 # on CGA/MDA/HGA and is also available on EGAM
-
- cmpw $0x5003, %ax # Unknown mode, force 80x25 color
- jnz force3
-
-st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
- jz set80
-
- movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
- orb %al, %al # Some buggy BIOS'es set 0 rows
- jz set80
-
- cmpb $24, %al # It's hopefully correct
- jz set80
-#endif /* CONFIG_VIDEO_400_HACK */
-force3: DO_STORE
- movw $0x0003, %ax # Forced set
- int $0x10
-set80: stc
- ret
-
-# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
-set_8pixel:
- DO_STORE
- call use_80x25 # The base is 80x25
-set_8pt:
- movw $0x1112, %ax # Use 8x8 font
- xorb %bl, %bl
- int $0x10
- movw $0x1200, %ax # Use alternate print screen
- movb $0x20, %bl
- int $0x10
- movw $0x1201, %ax # Turn off cursor emulation
- movb $0x34, %bl
- int $0x10
- movb $0x01, %ah # Define cursor scan lines 6-7
- movw $0x0607, %cx
- int $0x10
-set_current:
- stc
- ret
-
-# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
-# 80x25 mode with 14-point fonts instead of 16-point.
-set_80x28:
- DO_STORE
- call use_80x25 # The base is 80x25
-set14: movw $0x1111, %ax # Use 9x14 font
- xorb %bl, %bl
- int $0x10
- movb $0x01, %ah # Define cursor scan lines 11-12
- movw $0x0b0c, %cx
- int $0x10
- stc
- ret
-
-# Set the 80x43 mode. This mode is works on all VGA's.
-# It's a 350-scanline mode with 8-pixel font.
-set_80x43:
- DO_STORE
- movw $0x1201, %ax # Set 350 scans
- movb $0x30, %bl
- int $0x10
- movw $0x0003, %ax # Reset video mode
- int $0x10
- jmp set_8pt # Use 8-pixel font
-
-# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
-set_80x30:
- call use_80x25 # Start with real 80x25
- DO_STORE
- movw $0x3cc, %dx # Get CRTC port
- inb %dx, %al
- movb $0xd4, %dl
- rorb %al # Mono or color?
- jc set48a
-
- movb $0xb4, %dl
-set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
- call outidx
- movw $0x0b06, %ax # Vertical total
- call outidx
- movw $0x3e07, %ax # (Vertical) overflow
- call outidx
- movw $0xea10, %ax # Vertical sync start
- call outidx
- movw $0xdf12, %ax # Vertical display end
- call outidx
- movw $0xe715, %ax # Vertical blank start
- call outidx
- movw $0x0416, %ax # Vertical blank end
- call outidx
- pushw %dx
- movb $0xcc, %dl # Misc output register (read)
- inb %dx, %al
- movb $0xc2, %dl # (write)
- andb $0x0d, %al # Preserve clock select bits and color bit
- orb $0xe2, %al # Set correct sync polarity
- outb %al, %dx
- popw %dx
- movw $0x501e, force_size
- stc # That's all.
- ret
-
-# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
-set_80x34:
- call set_80x30 # Set 480 scans
- call set14 # And 14-pt font
- movw $0xdb12, %ax # VGA vertical display end
- movw $0x5022, force_size
-setvde: call outidx
- stc
- ret
-
-# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
-set_80x60:
- call set_80x30 # Set 480 scans
- call set_8pt # And 8-pt font
- movw $0xdf12, %ax # VGA vertical display end
- movw $0x503c, force_size
- jmp setvde
-
-# Special hack for ThinkPad graphics
-set_gfx:
-#ifdef CONFIG_VIDEO_GFX_HACK
- movw $VIDEO_GFX_BIOS_AX, %ax
- movw $VIDEO_GFX_BIOS_BX, %bx
- int $0x10
- movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
- stc
-#endif
- ret
-
-#ifdef CONFIG_VIDEO_RETAIN
-
-# Store screen contents to temporary buffer.
-store_screen:
- cmpb $0, do_restore # Already stored?
- jnz stsr
-
- testb $CAN_USE_HEAP, loadflags # Have we space for storing?
- jz stsr
-
- pushw %ax
- pushw %bx
- pushw force_size # Don't force specific size
- movw $0, force_size
- call mode_params # Obtain params of current mode
- popw force_size
- movb %fs:(PARAM_VIDEO_LINES), %ah
- movb %fs:(PARAM_VIDEO_COLS), %al
- movw %ax, %bx # BX=dimensions
- mulb %ah
- movw %ax, %cx # CX=number of characters
- addw %ax, %ax # Calculate image size
- addw $modelist+1024+4, %ax
- cmpw heap_end_ptr, %ax
- jnc sts1 # Unfortunately, out of memory
-
- movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
- leaw modelist+1024, %di
- stosw
- movw %bx, %ax
- stosw
- pushw %ds # Store the screen
- movw video_segment, %ds
- xorw %si, %si
- rep
- movsw
- popw %ds
- incb do_restore # Screen will be restored later
-sts1: popw %bx
- popw %ax
-stsr: ret
-
-# Restore screen contents from temporary buffer.
-restore_screen:
- cmpb $0, do_restore # Has the screen been stored?
- jz res1
-
- call mode_params # Get parameters of current mode
- movb %fs:(PARAM_VIDEO_LINES), %cl
- movb %fs:(PARAM_VIDEO_COLS), %ch
- leaw modelist+1024, %si # Screen buffer
- lodsw # Set cursor position
- movw %ax, %dx
- cmpb %cl, %dh
- jc res2
-
- movb %cl, %dh
- decb %dh
-res2: cmpb %ch, %dl
- jc res3
-
- movb %ch, %dl
- decb %dl
-res3: movb $0x02, %ah
- movb $0x00, %bh
- int $0x10
- lodsw # Display size
- movb %ah, %dl # DL=number of lines
- movb $0, %ah # BX=phys. length of orig. line
- movw %ax, %bx
- cmpb %cl, %dl # Too many?
- jc res4
-
- pushw %ax
- movb %dl, %al
- subb %cl, %al
- mulb %bl
- addw %ax, %si
- addw %ax, %si
- popw %ax
- movb %cl, %dl
-res4: cmpb %ch, %al # Too wide?
- jc res5
-
- movb %ch, %al # AX=width of src. line
-res5: movb $0, %cl
- xchgb %ch, %cl
- movw %cx, %bp # BP=width of dest. line
- pushw %es
- movw video_segment, %es
- xorw %di, %di # Move the data
- addw %bx, %bx # Convert BX and BP to _bytes_
- addw %bp, %bp
-res6: pushw %si
- pushw %di
- movw %ax, %cx
- rep
- movsw
- popw %di
- popw %si
- addw %bp, %di
- addw %bx, %si
- decb %dl
- jnz res6
-
- popw %es # Done
-res1: ret
-#endif /* CONFIG_VIDEO_RETAIN */
-
-# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
-outidx: outb %al, %dx
- pushw %ax
- movb %ah, %al
- incw %dx
- outb %al, %dx
- decw %dx
- popw %ax
- ret
-
-# Build the table of video modes (stored after the setup.S code at the
-# `modelist' label. Each video mode record looks like:
-# .word MODE-ID (our special mode ID (see above))
-# .byte rows (number of rows)
-# .byte columns (number of columns)
-# Returns address of the end of the table in DI, the end is marked
-# with a ASK_VGA ID.
-mode_table:
- movw mt_end, %di # Already filled?
- orw %di, %di
- jnz mtab1x
-
- leaw modelist, %di # Store standard modes:
- movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
- stosl
- movb adapter, %al # CGA/MDA/HGA -- no more modes
- orb %al, %al
- jz mtabe
-
- decb %al
- jnz mtabv
-
- movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
- stosl
- jmp mtabe
-
-mtab1x: jmp mtab1
-
-mtabv: leaw vga_modes, %si # All modes for std VGA
- movw $vga_modes_end-vga_modes, %cx
- rep # I'm unable to use movsw as I don't know how to store a half
- movsb # of the expression above to cx without using explicit shr.
-
- cmpb $0, scanning # Mode scan requested?
- jz mscan1
-
- call mode_scan
-mscan1:
-
-#ifdef CONFIG_VIDEO_LOCAL
- call local_modes
-#endif /* CONFIG_VIDEO_LOCAL */
-
-#ifdef CONFIG_VIDEO_VESA
- call vesa_modes # Detect VESA VGA modes
-#endif /* CONFIG_VIDEO_VESA */
-
-#ifdef CONFIG_VIDEO_SVGA
- cmpb $0, scanning # Bypass when scanning
- jnz mscan2
-
- call svga_modes # Detect SVGA cards & modes
-mscan2:
-#endif /* CONFIG_VIDEO_SVGA */
-
-mtabe:
-
-#ifdef CONFIG_VIDEO_COMPACT
- leaw modelist, %si
- movw %di, %dx
- movw %si, %di
-cmt1: cmpw %dx, %si # Scan all modes
- jz cmt2
-
- leaw modelist, %bx # Find in previous entries
- movw 2(%si), %cx
-cmt3: cmpw %bx, %si
- jz cmt4
-
- cmpw 2(%bx), %cx # Found => don't copy this entry
- jz cmt5
-
- addw $4, %bx
- jmp cmt3
-
-cmt4: movsl # Copy entry
- jmp cmt1
-
-cmt5: addw $4, %si # Skip entry
- jmp cmt1
-
-cmt2:
-#endif /* CONFIG_VIDEO_COMPACT */
-
- movw $ASK_VGA, (%di) # End marker
- movw %di, mt_end
-mtab1: leaw modelist, %si # SI=mode list, DI=list end
-ret0: ret
-
-# Modes usable on all standard VGAs
-vga_modes:
- .word VIDEO_8POINT
- .word 0x5032 # 80x50
- .word VIDEO_80x43
- .word 0x502b # 80x43
- .word VIDEO_80x28
- .word 0x501c # 80x28
- .word VIDEO_80x30
- .word 0x501e # 80x30
- .word VIDEO_80x34
- .word 0x5022 # 80x34
- .word VIDEO_80x60
- .word 0x503c # 80x60
-#ifdef CONFIG_VIDEO_GFX_HACK
- .word VIDEO_GFX_HACK
- .word VIDEO_GFX_DUMMY_RESOLUTION
-#endif
-
-vga_modes_end:
-# Detect VESA modes.
-
-#ifdef CONFIG_VIDEO_VESA
-vesa_modes:
- cmpb $2, adapter # VGA only
- jnz ret0
-
- movw %di, %bp # BP=original mode table end
- addw $0x200, %di # Buffer space
- movw $0x4f00, %ax # VESA Get card info call
- int $0x10
- movw %bp, %di
- cmpw $0x004f, %ax # Successful?
- jnz ret0
-
- cmpw $0x4556, 0x200(%di)
- jnz ret0
-
- cmpw $0x4153, 0x202(%di)
- jnz ret0
-
- movw $vesa_name, card_name # Set name to "VESA VGA"
- pushw %gs
- lgsw 0x20e(%di), %si # GS:SI=mode list
- movw $128, %cx # Iteration limit
-vesa1:
-# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
-# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
- gs; lodsw
- cmpw $0xffff, %ax # End of the table?
- jz vesar
-
- cmpw $0x0080, %ax # Check validity of mode ID
- jc vesa2
-
- orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
- jz vesan # Certain BIOSes report 0x80-0xff!
-
- cmpw $0x0800, %ax
- jnc vesae
-
-vesa2: pushw %cx
- movw %ax, %cx # Get mode information structure
- movw $0x4f01, %ax
- int $0x10
- movw %cx, %bx # BX=mode number
- addb $VIDEO_FIRST_VESA>>8, %bh
- popw %cx
- cmpw $0x004f, %ax
- jnz vesan # Don't report errors (buggy BIOSES)
-
- movb (%di), %al # Check capabilities. We require
- andb $0x19, %al # a color text mode.
- cmpb $0x09, %al
- jnz vesan
-
- cmpw $0xb800, 8(%di) # Standard video memory address required
- jnz vesan
-
- testb $2, (%di) # Mode characteristics supplied?
- movw %bx, (%di) # Store mode number
- jz vesa3
-
- xorw %dx, %dx
- movw 0x12(%di), %bx # Width
- orb %bh, %bh
- jnz vesan
-
- movb %bl, 0x3(%di)
- movw 0x14(%di), %ax # Height
- orb %ah, %ah
- jnz vesan
-
- movb %al, 2(%di)
- mulb %bl
- cmpw $8193, %ax # Small enough for Linux console driver?
- jnc vesan
-
- jmp vesaok
-
-vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
- jc vesan # so it must be a standard VESA mode.
-
- cmpw $5, %bx
- jnc vesan
-
- movw vesa_text_mode_table(%bx), %ax
- movw %ax, 2(%di)
-vesaok: addw $4, %di # The mode is valid. Store it.
-vesan: loop vesa1 # Next mode. Limit exceeded => error
-vesae: leaw vesaer, %si
- call prtstr
- movw %bp, %di # Discard already found modes.
-vesar: popw %gs
- ret
-
-# Dimensions of standard VESA text modes
-vesa_text_mode_table:
- .byte 60, 80 # 0108
- .byte 25, 132 # 0109
- .byte 43, 132 # 010A
- .byte 50, 132 # 010B
- .byte 60, 132 # 010C
-#endif /* CONFIG_VIDEO_VESA */
-
-# Scan for video modes. A bit dirty, but should work.
-mode_scan:
- movw $0x0100, %cx # Start with mode 0
-scm1: movb $0, %ah # Test the mode
- movb %cl, %al
- int $0x10
- movb $0x0f, %ah
- int $0x10
- cmpb %cl, %al
- jnz scm2 # Mode not set
-
- movw $0x3c0, %dx # Test if it's a text mode
- movb $0x10, %al # Mode bits
- call inidx
- andb $0x03, %al
- jnz scm2
-
- movb $0xce, %dl # Another set of mode bits
- movb $0x06, %al
- call inidx
- shrb %al
- jc scm2
-
- movb $0xd4, %dl # Cursor location
- movb $0x0f, %al
- call inidx
- orb %al, %al
- jnz scm2
-
- movw %cx, %ax # Ok, store the mode
- stosw
- movb %gs:(0x484), %al # Number of rows
- incb %al
- stosb
- movw %gs:(0x44a), %ax # Number of columns
- stosb
-scm2: incb %cl
- jns scm1
-
- movw $0x0003, %ax # Return back to mode 3
- int $0x10
- ret
-
-tstidx: outw %ax, %dx # OUT DX,AX and inidx
-inidx: outb %al, %dx # Read from indexed VGA register
- incw %dx # AL=index, DX=index reg port -> AL=data
- inb %dx, %al
- decw %dx
- ret
-
-# Try to detect type of SVGA card and supply (usually approximate) video
-# mode table for it.
-
-#ifdef CONFIG_VIDEO_SVGA
-svga_modes:
- leaw svga_table, %si # Test all known SVGA adapters
-dosvga: lodsw
- movw %ax, %bp # Default mode table
- orw %ax, %ax
- jz didsv1
-
- lodsw # Pointer to test routine
- pushw %si
- pushw %di
- pushw %es
- movw $0xc000, %bx
- movw %bx, %es
- call *%ax # Call test routine
- popw %es
- popw %di
- popw %si
- orw %bp, %bp
- jz dosvga
-
- movw %bp, %si # Found, copy the modes
- movb svga_prefix, %ah
-cpsvga: lodsb
- orb %al, %al
- jz didsv
-
- stosw
- movsw
- jmp cpsvga
-
-didsv: movw %si, card_name # Store pointer to card name
-didsv1: ret
-
-# Table of all known SVGA cards. For each card, we store a pointer to
-# a table of video modes supported by the card and a pointer to a routine
-# used for testing of presence of the card. The video mode table is always
-# followed by the name of the card or the chipset.
-svga_table:
- .word ati_md, ati_test
- .word oak_md, oak_test
- .word paradise_md, paradise_test
- .word realtek_md, realtek_test
- .word s3_md, s3_test
- .word chips_md, chips_test
- .word video7_md, video7_test
- .word cirrus5_md, cirrus5_test
- .word cirrus6_md, cirrus6_test
- .word cirrus1_md, cirrus1_test
- .word ahead_md, ahead_test
- .word everex_md, everex_test
- .word genoa_md, genoa_test
- .word trident_md, trident_test
- .word tseng_md, tseng_test
- .word 0
-
-# Test routines and mode tables:
-
-# S3 - The test algorithm was taken from the SuperProbe package
-# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
-s3_test:
- movw $0x0f35, %cx # we store some constants in cl/ch
- movw $0x03d4, %dx
- movb $0x38, %al
- call inidx
- movb %al, %bh # store current CRT-register 0x38
- movw $0x0038, %ax
- call outidx # disable writing to special regs
- movb %cl, %al # check whether we can write special reg 0x35
- call inidx
- movb %al, %bl # save the current value of CRT reg 0x35
- andb $0xf0, %al # clear bits 0-3
- movb %al, %ah
- movb %cl, %al # and write it to CRT reg 0x35
- call outidx
- call inidx # now read it back
- andb %ch, %al # clear the upper 4 bits
- jz s3_2 # the first test failed. But we have a
-
- movb %bl, %ah # second chance
- movb %cl, %al
- call outidx
- jmp s3_1 # do the other tests
-
-s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
- orb %bl, %ah # set the upper 4 bits of ah with the orig value
- call outidx # write ...
- call inidx # ... and reread
- andb %cl, %al # turn off the upper 4 bits
- pushw %ax
- movb %bl, %ah # restore old value in register 0x35
- movb %cl, %al
- call outidx
- popw %ax
- cmpb %ch, %al # setting lower 4 bits was successful => bad
- je no_s3 # writing is allowed => this is not an S3
-
-s3_1: movw $0x4838, %ax # allow writing to special regs by putting
- call outidx # magic number into CRT-register 0x38
- movb %cl, %al # check whether we can write special reg 0x35
- call inidx
- movb %al, %bl
- andb $0xf0, %al
- movb %al, %ah
- movb %cl, %al
- call outidx
- call inidx
- andb %ch, %al
- jnz no_s3 # no, we can't write => no S3
-
- movw %cx, %ax
- orb %bl, %ah
- call outidx
- call inidx
- andb %ch, %al
- pushw %ax
- movb %bl, %ah # restore old value in register 0x35
- movb %cl, %al
- call outidx
- popw %ax
- cmpb %ch, %al
- jne no_s31 # writing not possible => no S3
- movb $0x30, %al
- call inidx # now get the S3 id ...
- leaw idS3, %di
- movw $0x10, %cx
- repne
- scasb
- je no_s31
-
- movb %bh, %ah
- movb $0x38, %al
- jmp s3rest
-
-no_s3: movb $0x35, %al # restore CRT register 0x35
- movb %bl, %ah
- call outidx
-no_s31: xorw %bp, %bp # Detection failed
-s3rest: movb %bh, %ah
- movb $0x38, %al # restore old value of CRT register 0x38
- jmp outidx
-
-idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
- .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
-
-s3_md: .byte 0x54, 0x2b, 0x84
- .byte 0x55, 0x19, 0x84
- .byte 0
- .ascii "S3"
- .byte 0
-
-# ATI cards.
-ati_test:
- leaw idati, %si
- movw $0x31, %di
- movw $0x09, %cx
- repe
- cmpsb
- je atiok
-
- xorw %bp, %bp
-atiok: ret
-
-idati: .ascii "761295520"
-
-ati_md: .byte 0x23, 0x19, 0x84
- .byte 0x33, 0x2c, 0x84
- .byte 0x22, 0x1e, 0x64
- .byte 0x21, 0x19, 0x64
- .byte 0x58, 0x21, 0x50
- .byte 0x5b, 0x1e, 0x50
- .byte 0
- .ascii "ATI"
- .byte 0
-
-# AHEAD
-ahead_test:
- movw $0x200f, %ax
- movw $0x3ce, %dx
- outw %ax, %dx
- incw %dx
- inb %dx, %al
- cmpb $0x20, %al
- je isahed
-
- cmpb $0x21, %al
- je isahed
-
- xorw %bp, %bp
-isahed: ret
-
-ahead_md:
- .byte 0x22, 0x2c, 0x84
- .byte 0x23, 0x19, 0x84
- .byte 0x24, 0x1c, 0x84
- .byte 0x2f, 0x32, 0xa0
- .byte 0x32, 0x22, 0x50
- .byte 0x34, 0x42, 0x50
- .byte 0
- .ascii "Ahead"
- .byte 0
-
-# Chips & Tech.
-chips_test:
- movw $0x3c3, %dx
- inb %dx, %al
- orb $0x10, %al
- outb %al, %dx
- movw $0x104, %dx
- inb %dx, %al
- movb %al, %bl
- movw $0x3c3, %dx
- inb %dx, %al
- andb $0xef, %al
- outb %al, %dx
- cmpb $0xa5, %bl
- je cantok
-
- xorw %bp, %bp
-cantok: ret
-
-chips_md:
- .byte 0x60, 0x19, 0x84
- .byte 0x61, 0x32, 0x84
- .byte 0
- .ascii "Chips & Technologies"
- .byte 0
-
-# Cirrus Logic 5X0
-cirrus1_test:
- movw $0x3d4, %dx
- movb $0x0c, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bl
- xorb %al, %al
- outb %al, %dx
- decw %dx
- movb $0x1f, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bh
- xorb %ah, %ah
- shlb $4, %al
- movw %ax, %cx
- movb %bh, %al
- shrb $4, %al
- addw %ax, %cx
- shlw $8, %cx
- addw $6, %cx
- movw %cx, %ax
- movw $0x3c4, %dx
- outw %ax, %dx
- incw %dx
- inb %dx, %al
- andb %al, %al
- jnz nocirr
-
- movb %bh, %al
- outb %al, %dx
- inb %dx, %al
- cmpb $0x01, %al
- je iscirr
-
-nocirr: xorw %bp, %bp
-iscirr: movw $0x3d4, %dx
- movb %bl, %al
- xorb %ah, %ah
- shlw $8, %ax
- addw $0x0c, %ax
- outw %ax, %dx
- ret
-
-cirrus1_md:
- .byte 0x1f, 0x19, 0x84
- .byte 0x20, 0x2c, 0x84
- .byte 0x22, 0x1e, 0x84
- .byte 0x31, 0x25, 0x64
- .byte 0
- .ascii "Cirrus Logic 5X0"
- .byte 0
-
-# Cirrus Logic 54XX
-cirrus5_test:
- movw $0x3c4, %dx
- movb $6, %al
- call inidx
- movb %al, %bl # BL=backup
- movw $6, %ax
- call tstidx
- cmpb $0x0f, %al
- jne c5fail
-
- movw $0x1206, %ax
- call tstidx
- cmpb $0x12, %al
- jne c5fail
-
- movb $0x1e, %al
- call inidx
- movb %al, %bh
- movb %bh, %ah
- andb $0xc0, %ah
- movb $0x1e, %al
- call tstidx
- andb $0x3f, %al
- jne c5xx
-
- movb $0x1e, %al
- movb %bh, %ah
- orb $0x3f, %ah
- call tstidx
- xorb $0x3f, %al
- andb $0x3f, %al
-c5xx: pushf
- movb $0x1e, %al
- movb %bh, %ah
- outw %ax, %dx
- popf
- je c5done
-
-c5fail: xorw %bp, %bp
-c5done: movb $6, %al
- movb %bl, %ah
- outw %ax, %dx
- ret
-
-cirrus5_md:
- .byte 0x14, 0x19, 0x84
- .byte 0x54, 0x2b, 0x84
- .byte 0
- .ascii "Cirrus Logic 54XX"
- .byte 0
-
-# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
-# it's misidentified by the Ahead test.
-cirrus6_test:
- movw $0x3ce, %dx
- movb $0x0a, %al
- call inidx
- movb %al, %bl # BL=backup
- movw $0xce0a, %ax
- call tstidx
- orb %al, %al
- jne c2fail
-
- movw $0xec0a, %ax
- call tstidx
- cmpb $0x01, %al
- jne c2fail
-
- movb $0xaa, %al
- call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
- shrb $4, %al
- subb $4, %al
- jz c6done
-
- decb %al
- jz c6done
-
- subb $2, %al
- jz c6done
-
- decb %al
- jz c6done
-
-c2fail: xorw %bp, %bp
-c6done: movb $0x0a, %al
- movb %bl, %ah
- outw %ax, %dx
- ret
-
-cirrus6_md:
- .byte 0
- .ascii "Cirrus Logic 64XX"
- .byte 0
-
-# Everex / Trident
-everex_test:
- movw $0x7000, %ax
- xorw %bx, %bx
- int $0x10
- cmpb $0x70, %al
- jne noevrx
-
- shrw $4, %dx
- cmpw $0x678, %dx
- je evtrid
-
- cmpw $0x236, %dx
- jne evrxok
-
-evtrid: leaw trident_md, %bp
-evrxok: ret
-
-noevrx: xorw %bp, %bp
- ret
-
-everex_md:
- .byte 0x03, 0x22, 0x50
- .byte 0x04, 0x3c, 0x50
- .byte 0x07, 0x2b, 0x64
- .byte 0x08, 0x4b, 0x64
- .byte 0x0a, 0x19, 0x84
- .byte 0x0b, 0x2c, 0x84
- .byte 0x16, 0x1e, 0x50
- .byte 0x18, 0x1b, 0x64
- .byte 0x21, 0x40, 0xa0
- .byte 0x40, 0x1e, 0x84
- .byte 0
- .ascii "Everex/Trident"
- .byte 0
-
-# Genoa.
-genoa_test:
- leaw idgenoa, %si # Check Genoa 'clues'
- xorw %ax, %ax
- movb %es:(0x37), %al
- movw %ax, %di
- movw $0x04, %cx
- decw %si
- decw %di
-l1: incw %si
- incw %di
- movb (%si), %al
- testb %al, %al
- jz l2
-
- cmpb %es:(%di), %al
-l2: loope l1
- orw %cx, %cx
- je isgen
-
- xorw %bp, %bp
-isgen: ret
-
-idgenoa: .byte 0x77, 0x00, 0x99, 0x66
-
-genoa_md:
- .byte 0x58, 0x20, 0x50
- .byte 0x5a, 0x2a, 0x64
- .byte 0x60, 0x19, 0x84
- .byte 0x61, 0x1d, 0x84
- .byte 0x62, 0x20, 0x84
- .byte 0x63, 0x2c, 0x84
- .byte 0x64, 0x3c, 0x84
- .byte 0x6b, 0x4f, 0x64
- .byte 0x72, 0x3c, 0x50
- .byte 0x74, 0x42, 0x50
- .byte 0x78, 0x4b, 0x64
- .byte 0
- .ascii "Genoa"
- .byte 0
-
-# OAK
-oak_test:
- leaw idoakvga, %si
- movw $0x08, %di
- movw $0x08, %cx
- repe
- cmpsb
- je isoak
-
- xorw %bp, %bp
-isoak: ret
-
-idoakvga: .ascii "OAK VGA "
-
-oak_md: .byte 0x4e, 0x3c, 0x50
- .byte 0x4f, 0x3c, 0x84
- .byte 0x50, 0x19, 0x84
- .byte 0x51, 0x2b, 0x84
- .byte 0
- .ascii "OAK"
- .byte 0
-
-# WD Paradise.
-paradise_test:
- leaw idparadise, %si
- movw $0x7d, %di
- movw $0x04, %cx
- repe
- cmpsb
- je ispara
-
- xorw %bp, %bp
-ispara: ret
-
-idparadise: .ascii "VGA="
-
-paradise_md:
- .byte 0x41, 0x22, 0x50
- .byte 0x47, 0x1c, 0x84
- .byte 0x55, 0x19, 0x84
- .byte 0x54, 0x2c, 0x84
- .byte 0
- .ascii "Paradise"
- .byte 0
-
-# Trident.
-trident_test:
- movw $0x3c4, %dx
- movb $0x0e, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- xchgb %al, %ah
- xorb %al, %al
- outb %al, %dx
- inb %dx, %al
- xchgb %ah, %al
- movb %al, %bl # Strange thing ... in the book this wasn't
- andb $0x02, %bl # necessary but it worked on my card which
- jz setb2 # is a trident. Without it the screen goes
- # blurred ...
- andb $0xfd, %al
- jmp clrb2
-
-setb2: orb $0x02, %al
-clrb2: outb %al, %dx
- andb $0x0f, %ah
- cmpb $0x02, %ah
- je istrid
-
- xorw %bp, %bp
-istrid: ret
-
-trident_md:
- .byte 0x50, 0x1e, 0x50
- .byte 0x51, 0x2b, 0x50
- .byte 0x52, 0x3c, 0x50
- .byte 0x57, 0x19, 0x84
- .byte 0x58, 0x1e, 0x84
- .byte 0x59, 0x2b, 0x84
- .byte 0x5a, 0x3c, 0x84
- .byte 0
- .ascii "Trident"
- .byte 0
-
-# Tseng.
-tseng_test:
- movw $0x3cd, %dx
- inb %dx, %al # Could things be this simple ! :-)
- movb %al, %bl
- movb $0x55, %al
- outb %al, %dx
- inb %dx, %al
- movb %al, %ah
- movb %bl, %al
- outb %al, %dx
- cmpb $0x55, %ah
- je istsen
-
-isnot: xorw %bp, %bp
-istsen: ret
-
-tseng_md:
- .byte 0x26, 0x3c, 0x50
- .byte 0x2a, 0x28, 0x64
- .byte 0x23, 0x19, 0x84
- .byte 0x24, 0x1c, 0x84
- .byte 0x22, 0x2c, 0x84
- .byte 0x21, 0x3c, 0x84
- .byte 0
- .ascii "Tseng"
- .byte 0
-
-# Video7.
-video7_test:
- movw $0x3cc, %dx
- inb %dx, %al
- movw $0x3b4, %dx
- andb $0x01, %al
- jz even7
-
- movw $0x3d4, %dx
-even7: movb $0x0c, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bl
- movb $0x55, %al
- outb %al, %dx
- inb %dx, %al
- decw %dx
- movb $0x1f, %al
- outb %al, %dx
- incw %dx
- inb %dx, %al
- movb %al, %bh
- decw %dx
- movb $0x0c, %al
- outb %al, %dx
- incw %dx
- movb %bl, %al
- outb %al, %dx
- movb $0x55, %al
- xorb $0xea, %al
- cmpb %bh, %al
- jne isnot
-
- movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
- ret
-
-video7_md:
- .byte 0x40, 0x2b, 0x50
- .byte 0x43, 0x3c, 0x50
- .byte 0x44, 0x3c, 0x64
- .byte 0x41, 0x19, 0x84
- .byte 0x42, 0x2c, 0x84
- .byte 0x45, 0x1c, 0x84
- .byte 0
- .ascii "Video 7"
- .byte 0
-
-# Realtek VGA
-realtek_test:
- leaw idrtvga, %si
- movw $0x45, %di
- movw $0x0b, %cx
- repe
- cmpsb
- je isrt
-
- xorw %bp, %bp
-isrt: ret
-
-idrtvga: .ascii "REALTEK VGA"
-
-realtek_md:
- .byte 0x1a, 0x3c, 0x50
- .byte 0x1b, 0x19, 0x84
- .byte 0x1c, 0x1e, 0x84
- .byte 0x1d, 0x2b, 0x84
- .byte 0x1e, 0x3c, 0x84
- .byte 0
- .ascii "REALTEK"
- .byte 0
-
-#endif /* CONFIG_VIDEO_SVGA */
-
-# User-defined local mode table (VGA only)
-#ifdef CONFIG_VIDEO_LOCAL
-local_modes:
- leaw local_mode_table, %si
-locm1: lodsw
- orw %ax, %ax
- jz locm2
-
- stosw
- movsw
- jmp locm1
-
-locm2: ret
-
-# This is the table of local video modes which can be supplied manually
-# by the user. Each entry consists of mode ID (word) and dimensions
-# (byte for column count and another byte for row count). These modes
-# are placed before all SVGA and VESA modes and override them if table
-# compacting is enabled. The table must end with a zero word followed
-# by NUL-terminated video adapter name.
-local_mode_table:
- .word 0x0100 # Example: 40x25
- .byte 25,40
- .word 0
- .ascii "Local"
- .byte 0
-#endif /* CONFIG_VIDEO_LOCAL */
-
-# Read a key and return the ASCII code in al, scan code in ah
-getkey: xorb %ah, %ah
- int $0x16
- ret
-
-# Read a key with a timeout of 30 seconds.
-# The hardware clock is used to get the time.
-getkt: call gettime
- addb $30, %al # Wait 30 seconds
- cmpb $60, %al
- jl lminute
-
- subb $60, %al
-lminute:
- movb %al, %cl
-again: movb $0x01, %ah
- int $0x16
- jnz getkey # key pressed, so get it
-
- call gettime
- cmpb %cl, %al
- jne again
-
- movb $0x20, %al # timeout, return `space'
- ret
-
-# Flush the keyboard buffer
-flush: movb $0x01, %ah
- int $0x16
- jz empty
-
- xorb %ah, %ah
- int $0x16
- jmp flush
-
-empty: ret
-
-# Print hexadecimal number.
-prthw: pushw %ax
- movb %ah, %al
- call prthb
- popw %ax
-prthb: pushw %ax
- shrb $4, %al
- call prthn
- popw %ax
- andb $0x0f, %al
-prthn: cmpb $0x0a, %al
- jc prth1
-
- addb $0x07, %al
-prth1: addb $0x30, %al
- jmp prtchr
-
-# Print decimal number in al
-prtdec: pushw %ax
- pushw %cx
- xorb %ah, %ah
- movb $0x0a, %cl
- idivb %cl
- cmpb $0x09, %al
- jbe lt100
-
- call prtdec
- jmp skip10
-
-lt100: addb $0x30, %al
- call prtchr
-skip10: movb %ah, %al
- addb $0x30, %al
- call prtchr
- popw %cx
- popw %ax
- ret
-
-store_edid:
-#ifdef CONFIG_FIRMWARE_EDID
- pushw %es # just save all registers
- pushw %ax
- pushw %bx
- pushw %cx
- pushw %dx
- pushw %di
-
- pushw %fs
- popw %es
-
- movl $0x13131313, %eax # memset block with 0x13
- movw $32, %cx
- movw $0x140, %di
- cld
- rep
- stosl
-
- cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0
- jl no_edid
-
- pushw %es # save ES
- xorw %di, %di # Report Capability
- pushw %di
- popw %es # ES:DI must be 0:0
- movw $0x4f15, %ax
- xorw %bx, %bx
- xorw %cx, %cx
- int $0x10
- popw %es # restore ES
-
- cmpb $0x00, %ah # call successful
- jne no_edid
-
- cmpb $0x4f, %al # function supported
- jne no_edid
-
- movw $0x4f15, %ax # do VBE/DDC
- movw $0x01, %bx
- movw $0x00, %cx
- movw $0x01, %dx
- movw $0x140, %di
- int $0x10
-
-no_edid:
- popw %di # restore all registers
- popw %dx
- popw %cx
- popw %bx
- popw %ax
- popw %es
-#endif
- ret
-
-# VIDEO_SELECT-only variables
-mt_end: .word 0 # End of video mode table if built
-edit_buf: .space 6 # Line editor buffer
-card_name: .word 0 # Pointer to adapter name
-scanning: .byte 0 # Performing mode scan
-do_restore: .byte 0 # Screen contents altered during mode change
-svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
-graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
-dac_size: .byte 6 # DAC bit depth
-vbe_version: .word 0 # VBE bios version
-
-# Status messages
-keymsg: .ascii "Press <RETURN> to see video modes available, "
- .ascii "<SPACE> to continue or wait 30 secs"
- .byte 0x0d, 0x0a, 0
-
-listhdr: .byte 0x0d, 0x0a
- .ascii "Mode: COLSxROWS:"
-
-crlft: .byte 0x0d, 0x0a, 0
-
-prompt: .byte 0x0d, 0x0a
- .asciz "Enter mode number or `scan': "
-
-unknt: .asciz "Unknown mode ID. Try again."
-
-badmdt: .ascii "You passed an undefined mode number."
- .byte 0x0d, 0x0a, 0
-
-vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
- .ascii "report to <mj@ucw.cz>."
- .byte 0x0d, 0x0a, 0
-
-old_name: .asciz "CGA/MDA/HGA"
-
-ega_name: .asciz "EGA"
-
-svga_name: .ascii " "
-
-vga_name: .asciz "VGA"
-
-vesa_name: .asciz "VESA"
-
-name_bann: .asciz "Video adapter: "
-#endif /* CONFIG_VIDEO_SELECT */
-
-# Other variables:
-adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
-video_segment: .word 0xb800 # Video memory segment
-force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index b26378815b91..941a7e3aa5fb 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.21-rc3
-# Wed Mar 7 15:29:47 2007
+# Linux kernel version: 2.6.21-git3
+# Tue May 1 07:30:48 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -118,11 +118,11 @@ CONFIG_X86_PC=y
# CONFIG_X86_VSMP is not set
# CONFIG_MK8 is not set
# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_MICROCODE is not set
@@ -174,6 +174,7 @@ CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
+# CONFIG_RELOCATABLE is not set
CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
# CONFIG_CC_STACKPROTECTOR is not set
@@ -182,7 +183,6 @@ CONFIG_HZ_250=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
-# CONFIG_REORDER is not set
CONFIG_K8_NB=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
@@ -218,7 +218,6 @@ CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_NUMA=y
# CONFIG_ACPI_ASUS is not set
-# CONFIG_ACPI_IBM is not set
# CONFIG_ACPI_TOSHIBA is not set
CONFIG_ACPI_BLACKLIST_YEAR=0
# CONFIG_ACPI_DEBUG is not set
@@ -243,7 +242,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
#
# CPUFreq processor drivers
@@ -299,7 +298,6 @@ CONFIG_NET=y
#
# Networking options
#
-# CONFIG_NETDEBUG is not set
CONFIG_PACKET=y
# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
@@ -334,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
@@ -389,6 +388,13 @@ CONFIG_IPV6_SIT=y
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
# CONFIG_IEEE80211 is not set
#
@@ -409,10 +415,6 @@ CONFIG_FW_LOADER=y
# Connector - unified userspace <-> kernelspace linker
#
# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
# CONFIG_MTD is not set
#
@@ -459,6 +461,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_SONY_LAPTOP is not set
+# CONFIG_THINKPAD_ACPI is not set
#
# ATA/ATAPI/MFM/RLL support
@@ -494,7 +497,6 @@ CONFIG_BLK_DEV_IDEPCI=y
# CONFIG_BLK_DEV_RZ1000 is not set
CONFIG_BLK_DEV_IDEDMA_PCI=y
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
# CONFIG_IDEDMA_ONLYDISK is not set
# CONFIG_BLK_DEV_AEC62XX is not set
# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -525,7 +527,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y
# CONFIG_IDE_ARM is not set
CONFIG_BLK_DEV_IDEDMA=y
# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
# CONFIG_BLK_DEV_HD is not set
#
@@ -584,11 +585,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_ARCMSR is not set
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=y
-CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
-CONFIG_MEGARAID_SAS=y
+# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
@@ -608,6 +607,7 @@ CONFIG_MEGARAID_SAS=y
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_ESP_CORE is not set
# CONFIG_SCSI_SRP is not set
#
@@ -636,6 +636,7 @@ CONFIG_SATA_ACPI=y
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CS5520 is not set
# CONFIG_PATA_CS5530 is not set
@@ -687,7 +688,7 @@ CONFIG_BLK_DEV_DM=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
-CONFIG_FUSION_SAS=y
+# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set
@@ -700,19 +701,22 @@ CONFIG_IEEE1394=y
# Subsystem Options
#
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
#
-# Device Drivers
+# Controllers
+#
+
+#
+# Texas Instruments PCILynx requires I2C
#
-# CONFIG_IEEE1394_PCILYNX is not set
CONFIG_IEEE1394_OHCI1394=y
#
-# Protocol Drivers
+# Protocols
#
# CONFIG_IEEE1394_VIDEO1394 is not set
# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
# CONFIG_IEEE1394_ETH1394 is not set
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=y
@@ -775,7 +779,8 @@ CONFIG_TULIP=y
# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
+CONFIG_AMD8111_ETH=y
+# CONFIG_AMD8111E_NAPI is not set
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_B44=y
CONFIG_FORCEDETH=y
@@ -837,9 +842,10 @@ CONFIG_S2IO=m
# CONFIG_TR is not set
#
-# Wireless LAN (non-hamradio)
+# Wireless LAN
#
-# CONFIG_NET_RADIO is not set
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
#
# Wan interfaces
@@ -853,7 +859,6 @@ CONFIG_S2IO=m
# CONFIG_SHAPER is not set
CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
@@ -987,57 +992,7 @@ CONFIG_HPET_MMAP=y
#
# I2C support
#
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-
-#
-# I2C Algorithms
-#
-# CONFIG_I2C_ALGOBIT is not set
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-CONFIG_I2C_ISA=m
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PASEMI is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C is not set
#
# SPI support
@@ -1053,54 +1008,8 @@ CONFIG_I2C_ISA=m
#
# Hardware Monitoring support
#
-CONFIG_HWMON=y
+# CONFIG_HWMON is not set
# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1029 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_K8TEMP is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_PC87427 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47M192 is not set
-CONFIG_SENSORS_SMSC47B397=m
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT1211 is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83791D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83793 is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_SENSORS_HDAPS is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
#
# Multifunction device drivers
@@ -1147,8 +1056,9 @@ CONFIG_SOUND=y
# Open Sound System
#
CONFIG_SOUND_PRIME=y
-# CONFIG_OBSOLETE_OSS is not set
+CONFIG_OBSOLETE_OSS=y
# CONFIG_SOUND_BT878 is not set
+# CONFIG_SOUND_ES1371 is not set
CONFIG_SOUND_ICH=y
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
@@ -1163,6 +1073,14 @@ CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+
+#
# USB support
#
CONFIG_USB_ARCH_HAS_HCD=y
@@ -1175,6 +1093,7 @@ CONFIG_USB=y
# Miscellaneous USB options
#
CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
# CONFIG_USB_OTG is not set
@@ -1225,10 +1144,6 @@ CONFIG_USB_STORAGE=y
#
# USB Input Devices
#
-CONFIG_USB_HID=y
-# CONFIG_USB_HIDINPUT_POWERBOOK is not set
-# CONFIG_HID_FF is not set
-# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
# CONFIG_USB_ACECAD is not set
@@ -1556,7 +1471,7 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
+CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 071100ea1251..185399baaf6d 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -5,6 +5,11 @@
* This tricks binfmt_elf.c into loading 32bit binaries using lots
* of ugly preprocessor tricks. Talk about very very poor man's inheritance.
*/
+#define __ASM_X86_64_ELF_H 1
+
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/rwsem.h>
@@ -50,9 +55,6 @@ struct elf_phdr;
#undef ELF_ARCH
#define ELF_ARCH EM_386
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-
#define ELF_DATA ELFDATA2LSB
#define USE_ELF_CORE_DUMP 1
@@ -136,7 +138,7 @@ struct elf_prpsinfo
#define user user32
-#define __ASM_X86_64_ELF_H 1
+#undef elf_read_implies_exec
#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
//#include <asm/ia32.h>
#include <linux/elf.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 796df6992f62..c48087db6f75 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -481,11 +481,7 @@ ia32_sys_call_table:
.quad sys_symlink
.quad sys_lstat
.quad sys_readlink /* 85 */
-#ifdef CONFIG_IA32_AOUT
.quad sys_uselib
-#else
- .quad quiet_ni_syscall
-#endif
.quad sys_swapon
.quad sys_reboot
.quad compat_sys_old_readdir
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 568ff0df89e7..fc4419ff0355 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -13,6 +13,7 @@
#include <asm/proto.h>
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
+#include <asm/vsyscall32.h>
extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bb47e86f3d02..4d94c51803d8 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o
+ pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
+ perfctr-watchdog.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
obj-y += apic.o nmi.o
-obj-y += io_apic.o mpparse.o \
- genapic.o genapic_cluster.o genapic_flat.o
+obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_PM) += suspend.o
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
alternative-y += ../../i386/kernel/alternative.o
pcspeaker-y += ../../i386/kernel/pcspeaker.o
+perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index e1548fbe95ae..195b7034a148 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end;
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
-static pgd_t low_ptr;
-
-static void init_low_mapping(void)
-{
- pgd_t *slot0 = pgd_offset(current->mm, 0UL);
- low_ptr = *slot0;
- /* FIXME: We're playing with the current task's page tables here, which
- * is potentially dangerous on SMP systems.
- */
- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
- local_flush_tlb();
-}
-
/**
* acpi_save_state_mem - save kernel state
*
@@ -81,8 +68,6 @@ static void init_low_mapping(void)
*/
int acpi_save_state_mem(void)
{
- init_low_mapping();
-
memcpy((void *)acpi_wakeup_address, &wakeup_start,
&wakeup_end - &wakeup_start);
acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void)
*/
void acpi_restore_state_mem(void)
{
- set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
- local_flush_tlb();
}
/**
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void)
*/
void __init acpi_reserve_bootmem(void)
{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE)
+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
printk(KERN_CRIT
- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n");
+ "ACPI: Wakeup code way too big, will crash on attempt"
+ " to suspend\n");
}
static int __init acpi_sleep_setup(char *str)
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa911db5..8550a6ffa275 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -30,22 +31,28 @@ wakeup_code:
cld
# setup data segment
movw %cs, %ax
- movw %ax, %ds # Make ds:0 point to wakeup_start
+ movw %ax, %ds # Make ds:0 point to wakeup_start
movw %ax, %ss
- mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
+ # Private stack is needed for ASUS board
+ mov $(wakeup_stack - wakeup_code), %sp
- pushl $0 # Kill any dangerous flags
+ pushl $0 # Kill any dangerous flags
popfl
movl real_magic - wakeup_code, %eax
cmpl $0x12345678, %eax
jne bogus_real_magic
+ call verify_cpu # Verify the cpu supports long
+ # mode
+ testl %eax, %eax
+ jnz no_longmode
+
testl $1, video_flags - wakeup_code
jz 1f
lcall $0xc000,$3
movw %cs, %ax
- movw %ax, %ds # Bios might have played with that
+ movw %ax, %ds # Bios might have played with that
movw %ax, %ss
1:
@@ -61,12 +68,15 @@ wakeup_code:
movb $0xa2, %al ; outb %al, $0x80
- lidt %ds:idt_48a - wakeup_code
- xorl %eax, %eax
- movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
- shll $4, %eax
- addl $(gdta - wakeup_code), %eax
- movl %eax, gdt_48a +2 - wakeup_code
+ mov %ds, %ax # Find 32bit wakeup_code addr
+ movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
+ shll $4, %esi
+ # Fix up the vectors
+ addl %esi, wakeup_32_vector - wakeup_code
+ addl %esi, wakeup_long64_vector - wakeup_code
+ addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
+
+ lidtl %ds:idt_48a - wakeup_code
lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
# appropriate
@@ -75,86 +85,63 @@ wakeup_code:
jmp 1f
1:
- .byte 0x66, 0xea # prefix + jmpi-opcode
- .long wakeup_32 - __START_KERNEL_map
- .word __KERNEL_CS
+ ljmpl *(wakeup_32_vector - wakeup_code)
+
+ .balign 4
+wakeup_32_vector:
+ .long wakeup_32 - wakeup_code
+ .word __KERNEL32_CS, 0
.code32
wakeup_32:
# Running in this code, but at low address; paging is not yet turned on.
movb $0xa5, %al ; outb %al, $0x80
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe bogus_cpu
- wbinvd
- mov $0x80000001, %eax
- cpuid
- btl $29, %edx
- jnc bogus_cpu
- movl %edx,%edi
-
- movw $__KERNEL_DS, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %fs
- movw %ax, %gs
-
- movw $__KERNEL_DS, %ax
- movw %ax, %ss
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
- mov $(wakeup_stack - __START_KERNEL_map), %esp
- movl saved_magic - __START_KERNEL_map, %eax
- cmpl $0x9abcdef0, %eax
- jne bogus_32_magic
+ movw $0x0e00 + 'i', %ds:(0xb8012)
+ movb $0xa8, %al ; outb %al, $0x80;
/*
* Prepare for entering 64bits mode
*/
- /* Enable PAE mode and PGE */
+ /* Enable PAE */
xorl %eax, %eax
btsl $5, %eax
- btsl $7, %eax
movl %eax, %cr4
/* Setup early boot stage 4 level pagetables */
- movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax
+ leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
movl %eax, %cr3
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
- /* Fool rdmsr and reset %eax to avoid dependences */
- xorl %eax, %eax
+ /* Check if nx is implemented */
+ movl $0x80000001, %eax
+ cpuid
+ movl %edx,%edi
+
/* Enable Long Mode */
+ xorl %eax, %eax
btsl $_EFER_LME, %eax
- /* Enable System Call */
- btsl $_EFER_SCE, %eax
- /* No Execute supported? */
+ /* No Execute supported? */
btl $20,%edi
jnc 1f
btsl $_EFER_NX, %eax
-1:
/* Make changes effective */
+1: movl $MSR_EFER, %ecx
+ xorl %edx, %edx
wrmsr
- wbinvd
xorl %eax, %eax
btsl $31, %eax /* Enable paging and in turn activate Long Mode */
btsl $0, %eax /* Enable protected mode */
- btsl $1, %eax /* Enable MP */
- btsl $4, %eax /* Enable ET */
- btsl $5, %eax /* Enable NE */
- btsl $16, %eax /* Enable WP */
- btsl $18, %eax /* Enable AM */
/* Make changes effective */
movl %eax, %cr0
+
/* At this point:
CR4.PAE must be 1
CS.L must be 0
@@ -162,11 +149,6 @@ wakeup_32:
Next instruction must be a branch
This must be on identity-mapped page
*/
- jmp reach_compatibility_mode
-reach_compatibility_mode:
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
/*
* At this point we're in long mode but in 32bit compatibility mode
* with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,24 +156,19 @@ reach_compatibility_mode:
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
*/
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
- /* Load new GDT with the 64bit segment using 32bit descriptor */
- movl $(pGDT32 - __START_KERNEL_map), %eax
- lgdt (%eax)
-
- movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
/* Finally jump in 64bit mode */
- ljmp *(%eax)
+ ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
-wakeup_jumpvector:
- .long wakeup_long64 - __START_KERNEL_map
- .word __KERNEL_CS
+ .balign 4
+wakeup_long64_vector:
+ .long wakeup_long64 - wakeup_code
+ .word __KERNEL_CS, 0
.code64
- /* Hooray, we are in Long 64-bit mode (but still running in low memory) */
+ /* Hooray, we are in Long 64-bit mode (but still running in
+ * low memory)
+ */
wakeup_long64:
/*
* We must switch to a new descriptor in kernel space for the GDT
@@ -199,7 +176,15 @@ wakeup_long64:
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt cpu_gdt_descr - __START_KERNEL_map
+ lgdt cpu_gdt_descr
+
+ movw $0x0e00 + 'n', %ds:(0xb8014)
+ movb $0xa9, %al ; outb %al, $0x80
+
+ movq saved_magic, %rax
+ movq $0x123456789abcdef0, %rdx
+ cmpq %rdx, %rax
+ jne bogus_64_magic
movw $0x0e00 + 'u', %ds:(0xb8016)
@@ -211,75 +196,58 @@ wakeup_long64:
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movq saved_esp, %rsp
+ movq saved_rsp, %rsp
movw $0x0e00 + 'x', %ds:(0xb8018)
- movq saved_ebx, %rbx
- movq saved_edi, %rdi
- movq saved_esi, %rsi
- movq saved_ebp, %rbp
+ movq saved_rbx, %rbx
+ movq saved_rdi, %rdi
+ movq saved_rsi, %rsi
+ movq saved_rbp, %rbp
movw $0x0e00 + '!', %ds:(0xb801a)
- movq saved_eip, %rax
+ movq saved_rip, %rax
jmp *%rax
.code32
.align 64
gdta:
+ /* Its good to keep gdt in sync with one in trampoline.S */
.word 0, 0, 0, 0 # dummy
-
- .word 0, 0, 0, 0 # unused
-
- .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
- .word 0 # base address = 0
- .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
- .word 0x00CF # granularity = 4096, 386
- # (+5th nibble of limit)
-
- .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
- .word 0 # base address = 0
- .word 0x9200 # data read/write
- .word 0x00CF # granularity = 4096, 386
- # (+5th nibble of limit)
-# this is 64bit descriptor for code
- .word 0xFFFF
- .word 0
- .word 0x9A00 # code read/exec
- .word 0x00AF # as above, but it is long mode and with D=0
+ /* ??? Why I need the accessed bit set in order for this to work? */
+ .quad 0x00cf9b000000ffff # __KERNEL32_CS
+ .quad 0x00af9b000000ffff # __KERNEL_CS
+ .quad 0x00cf93000000ffff # __KERNEL_DS
idt_48a:
.word 0 # idt limit = 0
.word 0, 0 # idt base = 0L
gdt_48a:
- .word 0x8000 # gdt limit=2048,
+ .word 0x800 # gdt limit=2048,
# 256 GDT entries
- .word 0, 0 # gdt base (filled in later)
-
+ .long gdta - wakeup_code # gdt base (relocated in later)
-real_save_gdt: .word 0
- .quad 0
real_magic: .quad 0
video_mode: .quad 0
video_flags: .quad 0
+.code16
bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
+ movb $0xba,%al ; outb %al,$0x80
jmp bogus_real_magic
-bogus_32_magic:
+.code64
+bogus_64_magic:
movb $0xb3,%al ; outb %al,$0x80
- jmp bogus_32_magic
+ jmp bogus_64_magic
-bogus_31_magic:
- movb $0xb1,%al ; outb %al,$0x80
- jmp bogus_31_magic
-
-bogus_cpu:
- movb $0xbc,%al ; outb %al,$0x80
- jmp bogus_cpu
+.code16
+no_longmode:
+ movb $0xbc,%al ; outb %al,$0x80
+ jmp no_longmode
+#include "../verify_cpu.S"
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
@@ -301,6 +269,7 @@ bogus_cpu:
#define VIDEO_FIRST_V7 0x0900
# Setting of user mode (AX=mode ID) => CF=success
+.code16
mode_seta:
movw %ax, %bx
#if 0
@@ -346,21 +315,18 @@ check_vesaa:
_setbada: jmp setbada
- .code64
-bogus_magic:
- movw $0x0e00 + 'B', %ds:(0xb8018)
- jmp bogus_magic
-
-bogus_magic2:
- movw $0x0e00 + '2', %ds:(0xb8018)
- jmp bogus_magic2
-
-
wakeup_stack_begin: # Stack grows down
.org 0xff0
wakeup_stack: # Just below end of page
+.org 0x1000
+ENTRY(wakeup_level4_pgt)
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 510,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
+
ENTRY(wakeup_end)
##
@@ -373,28 +339,11 @@ ENTRY(wakeup_end)
#
# Returned address is location of code in low memory (past data and stack)
#
+ .code64
ENTRY(acpi_copy_wakeup_routine)
pushq %rax
- pushq %rcx
pushq %rdx
- sgdt saved_gdt
- sidt saved_idt
- sldt saved_ldt
- str saved_tss
-
- movq %cr3, %rdx
- movq %rdx, saved_cr3
- movq %cr4, %rdx
- movq %rdx, saved_cr4
- movq %cr0, %rdx
- movq %rdx, saved_cr0
- sgdt real_save_gdt - wakeup_start (,%rdi)
- movl $MSR_EFER, %ecx
- rdmsr
- movl %eax, saved_efer
- movl %edx, saved_efer2
-
movl saved_video_mode, %edx
movl %edx, video_mode - wakeup_start (,%rdi)
movl acpi_video_flags, %edx
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine)
movq $0x123456789abcdef0, %rdx
movq %rdx, saved_magic
- movl saved_magic - __START_KERNEL_map, %eax
- cmpl $0x9abcdef0, %eax
- jne bogus_32_magic
-
- # make sure %cr4 is set correctly (features, etc)
- movl saved_cr4 - __START_KERNEL_map, %eax
- movq %rax, %cr4
+ movq saved_magic, %rax
+ movq $0x123456789abcdef0, %rdx
+ cmpq %rdx, %rax
+ jne bogus_64_magic
- movl saved_cr0 - __START_KERNEL_map, %eax
- movq %rax, %cr0
- jmp 1f # Flush pipelines
-1:
# restore the regs we used
popq %rdx
- popq %rcx
popq %rax
ENTRY(do_suspend_lowlevel_s4bios)
ret
@@ -450,13 +391,13 @@ do_suspend_lowlevel:
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
- movq $.L97, saved_eip(%rip)
+ movq $.L97, saved_rip(%rip)
- movq %rsp,saved_esp
- movq %rbp,saved_ebp
- movq %rbx,saved_ebx
- movq %rdi,saved_edi
- movq %rsi,saved_esi
+ movq %rsp,saved_rsp
+ movq %rbp,saved_rbp
+ movq %rbx,saved_rbx
+ movq %rdi,saved_rdi
+ movq %rsi,saved_rsi
addq $8, %rsp
movl $3, %edi
@@ -503,25 +444,12 @@ do_suspend_lowlevel:
.data
ALIGN
-ENTRY(saved_ebp) .quad 0
-ENTRY(saved_esi) .quad 0
-ENTRY(saved_edi) .quad 0
-ENTRY(saved_ebx) .quad 0
+ENTRY(saved_rbp) .quad 0
+ENTRY(saved_rsi) .quad 0
+ENTRY(saved_rdi) .quad 0
+ENTRY(saved_rbx) .quad 0
-ENTRY(saved_eip) .quad 0
-ENTRY(saved_esp) .quad 0
+ENTRY(saved_rip) .quad 0
+ENTRY(saved_rsp) .quad 0
ENTRY(saved_magic) .quad 0
-
-ALIGN
-# saved registers
-saved_gdt: .quad 0,0
-saved_idt: .quad 0,0
-saved_ldt: .quad 0
-saved_tss: .quad 0
-
-saved_cr0: .quad 0
-saved_cr3: .quad 0
-saved_cr4: .quad 0
-saved_efer: .quad 0
-saved_efer2: .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index b487396c4c5b..a52af5820592 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
- pg_data_t *nd0 = NODE_DATA(0);
u32 aper_size;
void *p;
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void)
* Unfortunately we cannot move it up because that would make the
* IOMMU useless.
*/
- p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0);
+ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk("Cannot allocate aperture memory hole (%p,%uK)\n",
p, aper_size>>10);
if (p)
- free_bootmem_node(nd0, __pa(p), aper_size);
+ free_bootmem(__pa(p), aper_size);
return 0;
}
printk("Mapping aperture over %d KB of RAM @ %lx\n",
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index bd3e45d47c37..d198f7d82e5a 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0;
static void apic_pm_activate(void);
+void apic_wait_icr_idle(void)
+{
+ while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+ cpu_relax();
+}
+
+unsigned int safe_apic_wait_icr_idle(void)
+{
+ unsigned int send_status;
+ int timeout;
+
+ timeout = 0;
+ do {
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ if (!send_status)
+ break;
+ udelay(100);
+ } while (timeout++ < 1000);
+
+ return send_status;
+}
+
void enable_NMI_through_LVT0 (void * dummy)
{
unsigned int v;
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks)
static int __init calibrate_APIC_clock(void)
{
- int apic, apic_start, tsc, tsc_start;
+ unsigned apic, apic_start;
+ unsigned long tsc, tsc_start;
int result;
/*
* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(4000000000);
apic_start = apic_read(APIC_TMCCT);
#ifdef CONFIG_X86_PM_TIMER
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void)
} else
#endif
{
- rdtscl(tsc_start);
+ rdtscll(tsc_start);
do {
apic = apic_read(APIC_TMCCT);
- rdtscl(tsc);
+ rdtscll(tsc);
} while ((tsc - tsc_start) < TICK_COUNT &&
- (apic - apic_start) < TICK_COUNT);
+ (apic_start - apic) < TICK_COUNT);
- result = (apic_start - apic) * 1000L * cpu_khz /
+ result = (apic_start - apic) * 1000L * tsc_khz /
(tsc - tsc_start);
}
printk("result %d\n", result);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2beb2c..778953bc636c 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -21,6 +21,14 @@
#define BLANK() asm volatile("\n->" : : )
+#define __NO_STUBS 1
+#undef __SYSCALL
+#undef _ASM_X86_64_UNISTD_H_
+#define __SYSCALL(nr, sym) [nr] = 1,
+static char syscalls[] = {
+#include <asm/unistd.h>
+};
+
int main(void)
{
#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
@@ -71,5 +79,7 @@ int main(void)
DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
BLANK();
DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
+ BLANK();
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
return 0;
}
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
new file mode 100644
index 000000000000..12b585b5345d
--- /dev/null
+++ b/arch/x86_64/kernel/bugs.c
@@ -0,0 +1,21 @@
+/*
+ * arch/x86_64/kernel/bugs.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ * Copyright (C) 2000 SuSE
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/alternative.h>
+#include <asm/processor.h>
+
+void __init check_bugs(void)
+{
+ identify_cpu(&boot_cpu_data);
+#if !defined(CONFIG_SMP)
+ printk("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+#endif
+ alternative_instructions();
+}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a490fabfcf47..be8965427a93 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -25,7 +25,7 @@
#include <asm/bootsetup.h>
#include <asm/sections.h>
-struct e820map e820 __initdata;
+struct e820map e820;
/*
* PFN of last memory page.
@@ -98,7 +98,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
-int __meminit
+int
e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
@@ -112,6 +112,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
}
return 0;
}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
/*
* This function checks if the entire range <start,end> is mapped with type.
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fede55a53995..990d9c218a5d 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,18 +71,6 @@ static void __init ati_bugs(void)
}
}
-static void intel_bugs(void)
-{
- u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
-
-#ifdef CONFIG_SMP
- if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
- device == PCI_DEVICE_ID_INTEL_E7525_MCH)
- quirk_intel_irqbalance();
-#endif
-}
-
struct chipset {
u16 vendor;
void (*f)(void);
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
{ PCI_VENDOR_ID_VIA, via_bugs },
{ PCI_VENDOR_ID_ATI, ati_bugs },
- { PCI_VENDOR_ID_INTEL, intel_bugs},
{}
};
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 47b6d90349da..92213d2b7c11 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@
#ifdef __i386__
#include <asm/setup.h>
-#define VGABASE (__ISA_IO_base + 0xb8000)
#else
#include <asm/bootsetup.h>
-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
#endif
+#define VGABASE (__ISA_IO_base + 0xb8000)
static int max_ypos = 25, max_xpos = 80;
static int current_ypos = 25, current_xpos = 0;
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
return ret;
}
-void __init simnow_init(char *str)
+static void __init simnow_init(char *str)
{
char *fn = "klog";
if (*str == '=')
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed4350ced3d0..fa984b53e7e6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -701,6 +701,7 @@ END(spurious_interrupt)
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
@@ -710,6 +711,7 @@ END(spurious_interrupt)
XCPT_FRAME
pushq %rax
CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
@@ -817,6 +819,7 @@ paranoid_schedule\trace:
*/
KPROBE_ENTRY(error_entry)
_frame RDI
+ CFI_REL_OFFSET rax,0
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry)
movq %rsi,13*8(%rsp)
CFI_REL_OFFSET rsi,RSI
movq 14*8(%rsp),%rsi /* load rax from rdi slot */
+ CFI_REGISTER rax,rsi
movq %rdx,12*8(%rsp)
CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
@@ -857,6 +861,7 @@ error_swapgs:
swapgs
error_sti:
movq %rdi,RDI(%rsp)
+ CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
deleted file mode 100644
index 7ae18ec12454..000000000000
--- a/arch/x86_64/kernel/functionlist
+++ /dev/null
@@ -1,1284 +0,0 @@
-*(.text.flush_thread)
-*(.text.check_poison_obj)
-*(.text.copy_page)
-*(.text.__set_personality)
-*(.text.gart_map_sg)
-*(.text.kmem_cache_free)
-*(.text.find_get_page)
-*(.text._raw_spin_lock)
-*(.text.ide_outb)
-*(.text.unmap_vmas)
-*(.text.copy_page_range)
-*(.text.kprobe_handler)
-*(.text.__handle_mm_fault)
-*(.text.__d_lookup)
-*(.text.copy_user_generic)
-*(.text.__link_path_walk)
-*(.text.get_page_from_freelist)
-*(.text.kmem_cache_alloc)
-*(.text.drive_cmd_intr)
-*(.text.ia32_setup_sigcontext)
-*(.text.huge_pte_offset)
-*(.text.do_page_fault)
-*(.text.page_remove_rmap)
-*(.text.release_pages)
-*(.text.ide_end_request)
-*(.text.__mutex_lock_slowpath)
-*(.text.__find_get_block)
-*(.text.kfree)
-*(.text.vfs_read)
-*(.text._raw_spin_unlock)
-*(.text.free_hot_cold_page)
-*(.text.fget_light)
-*(.text.schedule)
-*(.text.memcmp)
-*(.text.touch_atime)
-*(.text.__might_sleep)
-*(.text.__down_read_trylock)
-*(.text.arch_pick_mmap_layout)
-*(.text.find_vma)
-*(.text.__make_request)
-*(.text.do_generic_mapping_read)
-*(.text.mutex_lock_interruptible)
-*(.text.__generic_file_aio_read)
-*(.text._atomic_dec_and_lock)
-*(.text.__wake_up_bit)
-*(.text.add_to_page_cache)
-*(.text.cache_alloc_debugcheck_after)
-*(.text.vm_normal_page)
-*(.text.mutex_debug_check_no_locks_freed)
-*(.text.net_rx_action)
-*(.text.__find_first_zero_bit)
-*(.text.put_page)
-*(.text._raw_read_lock)
-*(.text.__delay)
-*(.text.dnotify_parent)
-*(.text.do_path_lookup)
-*(.text.do_sync_read)
-*(.text.do_lookup)
-*(.text.bit_waitqueue)
-*(.text.file_read_actor)
-*(.text.strncpy_from_user)
-*(.text.__pagevec_lru_add_active)
-*(.text.fget)
-*(.text.dput)
-*(.text.__strnlen_user)
-*(.text.inotify_inode_queue_event)
-*(.text.rw_verify_area)
-*(.text.ide_intr)
-*(.text.inotify_dentry_parent_queue_event)
-*(.text.permission)
-*(.text.memscan)
-*(.text.hpet_rtc_interrupt)
-*(.text.do_mmap_pgoff)
-*(.text.current_fs_time)
-*(.text.vfs_getattr)
-*(.text.kmem_flagcheck)
-*(.text.mark_page_accessed)
-*(.text.free_pages_and_swap_cache)
-*(.text.generic_fillattr)
-*(.text.__block_prepare_write)
-*(.text.__set_page_dirty_nobuffers)
-*(.text.link_path_walk)
-*(.text.find_get_pages_tag)
-*(.text.ide_do_request)
-*(.text.__alloc_pages)
-*(.text.generic_permission)
-*(.text.mod_page_state_offset)
-*(.text.free_pgd_range)
-*(.text.generic_file_buffered_write)
-*(.text.number)
-*(.text.ide_do_rw_disk)
-*(.text.__brelse)
-*(.text.__mod_page_state_offset)
-*(.text.rotate_reclaimable_page)
-*(.text.find_vma_prepare)
-*(.text.find_vma_prev)
-*(.text.lru_cache_add_active)
-*(.text.__kmalloc_track_caller)
-*(.text.smp_invalidate_interrupt)
-*(.text.handle_IRQ_event)
-*(.text.__find_get_block_slow)
-*(.text.do_wp_page)
-*(.text.do_select)
-*(.text.set_user_nice)
-*(.text.sys_read)
-*(.text.do_munmap)
-*(.text.csum_partial)
-*(.text.__do_softirq)
-*(.text.may_open)
-*(.text.getname)
-*(.text.get_empty_filp)
-*(.text.__fput)
-*(.text.remove_mapping)
-*(.text.filp_ctor)
-*(.text.poison_obj)
-*(.text.unmap_region)
-*(.text.test_set_page_writeback)
-*(.text.__do_page_cache_readahead)
-*(.text.sock_def_readable)
-*(.text.ide_outl)
-*(.text.shrink_zone)
-*(.text.rb_insert_color)
-*(.text.get_request)
-*(.text.sys_pread64)
-*(.text.spin_bug)
-*(.text.ide_outsl)
-*(.text.mask_and_ack_8259A)
-*(.text.filemap_nopage)
-*(.text.page_add_file_rmap)
-*(.text.find_lock_page)
-*(.text.tcp_poll)
-*(.text.__mark_inode_dirty)
-*(.text.file_ra_state_init)
-*(.text.generic_file_llseek)
-*(.text.__pagevec_lru_add)
-*(.text.page_cache_readahead)
-*(.text.n_tty_receive_buf)
-*(.text.zonelist_policy)
-*(.text.vma_adjust)
-*(.text.test_clear_page_dirty)
-*(.text.sync_buffer)
-*(.text.do_exit)
-*(.text.__bitmap_weight)
-*(.text.alloc_pages_current)
-*(.text.get_unused_fd)
-*(.text.zone_watermark_ok)
-*(.text.cpuset_update_task_memory_state)
-*(.text.__bitmap_empty)
-*(.text.sys_munmap)
-*(.text.__inode_dir_notify)
-*(.text.__generic_file_aio_write_nolock)
-*(.text.__pte_alloc)
-*(.text.sys_select)
-*(.text.vm_acct_memory)
-*(.text.vfs_write)
-*(.text.__lru_add_drain)
-*(.text.prio_tree_insert)
-*(.text.generic_file_aio_read)
-*(.text.vma_merge)
-*(.text.block_write_full_page)
-*(.text.__page_set_anon_rmap)
-*(.text.apic_timer_interrupt)
-*(.text.release_console_sem)
-*(.text.sys_write)
-*(.text.sys_brk)
-*(.text.dup_mm)
-*(.text.read_current_timer)
-*(.text.ll_rw_block)
-*(.text.blk_rq_map_sg)
-*(.text.dbg_userword)
-*(.text.__block_commit_write)
-*(.text.cache_grow)
-*(.text.copy_strings)
-*(.text.release_task)
-*(.text.do_sync_write)
-*(.text.unlock_page)
-*(.text.load_elf_binary)
-*(.text.__follow_mount)
-*(.text.__getblk)
-*(.text.do_sys_open)
-*(.text.current_kernel_time)
-*(.text.call_rcu)
-*(.text.write_chan)
-*(.text.vsnprintf)
-*(.text.dummy_inode_setsecurity)
-*(.text.submit_bh)
-*(.text.poll_freewait)
-*(.text.bio_alloc_bioset)
-*(.text.skb_clone)
-*(.text.page_waitqueue)
-*(.text.__mutex_lock_interruptible_slowpath)
-*(.text.get_index)
-*(.text.csum_partial_copy_generic)
-*(.text.bad_range)
-*(.text.remove_vma)
-*(.text.cp_new_stat)
-*(.text.alloc_arraycache)
-*(.text.test_clear_page_writeback)
-*(.text.strsep)
-*(.text.open_namei)
-*(.text._raw_read_unlock)
-*(.text.get_vma_policy)
-*(.text.__down_write_trylock)
-*(.text.find_get_pages)
-*(.text.tcp_rcv_established)
-*(.text.generic_make_request)
-*(.text.__block_write_full_page)
-*(.text.cfq_set_request)
-*(.text.sys_inotify_init)
-*(.text.split_vma)
-*(.text.__mod_timer)
-*(.text.get_options)
-*(.text.vma_link)
-*(.text.mpage_writepages)
-*(.text.truncate_complete_page)
-*(.text.tcp_recvmsg)
-*(.text.sigprocmask)
-*(.text.filemap_populate)
-*(.text.sys_close)
-*(.text.inotify_dev_queue_event)
-*(.text.do_task_stat)
-*(.text.__dentry_open)
-*(.text.unlink_file_vma)
-*(.text.__pollwait)
-*(.text.packet_rcv_spkt)
-*(.text.drop_buffers)
-*(.text.free_pgtables)
-*(.text.generic_file_direct_write)
-*(.text.copy_process)
-*(.text.netif_receive_skb)
-*(.text.dnotify_flush)
-*(.text.print_bad_pte)
-*(.text.anon_vma_unlink)
-*(.text.sys_mprotect)
-*(.text.sync_sb_inodes)
-*(.text.find_inode_fast)
-*(.text.dummy_inode_readlink)
-*(.text.putname)
-*(.text.init_smp_flush)
-*(.text.dbg_redzone2)
-*(.text.sk_run_filter)
-*(.text.may_expand_vm)
-*(.text.generic_file_aio_write)
-*(.text.find_next_zero_bit)
-*(.text.file_kill)
-*(.text.audit_getname)
-*(.text.arch_unmap_area_topdown)
-*(.text.alloc_page_vma)
-*(.text.tcp_transmit_skb)
-*(.text.rb_next)
-*(.text.dbg_redzone1)
-*(.text.generic_file_mmap)
-*(.text.vfs_fstat)
-*(.text.sys_time)
-*(.text.page_lock_anon_vma)
-*(.text.get_unmapped_area)
-*(.text.remote_llseek)
-*(.text.__up_read)
-*(.text.fd_install)
-*(.text.eventpoll_init_file)
-*(.text.dma_alloc_coherent)
-*(.text.create_empty_buffers)
-*(.text.__mutex_unlock_slowpath)
-*(.text.dup_fd)
-*(.text.d_alloc)
-*(.text.tty_ldisc_try)
-*(.text.sys_stime)
-*(.text.__rb_rotate_right)
-*(.text.d_validate)
-*(.text.rb_erase)
-*(.text.path_release)
-*(.text.memmove)
-*(.text.invalidate_complete_page)
-*(.text.clear_inode)
-*(.text.cache_estimate)
-*(.text.alloc_buffer_head)
-*(.text.smp_call_function_interrupt)
-*(.text.flush_tlb_others)
-*(.text.file_move)
-*(.text.balance_dirty_pages_ratelimited)
-*(.text.vma_prio_tree_add)
-*(.text.timespec_trunc)
-*(.text.mempool_alloc)
-*(.text.iget_locked)
-*(.text.d_alloc_root)
-*(.text.cpuset_populate_dir)
-*(.text.anon_vma_prepare)
-*(.text.sys_newstat)
-*(.text.alloc_page_interleave)
-*(.text.__path_lookup_intent_open)
-*(.text.__pagevec_free)
-*(.text.inode_init_once)
-*(.text.free_vfsmnt)
-*(.text.__user_walk_fd)
-*(.text.cfq_idle_slice_timer)
-*(.text.sys_mmap)
-*(.text.sys_llseek)
-*(.text.prio_tree_remove)
-*(.text.filp_close)
-*(.text.file_permission)
-*(.text.vma_prio_tree_remove)
-*(.text.tcp_ack)
-*(.text.nameidata_to_filp)
-*(.text.sys_lseek)
-*(.text.percpu_counter_mod)
-*(.text.igrab)
-*(.text.__bread)
-*(.text.alloc_inode)
-*(.text.filldir)
-*(.text.__rb_rotate_left)
-*(.text.irq_affinity_write_proc)
-*(.text.init_request_from_bio)
-*(.text.find_or_create_page)
-*(.text.tty_poll)
-*(.text.tcp_sendmsg)
-*(.text.ide_wait_stat)
-*(.text.free_buffer_head)
-*(.text.flush_signal_handlers)
-*(.text.tcp_v4_rcv)
-*(.text.nr_blockdev_pages)
-*(.text.locks_remove_flock)
-*(.text.__iowrite32_copy)
-*(.text.do_filp_open)
-*(.text.try_to_release_page)
-*(.text.page_add_new_anon_rmap)
-*(.text.kmem_cache_size)
-*(.text.eth_type_trans)
-*(.text.try_to_free_buffers)
-*(.text.schedule_tail)
-*(.text.proc_lookup)
-*(.text.no_llseek)
-*(.text.kfree_skbmem)
-*(.text.do_wait)
-*(.text.do_mpage_readpage)
-*(.text.vfs_stat_fd)
-*(.text.tty_write)
-*(.text.705)
-*(.text.sync_page)
-*(.text.__remove_shared_vm_struct)
-*(.text.__kfree_skb)
-*(.text.sock_poll)
-*(.text.get_request_wait)
-*(.text.do_sigaction)
-*(.text.do_brk)
-*(.text.tcp_event_data_recv)
-*(.text.read_chan)
-*(.text.pipe_writev)
-*(.text.__emul_lookup_dentry)
-*(.text.rtc_get_rtc_time)
-*(.text.print_objinfo)
-*(.text.file_update_time)
-*(.text.do_signal)
-*(.text.disable_8259A_irq)
-*(.text.blk_queue_bounce)
-*(.text.__anon_vma_link)
-*(.text.__vma_link)
-*(.text.vfs_rename)
-*(.text.sys_newlstat)
-*(.text.sys_newfstat)
-*(.text.sys_mknod)
-*(.text.__show_regs)
-*(.text.iput)
-*(.text.get_signal_to_deliver)
-*(.text.flush_tlb_page)
-*(.text.debug_mutex_wake_waiter)
-*(.text.copy_thread)
-*(.text.clear_page_dirty_for_io)
-*(.text.buffer_io_error)
-*(.text.vfs_permission)
-*(.text.truncate_inode_pages_range)
-*(.text.sys_recvfrom)
-*(.text.remove_suid)
-*(.text.mark_buffer_dirty)
-*(.text.local_bh_enable)
-*(.text.get_zeroed_page)
-*(.text.get_vmalloc_info)
-*(.text.flush_old_exec)
-*(.text.dummy_inode_permission)
-*(.text.__bio_add_page)
-*(.text.prio_tree_replace)
-*(.text.notify_change)
-*(.text.mntput_no_expire)
-*(.text.fput)
-*(.text.__end_that_request_first)
-*(.text.wake_up_bit)
-*(.text.unuse_mm)
-*(.text.shrink_icache_memory)
-*(.text.sched_balance_self)
-*(.text.__pmd_alloc)
-*(.text.pipe_poll)
-*(.text.normal_poll)
-*(.text.__free_pages)
-*(.text.follow_mount)
-*(.text.cdrom_start_packet_command)
-*(.text.blk_recount_segments)
-*(.text.bio_put)
-*(.text.__alloc_skb)
-*(.text.__wake_up)
-*(.text.vm_stat_account)
-*(.text.sys_fcntl)
-*(.text.sys_fadvise64)
-*(.text._raw_write_unlock)
-*(.text.__pud_alloc)
-*(.text.alloc_page_buffers)
-*(.text.vfs_llseek)
-*(.text.sockfd_lookup)
-*(.text._raw_write_lock)
-*(.text.put_compound_page)
-*(.text.prune_dcache)
-*(.text.pipe_readv)
-*(.text.mempool_free)
-*(.text.make_ahead_window)
-*(.text.lru_add_drain)
-*(.text.constant_test_bit)
-*(.text.__clear_user)
-*(.text.arch_unmap_area)
-*(.text.anon_vma_link)
-*(.text.sys_chroot)
-*(.text.setup_arg_pages)
-*(.text.radix_tree_preload)
-*(.text.init_rwsem)
-*(.text.generic_osync_inode)
-*(.text.generic_delete_inode)
-*(.text.do_sys_poll)
-*(.text.dev_queue_xmit)
-*(.text.default_llseek)
-*(.text.__writeback_single_inode)
-*(.text.vfs_ioctl)
-*(.text.__up_write)
-*(.text.unix_poll)
-*(.text.sys_rt_sigprocmask)
-*(.text.sock_recvmsg)
-*(.text.recalc_bh_state)
-*(.text.__put_unused_fd)
-*(.text.process_backlog)
-*(.text.locks_remove_posix)
-*(.text.lease_modify)
-*(.text.expand_files)
-*(.text.end_buffer_read_nobh)
-*(.text.d_splice_alias)
-*(.text.debug_mutex_init_waiter)
-*(.text.copy_from_user)
-*(.text.cap_vm_enough_memory)
-*(.text.show_vfsmnt)
-*(.text.release_sock)
-*(.text.pfifo_fast_enqueue)
-*(.text.half_md4_transform)
-*(.text.fs_may_remount_ro)
-*(.text.do_fork)
-*(.text.copy_hugetlb_page_range)
-*(.text.cache_free_debugcheck)
-*(.text.__tcp_select_window)
-*(.text.task_handoff_register)
-*(.text.sys_open)
-*(.text.strlcpy)
-*(.text.skb_copy_datagram_iovec)
-*(.text.set_up_list3s)
-*(.text.release_open_intent)
-*(.text.qdisc_restart)
-*(.text.n_tty_chars_in_buffer)
-*(.text.inode_change_ok)
-*(.text.__downgrade_write)
-*(.text.debug_mutex_unlock)
-*(.text.add_timer_randomness)
-*(.text.sock_common_recvmsg)
-*(.text.set_bh_page)
-*(.text.printk_lock)
-*(.text.path_release_on_umount)
-*(.text.ip_output)
-*(.text.ide_build_dmatable)
-*(.text.__get_user_8)
-*(.text.end_buffer_read_sync)
-*(.text.__d_path)
-*(.text.d_move)
-*(.text.del_timer)
-*(.text.constant_test_bit)
-*(.text.blockable_page_cache_readahead)
-*(.text.tty_read)
-*(.text.sys_readlink)
-*(.text.sys_faccessat)
-*(.text.read_swap_cache_async)
-*(.text.pty_write_room)
-*(.text.page_address_in_vma)
-*(.text.kthread)
-*(.text.cfq_exit_io_context)
-*(.text.__tcp_push_pending_frames)
-*(.text.sys_pipe)
-*(.text.submit_bio)
-*(.text.pid_revalidate)
-*(.text.page_referenced_file)
-*(.text.lock_sock)
-*(.text.get_page_state_node)
-*(.text.generic_block_bmap)
-*(.text.do_setitimer)
-*(.text.dev_queue_xmit_nit)
-*(.text.copy_from_read_buf)
-*(.text.__const_udelay)
-*(.text.console_conditional_schedule)
-*(.text.wake_up_new_task)
-*(.text.wait_for_completion_interruptible)
-*(.text.tcp_rcv_rtt_update)
-*(.text.sys_mlockall)
-*(.text.set_fs_altroot)
-*(.text.schedule_timeout)
-*(.text.nr_free_pagecache_pages)
-*(.text.nf_iterate)
-*(.text.mapping_tagged)
-*(.text.ip_queue_xmit)
-*(.text.ip_local_deliver)
-*(.text.follow_page)
-*(.text.elf_map)
-*(.text.dummy_file_permission)
-*(.text.dispose_list)
-*(.text.dentry_open)
-*(.text.dentry_iput)
-*(.text.bio_alloc)
-*(.text.wait_on_page_bit)
-*(.text.vfs_readdir)
-*(.text.vfs_lstat)
-*(.text.seq_escape)
-*(.text.__posix_lock_file)
-*(.text.mm_release)
-*(.text.kref_put)
-*(.text.ip_rcv)
-*(.text.__iget)
-*(.text.free_pages)
-*(.text.find_mergeable_anon_vma)
-*(.text.find_extend_vma)
-*(.text.dummy_inode_listsecurity)
-*(.text.bio_add_page)
-*(.text.__vm_enough_memory)
-*(.text.vfs_stat)
-*(.text.tty_paranoia_check)
-*(.text.tcp_read_sock)
-*(.text.tcp_data_queue)
-*(.text.sys_uname)
-*(.text.sys_renameat)
-*(.text.__strncpy_from_user)
-*(.text.__mutex_init)
-*(.text.__lookup_hash)
-*(.text.kref_get)
-*(.text.ip_route_input)
-*(.text.__insert_inode_hash)
-*(.text.do_sock_write)
-*(.text.blk_done_softirq)
-*(.text.__wake_up_sync)
-*(.text.__vma_link_rb)
-*(.text.tty_ioctl)
-*(.text.tracesys)
-*(.text.sys_getdents)
-*(.text.sys_dup)
-*(.text.stub_execve)
-*(.text.sha_transform)
-*(.text.radix_tree_tag_clear)
-*(.text.put_unused_fd)
-*(.text.put_files_struct)
-*(.text.mpage_readpages)
-*(.text.may_delete)
-*(.text.kmem_cache_create)
-*(.text.ip_mc_output)
-*(.text.interleave_nodes)
-*(.text.groups_search)
-*(.text.generic_drop_inode)
-*(.text.generic_commit_write)
-*(.text.fcntl_setlk)
-*(.text.exit_mmap)
-*(.text.end_page_writeback)
-*(.text.__d_rehash)
-*(.text.debug_mutex_free_waiter)
-*(.text.csum_ipv6_magic)
-*(.text.count)
-*(.text.cleanup_rbuf)
-*(.text.check_spinlock_acquired_node)
-*(.text.can_vma_merge_after)
-*(.text.bio_endio)
-*(.text.alloc_pidmap)
-*(.text.write_ldt)
-*(.text.vmtruncate_range)
-*(.text.vfs_create)
-*(.text.__user_walk)
-*(.text.update_send_head)
-*(.text.unmap_underlying_metadata)
-*(.text.tty_ldisc_deref)
-*(.text.tcp_setsockopt)
-*(.text.tcp_send_ack)
-*(.text.sys_pause)
-*(.text.sys_gettimeofday)
-*(.text.sync_dirty_buffer)
-*(.text.strncmp)
-*(.text.release_posix_timer)
-*(.text.proc_file_read)
-*(.text.prepare_to_wait)
-*(.text.locks_mandatory_locked)
-*(.text.interruptible_sleep_on_timeout)
-*(.text.inode_sub_bytes)
-*(.text.in_group_p)
-*(.text.hrtimer_try_to_cancel)
-*(.text.filldir64)
-*(.text.fasync_helper)
-*(.text.dummy_sb_pivotroot)
-*(.text.d_lookup)
-*(.text.d_instantiate)
-*(.text.__d_find_alias)
-*(.text.cpu_idle_wait)
-*(.text.cond_resched_lock)
-*(.text.chown_common)
-*(.text.blk_congestion_wait)
-*(.text.activate_page)
-*(.text.unlock_buffer)
-*(.text.tty_wakeup)
-*(.text.tcp_v4_do_rcv)
-*(.text.tcp_current_mss)
-*(.text.sys_openat)
-*(.text.sys_fchdir)
-*(.text.strnlen_user)
-*(.text.strnlen)
-*(.text.strchr)
-*(.text.sock_common_getsockopt)
-*(.text.skb_checksum)
-*(.text.remove_wait_queue)
-*(.text.rb_replace_node)
-*(.text.radix_tree_node_ctor)
-*(.text.pty_chars_in_buffer)
-*(.text.profile_hit)
-*(.text.prio_tree_left)
-*(.text.pgd_clear_bad)
-*(.text.pfifo_fast_dequeue)
-*(.text.page_referenced)
-*(.text.open_exec)
-*(.text.mmput)
-*(.text.mm_init)
-*(.text.__ide_dma_off_quietly)
-*(.text.ide_dma_intr)
-*(.text.hrtimer_start)
-*(.text.get_io_context)
-*(.text.__get_free_pages)
-*(.text.find_first_zero_bit)
-*(.text.file_free_rcu)
-*(.text.dummy_socket_sendmsg)
-*(.text.do_unlinkat)
-*(.text.do_arch_prctl)
-*(.text.destroy_inode)
-*(.text.can_vma_merge_before)
-*(.text.block_sync_page)
-*(.text.block_prepare_write)
-*(.text.bio_init)
-*(.text.arch_ptrace)
-*(.text.wake_up_inode)
-*(.text.wait_on_retry_sync_kiocb)
-*(.text.vma_prio_tree_next)
-*(.text.tcp_rcv_space_adjust)
-*(.text.__tcp_ack_snd_check)
-*(.text.sys_utime)
-*(.text.sys_recvmsg)
-*(.text.sys_mremap)
-*(.text.sys_bdflush)
-*(.text.sleep_on)
-*(.text.set_page_dirty_lock)
-*(.text.seq_path)
-*(.text.schedule_timeout_interruptible)
-*(.text.sched_fork)
-*(.text.rt_run_flush)
-*(.text.profile_munmap)
-*(.text.prepare_binprm)
-*(.text.__pagevec_release_nonlru)
-*(.text.m_show)
-*(.text.lookup_mnt)
-*(.text.__lookup_mnt)
-*(.text.lock_timer_base)
-*(.text.is_subdir)
-*(.text.invalidate_bh_lru)
-*(.text.init_buffer_head)
-*(.text.ifind_fast)
-*(.text.ide_dma_start)
-*(.text.__get_page_state)
-*(.text.flock_to_posix_lock)
-*(.text.__find_symbol)
-*(.text.do_futex)
-*(.text.do_execve)
-*(.text.dirty_writeback_centisecs_handler)
-*(.text.dev_watchdog)
-*(.text.can_share_swap_page)
-*(.text.blkdev_put)
-*(.text.bio_get_nr_vecs)
-*(.text.xfrm_compile_policy)
-*(.text.vma_prio_tree_insert)
-*(.text.vfs_lstat_fd)
-*(.text.__user_path_lookup_open)
-*(.text.thread_return)
-*(.text.tcp_send_delayed_ack)
-*(.text.sock_def_error_report)
-*(.text.shrink_slab)
-*(.text.serial_out)
-*(.text.seq_read)
-*(.text.secure_ip_id)
-*(.text.search_binary_handler)
-*(.text.proc_pid_unhash)
-*(.text.pagevec_lookup)
-*(.text.new_inode)
-*(.text.memcpy_toiovec)
-*(.text.locks_free_lock)
-*(.text.__lock_page)
-*(.text.__lock_buffer)
-*(.text.load_module)
-*(.text.is_bad_inode)
-*(.text.invalidate_inode_buffers)
-*(.text.insert_vm_struct)
-*(.text.inode_setattr)
-*(.text.inode_add_bytes)
-*(.text.ide_read_24)
-*(.text.ide_get_error_location)
-*(.text.ide_do_drive_cmd)
-*(.text.get_locked_pte)
-*(.text.get_filesystem_list)
-*(.text.generic_file_open)
-*(.text.follow_down)
-*(.text.find_next_bit)
-*(.text.__find_first_bit)
-*(.text.exit_mm)
-*(.text.exec_keys)
-*(.text.end_buffer_write_sync)
-*(.text.end_bio_bh_io_sync)
-*(.text.dummy_socket_shutdown)
-*(.text.d_rehash)
-*(.text.d_path)
-*(.text.do_ioctl)
-*(.text.dget_locked)
-*(.text.copy_thread_group_keys)
-*(.text.cdrom_end_request)
-*(.text.cap_bprm_apply_creds)
-*(.text.blk_rq_bio_prep)
-*(.text.__bitmap_intersects)
-*(.text.bio_phys_segments)
-*(.text.bio_free)
-*(.text.arch_get_unmapped_area_topdown)
-*(.text.writeback_in_progress)
-*(.text.vfs_follow_link)
-*(.text.tcp_rcv_state_process)
-*(.text.tcp_check_space)
-*(.text.sys_stat)
-*(.text.sys_rt_sigreturn)
-*(.text.sys_rt_sigaction)
-*(.text.sys_remap_file_pages)
-*(.text.sys_pwrite64)
-*(.text.sys_fchownat)
-*(.text.sys_fchmodat)
-*(.text.strncat)
-*(.text.strlcat)
-*(.text.strcmp)
-*(.text.steal_locks)
-*(.text.sock_create)
-*(.text.sk_stream_rfree)
-*(.text.sk_stream_mem_schedule)
-*(.text.skip_atoi)
-*(.text.sk_alloc)
-*(.text.show_stat)
-*(.text.set_fs_pwd)
-*(.text.set_binfmt)
-*(.text.pty_unthrottle)
-*(.text.proc_symlink)
-*(.text.pipe_release)
-*(.text.pageout)
-*(.text.n_tty_write_wakeup)
-*(.text.n_tty_ioctl)
-*(.text.nr_free_zone_pages)
-*(.text.migration_thread)
-*(.text.mempool_free_slab)
-*(.text.meminfo_read_proc)
-*(.text.max_sane_readahead)
-*(.text.lru_cache_add)
-*(.text.kill_fasync)
-*(.text.kernel_read)
-*(.text.invalidate_mapping_pages)
-*(.text.inode_has_buffers)
-*(.text.init_once)
-*(.text.inet_sendmsg)
-*(.text.idedisk_issue_flush)
-*(.text.generic_file_write)
-*(.text.free_more_memory)
-*(.text.__free_fdtable)
-*(.text.filp_dtor)
-*(.text.exit_sem)
-*(.text.exit_itimers)
-*(.text.error_interrupt)
-*(.text.end_buffer_async_write)
-*(.text.eligible_child)
-*(.text.elf_map)
-*(.text.dump_task_regs)
-*(.text.dummy_task_setscheduler)
-*(.text.dummy_socket_accept)
-*(.text.dummy_file_free_security)
-*(.text.__down_read)
-*(.text.do_sock_read)
-*(.text.do_sigaltstack)
-*(.text.do_mremap)
-*(.text.current_io_context)
-*(.text.cpu_swap_callback)
-*(.text.copy_vma)
-*(.text.cap_bprm_set_security)
-*(.text.blk_insert_request)
-*(.text.bio_map_kern_endio)
-*(.text.bio_hw_segments)
-*(.text.bictcp_cong_avoid)
-*(.text.add_interrupt_randomness)
-*(.text.wait_for_completion)
-*(.text.version_read_proc)
-*(.text.unix_write_space)
-*(.text.tty_ldisc_ref_wait)
-*(.text.tty_ldisc_put)
-*(.text.try_to_wake_up)
-*(.text.tcp_v4_tw_remember_stamp)
-*(.text.tcp_try_undo_dsack)
-*(.text.tcp_may_send_now)
-*(.text.sys_waitid)
-*(.text.sys_sched_getparam)
-*(.text.sys_getppid)
-*(.text.sys_getcwd)
-*(.text.sys_dup2)
-*(.text.sys_chmod)
-*(.text.sys_chdir)
-*(.text.sprintf)
-*(.text.sock_wfree)
-*(.text.sock_aio_write)
-*(.text.skb_drop_fraglist)
-*(.text.skb_dequeue)
-*(.text.set_close_on_exec)
-*(.text.set_brk)
-*(.text.seq_puts)
-*(.text.SELECT_DRIVE)
-*(.text.sched_exec)
-*(.text.return_EIO)
-*(.text.remove_from_page_cache)
-*(.text.rcu_start_batch)
-*(.text.__put_task_struct)
-*(.text.proc_pid_readdir)
-*(.text.proc_get_inode)
-*(.text.prepare_to_wait_exclusive)
-*(.text.pipe_wait)
-*(.text.pipe_new)
-*(.text.pdflush_operation)
-*(.text.__pagevec_release)
-*(.text.pagevec_lookup_tag)
-*(.text.packet_rcv)
-*(.text.n_tty_set_room)
-*(.text.nr_free_pages)
-*(.text.__net_timestamp)
-*(.text.mpage_end_io_read)
-*(.text.mod_timer)
-*(.text.__memcpy)
-*(.text.mb_cache_shrink_fn)
-*(.text.lock_rename)
-*(.text.kstrdup)
-*(.text.is_ignored)
-*(.text.int_very_careful)
-*(.text.inotify_inode_is_dead)
-*(.text.inotify_get_cookie)
-*(.text.inode_get_bytes)
-*(.text.init_timer)
-*(.text.init_dev)
-*(.text.inet_getname)
-*(.text.ide_map_sg)
-*(.text.__ide_dma_end)
-*(.text.hrtimer_get_remaining)
-*(.text.get_task_mm)
-*(.text.get_random_int)
-*(.text.free_pipe_info)
-*(.text.filemap_write_and_wait_range)
-*(.text.exit_thread)
-*(.text.enter_idle)
-*(.text.end_that_request_first)
-*(.text.end_8259A_irq)
-*(.text.dummy_file_alloc_security)
-*(.text.do_group_exit)
-*(.text.debug_mutex_init)
-*(.text.cpuset_exit)
-*(.text.cpu_idle)
-*(.text.copy_semundo)
-*(.text.copy_files)
-*(.text.chrdev_open)
-*(.text.cdrom_transfer_packet_command)
-*(.text.cdrom_mode_sense)
-*(.text.blk_phys_contig_segment)
-*(.text.blk_get_queue)
-*(.text.bio_split)
-*(.text.audit_alloc)
-*(.text.anon_pipe_buf_release)
-*(.text.add_wait_queue_exclusive)
-*(.text.add_wait_queue)
-*(.text.acct_process)
-*(.text.account)
-*(.text.zeromap_page_range)
-*(.text.yield)
-*(.text.writeback_acquire)
-*(.text.worker_thread)
-*(.text.wait_on_page_writeback_range)
-*(.text.__wait_on_buffer)
-*(.text.vscnprintf)
-*(.text.vmalloc_to_pfn)
-*(.text.vgacon_save_screen)
-*(.text.vfs_unlink)
-*(.text.vfs_rmdir)
-*(.text.unregister_md_personality)
-*(.text.unlock_new_inode)
-*(.text.unix_stream_sendmsg)
-*(.text.unix_stream_recvmsg)
-*(.text.unhash_process)
-*(.text.udp_v4_lookup_longway)
-*(.text.tty_ldisc_flush)
-*(.text.tty_ldisc_enable)
-*(.text.tty_hung_up_p)
-*(.text.tty_buffer_free_all)
-*(.text.tso_fragment)
-*(.text.try_to_del_timer_sync)
-*(.text.tcp_v4_err)
-*(.text.tcp_unhash)
-*(.text.tcp_seq_next)
-*(.text.tcp_select_initial_window)
-*(.text.tcp_sacktag_write_queue)
-*(.text.tcp_cwnd_validate)
-*(.text.sys_vhangup)
-*(.text.sys_uselib)
-*(.text.sys_symlink)
-*(.text.sys_signal)
-*(.text.sys_poll)
-*(.text.sys_mount)
-*(.text.sys_kill)
-*(.text.sys_ioctl)
-*(.text.sys_inotify_add_watch)
-*(.text.sys_getuid)
-*(.text.sys_getrlimit)
-*(.text.sys_getitimer)
-*(.text.sys_getgroups)
-*(.text.sys_ftruncate)
-*(.text.sysfs_lookup)
-*(.text.sys_exit_group)
-*(.text.stub_fork)
-*(.text.sscanf)
-*(.text.sock_map_fd)
-*(.text.sock_get_timestamp)
-*(.text.__sock_create)
-*(.text.smp_call_function_single)
-*(.text.sk_stop_timer)
-*(.text.skb_copy_and_csum_datagram)
-*(.text.__skb_checksum_complete)
-*(.text.single_next)
-*(.text.sigqueue_alloc)
-*(.text.shrink_dcache_parent)
-*(.text.select_idle_routine)
-*(.text.run_workqueue)
-*(.text.run_local_timers)
-*(.text.remove_inode_hash)
-*(.text.remove_dquot_ref)
-*(.text.register_binfmt)
-*(.text.read_cache_pages)
-*(.text.rb_last)
-*(.text.pty_open)
-*(.text.proc_root_readdir)
-*(.text.proc_pid_flush)
-*(.text.proc_pident_lookup)
-*(.text.proc_fill_super)
-*(.text.proc_exe_link)
-*(.text.posix_locks_deadlock)
-*(.text.pipe_iov_copy_from_user)
-*(.text.opost)
-*(.text.nf_register_hook)
-*(.text.netif_rx_ni)
-*(.text.m_start)
-*(.text.mpage_writepage)
-*(.text.mm_alloc)
-*(.text.memory_open)
-*(.text.mark_buffer_async_write)
-*(.text.lru_add_drain_all)
-*(.text.locks_init_lock)
-*(.text.locks_delete_lock)
-*(.text.lock_hrtimer_base)
-*(.text.load_script)
-*(.text.__kill_fasync)
-*(.text.ip_mc_sf_allow)
-*(.text.__ioremap)
-*(.text.int_with_check)
-*(.text.int_sqrt)
-*(.text.install_thread_keyring)
-*(.text.init_page_buffers)
-*(.text.inet_sock_destruct)
-*(.text.idle_notifier_register)
-*(.text.ide_execute_command)
-*(.text.ide_end_drive_cmd)
-*(.text.__ide_dma_host_on)
-*(.text.hrtimer_run_queues)
-*(.text.hpet_mask_rtc_irq_bit)
-*(.text.__get_zone_counts)
-*(.text.get_zone_counts)
-*(.text.get_write_access)
-*(.text.get_fs_struct)
-*(.text.get_dirty_limits)
-*(.text.generic_readlink)
-*(.text.free_hot_page)
-*(.text.finish_wait)
-*(.text.find_inode)
-*(.text.find_first_bit)
-*(.text.__filemap_fdatawrite_range)
-*(.text.__filemap_copy_from_user_iovec)
-*(.text.exit_aio)
-*(.text.elv_set_request)
-*(.text.elv_former_request)
-*(.text.dup_namespace)
-*(.text.dupfd)
-*(.text.dummy_socket_getsockopt)
-*(.text.dummy_sb_post_mountroot)
-*(.text.dummy_quotactl)
-*(.text.dummy_inode_rename)
-*(.text.__do_SAK)
-*(.text.do_pipe)
-*(.text.do_fsync)
-*(.text.d_instantiate_unique)
-*(.text.d_find_alias)
-*(.text.deny_write_access)
-*(.text.dentry_unhash)
-*(.text.d_delete)
-*(.text.datagram_poll)
-*(.text.cpuset_fork)
-*(.text.cpuid_read)
-*(.text.copy_namespace)
-*(.text.cond_resched)
-*(.text.check_version)
-*(.text.__change_page_attr)
-*(.text.cfq_slab_kill)
-*(.text.cfq_completed_request)
-*(.text.cdrom_pc_intr)
-*(.text.cdrom_decode_status)
-*(.text.cap_capset_check)
-*(.text.blk_put_request)
-*(.text.bio_fs_destructor)
-*(.text.bictcp_min_cwnd)
-*(.text.alloc_chrdev_region)
-*(.text.add_element)
-*(.text.acct_update_integrals)
-*(.text.write_boundary_block)
-*(.text.writeback_release)
-*(.text.writeback_inodes)
-*(.text.wake_up_state)
-*(.text.__wake_up_locked)
-*(.text.wake_futex)
-*(.text.wait_task_inactive)
-*(.text.__wait_on_freeing_inode)
-*(.text.wait_noreap_copyout)
-*(.text.vmstat_start)
-*(.text.vgacon_do_font_op)
-*(.text.vfs_readv)
-*(.text.vfs_quota_sync)
-*(.text.update_queue)
-*(.text.unshare_files)
-*(.text.unmap_vm_area)
-*(.text.unix_socketpair)
-*(.text.unix_release_sock)
-*(.text.unix_detach_fds)
-*(.text.unix_create1)
-*(.text.unix_bind)
-*(.text.udp_sendmsg)
-*(.text.udp_rcv)
-*(.text.udp_queue_rcv_skb)
-*(.text.uart_write)
-*(.text.uart_startup)
-*(.text.uart_open)
-*(.text.tty_vhangup)
-*(.text.tty_termios_baud_rate)
-*(.text.tty_release)
-*(.text.tty_ldisc_ref)
-*(.text.throttle_vm_writeout)
-*(.text.058)
-*(.text.tcp_xmit_probe_skb)
-*(.text.tcp_v4_send_check)
-*(.text.tcp_v4_destroy_sock)
-*(.text.tcp_sync_mss)
-*(.text.tcp_snd_test)
-*(.text.tcp_slow_start)
-*(.text.tcp_send_fin)
-*(.text.tcp_rtt_estimator)
-*(.text.tcp_parse_options)
-*(.text.tcp_ioctl)
-*(.text.tcp_init_tso_segs)
-*(.text.tcp_init_cwnd)
-*(.text.tcp_getsockopt)
-*(.text.tcp_fin)
-*(.text.tcp_connect)
-*(.text.tcp_cong_avoid)
-*(.text.__tcp_checksum_complete_user)
-*(.text.task_dumpable)
-*(.text.sys_wait4)
-*(.text.sys_utimes)
-*(.text.sys_symlinkat)
-*(.text.sys_socketpair)
-*(.text.sys_rmdir)
-*(.text.sys_readahead)
-*(.text.sys_nanosleep)
-*(.text.sys_linkat)
-*(.text.sys_fstat)
-*(.text.sysfs_readdir)
-*(.text.sys_execve)
-*(.text.sysenter_tracesys)
-*(.text.sys_chown)
-*(.text.stub_clone)
-*(.text.strrchr)
-*(.text.strncpy)
-*(.text.stopmachine_set_state)
-*(.text.sock_sendmsg)
-*(.text.sock_release)
-*(.text.sock_fasync)
-*(.text.sock_close)
-*(.text.sk_stream_write_space)
-*(.text.sk_reset_timer)
-*(.text.skb_split)
-*(.text.skb_recv_datagram)
-*(.text.skb_queue_tail)
-*(.text.sk_attach_filter)
-*(.text.si_swapinfo)
-*(.text.simple_strtoll)
-*(.text.set_termios)
-*(.text.set_task_comm)
-*(.text.set_shrinker)
-*(.text.set_normalized_timespec)
-*(.text.set_brk)
-*(.text.serial_in)
-*(.text.seq_printf)
-*(.text.secure_dccp_sequence_number)
-*(.text.rwlock_bug)
-*(.text.rt_hash_code)
-*(.text.__rta_fill)
-*(.text.__request_resource)
-*(.text.relocate_new_kernel)
-*(.text.release_thread)
-*(.text.release_mem)
-*(.text.rb_prev)
-*(.text.rb_first)
-*(.text.random_poll)
-*(.text.__put_super_and_need_restart)
-*(.text.pty_write)
-*(.text.ptrace_stop)
-*(.text.proc_self_readlink)
-*(.text.proc_root_lookup)
-*(.text.proc_root_link)
-*(.text.proc_pid_make_inode)
-*(.text.proc_pid_attr_write)
-*(.text.proc_lookupfd)
-*(.text.proc_delete_inode)
-*(.text.posix_same_owner)
-*(.text.posix_block_lock)
-*(.text.poll_initwait)
-*(.text.pipe_write)
-*(.text.pipe_read_fasync)
-*(.text.pipe_ioctl)
-*(.text.pdflush)
-*(.text.pci_user_read_config_dword)
-*(.text.page_readlink)
-*(.text.null_lseek)
-*(.text.nf_hook_slow)
-*(.text.netlink_sock_destruct)
-*(.text.netlink_broadcast)
-*(.text.neigh_resolve_output)
-*(.text.name_to_int)
-*(.text.mwait_idle)
-*(.text.mutex_trylock)
-*(.text.mutex_debug_check_no_locks_held)
-*(.text.m_stop)
-*(.text.mpage_end_io_write)
-*(.text.mpage_alloc)
-*(.text.move_page_tables)
-*(.text.mounts_open)
-*(.text.__memset)
-*(.text.memcpy_fromiovec)
-*(.text.make_8259A_irq)
-*(.text.lookup_user_key_possessed)
-*(.text.lookup_create)
-*(.text.locks_insert_lock)
-*(.text.locks_alloc_lock)
-*(.text.kthread_should_stop)
-*(.text.kswapd)
-*(.text.kobject_uevent)
-*(.text.kobject_get_path)
-*(.text.kobject_get)
-*(.text.klist_children_put)
-*(.text.__ip_route_output_key)
-*(.text.ip_flush_pending_frames)
-*(.text.ip_compute_csum)
-*(.text.ip_append_data)
-*(.text.ioc_set_batching)
-*(.text.invalidate_inode_pages)
-*(.text.__invalidate_device)
-*(.text.install_arg_page)
-*(.text.in_sched_functions)
-*(.text.inotify_unmount_inodes)
-*(.text.init_once)
-*(.text.init_cdrom_command)
-*(.text.inet_stream_connect)
-*(.text.inet_sk_rebuild_header)
-*(.text.inet_csk_addr2sockaddr)
-*(.text.inet_create)
-*(.text.ifind)
-*(.text.ide_setup_dma)
-*(.text.ide_outsw)
-*(.text.ide_fixstring)
-*(.text.ide_dma_setup)
-*(.text.ide_cdrom_packet)
-*(.text.ide_cd_put)
-*(.text.ide_build_sglist)
-*(.text.i8259A_shutdown)
-*(.text.hung_up_tty_ioctl)
-*(.text.hrtimer_nanosleep)
-*(.text.hrtimer_init)
-*(.text.hrtimer_cancel)
-*(.text.hash_futex)
-*(.text.group_send_sig_info)
-*(.text.grab_cache_page_nowait)
-*(.text.get_wchan)
-*(.text.get_stack)
-*(.text.get_page_state)
-*(.text.getnstimeofday)
-*(.text.get_node)
-*(.text.get_kprobe)
-*(.text.generic_unplug_device)
-*(.text.free_task)
-*(.text.frag_show)
-*(.text.find_next_zero_string)
-*(.text.filp_open)
-*(.text.fillonedir)
-*(.text.exit_io_context)
-*(.text.exit_idle)
-*(.text.exact_lock)
-*(.text.eth_header)
-*(.text.dummy_unregister_security)
-*(.text.dummy_socket_post_create)
-*(.text.dummy_socket_listen)
-*(.text.dummy_quota_on)
-*(.text.dummy_inode_follow_link)
-*(.text.dummy_file_receive)
-*(.text.dummy_file_mprotect)
-*(.text.dummy_file_lock)
-*(.text.dummy_file_ioctl)
-*(.text.dummy_bprm_post_apply_creds)
-*(.text.do_writepages)
-*(.text.__down_interruptible)
-*(.text.do_notify_resume)
-*(.text.do_acct_process)
-*(.text.del_timer_sync)
-*(.text.default_rebuild_header)
-*(.text.d_callback)
-*(.text.dcache_readdir)
-*(.text.ctrl_dumpfamily)
-*(.text.cpuset_rmdir)
-*(.text.copy_strings_kernel)
-*(.text.con_write_room)
-*(.text.complete_all)
-*(.text.collect_sigign_sigcatch)
-*(.text.clear_user)
-*(.text.check_unthrottle)
-*(.text.cdrom_release)
-*(.text.cdrom_newpc_intr)
-*(.text.cdrom_ioctl)
-*(.text.cdrom_check_status)
-*(.text.cdev_put)
-*(.text.cdev_add)
-*(.text.cap_ptrace)
-*(.text.cap_bprm_secureexec)
-*(.text.cache_alloc_refill)
-*(.text.bmap)
-*(.text.blk_run_queue)
-*(.text.blk_queue_dma_alignment)
-*(.text.blk_ordered_req_seq)
-*(.text.blk_backing_dev_unplug)
-*(.text.__bitmap_subset)
-*(.text.__bitmap_and)
-*(.text.bio_unmap_user)
-*(.text.__bforget)
-*(.text.bd_forget)
-*(.text.bad_pipe_w)
-*(.text.bad_get_user)
-*(.text.audit_free)
-*(.text.anon_vma_ctor)
-*(.text.anon_pipe_buf_map)
-*(.text.alloc_sock_iocb)
-*(.text.alloc_fdset)
-*(.text.aio_kick_handler)
-*(.text.__add_entropy_words)
-*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 0b3603adf56d..47496a40e84f 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -11,120 +11,54 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <asm/smp.h>
#include <asm/ipi.h>
+#include <asm/genapic.h>
-#if defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
+ = { [0 ... NR_CPUS-1] = BAD_APICID };
EXPORT_SYMBOL(x86_cpu_to_apicid);
-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-extern struct genapic apic_cluster;
-extern struct genapic apic_flat;
-extern struct genapic apic_physflat;
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-struct genapic *genapic = &apic_flat;
-struct genapic *genapic_force;
+struct genapic __read_mostly *genapic = &apic_flat;
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
-void __init clustered_apic_check(void)
+void __init setup_apic_routing(void)
{
- long i;
- u8 clusters, max_cluster;
- u8 id;
- u8 cluster_cnt[NUM_APIC_CLUSTERS];
- int max_apic = 0;
-
- /* genapic selection can be forced because of certain quirks.
- */
- if (genapic_force) {
- genapic = genapic_force;
- goto print;
- }
-
-#if defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI
/*
- * Some x86_64 machines use physical APIC mode regardless of how many
- * procs/clusters are present (x86_64 ES7000 is an example).
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
*/
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
- genapic = &apic_cluster;
- goto print;
- }
-#endif
-
- memset(cluster_cnt, 0, sizeof(cluster_cnt));
- for (i = 0; i < NR_CPUS; i++) {
- id = bios_cpu_apicid[i];
- if (id == BAD_APICID)
- continue;
- if (id > max_apic)
- max_apic = id;
- cluster_cnt[APIC_CLUSTERID(id)]++;
- }
-
- /* Don't use clustered mode on AMD platforms. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
genapic = &apic_physflat;
-#ifndef CONFIG_HOTPLUG_CPU
- /* In the CPU hotplug case we cannot use broadcast mode
- because that opens a race when a CPU is removed.
- Stay at physflat mode in this case.
- It is bad to do this unconditionally though. Once
- we have ACPI platform support for CPU hotplug
- we should detect hotplug capablity from ACPI tables and
- only do this when really needed. -AK */
- if (max_apic <= 8)
- genapic = &apic_flat;
+ else
#endif
- goto print;
- }
- clusters = 0;
- max_cluster = 0;
-
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (cluster_cnt[i] > 0) {
- ++clusters;
- if (cluster_cnt[i] > max_cluster)
- max_cluster = cluster_cnt[i];
- }
- }
-
- /*
- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
- * else physical mode.
- * (We don't use lowest priority delivery + HW APIC IRQ steering, so
- * can ignore the clustered logical case and go straight to physical.)
- */
- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
-#ifdef CONFIG_HOTPLUG_CPU
- /* Don't use APIC shortcuts in CPU hotplug to avoid races */
- genapic = &apic_physflat;
-#else
+ if (cpus_weight(cpu_possible_map) <= 8)
genapic = &apic_flat;
-#endif
- } else
- genapic = &apic_cluster;
+ else
+ genapic = &apic_physflat;
-print:
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
-/* Same for both flat and clustered. */
+/* Same for both flat and physical. */
void send_IPI_self(int vector)
{
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
deleted file mode 100644
index 73d76308b955..000000000000
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright 2004 James Cleverdon, IBM.
- * Subject to the GNU Public License, v.2
- *
- * Clustered APIC subarch code. Up to 255 CPUs, physical delivery.
- * (A more realistic maximum is around 230 CPUs.)
- *
- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
- * James Cleverdon.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <asm/smp.h>
-#include <asm/ipi.h>
-
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void cluster_init_apic_ldr(void)
-{
- unsigned long val, id;
- long i, count;
- u8 lid;
- u8 my_id = hard_smp_processor_id();
- u8 my_cluster = APIC_CLUSTER(my_id);
-
- /* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = NR_CPUS; --i >= 0; ) {
- lid = x86_cpu_to_log_apicid[i];
- if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
- ++count;
- }
- /*
- * We only have a 4 wide bitmap in cluster mode. There's no way
- * to get above 60 CPUs and still give each one it's own bit.
- * But, we're using physical IRQ delivery, so we don't care.
- * Use bit 3 for the 4th through Nth CPU in each cluster.
- */
- if (count >= XAPIC_DEST_CPUS_SHIFT)
- count = 3;
- id = my_cluster | (1UL << count);
- x86_cpu_to_log_apicid[smp_processor_id()] = id;
- apic_write(APIC_DFR, APIC_DFR_CLUSTER);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-
-static cpumask_t cluster_target_cpus(void)
-{
- return cpumask_of_cpu(0);
-}
-
-static cpumask_t cluster_vector_allocation_domain(int cpu)
-{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
-}
-
-static void cluster_send_IPI_mask(cpumask_t mask, int vector)
-{
- send_IPI_mask_sequence(mask, vector);
-}
-
-static void cluster_send_IPI_allbutself(int vector)
-{
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- cluster_send_IPI_mask(mask, vector);
-}
-
-static void cluster_send_IPI_all(int vector)
-{
- cluster_send_IPI_mask(cpu_online_map, vector);
-}
-
-static int cluster_apic_id_registered(void)
-{
- return 1;
-}
-
-static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
- return x86_cpu_to_apicid[cpu];
- else
- return BAD_APICID;
-}
-
-/* cpuid returns the value latched in the HW at reset, not the APIC ID
- * register's value. For any box whose BIOS changes APIC IDs, like
- * clustered APIC systems, we must use hard_smp_processor_id.
- *
- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
- */
-static unsigned int phys_pkg_id(int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-struct genapic apic_cluster = {
- .name = "clustered",
- .int_delivery_mode = dest_Fixed,
- .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .target_cpus = cluster_target_cpus,
- .vector_allocation_domain = cluster_vector_allocation_domain,
- .apic_id_registered = cluster_apic_id_registered,
- .init_apic_ldr = cluster_init_apic_ldr,
- .send_IPI_all = cluster_send_IPI_all,
- .send_IPI_allbutself = cluster_send_IPI_allbutself,
- .send_IPI_mask = cluster_send_IPI_mask,
- .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid,
- .phys_pkg_id = phys_pkg_id,
-};
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 7c01db8fa9d1..ecb01eefdd27 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -8,6 +8,7 @@
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
*/
+#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
@@ -16,6 +17,7 @@
#include <linux/init.h>
#include <asm/smp.h>
#include <asm/ipi.h>
+#include <asm/genapic.h>
static cpumask_t flat_target_cpus(void)
{
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void)
static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
{
unsigned long mask = cpus_addr(cpumask)[0];
- unsigned long cfg;
unsigned long flags;
local_irq_save(flags);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- apic_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
+ __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 598a4d0351fc..1fab487dee86 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
* Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+ * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
*/
@@ -13,97 +14,131 @@
#include <linux/init.h>
#include <asm/desc.h>
#include <asm/segment.h>
+#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-
+
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages on setup so define __START_KERNEL to
- * 0x100000 for this stage
- *
+ * because we need identity-mapped pages.
+ *
*/
.text
.section .bootstrap.text
- .code32
- .globl startup_32
-/* %bx: 1 if coming from smp trampoline on secondary cpu */
-startup_32:
-
+ .code64
+ .globl startup_64
+startup_64:
+
/*
- * At this point the CPU runs in 32bit protected mode (CS.D = 1) with
- * paging disabled and the point of this file is to switch to 64bit
- * long mode with a kernel mapping for kerneland to jump into the
- * kernel virtual addresses.
- * There is no stack until we set one up.
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded an identity mapped page table
+ * for us. These identity mapped page tables map all of the
+ * kernel pages and possibly all of memory.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either directly from a 64bit bootloader, or from
+ * arch/x86_64/boot/compressed/head.S.
+ *
+ * We only come here initially at boot nothing else comes here.
+ *
+ * Since we may be loaded at an address different from what we were
+ * compiled to run at we first fixup the physical addresses in our page
+ * tables and then reload them.
*/
- /* Initialize the %ds segment register */
- movl $__KERNEL_DS,%eax
- movl %eax,%ds
-
- /* Load new GDT with the 64bit segments using 32bit descriptor */
- lgdt pGDT32 - __START_KERNEL_map
-
- /* If the CPU doesn't support CPUID this will double fault.
- * Unfortunately it is hard to check for CPUID without a stack.
+ /* Compute the delta between the address I am compiled to run at and the
+ * address I am actually running at.
*/
-
- /* Check if extended functions are implemented */
- movl $0x80000000, %eax
- cpuid
- cmpl $0x80000000, %eax
- jbe no_long_mode
- /* Check if long mode is implemented */
- mov $0x80000001, %eax
- cpuid
- btl $29, %edx
- jnc no_long_mode
-
- /*
- * Prepare for entering 64bits mode
+ leaq _text(%rip), %rbp
+ subq $_text - __START_KERNEL_map, %rbp
+
+ /* Is the address not 2M aligned? */
+ movq %rbp, %rax
+ andl $~LARGE_PAGE_MASK, %eax
+ testl %eax, %eax
+ jnz bad_address
+
+ /* Is the address too large? */
+ leaq _text(%rip), %rdx
+ movq $PGDIR_SIZE, %rax
+ cmpq %rax, %rdx
+ jae bad_address
+
+ /* Fixup the physical addresses in the page table
*/
+ addq %rbp, init_level4_pgt + 0(%rip)
+ addq %rbp, init_level4_pgt + (258*8)(%rip)
+ addq %rbp, init_level4_pgt + (511*8)(%rip)
+
+ addq %rbp, level3_ident_pgt + 0(%rip)
+ addq %rbp, level3_kernel_pgt + (510*8)(%rip)
+
+ /* Add an Identity mapping if I am above 1G */
+ leaq _text(%rip), %rdi
+ andq $LARGE_PAGE_MASK, %rdi
+
+ movq %rdi, %rax
+ shrq $PUD_SHIFT, %rax
+ andq $(PTRS_PER_PUD - 1), %rax
+ jz ident_complete
+
+ leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
+ leaq level3_ident_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax, 8)
+
+ movq %rdi, %rax
+ shrq $PMD_SHIFT, %rax
+ andq $(PTRS_PER_PMD - 1), %rax
+ leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
+ leaq level2_spare_pgt(%rip), %rbx
+ movq %rdx, 0(%rbx, %rax, 8)
+ident_complete:
+
+ /* Fixup the kernel text+data virtual addresses
+ */
+ leaq level2_kernel_pgt(%rip), %rdi
+ leaq 4096(%rdi), %r8
+ /* See if it is a valid page table entry */
+1: testq $1, 0(%rdi)
+ jz 2f
+ addq %rbp, 0(%rdi)
+ /* Go to the next page */
+2: addq $8, %rdi
+ cmp %r8, %rdi
+ jne 1b
+
+ /* Fixup phys_base */
+ addq %rbp, phys_base(%rip)
- /* Enable PAE mode */
- xorl %eax, %eax
- btsl $5, %eax
- movl %eax, %cr4
-
- /* Setup early boot stage 4 level pagetables */
- movl $(boot_level4_pgt - __START_KERNEL_map), %eax
- movl %eax, %cr3
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
-
- /* Enable Long Mode */
- btsl $_EFER_LME, %eax
-
- /* Make changes effective */
- wrmsr
+#ifdef CONFIG_SMP
+ addq %rbp, trampoline_level4_pgt + 0(%rip)
+ addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+ addq %rbp, wakeup_level4_pgt + 0(%rip)
+ addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
+#endif
- xorl %eax, %eax
- btsl $31, %eax /* Enable paging and in turn activate Long Mode */
- btsl $0, %eax /* Enable protected mode */
- /* Make changes effective */
- movl %eax, %cr0
- /*
- * At this point we're in long mode but in 32bit compatibility mode
- * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
- * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
- * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+ /* Due to ENTRY(), sometimes the empty space gets filled with
+ * zeros. Better take a jmp than relying on empty space being
+ * filled with 0x90 (nop)
*/
- ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map)
-
- .code64
- .org 0x100
- .globl startup_64
-startup_64:
- /* We come here either from startup_32
- * or directly from a 64bit bootloader.
- * Since we may have come directly from a bootloader we
- * reload the page tables here.
+ jmp secondary_startup_64
+ENTRY(secondary_startup_64)
+ /*
+ * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
+ * and someone has loaded a mapped page table.
+ *
+ * %esi holds a physical pointer to real_mode_data.
+ *
+ * We come here either from startup_64 (using physical addresses)
+ * or from trampoline.S (using virtual addresses).
+ *
+ * Using virtual addresses from trampoline.S removes the need
+ * to have any identity mapped pages in the kernel page table
+ * after the boot processor executes this code.
*/
/* Enable PAE mode and PGE */
@@ -113,9 +148,15 @@ startup_64:
movq %rax, %cr4
/* Setup early boot stage 4 level pagetables. */
- movq $(boot_level4_pgt - __START_KERNEL_map), %rax
+ movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ addq phys_base(%rip), %rax
movq %rax, %cr3
+ /* Ensure I am executing from virtual addresses */
+ movq $1f, %rax
+ jmp *%rax
+1:
+
/* Check if nx is implemented */
movl $0x80000001, %eax
cpuid
@@ -124,17 +165,11 @@ startup_64:
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
-
- /* Enable System Call */
- btsl $_EFER_SCE, %eax
-
- /* No Execute supported? */
- btl $20,%edi
+ btsl $_EFER_SCE, %eax /* Enable System Call */
+ btl $20,%edi /* No Execute supported? */
jnc 1f
btsl $_EFER_NX, %eax
-1:
- /* Make changes effective */
- wrmsr
+1: wrmsr /* Make changes effective */
/* Setup cr0 */
#define CR0_PM 1 /* protected mode */
@@ -161,7 +196,7 @@ startup_64:
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt cpu_gdt_descr
+ lgdt cpu_gdt_descr(%rip)
/* set up data segments. actually 0 would do too */
movl $__KERNEL_DS,%eax
@@ -212,6 +247,9 @@ initial_code:
init_rsp:
.quad init_thread_union+THREAD_SIZE-8
+bad_address:
+ jmp bad_address
+
ENTRY(early_idt_handler)
cmpl $2,early_recursion_flag(%rip)
jz 1f
@@ -240,110 +278,66 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
-.code32
-ENTRY(no_long_mode)
- /* This isn't an x86-64 CPU so hang */
-1:
- jmp 1b
-
-.org 0xf00
- .globl pGDT32
-pGDT32:
- .word gdt_end-cpu_gdt_table-1
- .long cpu_gdt_table-__START_KERNEL_map
-
-.org 0xf10
-ljumpvector:
- .long startup_64-__START_KERNEL_map
- .word __KERNEL_CS
+.balign PAGE_SIZE
-ENTRY(stext)
-ENTRY(_stext)
-
- $page = 0
#define NEXT_PAGE(name) \
- $page = $page + 1; \
- .org $page * 0x1000; \
- phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
+ .balign PAGE_SIZE; \
ENTRY(name)
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << 21) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+ /*
+ * This default setting generates an ident mapping at address 0x100000
+ * and a mapping for the kernel that precisely maps virtual address
+ * 0xffffffff80000000 to physical address 0x000000. (always using
+ * 2Mbyte large pages provided by PAE mode)
+ */
NEXT_PAGE(init_level4_pgt)
- /* This gets initialized in x86_64_start_kernel */
- .fill 512,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 257,8,0
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .fill 252,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level3_ident_pgt)
- .quad phys_level2_ident_pgt | 0x007
+ .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.fill 511,8,0
NEXT_PAGE(level3_kernel_pgt)
.fill 510,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad phys_level2_kernel_pgt | 0x007
+ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
.fill 1,8,0
NEXT_PAGE(level2_ident_pgt)
- /* 40MB for bootup. */
- i = 0
- .rept 20
- .quad i << 21 | 0x083
- i = i + 1
- .endr
- /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
- .globl temp_boot_pmds
-temp_boot_pmds:
- .fill 492,8,0
-
+ /* Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ */
+ PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+
NEXT_PAGE(level2_kernel_pgt)
/* 40MB kernel mapping. The kernel code cannot be bigger than that.
When you change this change KERNEL_TEXT_SIZE in page.h too. */
/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- i = 0
- .rept 20
- .quad i << 21 | 0x183
- i = i + 1
- .endr
+ PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_TEXT_SIZE/PMD_SIZE)
/* Module mapping starts here */
- .fill 492,8,0
+ .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
-NEXT_PAGE(level3_physmem_pgt)
- .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */
- .fill 511,8,0
+NEXT_PAGE(level2_spare_pgt)
+ .fill 512,8,0
+#undef PMDS
#undef NEXT_PAGE
.data
-
-#ifdef CONFIG_ACPI_SLEEP
- .align PAGE_SIZE
-ENTRY(wakeup_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-#endif
-
-#ifndef CONFIG_HOTPLUG_CPU
- __INITDATA
-#endif
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
- .align PAGE_SIZE
-ENTRY(boot_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-
- .data
-
.align 16
.globl cpu_gdt_descr
cpu_gdt_descr:
@@ -357,6 +351,10 @@ gdt:
.endr
#endif
+ENTRY(phys_base)
+ /* This must match the first entry in level2_kernel_pgt */
+ .quad 0x0000000000000000
+
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
@@ -370,13 +368,13 @@ gdt:
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9b000000ffff /* __KERNEL_CS */
+ .quad 0x00cf93000000ffff /* __KERNEL_DS */
+ .quad 0x00cffb000000ffff /* __USER32_CS */
+ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
+ .quad 0x00affb000000ffff /* __USER_CS */
.quad 0x0 /* unused */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x00cffa000000ffff /* __USER32_CS */
- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
- .quad 0x00affa000000ffff /* __USER_CS */
- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 5f197b0a330a..213d90e04755 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#include <asm/sections.h>
+static void __init zap_identity_mappings(void)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ pgd_clear(pgd);
+ __flush_tlb();
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -29,25 +37,24 @@ static void __init clear_bss(void)
}
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x90020
+#define OLD_CL_MAGIC_ADDR 0x20
#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_BASE_ADDR 0x90000
-#define OLD_CL_OFFSET 0x90022
+#define OLD_CL_OFFSET 0x22
static void __init copy_bootdata(char *real_mode_data)
{
- int new_data;
+ unsigned long new_data;
char * command_line;
memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
if (!new_data) {
- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
return;
}
- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
}
- command_line = (char *) ((u64)(new_data));
+ command_line = __va(new_data);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
+ /*
+ * Make sure kernel is aligned to 2MB address. Catching it at compile
+ * time is better. Change your config file and compile the kernel
+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+ */
+ BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1));
+
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
+ /* Make NULL pointers segfault */
+ zap_identity_mappings();
+
for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
asm volatile("lidt %0" :: "m" (idt_descr));
early_printk("Kernel alive\n");
- /*
- * switch to init_level4_pgt from boot_level4_pgt
- */
- memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
-
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
pda_init(0);
- copy_bootdata(real_mode_data);
+ copy_bootdata(__va(real_mode_data));
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b7d2b76b92d4..2a2df14dab7e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
enable_8259A_irq(0);
}
-void __init UNEXPECTED_IO_APIC(void)
-{
-}
-
void __apicdebuginit print_IO_APIC(void)
{
int apic, i;
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void)
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.bits.entries != 0x2E) &&
- (reg_01.bits.entries != 0x3F) &&
- (reg_01.bits.entries != 0x03)
- )
- UNEXPECTED_IO_APIC();
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
if (reg_01.bits.version >= 0x10) {
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
- UNEXPECTED_IO_APIC();
}
printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq)
vector = ~get_irq_regs()->orig_rax;
me = smp_processor_id();
- if ((vector == cfg->vector) &&
- cpu_isset(smp_processor_id(), cfg->domain)) {
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index 745b1f0f494e..387d347b0e07 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -16,6 +16,7 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
+#include <linux/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..a8bb33c1a8f2 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image)
page_list[PA_CONTROL_PAGE] = __pa(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = __pa(kexec_pgd);
+ page_list[PA_PGD] = __pa_symbol(&kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = __pa(kexec_pud0);
+ page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0);
page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = __pa(kexec_pmd0);
+ page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0);
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = __pa(kexec_pte0);
+ page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0);
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = __pa(kexec_pud1);
+ page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1);
page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = __pa(kexec_pmd1);
+ page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1);
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8011a8e1c7d4..fa2672682477 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
#endif /* CONFIG_X86_MCE_INTEL */
/*
- * Periodic polling timer for "silent" machine check errors.
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll 2x faster. When the poller finds no more
+ * errors, poll 2x slower (up to check_interval seconds).
*/
static int check_interval = 5 * 60; /* 5 minutes */
+static int next_interval; /* in jiffies */
static void mcheck_timer(struct work_struct *work);
static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info)
static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
- schedule_delayed_work(&mcheck_work, check_interval * HZ);
/*
* It's ok to read stale data here for notify_user and
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work)
* writes.
*/
if (notify_user && console_logged) {
+ static unsigned long last_print;
+ unsigned long now = jiffies;
+
+ /* if we logged an MCE, reduce the polling interval */
+ next_interval = max(next_interval/2, HZ/100);
notify_user = 0;
clear_bit(0, &console_logged);
- printk(KERN_INFO "Machine check events logged\n");
+ if (time_after_eq(now, last_print + (check_interval*HZ))) {
+ last_print = now;
+ printk(KERN_INFO "Machine check events logged\n");
+ }
+ } else {
+ next_interval = min(next_interval*2, check_interval*HZ);
}
+
+ schedule_delayed_work(&mcheck_work, next_interval);
}
static __init int periodic_mcheck_init(void)
{
- if (check_interval)
- schedule_delayed_work(&mcheck_work, check_interval*HZ);
+ next_interval = check_interval * HZ;
+ if (next_interval)
+ schedule_delayed_work(&mcheck_work, next_interval);
return 0;
}
__initcall(periodic_mcheck_init);
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
- if (check_interval)
+ if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
on_each_cpu(mce_init, NULL, 1, 1);
- if (check_interval)
- schedule_delayed_work(&mcheck_work, check_interval*HZ);
+ next_interval = check_interval * HZ;
+ if (next_interval)
+ schedule_delayed_work(&mcheck_work, next_interval);
}
static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 455aa0b932f0..d0dc4891599b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
}
}
- clustered_apic_check();
+ setup_apic_routing();
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index dfab9f167366..6cd2b30e2ffc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -27,28 +27,11 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/mce.h>
-#include <asm/intel_arch_perfmon.h>
int unknown_nmi_panic;
int nmi_watchdog_enabled;
int panic_on_unrecovered_nmi;
-/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
- * evtsel_nmi_owner tracks the ownership of the event selection
- * - different performance counters/ event selection may be reserved for
- * different subsystems this reservation system just tries to coordinate
- * things a little
- */
-
-/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
- * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
- */
-#define NMI_MAX_COUNTER_BITS 66
-#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
-
-static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
-
static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* nmi_active:
@@ -63,191 +46,11 @@ int panic_on_timeout;
unsigned int nmi_watchdog = NMI_DEFAULT;
static unsigned int nmi_hz = HZ;
-struct nmi_watchdog_ctlblk {
- int enabled;
- u64 check_bit;
- unsigned int cccr_msr;
- unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
- unsigned int evntsel_msr; /* the MSR to select the events to handle */
-};
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+static DEFINE_PER_CPU(short, wd_enabled);
/* local prototypes */
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the performance counter register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_PERFCTR0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_PERFCTR0);
- else
- return (msr - MSR_P4_BPU_PERFCTR0);
- }
- return 0;
-}
-
-/* converts an msr to an appropriate reservation bit */
-static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
-{
- /* returns the bit offset of the event selection register */
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return (msr - MSR_K7_EVNTSEL0);
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
- else
- return (msr - MSR_P4_BSU_ESCR0);
- }
- return 0;
-}
-
-/* checks for a bit availability (hack for oprofile) */
-int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
-{
- int cpu;
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 0;
- }
- return 1;
-}
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
- unsigned int counter;
- int cpu;
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- for_each_possible_cpu (cpu) {
- if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 0;
- }
- return 1;
-}
-
-static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
- return 1;
- return 0;
-}
-
-static void __release_perfctr_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
-}
-
-int reserve_perfctr_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_perfctr_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_perfctr_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_perfctr_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu)
- __release_perfctr_nmi(cpu, msr);
-}
-
-int __reserve_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
- return 1;
- return 0;
-}
-
-static void __release_evntsel_nmi(int cpu, unsigned int msr)
-{
- unsigned int counter;
- if (cpu < 0)
- cpu = smp_processor_id();
-
- counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
-}
-
-int reserve_evntsel_nmi(unsigned int msr)
-{
- int cpu, i;
- for_each_possible_cpu (cpu) {
- if (!__reserve_evntsel_nmi(cpu, msr)) {
- for_each_possible_cpu (i) {
- if (i >= cpu)
- break;
- __release_evntsel_nmi(i, msr);
- }
- return 0;
- }
- }
- return 1;
-}
-
-void release_evntsel_nmi(unsigned int msr)
-{
- int cpu;
- for_each_possible_cpu (cpu) {
- __release_evntsel_nmi(cpu, msr);
- }
-}
-
-static __cpuinit inline int nmi_known_cpu(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return 1;
- else
- return (boot_cpu_data.x86 == 15);
- }
- return 0;
-}
-
/* Run after command line and cpu_init init, but before all other checks */
void nmi_watchdog_default(void)
{
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data)
}
#endif
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
- unsigned int retval = hz;
-
- /*
- * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
- * are writable, with higher bits sign extending from bit 31.
- * So, we can only program the counter with 31 bit values and
- * 32nd bit should be 1, for 33.. to be 1.
- * Find the appropriate nmi_hz
- */
- if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
- retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
- }
- return retval;
-}
-
int __init check_nmi_watchdog (void)
{
int *counts;
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void)
mdelay((20*1000)/nmi_hz); // wait 20 ticks
for_each_online_cpu(cpu) {
- if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+ if (!per_cpu(wd_enabled, cpu))
continue;
if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
cpu,
counts[cpu],
cpu_pda(cpu)->__nmi_count);
- per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+ per_cpu(wd_enabled, cpu) = 0;
atomic_dec(&nmi_active);
}
}
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void)
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- nmi_hz = 1;
- if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- }
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ nmi_hz = lapic_adjust_nmi_hz(1);
kfree(counts);
return 0;
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
-static void disable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-static void enable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- /* are we already enabled */
- if (atomic_read(&nmi_active) != 0)
- return;
-
- /* are we lapic aware */
- if (nmi_known_cpu() <= 0)
- return;
-
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- touch_nmi_watchdog();
-}
-
-void disable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- disable_irq(0);
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_timer_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_IO_APIC);
-
- if (atomic_read(&nmi_active) == 0) {
- touch_nmi_watchdog();
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
- enable_irq(0);
- }
-}
static void __acpi_nmi_disable(void *__unused)
{
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs);
#endif /* CONFIG_PM */
-/*
- * Activate the NMI watchdog via the local APIC.
- * Original code written by Keith Owens.
- */
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = MSR_K7_PERFCTR0;
- evntsel_msr = MSR_K7_EVNTSEL0;
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- /* Simulator may not support it */
- if (checking_wrmsrl(evntsel_msr, 0UL))
- goto fail2;
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL<<63;
- return 1;
-fail2:
- __release_evntsel_nmi(-1, evntsel_msr);
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_k7_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-/* Note that these events don't tick when the CPU idles. This means
- the frequency varies with CPU load. */
-
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS (1<<3)
-#define P4_ESCR_USR (1<<2)
-#define P4_CCCR_OVF_PMI0 (1<<26)
-#define P4_CCCR_OVF_PMI1 (1<<27)
-#define P4_CCCR_THRESHOLD(N) ((N)<<20)
-#define P4_CCCR_COMPLEMENT (1<<19)
-#define P4_CCCR_COMPARE (1<<18)
-#define P4_CCCR_REQUIRED (3<<16)
-#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
-#define P4_CCCR_ENABLE (1<<12)
-#define P4_CCCR_OVF (1<<31)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- CRU_ESCR0 (with any non-null event selector) through a complemented
- max threshold. [IA32-Vol3, Section 14.9.9] */
-
-static int setup_p4_watchdog(void)
-{
- unsigned int perfctr_msr, evntsel_msr, cccr_msr;
- unsigned int evntsel, cccr_val;
- unsigned int misc_enable, dummy;
- unsigned int ht_num;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
-#ifdef CONFIG_SMP
- /* detect which hyperthread we are on */
- if (smp_num_siblings == 2) {
- unsigned int ebx, apicid;
-
- ebx = cpuid_ebx(1);
- apicid = (ebx >> 24) & 0xff;
- ht_num = apicid & 1;
- } else
-#endif
- ht_num = 0;
-
- /* performance counters are shared resources
- * assign each hyperthread its own set
- * (re-use the ESCR0 register, seems safe
- * and keeps the cccr_val the same)
- */
- if (!ht_num) {
- /* logical cpu 0 */
- perfctr_msr = MSR_P4_IQ_PERFCTR0;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR0;
- cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
- } else {
- /* logical cpu 1 */
- perfctr_msr = MSR_P4_IQ_PERFCTR1;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR1;
- cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
- }
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- evntsel = P4_ESCR_EVENT_SELECT(0x3F)
- | P4_ESCR_OS
- | P4_ESCR_USR;
-
- cccr_val |= P4_CCCR_THRESHOLD(15)
- | P4_CCCR_COMPLEMENT
- | P4_CCCR_COMPARE
- | P4_CCCR_REQUIRED;
-
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsr(cccr_msr, cccr_val, 0);
- wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = cccr_msr;
- wd->check_bit = 1ULL<<39;
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_p4_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->cccr_msr, 0, 0);
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static int setup_intel_arch_watchdog(void)
-{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- goto fail;
-
- perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
- evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
-
- if (!__reserve_perfctr_nmi(-1, perfctr_msr))
- goto fail;
-
- if (!__reserve_evntsel_nmi(-1, evntsel_msr))
- goto fail1;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
-
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; //unused
- wd->check_bit = 1ULL << (eax.split.bit_width - 1);
- return 1;
-fail1:
- __release_perfctr_nmi(-1, perfctr_msr);
-fail:
- return 0;
-}
-
-static void stop_intel_arch_watchdog(void)
-{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return;
-
- wrmsr(wd->evntsel_msr, 0, 0);
-
- __release_evntsel_nmi(-1, wd->evntsel_msr);
- __release_perfctr_nmi(-1, wd->perfctr_msr);
-}
-
void setup_apic_nmi_watchdog(void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /* only support LOCAL and IO APICs for now */
- if ((nmi_watchdog != NMI_LOCAL_APIC) &&
- (nmi_watchdog != NMI_IO_APIC))
- return;
-
- if (wd->enabled == 1)
+ if (__get_cpu_var(wd_enabled) == 1)
return;
/* cheap hack to support suspend/resume */
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused)
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
return;
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
- return;
- if (!setup_k7_watchdog())
- return;
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- if (!setup_intel_arch_watchdog())
- return;
- break;
- }
- if (!setup_p4_watchdog())
- return;
- break;
- default:
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ __get_cpu_var(wd_enabled) = 1;
+ if (lapic_watchdog_init(nmi_hz) < 0) {
+ __get_cpu_var(wd_enabled) = 0;
return;
}
+ /* FALL THROUGH */
+ case NMI_IO_APIC:
+ __get_cpu_var(wd_enabled) = 1;
+ atomic_inc(&nmi_active);
}
- wd->enabled = 1;
- atomic_inc(&nmi_active);
}
void stop_apic_nmi_watchdog(void *unused)
{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
-
- if (wd->enabled == 0)
+ if (__get_cpu_var(wd_enabled) == 0)
return;
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
- return;
- stop_k7_watchdog();
- break;
- case X86_VENDOR_INTEL:
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- stop_intel_arch_watchdog();
- break;
- }
- stop_p4_watchdog();
- break;
- default:
- return;
- }
- }
- wd->enabled = 0;
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ lapic_watchdog_stop();
+ __get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
int sum;
int touched = 0;
int cpu = smp_processor_id();
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- u64 dummy;
- int rc=0;
+ int rc = 0;
/* check for other users first */
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
}
/* see if the nmi watchdog went off */
- if (wd->enabled) {
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- rdmsrl(wd->perfctr_msr, dummy);
- if (dummy & wd->check_bit){
- /* this wasn't a watchdog timer interrupt */
- goto done;
- }
-
- /* only Intel uses the cccr msr */
- if (wd->cccr_msr != 0) {
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- rdmsrl(wd->cccr_msr, dummy);
- dummy &= ~P4_CCCR_OVF;
- wrmsrl(wd->cccr_msr, dummy);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* start the cycle over again */
- wrmsrl(wd->perfctr_msr,
- -((u64)cpu_khz * 1000 / nmi_hz));
- } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
- /*
- * ArchPerfom/Core Duo needs to re-unmask
- * the apic vector
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* ARCH_PERFMON has 32 bit counter writes */
- wrmsr(wd->perfctr_msr,
- (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
- } else {
- /* start the cycle over again */
- wrmsrl(wd->perfctr_msr,
- -((u64)cpu_khz * 1000 / nmi_hz));
- }
- rc = 1;
- } else if (nmi_watchdog == NMI_IO_APIC) {
- /* don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- } else
- printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
+ if (!__get_cpu_var(wd_enabled))
+ return rc;
+ switch (nmi_watchdog) {
+ case NMI_LOCAL_APIC:
+ rc |= lapic_wd_event(nmi_hz);
+ break;
+ case NMI_IO_APIC:
+ /* don't know how to accurately check for this.
+ * just assume it was a watchdog timer interrupt
+ * This matches the old behaviour.
+ */
+ rc = 1;
+ break;
}
-done:
return rc;
}
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void)
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-EXPORT_SYMBOL(reserve_perfctr_nmi);
-EXPORT_SYMBOL(release_perfctr_nmi);
-EXPORT_SYMBOL(reserve_evntsel_nmi);
-EXPORT_SYMBOL(release_evntsel_nmi);
-EXPORT_SYMBOL(disable_timer_nmi_watchdog);
-EXPORT_SYMBOL(enable_timer_nmi_watchdog);
EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 04480c3b68f5..5bd20b542c1e 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -507,7 +507,7 @@ error:
return ret;
}
-static struct dma_mapping_ops calgary_dma_ops = {
+static const struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 0bae862e9a55..0a762e10f2be 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amd64_init(void);
-static struct dma_mapping_ops gart_dma_ops = {
+static const struct dma_mapping_ops gart_dma_ops = {
.mapping_error = NULL,
.map_single = gart_map_single,
.map_simple = gart_map_simple,
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index df09ab05a1bd..6dade0c867cc 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
{
}
-struct dma_mapping_ops nommu_dma_ops = {
+const struct dma_mapping_ops nommu_dma_ops = {
.map_single = nommu_map_single,
.unmap_single = nommu_unmap_single,
.map_sg = nommu_map_sg,
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index eb18be5a6569..4b4569abc60c 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -12,7 +12,7 @@
int swiotlb __read_mostly;
EXPORT_SYMBOL(swiotlb);
-struct dma_mapping_ops swiotlb_dma_ops = {
+const struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,