aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/Makefile18
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/boot/cpu.c26
-rw-r--r--arch/x86/boot/mkcpustr.c49
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c40
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c5
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c83
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c107
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c35
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c74
-rw-r--r--arch/x86/kernel/cpuid.c52
-rw-r--r--arch/x86/kernel/efi.c57
-rw-r--r--arch/x86/kernel/efi_64.c22
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_64.S19
-rw-r--r--arch/x86/kernel/ldt.c3
-rw-r--r--arch/x86/kernel/msr.c14
-rw-r--r--arch/x86/kernel/pci-calgary_64.c34
-rw-r--r--arch/x86/kernel/pci-gart_64.c53
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/quirks.c69
-rw-r--r--arch/x86/kernel/setup_64.c76
-rw-r--r--arch/x86/kernel/smpboot_32.c2
-rw-r--r--arch/x86/kernel/srat_32.c2
-rw-r--r--arch/x86/kernel/suspend_64.c8
-rw-r--r--arch/x86/kernel/syscall_table_32.S4
-rw-r--r--arch/x86/kernel/test_nx.c14
-rw-r--r--arch/x86/kernel/trampoline_32.S7
-rw-r--r--arch/x86/kernel/trampoline_64.S3
-rw-r--r--arch/x86/kernel/traps_32.c15
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/bitops_32.c2
-rw-r--r--arch/x86/lib/bitops_64.c2
-rw-r--r--arch/x86/lib/bitstr_64.c28
-rw-r--r--arch/x86/lib/delay_32.c4
-rw-r--r--arch/x86/lib/delay_64.c4
-rw-r--r--arch/x86/lib/mmx_32.c31
-rw-r--r--arch/x86/lib/usercopy_32.c12
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mm/fault.c62
-rw-r--r--arch/x86/mm/init_32.c16
-rw-r--r--arch/x86/mm/init_64.c58
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/numa_64.c7
-rw-r--r--arch/x86/mm/pageattr-test.c68
-rw-r--r--arch/x86/mm/pageattr.c389
-rw-r--r--arch/x86/mm/pgtable_32.c73
-rw-r--r--arch/x86/pci/fixup.c22
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/numa.c52
68 files changed, 989 insertions, 941 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93e66678e158..e6728bd61cc1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,8 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select HAVE_OPROFILE
+ select HAVE_KPROBES
config GENERIC_LOCKBREAK
def_bool n
@@ -106,12 +108,16 @@ config GENERIC_TIME_VSYSCALL
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
-config ARCH_SUPPORTS_OPROFILE
- bool
- default y
-
select HAVE_KVM
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+ depends on !SMP || !X86_VOYAGER
+
+config ARCH_SUSPEND_POSSIBLE
+ def_bool y
+ depends on !X86_VOYAGER
+
config ZONE_DMA32
bool
default X86_64
@@ -196,8 +202,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also the <file:Documentation/smp.txt>,
- <file:Documentation/i386/IO-APIC.txt>,
+ See also <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
@@ -301,6 +306,7 @@ config X86_RDC321X
select M486
select X86_REBOOTFIXUPS
select GENERIC_GPIO
+ select LEDS_CLASS
select LEDS_GPIO
help
This option is needed for RDC R-321x system-on-chip, also known
@@ -409,7 +415,7 @@ config HPET_TIMER
config HPET_EMULATE_RTC
def_bool y
- depends on HPET_TIMER && (RTC=y || RTC=m)
+ depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
# Mark as embedded because too many people got it wrong.
# The code disables itself when not needed.
@@ -459,6 +465,9 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
+config IOMMU_HELPER
+ def_bool (CALGARY_IOMMU || GART_IOMMU)
+
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
@@ -1369,11 +1378,6 @@ config PCI
your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
VESA. If you have PCI, say Y, otherwise N.
- The PCI-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>, contains valuable
- information about which PCI hardware does work under Linux and which
- doesn't.
-
choice
prompt "PCI access mode"
depends on X86_32 && PCI && !X86_VISWS
@@ -1594,8 +1598,6 @@ source "drivers/firmware/Kconfig"
source "fs/Kconfig"
-source "kernel/Kconfig.instrumentation"
-
source "arch/x86/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e1e3af28c3a..fa555148823d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
- bool "CPA self test code"
+ bool "CPA self-test code"
depends on DEBUG_KERNEL
help
- Do change_page_attr self tests at boot.
+ Do change_page_attr() self-tests every 30 seconds.
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98bed5b..364865b1b08d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
LDFLAGS := -m elf_$(UTS_MACHINE)
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
# Speed up the build
KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a39c40..f88458e83ef0 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
#RAMDISK := -DRAMDISK=512
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
-subdir- := compressed
+subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
-hostprogs-y := tools/build
+hostprogs-y := mkcpustr tools/build
-HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR $@
+ cmd_cpustr = $(obj)/mkcpustr > $@
+targets += cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+ $(call if_changed,cpustr)
# ---------------------------------------------------------------------------
@@ -80,6 +88,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
@@ -90,7 +99,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_setup.bin := -O binary
-
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
-FDARGS =
+FDARGS =
# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
FDINITRD =
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24ceabd909..d2b9f3bb87c0 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
$(call if_changed,ld)
@:
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a7f0d2..e8657b98c902 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
- addl $(LARGE_PAGE_SIZE -1), %ebx
- andl $LARGE_PAGE_MASK, %ebx
+ addl $(PMD_PAGE_SIZE -1), %ebx
+ andl $PMD_PAGE_MASK, %ebx
#else
movl $CONFIG_PHYSICAL_START, %ebx
#endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(LARGE_PAGE_SIZE - 1), %rbp
- andq $LARGE_PAGE_MASK, %rbp
+ addq $(PMD_PAGE_SIZE - 1), %rbp
+ andq $PMD_PAGE_MASK, %rbp
movq %rbp, %rbx
#else
movq $CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32da5852..00e19edd852c 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
* ----------------------------------------------------------------------- */
/*
- * arch/i386/boot/cpu.c
+ * arch/x86/boot/cpu.c
*
* Check for obligatory CPU features and abort if the features are not
* present.
@@ -19,6 +19,8 @@
#include "bitops.h"
#include <asm/cpufeature.h>
+#include "cpustr.h"
+
static char *cpu_name(int level)
{
static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
+ const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
@@ -51,13 +54,26 @@ int validate_cpu(void)
puts("This kernel requires the following features "
"not present on the CPU:\n");
+ msg_strs = (const unsigned char *)x86_cap_strs;
+
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
- if (e & 1)
- printf("%d:%d ", i, j);
-
+ int n = (i << 5)+j;
+ if (*msg_strs < n) {
+ /* Skip to the next string */
+ do {
+ msg_strs++;
+ } while (*msg_strs);
+ msg_strs++;
+ }
+ if (e & 1) {
+ if (*msg_strs == n && msg_strs[1])
+ printf("%s ", msg_strs+1);
+ else
+ printf("%d:%d ", i, j);
+ }
e >>= 1;
}
}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 000000000000..bbe76953bae9
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../kernel/cpu/feature_names.c"
+
+#if NCAPFLAGS > 8
+# error "Need to adjust the boot code handling of CPUID strings"
+#endif
+
+int main(void)
+{
+ int i;
+ const char *str;
+
+ printf("static const char x86_cap_strs[] = \n");
+
+ for (i = 0; i < NCAPINTS*32; i++) {
+ str = x86_cap_flags[i];
+
+ if (i == NCAPINTS*32-1) {
+ /* The last entry must be unconditional; this
+ also consumes the compiler-added null character */
+ if (!str)
+ str = "";
+ printf("\t\"\\x%02x\"\"%s\"\n", i, str);
+ } else if (str) {
+ printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+ "\t\"\\x%02x\"\"%s\\0\"\n"
+ "#endif\n",
+ i >> 5, i & 31, i, str);
+ }
+ }
+ printf("\t;\n");
+ return 0;
+}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 0db0a6291bbd..8022d3c695c0 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -722,7 +722,9 @@ ia32_sys_call_table:
.quad sys_epoll_pwait
.quad compat_sys_utimensat /* 320 */
.quad compat_sys_signalfd
- .quad compat_sys_timerfd
+ .quad sys_timerfd_create
.quad sys_eventfd
.quad sys32_fallocate
+ .quad compat_sys_timerfd_settime /* 325 */
+ .quad compat_sys_timerfd_gettime
ia32_syscall_end:
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f813009d44b..21dc1a061bf1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o
-obj-$(CONFIG_APM) += apm_32.o
+apm-y := apm_32.o
+obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o
obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o
@@ -74,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
obj-y += pcspeaker.o
endif
-obj-$(CONFIG_SCx200) += scx200_32.o
+obj-$(CONFIG_SCx200) += scx200.o
+scx200-y += scx200_32.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d2a58431a074..fc8825d4b996 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -587,25 +587,6 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
EXPORT_SYMBOL(acpi_unregister_ioapic);
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
struct acpi_table_boot *sb;
@@ -748,27 +729,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
return 0;
}
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi_enabled) {
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- return efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- return efi.acpi;
- }
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp(0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Parse LAPIC entries in MADT
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3bd763..a0c4d7c5dbd7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
#
obj-y := intel_cacheinfo.o addon_cpuid_features.o
+obj-y += feature_names.o
obj-$(CONFIG_X86_32) += common.o proc.o bugs.o
obj-$(CONFIG_X86_32) += amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142b58e7..f86a3c4a2669 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
* They will insert themselves into the cpu_devs structure.
* Then, when cpu_init() is called, we can just iterate over that array.
*/
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
void __init early_cpu_init(void)
{
intel_cpu_init();
@@ -647,7 +637,7 @@ void __init early_cpu_init(void)
}
/* Make sure %fs is initialized properly in idle threads */
-struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a5beb1..e0b38c33d842 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
extern void early_init_intel(struct cpuinfo_x86 *c);
extern void early_init_amd(struct cpuinfo_x86 *c);
+/* Specific CPU type init functions */
+int intel_cpu_init(void);
+int amd_init_cpu(void);
+int cyrix_init_cpu(void);
+int nsc_init_cpu(void);
+int centaur_init_cpu(void);
+int transmeta_init_cpu(void);
+int nexgen_init_cpu(void);
+int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a0522735dd9d..5affe91ca1e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
- u32 hi = 0, lo = 0;
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 76c3ab0da468..98d4fdb7dc04 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -189,10 +189,7 @@ static unsigned int pentium4_get_frequency(void)
printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
/* Multiplier. */
- if (c->x86_model < 2)
- mult = msr_lo >> 27;
- else
- mult = msr_lo >> 24;
+ mult = msr_lo >> 24;
dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 404a6a2d4016..7139b0262703 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -83,8 +83,6 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445";
* FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
*/
-extern void calibrate_delay(void) __init;
-
static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
{
unsigned long flags;
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 000000000000..ee975ac6bbcb
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
+/*
+ * Strings for the various x86 capability flags.
+ *
+ * This file must not contain any executable code.
+ */
+
+#include "asm/cpufeature.h"
+
+/*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned. Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+const char * const x86_cap_flags[NCAPINTS*32] = {
+ /* Intel-defined */
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+ /* AMD-defined */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+ "3dnowext", "3dnow",
+
+ /* Transmeta-defined */
+ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Other (Linux-defined) */
+ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+ NULL, NULL, NULL, NULL,
+ "constant_tsc", "up", NULL, "arch_perfmon",
+ "pebs", "bts", NULL, NULL,
+ "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Intel-defined (#2) */
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+ NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* VIA/Cyrix/Centaur-defined */
+ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+ "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* AMD-defined (#2) */
+ "lahf_lm", "cmp_legacy", "svm", "extapic",
+ "cr8_legacy", "abm", "sse4a", "misalignsse",
+ "3dnowprefetch", "osvw", "ibs", "sse5",
+ "skinit", "wdt", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Auxiliary (Linux-defined) */
+ "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+const char *const x86_power_flags[32] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
+};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b018db..fae31ce747bd 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/ds.h>
+#include <asm/bugs.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c70f888..ff14c320040c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
#include "mtrr.h"
-int arr3_protected;
-
static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 4:
return replace_reg;
case 3:
- if (arr3_protected)
- break;
case 2:
case 1:
case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
} else {
for (i = 0; i < 7; i++) {
cyrix_get_arr(i, &lbase, &lsize, &ltype);
- if ((i == 3) && arr3_protected)
- continue;
if (lsize == 0)
return i;
}
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
post_set();
}
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- * if it is set, write a warning message: ARR3 cannot be changed!
- * (it cannot be changed until the next processor reset)
- * if it is reset, then we can change it, set all the needed bits:
- * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- * - disable access to SMM memory (CCR1 bit 2 reset)
- * - disable SMM mode (CCR1 bit 1 reset)
- * - disable write protection of ARR3 (CCR6 bit 1 reset)
- * - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
- struct set_mtrr_context ctxt;
- unsigned char ccr[7];
- int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
- int i;
-#endif
-
- /* flush cache and enable MAPEN */
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- /* Save all CCRs locally */
- ccr[0] = getCx86(CX86_CCR0);
- ccr[1] = getCx86(CX86_CCR1);
- ccr[2] = getCx86(CX86_CCR2);
- ccr[3] = ctxt.ccr3;
- ccr[4] = getCx86(CX86_CCR4);
- ccr[5] = getCx86(CX86_CCR5);
- ccr[6] = getCx86(CX86_CCR6);
-
- if (ccr[3] & 1) {
- ccrc[3] = 1;
- arr3_protected = 1;
- } else {
- /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
- * access to SMM memory through ARR3 (bit 7).
- */
- if (ccr[1] & 0x80) {
- ccr[1] &= 0x7f;
- ccrc[1] |= 0x80;
- }
- if (ccr[1] & 0x04) {
- ccr[1] &= 0xfb;
- ccrc[1] |= 0x04;
- }
- if (ccr[1] & 0x02) {
- ccr[1] &= 0xfd;
- ccrc[1] |= 0x02;
- }
- arr3_protected = 0;
- if (ccr[6] & 0x02) {
- ccr[6] &= 0xfd;
- ccrc[6] = 1; /* Disable write protection of ARR3 */
- setCx86(CX86_CCR6, ccr[6]);
- }
- /* Disable ARR3. This is safe now that we disabled SMM. */
- /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
- }
- /* If we changed CCR1 in memory, change it in the processor, too. */
- if (ccrc[1])
- setCx86(CX86_CCR1, ccr[1]);
-
- /* Enable ARR usage by the processor */
- if (!(ccr[5] & 0x20)) {
- ccr[5] |= 0x20;
- ccrc[5] = 1;
- setCx86(CX86_CCR5, ccr[5]);
- }
-#ifdef CONFIG_SMP
- for (i = 0; i < 7; i++)
- ccr_state[i] = ccr[i];
- for (i = 0; i < 8; i++)
- cyrix_get_arr(i,
- &arr_state[i].base, &arr_state[i].size,
- &arr_state[i].type);
-#endif
-
- set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
-
- if (ccrc[5])
- printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
- if (ccrc[3])
- printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
- if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
- if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
- if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
- if (ccrc[6])
- printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
static struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
// .init = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 715919582657..b6e136f23d3d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
-#ifndef CONFIG_X86_64
-extern int arr3_protected;
-#else
-#define arr3_protected 0
-#endif
-
void set_mtrr_ops(struct mtrr_ops * ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
goto out;
}
- if (is_cpu(CYRIX) && !use_intel()) {
- if ((reg == 3) && arr3_protected) {
- printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
- goto out;
- }
- }
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
@@ -566,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
static void __init init_ifs(void)
{
#ifndef CONFIG_X86_64
@@ -675,7 +659,7 @@ static __init int amd_special_default_mtrr(void)
*/
int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
- unsigned long i, base, size, highest_addr = 0, def, dummy;
+ unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 trim_start, trim_size;
@@ -698,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
mtrr_if->get(i, &base, &size, &type);
if (type != MTRR_TYPE_WRBACK)
continue;
- base <<= PAGE_SHIFT;
- size <<= PAGE_SHIFT;
- if (highest_addr < base + size)
- highest_addr = base + size;
+ if (highest_pfn < base + size)
+ highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
- if (!highest_addr) {
+ if (!highest_pfn) {
printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
WARN_ON(1);
return 0;
}
- if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+ if (highest_pfn < end_pfn) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %LdMB of RAM.\n",
- (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+ " all of memory, losing %luMB of RAM.\n",
+ (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
- trim_start = highest_addr;
+ trim_start = highest_pfn;
+ trim_start <<= PAGE_SHIFT;
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c20814..2cc77eb6fea3 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
+/* CPU specific mtrr init functions */
+int amd_init_mtrr(void);
+int cyrix_init_mtrr(void);
+int centaur_init_mtrr(void);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 028213260148..af11d31dce0a 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
*/
static int show_cpuinfo(struct seq_file *m, void *v)
{
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char * const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char * const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* constant_tsc - moved to flags */
- /* nothing */
- };
struct cpuinfo_x86 *c = v;
int i, n = 0;
int fpu_exception;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d800f9..288e7a6598ac 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
* and then read in chunks of 16 bytes. A larger size means multiple
* reads of consecutive levels.
*
+ * The lower 32 bits of the file position is used as the incoming %eax,
+ * and the upper 32 bits of the file position as the incoming %ecx,
+ * the latter intended for "counting" eax levels like eax=4.
+ *
* This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
* an SMP box will direct the access to CPU %d.
*/
@@ -43,35 +47,24 @@
static struct class *cpuid_class;
-struct cpuid_command {
- u32 reg;
- u32 *data;
+struct cpuid_regs {
+ u32 eax, ebx, ecx, edx;
};
static void cpuid_smp_cpuid(void *cmd_block)
{
- struct cpuid_command *cmd = cmd_block;
-
- cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
- &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- struct cpuid_command cmd;
-
- cmd.reg = reg;
- cmd.data = data;
+ struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ cpuid_count(cmd->eax, cmd->ecx,
+ &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
}
static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
-
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
default:
ret = -EINVAL;
}
-
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
size_t count, loff_t * ppos)
{
char __user *tmp = buf;
- u32 data[4];
- u32 reg = *ppos;
+ struct cpuid_regs cmd;
int cpu = iminor(file->f_path.dentry->d_inode);
+ u64 pos = *ppos;
if (count % 16)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 16) {
- do_cpuid(cpu, reg, data);
- if (copy_to_user(tmp, &data, 16))
+ cmd.eax = pos;
+ cmd.ecx = pos >> 32;
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ if (copy_to_user(tmp, &cmd, 16))
return -EFAULT;
tmp += 16;
- *ppos = reg++;
+ *ppos = ++pos;
}
return tmp - buf;
@@ -193,7 +187,7 @@ static int __init cpuid_init(void)
}
for_each_online_cpu(i) {
err = cpuid_device_create(i);
- if (err != 0)
+ if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +202,7 @@ out_class:
}
class_destroy(cpuid_class);
out_chrdev:
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+ unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
out:
return err;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324a625c..32dd62b36ff7 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
#endif
}
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static void __init runtime_code_page_mkexec(void)
{
efi_memory_desc_t *md;
- unsigned long end;
void *p;
if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if (md->type == EFI_RUNTIME_SERVICES_CODE &&
- (end >> PAGE_SHIFT) <= max_pfn_mapped) {
- set_memory_x(md->virt_addr, md->num_pages);
- set_memory_uc(md->virt_addr, md->num_pages);
- }
+
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
+
+ set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
}
- __flush_tlb_all();
}
-#else
-static inline void __init runtime_code_page_mkexec(void) { }
-#endif
/*
* This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
{
efi_memory_desc_t *md;
efi_status_t status;
- unsigned long end;
- void *p;
+ unsigned long size;
+ u64 end, systab;
+ void *p, *va;
efi.systab = NULL;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if ((md->attribute & EFI_MEMORY_WB) &&
- ((end >> PAGE_SHIFT) <= max_pfn_mapped))
- md->virt_addr = (unsigned long)__va(md->phys_addr);
+
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ end = md->phys_addr + size;
+
+ if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
+ va = __va(md->phys_addr);
else
- md->virt_addr = (unsigned long)
- efi_ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!md->virt_addr)
+ va = efi_ioremap(md->phys_addr, size);
+
+ if (md->attribute & EFI_MEMORY_WB)
+ set_memory_uc(md->virt_addr, size);
+
+ md->virt_addr = (u64) (unsigned long) va;
+
+ if (!va) {
printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
(unsigned long long)md->phys_addr);
- if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab < end))
- efi.systab = (efi_system_table_t *)(unsigned long)
- (md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab);
+ continue;
+ }
+
+ systab = (u64) (unsigned long) efi_phys.systab;
+ if (md->phys_addr <= systab && systab < end) {
+ systab += md->virt_addr - md->phys_addr;
+ efi.systab = (efi_system_table_t *) (unsigned long) systab;
+ }
}
BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f2379480f..09d5c2330934 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
else
set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
__supported_pte_mask));
- if (level == 4)
- start = (start + PMD_SIZE) & PMD_MASK;
- else
+ if (level == PG_LEVEL_4K)
start = (start + PAGE_SIZE) & PAGE_MASK;
+ else
+ start = (start + PMD_SIZE) & PMD_MASK;
}
}
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
memmap.nr_map * memmap.desc_size);
}
-void __iomem * __init efi_ioremap(unsigned long offset,
- unsigned long size)
+void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
static unsigned pages_mapped;
- unsigned long last_addr;
unsigned i, pages;
- last_addr = offset + size - 1;
- offset &= PAGE_MASK;
- pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+ /* phys_addr and size must be page aligned */
+ if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+ return NULL;
+
+ pages = size >> PAGE_SHIFT;
if (pages_mapped + pages > MAX_EFI_IO_PAGES)
return NULL;
for (i = 0; i < pages; i++) {
__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
- offset, PAGE_KERNEL_EXEC_NOCACHE);
- offset += PAGE_SIZE;
+ phys_addr, PAGE_KERNEL);
+ phys_addr += PAGE_SIZE;
pages_mapped++;
}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bea8474744ff..c7341e81941c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */
TRACE_IRQS_IRETQ
restore_args:
RESTORE_ARGS 0,8,0
-iret_label:
#ifdef CONFIG_PARAVIRT
INTERRUPT_RETURN
#endif
@@ -593,13 +592,22 @@ ENTRY(native_iret)
.quad native_iret, bad_iret
.previous
.section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- /* running with kernel gs */
bad_iret:
- movq $11,%rdi /* SIGSEGV */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
- jmp do_exit
+ /*
+ * The iret traps when the %cs or %ss being restored is bogus.
+ * We've lost the original trap vector and error code.
+ * #GPF is the most likely one to get for an invalid selector.
+ * So pretend we completed the iret and took the #GPF in user mode.
+ *
+ * We are now running with the kernel GS after exception recovery.
+ * But error_entry expects us to have user GS to match the user %cs,
+ * so swap back.
+ */
+ pushq $0
+
+ SWAPGS
+ jmp general_protection
+
.previous
/* edi: workmask, edx: work */
@@ -911,7 +919,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
+ leaq native_iret(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a361200..09b38d539b09 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
/* Is the address not 2M aligned? */
movq %rbp, %rax
- andl $~LARGE_PAGE_MASK, %eax
+ andl $~PMD_PAGE_MASK, %eax
testl %eax, %eax
jnz bad_address
@@ -88,7 +88,7 @@ startup_64:
/* Add an Identity mapping if I am above 1G */
leaq _text(%rip), %rdi
- andq $LARGE_PAGE_MASK, %rdi
+ andq $PMD_PAGE_MASK, %rdi
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
@@ -250,18 +250,13 @@ ENTRY(secondary_startup_64)
lretq
/* SMP bootup changes these two */
-#ifndef CONFIG_HOTPLUG_CPU
- .pushsection .init.data
-#endif
+ __CPUINITDATA
.align 8
- .globl initial_code
-initial_code:
+ ENTRY(initial_code)
.quad x86_64_start_kernel
-#ifndef CONFIG_HOTPLUG_CPU
- .popsection
-#endif
- .globl init_rsp
-init_rsp:
+ __FINITDATA
+
+ ENTRY(init_rsp)
.quad init_thread_union+THREAD_SIZE-8
bad_address:
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c8394a..0224c3637c73 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
- mincount = (mincount + 511) & (~511);
+ mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+ (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
else
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850e6519..af51ea8400b2 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
- loff_t ret = -EINVAL;
+ loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
case 1:
file->f_pos += offset;
ret = file->f_pos;
+ break;
+ default:
+ ret = -EINVAL;
}
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1fe7f043ebde..1b5464c2434f 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -35,6 +35,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/tce.h>
@@ -260,22 +261,28 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
- unsigned int npages)
+static unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu_table *tbl,
+ unsigned int npages)
{
unsigned long flags;
unsigned long offset;
+ unsigned long boundary_size;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
BUG_ON(npages == 0);
spin_lock_irqsave(&tbl->it_lock, flags);
- offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
- tbl->it_size, npages);
+ offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint,
+ npages, 0, boundary_size, 0);
if (offset == ~0UL) {
tbl->chip_ops->tce_cache_blast(tbl);
- offset = find_next_zero_string(tbl->it_map, 0,
- tbl->it_size, npages);
+
+ offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
+ npages, 0, boundary_size, 0);
if (offset == ~0UL) {
printk(KERN_WARNING "Calgary: IOMMU full.\n");
spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -286,7 +293,6 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
}
}
- set_bit_string(tbl->it_map, offset, npages);
tbl->it_hint = offset + npages;
BUG_ON(tbl->it_hint > tbl->it_size);
@@ -295,13 +301,13 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
return offset;
}
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
- unsigned int npages, int direction)
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+ void *vaddr, unsigned int npages, int direction)
{
unsigned long entry;
dma_addr_t ret = bad_dma_address;
- entry = iommu_range_alloc(tbl, npages);
+ entry = iommu_range_alloc(dev, tbl, npages);
if (unlikely(entry == bad_dma_address))
goto error;
@@ -354,7 +360,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
badbit, tbl, dma_addr, entry, npages);
}
- __clear_bit_string(tbl->it_map, entry, npages);
+ iommu_area_free(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
@@ -438,7 +444,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
vaddr = (unsigned long) sg_virt(s);
npages = num_dma_pages(vaddr, s->length);
- entry = iommu_range_alloc(tbl, npages);
+ entry = iommu_range_alloc(dev, tbl, npages);
if (entry == bad_dma_address) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
@@ -476,7 +482,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
npages = num_dma_pages(uaddr, size);
if (translation_enabled(tbl))
- dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+ dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
else
dma_handle = virt_to_bus(vaddr);
@@ -516,7 +522,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
if (translation_enabled(tbl)) {
/* set up tces to cover the allocated range */
- mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
if (mapping == bad_dma_address)
goto free;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc7181982..65f6acb025c8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -25,6 +25,7 @@
#include <linux/bitops.h>
#include <linux/kdebug.h>
#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
@@ -82,17 +83,24 @@ AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */
-static unsigned long alloc_iommu(int size)
+static unsigned long alloc_iommu(struct device *dev, int size)
{
unsigned long offset, flags;
+ unsigned long boundary_size;
+ unsigned long base_index;
+
+ base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
+ PAGE_SIZE) >> PAGE_SHIFT;
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
- iommu_pages, size);
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
+ size, base_index, boundary_size, 0);
if (offset == -1) {
need_flush = 1;
- offset = find_next_zero_string(iommu_gart_bitmap, 0,
- iommu_pages, size);
+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
+ size, base_index, boundary_size, 0);
}
if (offset != -1) {
set_bit_string(iommu_gart_bitmap, offset, size);
@@ -114,7 +122,7 @@ static void free_iommu(unsigned long offset, int size)
unsigned long flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- __clear_bit_string(iommu_gart_bitmap, offset, size);
+ iommu_area_free(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -235,7 +243,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
unsigned long npages = to_pages(phys_mem, size);
- unsigned long iommu_page = alloc_iommu(npages);
+ unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
if (iommu_page == -1) {
@@ -355,10 +363,11 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
}
/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *start, int nelems,
- struct scatterlist *sout, unsigned long pages)
+static int __dma_map_cont(struct device *dev, struct scatterlist *start,
+ int nelems, struct scatterlist *sout,
+ unsigned long pages)
{
- unsigned long iommu_start = alloc_iommu(pages);
+ unsigned long iommu_start = alloc_iommu(dev, pages);
unsigned long iommu_page = iommu_start;
struct scatterlist *s;
int i;
@@ -394,8 +403,8 @@ static int __dma_map_cont(struct scatterlist *start, int nelems,
}
static inline int
-dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
- unsigned long pages, int need)
+dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
+ struct scatterlist *sout, unsigned long pages, int need)
{
if (!need) {
BUG_ON(nelems != 1);
@@ -403,7 +412,7 @@ dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
sout->dma_length = start->length;
return 0;
}
- return __dma_map_cont(start, nelems, sout, pages);
+ return __dma_map_cont(dev, start, nelems, sout, pages);
}
/*
@@ -416,6 +425,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
struct scatterlist *s, *ps, *start_sg, *sgmap;
int need = 0, nextneed, i, out, start;
unsigned long pages = 0;
+ unsigned int seg_size;
+ unsigned int max_seg_size;
if (nents == 0)
return 0;
@@ -426,6 +437,8 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
out = 0;
start = 0;
start_sg = sgmap = sg;
+ seg_size = 0;
+ max_seg_size = dma_get_max_seg_size(dev);
ps = NULL; /* shut up gcc */
for_each_sg(sg, s, nents, i) {
dma_addr_t addr = sg_phys(s);
@@ -443,11 +456,13 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
* offset.
*/
if (!iommu_merge || !nextneed || !need || s->offset ||
+ (s->length + seg_size > max_seg_size) ||
(ps->offset + ps->length) % PAGE_SIZE) {
- if (dma_map_cont(start_sg, i - start, sgmap,
- pages, need) < 0)
+ if (dma_map_cont(dev, start_sg, i - start,
+ sgmap, pages, need) < 0)
goto error;
out++;
+ seg_size = 0;
sgmap = sg_next(sgmap);
pages = 0;
start = i;
@@ -455,11 +470,12 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
}
}
+ seg_size += s->length;
need = nextneed;
pages += to_pages(s->offset, s->length);
ps = s;
}
- if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
+ if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
goto error;
out++;
flush_gart();
@@ -501,7 +517,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
}
a = aper + iommu_size;
- iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+ iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
@@ -731,7 +747,8 @@ void __init gart_iommu_init(void)
* the backing memory. The GART address is only used by PCI
* devices.
*/
- clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+ set_memory_np((unsigned long)__va(iommu_bus_base),
+ iommu_size >> PAGE_SHIFT);
/*
* Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371ab223a..dabdbeff1f77 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
* because it has nothing to do.
* Give all the remaining CPUS a kick.
*/
- smp_call_function_mask(map, do_nothing, 0, 0);
+ smp_call_function_mask(map, do_nothing, NULL, 0);
} while (!cpus_empty(map));
set_cpus_allowed(current, tmp);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 96286df1bb81..702c33efea84 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task,
if (invalid_selector(value))
return -EIO;
- if (offset != offsetof(struct user_regs_struct, gs))
+ /*
+ * For %cs and %ss we cannot permit a null selector.
+ * We can permit a bogus selector as long as it has USER_RPL.
+ * Null selectors are fine for other segment registers, but
+ * we will never get back to user mode with invalid %cs or %ss
+ * and will take the trap in iret instead. Much code relies
+ * on user_mode() to distinguish a user trap frame (which can
+ * safely use invalid selectors) from a kernel trap frame.
+ */
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ss):
+ if (unlikely(value == 0))
+ return -EIO;
+
+ default:
*pt_regs_access(task_pt_regs(task), offset) = value;
- else {
+ break;
+
+ case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
@@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task,
* Can't actually change these in 64-bit mode.
*/
case offsetof(struct user_regs_struct,cs):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
break;
case offsetof(struct user_regs_struct,ss):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 150ba29a0d33..6ba33ca8715a 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -30,8 +30,8 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
if (!(word & (1 << 13))) {
- printk(KERN_INFO "Intel E7520/7320/7525 detected. "
- "Disabling irq balancing and affinity\n");
+ dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
+ "disabling irq balancing and affinity\n");
#ifdef CONFIG_IRQBALANCE
irqbalance_disable("");
#endif
@@ -104,14 +104,16 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
pci_read_config_dword(dev, 0xF0, &rcba);
rcba &= 0xFFFFC000;
if (rcba == 0) {
- printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n");
+ dev_printk(KERN_DEBUG, &dev->dev, "RCBA disabled; "
+ "cannot force enable HPET\n");
return;
}
/* use bits 31:14, 16 kB aligned */
rcba_base = ioremap_nocache(rcba, 0x4000);
if (rcba_base == NULL) {
- printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n");
+ dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; "
+ "cannot force enable HPET\n");
return;
}
@@ -122,8 +124,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
/* HPET is enabled in HPTC. Just not reported by BIOS */
val = val & 0x3;
force_hpet_address = 0xFED00000 | (val << 12);
- printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+ "0x%lx\n", force_hpet_address);
iounmap(rcba_base);
return;
}
@@ -142,11 +144,12 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
if (err) {
force_hpet_address = 0;
iounmap(rcba_base);
- printk(KERN_DEBUG "Failed to force enable HPET\n");
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "Failed to force enable HPET\n");
} else {
force_hpet_resume_type = ICH_FORCE_HPET_RESUME;
- printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+ "0x%lx\n", force_hpet_address);
}
}
@@ -208,8 +211,8 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev)
if (val & 0x4) {
val &= 0x3;
force_hpet_address = 0xFED00000 | (val << 12);
- printk(KERN_DEBUG "HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n",
+ force_hpet_address);
return;
}
@@ -229,14 +232,14 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev)
/* HPET is enabled in HPTC. Just not reported by BIOS */
val &= 0x3;
force_hpet_address = 0xFED00000 | (val << 12);
- printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+ "0x%lx\n", force_hpet_address);
cached_dev = dev;
force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME;
return;
}
- printk(KERN_DEBUG "Failed to force enable HPET\n");
+ dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n");
}
/*
@@ -294,8 +297,8 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev)
*/
if (val & 0x80) {
force_hpet_address = (val & ~0x3ff);
- printk(KERN_DEBUG "HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "HPET at 0x%lx\n",
+ force_hpet_address);
return;
}
@@ -309,14 +312,14 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev)
pci_read_config_dword(dev, 0x68, &val);
if (val & 0x80) {
force_hpet_address = (val & ~0x3ff);
- printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
- force_hpet_address);
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+ "0x%lx\n", force_hpet_address);
cached_dev = dev;
force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
return;
}
- printk(KERN_DEBUG "Failed to force enable HPET\n");
+ dev_printk(KERN_DEBUG, &dev->dev, "Failed to force enable HPET\n");
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
@@ -344,7 +347,7 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
pci_read_config_dword(dev, 0x44, &val);
force_hpet_address = val & 0xfffffffe;
force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME;
- printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
force_hpet_address);
cached_dev = dev;
return;
@@ -377,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
void force_hpet_resume(void)
{
switch (force_hpet_resume_type) {
- case ICH_FORCE_HPET_RESUME:
- return ich_force_hpet_resume();
-
- case OLD_ICH_FORCE_HPET_RESUME:
- return old_ich_force_hpet_resume();
-
- case VT8237_FORCE_HPET_RESUME:
- return vt8237_force_hpet_resume();
-
- case NVIDIA_FORCE_HPET_RESUME:
- return nvidia_force_hpet_resume();
-
- default:
+ case ICH_FORCE_HPET_RESUME:
+ ich_force_hpet_resume();
+ return;
+ case OLD_ICH_FORCE_HPET_RESUME:
+ old_ich_force_hpet_resume();
+ return;
+ case VT8237_FORCE_HPET_RESUME:
+ vt8237_force_hpet_resume();
+ return;
+ case NVIDIA_FORCE_HPET_RESUME:
+ nvidia_force_hpet_resume();
+ return;
+ default:
break;
}
}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c534b9..c8939dfddfba 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
struct cpuinfo_x86 *c = v;
int cpu = 0, i;
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char *const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char *const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
- };
-
-
#ifdef CONFIG_SMP
cpu = c->cpu_index;
#endif
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 5787a0c3e296..579b9b740c7c 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -202,8 +202,6 @@ valid_k7:
;
}
-extern void calibrate_delay(void);
-
static atomic_t init_deasserted;
static void __cpuinit smp_callin(void)
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2bf6903cb444..b72e61359c36 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -274,7 +274,7 @@ int __init get_memcfg_from_srat(void)
int tables = 0;
int i = 0;
- rsdp_address = acpi_find_rsdp();
+ rsdp_address = acpi_os_get_root_pointer();
if (!rsdp_address) {
printk("%s: System description tables not found\n",
__FUNCTION__);
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 09199511c256..7ac7130022f1 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -140,7 +140,12 @@ static void fix_processor_context(void)
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
- set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+ /*
+ * This just modifies memory; should not be necessary. But... This
+ * is necessary, because 386 hardware has concept of busy TSS or some
+ * similar stupidity.
+ */
+ set_tss_desc(cpu, t);
get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;
@@ -160,7 +165,6 @@ static void fix_processor_context(void)
loaddebug(&current->thread, 6);
loaddebug(&current->thread, 7);
}
-
}
#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8344c70adf61..adff5562f5fd 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -321,6 +321,8 @@ ENTRY(sys_call_table)
.long sys_epoll_pwait
.long sys_utimensat /* 320 */
.long sys_signalfd
- .long sys_timerfd
+ .long sys_timerfd_create
.long sys_eventfd
.long sys_fallocate
+ .long sys_timerfd_settime /* 325 */
+ .long sys_timerfd_gettime
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e304c7..10b8a6f69f84 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/uaccess.h>
+#include <asm/asm.h>
extern int rodata_test_data;
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
"2: mov %[zero], %[rslt]\n"
" ret\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
-#ifdef CONFIG_X86_32
- " .long 0b\n"
- " .long 2b\n"
-#else
- " .quad 0b\n"
- " .quad 2b\n"
-#endif
- ".previous\n"
+ _ASM_EXTABLE(0b,2b)
: [rslt] "=r" (result)
: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
);
@@ -147,7 +139,6 @@ static int test_NX(void)
* Until then, don't run them to avoid too many people getting scared
* by the error message
*/
-#if 0
#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
@@ -160,6 +151,7 @@ static int test_NX(void)
}
#endif
+#if 0
/* Test 4: Check if the .data section of a module is executable */
if (test_address(&test_data)) {
printk(KERN_ERR "test_nx: .data section is executable\n");
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6aca3d..64580679861e 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
- * We jump into the boot/compressed/head.S code. So you'd
- * better be running a compressed kernel image or you
- * won't get very far.
+ * We jump into arch/x86/kernel/head_32.S.
*
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c6a9f5..4aedd0bcee4c 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
* and IP is zero. Thus, data addresses need to be absolute
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3cf72977d012..b22c01e05a18 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1176,17 +1176,12 @@ void __init trap_init(void)
#endif
set_trap_gate(19,&simd_coprocessor_error);
+ /*
+ * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+ * Generate a build-time error if the alignment is wrong.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
if (cpu_has_fxsr) {
- /*
- * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
- * Generates a compile-time "error: zero width for bit-field" if
- * the alignment is wrong.
- */
- struct fxsrAlignAssert {
- int _:!(offsetof(struct task_struct,
- thread.i387.fxsave) & 15);
- };
-
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
printk("done.\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2c2e19..12affe1f9bce 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
u32 *idt_entry = (u32 *)g;
- vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
}
static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
const void *desc, int type)
{
u32 *gdt_entry = (u32 *)desc;
- vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+ vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
}
static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
u32 *ldt_entry = (u32 *)desc;
- vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
}
static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c83e1c9b5129..41962e793c0f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -53,5 +53,6 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
+source drivers/virtio/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b89dff..cf5308148689 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1739,7 +1739,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
if (bytes == 8) {
gpa_t gpa;
struct page *page;
- char *addr;
+ char *kaddr;
u64 val;
down_read(&current->mm->mmap_sem);
@@ -1754,9 +1754,9 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
val = *(u64 *)new;
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
- addr = kmap_atomic(page, KM_USER0);
- set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
- kunmap_atomic(addr, KM_USER0);
+ kaddr = kmap_atomic(page, KM_USER0);
+ set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
+ kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
emul_write:
up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4876182daf8a..25df1c1989fe 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,7 +21,7 @@ else
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
- lib-y += bitstr_64.o bitops_64.o
+ lib-y += bitops_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
endif
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
index afd0045595d4..b65440459859 100644
--- a/arch/x86/lib/bitops_32.c
+++ b/arch/x86/lib/bitops_32.c
@@ -2,7 +2,7 @@
#include <linux/module.h>
/**
- * find_next_bit - find the first set bit in a memory region
+ * find_next_bit - find the next set bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
index 95b6d9639fba..0e8f491e6ccc 100644
--- a/arch/x86/lib/bitops_64.c
+++ b/arch/x86/lib/bitops_64.c
@@ -58,7 +58,7 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size)
}
/**
- * find_next_zero_bit - find the first zero bit in a memory region
+ * find_next_zero_bit - find the next zero bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c
deleted file mode 100644
index 7445caf1b5de..000000000000
--- a/arch/x86/lib/bitstr_64.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/* Find string of zero bits in a bitmap */
-unsigned long
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
-{
- unsigned long n, end, i;
-
- again:
- n = find_next_zero_bit(bitmap, nbits, start);
- if (n == -1)
- return -1;
-
- /* could test bitsliced, but it's hardly worth it */
- end = n+len;
- if (end > nbits)
- return -1;
- for (i = n+1; i < end; i++) {
- if (test_bit(i, bitmap)) {
- start = i+1;
- goto again;
- }
- }
- return n;
-}
-
-EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index aad9d95469dc..4535e6d147ad 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -12,8 +12,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
@@ -63,7 +65,7 @@ void use_tsc_delay(void)
delay_fn = delay_tsc;
}
-int read_current_timer(unsigned long *timer_val)
+int __devinit read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscl(*timer_val);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 45cdd3fbd91c..bbc610518516 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -10,8 +10,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/delay.h>
#include <asm/msr.h>
@@ -20,7 +22,7 @@
#include <asm/smp.h>
#endif
-int read_current_timer(unsigned long *timer_value)
+int __devinit read_current_timer(unsigned long *timer_value)
{
rdtscll(*timer_value);
return 0;
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2e8dd4..cc9b4a4450f3 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
#include <linux/hardirq.h>
#include <linux/module.h>
+#include <asm/asm.h>
#include <asm/i387.h>
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5bedb2..e849b9998b0e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do { \
"3: movl %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do { \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
+ _ASM_EXTABLE(1b,2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f838cc..0c89d1bb0287 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do { \
"3: movq %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,3b\n"
- " .quad 1b,2b\n"
- ".previous"
+ _ASM_EXTABLE(0b,3b)
+ _ASM_EXTABLE(1b,2b)
: [size8] "=c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
[zero] "r" (0UL), [eight] "r" (8UL));
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index dffa786f61fe..3cc8eb2f36a9 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -444,8 +444,6 @@ static __u32 __init setup_trampoline(void)
static void __init start_secondary(void *unused)
{
__u8 cpuid = hard_smp_processor_id();
- /* external functions not defined in the headers */
- extern void calibrate_delay(void);
cpu_init();
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0abf81..621afb6343dc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
pud = pud_offset(pgd, address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto ret;
pmd = pmd_offset(pud, address);
if (bad_address(pmd)) goto bad;
@@ -427,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
}
#endif
+static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+{
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
/*
* Handle a spurious fault caused by a stale TLB entry. This allows
* us to lazily refresh the TLB when increasing the permissions of a
@@ -456,20 +467,21 @@ static int spurious_fault(unsigned long address,
if (!pud_present(*pud))
return 0;
+ if (pud_large(*pud))
+ return spurious_fault_check(error_code, (pte_t *) pud);
+
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return 0;
+ if (pmd_large(*pmd))
+ return spurious_fault_check(error_code, (pte_t *) pmd);
+
pte = pte_offset_kernel(pmd, address);
if (!pte_present(*pte))
return 0;
- if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
- return 0;
-
- return 1;
+ return spurious_fault_check(error_code, pte);
}
/*
@@ -508,6 +520,10 @@ static int vmalloc_fault(unsigned long address)
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
/* Copy kernel mappings over when needed. This can also
happen within a race in page table update. In the later
case just flush. */
@@ -603,6 +619,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
+#else
+ if (unlikely(address >= TASK_SIZE64)) {
+#endif
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
return;
@@ -618,6 +637,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto bad_area_nosemaphore;
}
+
+#ifdef CONFIG_X86_32
/* It's safe to allow irq's after cr2 has been saved and the vmalloc
fault has been handled. */
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +651,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
#else /* CONFIG_X86_64 */
- if (unlikely(address >= TASK_SIZE64)) {
- /*
- * Don't check for the module range here: its PML4
- * is always initialized because it's shared with the main
- * kernel text. Only vmalloc may need PML4 syncups.
- */
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- ((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
-
- /* Can handle a stale RO->RW TLB */
- if (spurious_fault(address, error_code))
- return;
-
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
if (likely(regs->flags & X86_EFLAGS_IF))
local_irq_enable();
@@ -959,11 +958,12 @@ void vmalloc_sync_all(void)
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock(&pgd_lock);
+ spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -972,7 +972,7 @@ void vmalloc_sync_all(void)
else
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
- spin_unlock(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
set_bit(pgd_index(address), insync);
}
if (address == start)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index da524fb22422..d1bc04006d16 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <asm/asm.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -423,23 +424,23 @@ static void __init pagetable_init(void)
paravirt_pagetable_setup_done(pgd_base);
}
-#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI_SLEEP
/*
- * Swap suspend & friends need this for resume because things like the intel-agp
+ * ACPI suspend needs this for resume, because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
*/
-char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+char swsusp_pg_dir[PAGE_SIZE]
__attribute__ ((aligned(PAGE_SIZE)));
static inline void save_pg_dir(void)
{
memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
}
-#else
+#else /* !CONFIG_ACPI_SLEEP */
static inline void save_pg_dir(void)
{
}
-#endif
+#endif /* !CONFIG_ACPI_SLEEP */
void zap_low_mappings(void)
{
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
"1: movb %1, %0 \n"
" xorl %2, %2 \n"
"2: \n"
- ".section __ex_table, \"a\"\n"
- " .align 4 \n"
- " .long 1b, 2b \n"
- ".previous \n"
+ _ASM_EXTABLE(1b,2b)
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed76c28..9b61c75a2355 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
int i = pmd_index(address);
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
- unsigned long entry;
pmd_t *pmd = pmd_page + pmd_index(address);
if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
if (pmd_val(*pmd))
continue;
- entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
- entry &= __supported_pte_mask;
- set_pmd(pmd, __pmd(entry));
+ set_pte((pte_t *)pmd,
+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
}
}
@@ -435,49 +433,6 @@ void __init paging_init(void)
#endif
/*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd))
- continue;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
-
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
-
- if (!(pmd_val(*pmd) & _PAGE_PSE)) {
- /*
- * Could handle this, but it should not happen
- * currently:
- */
- printk(KERN_ERR "clear_kernel_mapping: "
- "mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-/*
* Memory hotplug specific functions
*/
void online_page(struct page *page)
@@ -636,10 +591,17 @@ void mark_rodata_ro(void)
if (end <= start)
return;
- set_memory_ro(start, (end - start) >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+
+ /*
+ * The rodata section (but not the kernel text!) should also be
+ * not-executable.
+ */
+ start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ set_memory_nx(start, (end - start) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94608fd..ee6648fe6b15 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
*/
-static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum ioremap_mode mode)
{
- unsigned long vaddr = (unsigned long)__va(paddr);
unsigned long nrpages = size >> PAGE_SHIFT;
- unsigned int level;
int err;
- /* No change for pages after the last mapping */
- if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
- return 0;
-
- /*
- * If there is no identity map for this address,
- * change_page_attr_addr is unnecessary
- */
- if (!lookup_address(vaddr, &level))
- return 0;
-
switch (mode) {
case IOR_MODE_UNCACHED:
default:
@@ -114,9 +101,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
enum ioremap_mode mode)
{
- void __iomem *addr;
+ unsigned long pfn, offset, last_addr, vaddr;
struct vm_struct *area;
- unsigned long offset, last_addr;
pgprot_t prot;
/* Don't allow wraparound or zero size */
@@ -133,9 +119,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
- (offset << PAGE_SHIFT) < last_addr; offset++) {
- if (page_is_ram(offset))
+ for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
+ (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+ if (page_is_ram(pfn) && pfn_valid(pfn) &&
+ !PageReserved(pfn_to_page(pfn)))
return NULL;
}
@@ -163,19 +150,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
if (!area)
return NULL;
area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, prot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ vaddr = (unsigned long) area->addr;
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+ remove_vm_area((void *)(vaddr & PAGE_MASK));
return NULL;
}
- if (ioremap_change_attr(phys_addr, size, mode) < 0) {
- vunmap(addr);
+ if (ioremap_change_attr(vaddr, size, mode) < 0) {
+ vunmap(area->addr);
return NULL;
}
- return (void __iomem *) (offset + (char __iomem *)addr);
+ return (void __iomem *) (vaddr + offset);
}
/**
@@ -254,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
return;
}
- /* Reset the direct mapping. Can block */
- ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
-
/* Finally remove it */
o = remove_vm_area((void *)addr);
BUG_ON(p != o || o == NULL);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09b9194..5a02bf4c91ec 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
if (node_data[nodeid] == NULL)
return;
nodedata_phys = __pa(node_data[nodeid]);
+ printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+ nodedata_phys + pgdat_size - 1);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
return;
}
bootmap_start = __pa(bootmap);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn);
+ printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
+ bootmap_start, bootmap_start + bootmap_size - 1,
+ bootmap_pages);
+
free_bootmem_with_active_regions(nodeid, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e786d2f2..ed8201600354 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,6 +5,7 @@
* and compares page tables forwards and afterwards.
*/
#include <linux/bootmem.h>
+#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -14,8 +15,13 @@
#include <asm/pgtable.h>
#include <asm/kdebug.h>
+/*
+ * Only print the results of the first pass:
+ */
+static __read_mostly int print = 1;
+
enum {
- NTEST = 4000,
+ NTEST = 400,
#ifdef CONFIG_X86_64
LPS = (1 << PMD_SHIFT),
#elif defined(CONFIG_X86_PAE)
@@ -31,7 +37,7 @@ struct split_state {
long min_exec, max_exec;
};
-static __init int print_split(struct split_state *s)
+static int print_split(struct split_state *s)
{
long i, expected, missed = 0;
int printed = 0;
@@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s)
s->max_exec = addr;
}
}
- printk(KERN_INFO
- "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
- s->spg, s->lpg, s->gpg, s->exec,
- s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+ if (print) {
+ printk(KERN_INFO
+ " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+ s->spg, s->lpg, s->gpg, s->exec,
+ s->min_exec != ~0UL ? s->min_exec : 0,
+ s->max_exec, missed);
+ }
expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
if (expected != i) {
@@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s)
return err;
}
-static unsigned long __initdata addr[NTEST];
-static unsigned int __initdata len[NTEST];
+static unsigned long addr[NTEST];
+static unsigned int len[NTEST];
/* Change the global bit on random pages in the direct mapping */
-static __init int exercise_pageattr(void)
+static int pageattr_test(void)
{
struct split_state sa, sb, sc;
unsigned long *bm;
@@ -110,7 +119,8 @@ static __init int exercise_pageattr(void)
int i, k;
int err;
- printk(KERN_INFO "CPA exercising pageattr\n");
+ if (print)
+ printk(KERN_INFO "CPA self-test:\n");
bm = vmalloc((max_pfn_mapped + 7) / 8);
if (!bm) {
@@ -137,7 +147,8 @@ static __init int exercise_pageattr(void)
for (k = 0; k < len[i]; k++) {
pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
- if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+ if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
+ !(pte_val(*pte) & _PAGE_PRESENT)) {
addr[i] = 0;
break;
}
@@ -185,7 +196,6 @@ static __init int exercise_pageattr(void)
failed += print_split(&sb);
- printk(KERN_INFO "CPA reverting everything\n");
for (i = 0; i < NTEST; i++) {
if (!addr[i])
continue;
@@ -213,12 +223,40 @@ static __init int exercise_pageattr(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
+ printk(KERN_ERR "NOT PASSED. Please report.\n");
WARN_ON(1);
+ return -EINVAL;
} else {
- printk(KERN_INFO "CPA selftests PASSED\n");
+ if (print)
+ printk(KERN_INFO "ok.\n");
}
return 0;
}
-module_init(exercise_pageattr);
+
+static int do_pageattr_test(void *__unused)
+{
+ while (!kthread_should_stop()) {
+ schedule_timeout_interruptible(HZ*30);
+ if (pageattr_test() < 0)
+ break;
+ if (print)
+ print--;
+ }
+ return 0;
+}
+
+static int start_pageattr_test(void)
+{
+ struct task_struct *p;
+
+ p = kthread_create(do_pageattr_test, NULL, "pageattr-test");
+ if (!IS_ERR(p))
+ wake_up_process(p);
+ else
+ WARN_ON(1);
+
+ return 0;
+}
+
+module_init(start_pageattr_test);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e513..8493c855582b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+ unsigned long vaddr;
+ pgprot_t mask_set;
+ pgprot_t mask_clr;
+ int numpages;
+ int flushtlb;
+};
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
static void __cpa_flush_all(void *arg)
{
+ unsigned long cache = (unsigned long)arg;
+
/*
* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
__flush_tlb_all();
- if (boot_cpu_data.x86_model >= 4)
+ if (cache && boot_cpu_data.x86_model >= 4)
wbinvd();
}
-static void cpa_flush_all(void)
+static void cpa_flush_all(unsigned long cache)
{
BUG_ON(irqs_disabled());
- on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+ on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
}
static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
__flush_tlb_all();
}
-static void cpa_flush_range(unsigned long start, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
{
unsigned int i, level;
unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+ if (!cache)
+ return;
+
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
/*
* Only flush present addresses:
*/
- if (pte && pte_present(*pte))
+ if (pte && (pte_val(*pte) & _PAGE_PRESENT))
clflush_cache_range((void *) addr, PAGE_SIZE);
}
}
+#define HIGH_MAP_START __START_KERNEL_map
+#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+
+/*
+ * Converts a virtual address to a X86-64 highmap address
+ */
+static unsigned long virt_to_highmap(void *address)
+{
+#ifdef CONFIG_X86_64
+ return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
+#else
+ return (unsigned long)address;
+#endif
+}
+
/*
* Certain areas of memory on x86 require very specific protection flags,
* for example the BIOS area or kernel text. Callers don't always get this
@@ -129,19 +161,36 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
*/
if (within(address, (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+ pgprot_val(forbidden) |= _PAGE_NX;
-#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
pgprot_val(forbidden) |= _PAGE_RW;
-#endif
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(__start_rodata),
+ virt_to_highmap(__end_rodata)))
+ pgprot_val(forbidden) |= _PAGE_RW;
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
}
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
pte_t *lookup_address(unsigned long address, int *level)
{
pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +201,31 @@ pte_t *lookup_address(unsigned long address, int *level)
if (pgd_none(*pgd))
return NULL;
+
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return NULL;
+
+ *level = PG_LEVEL_1G;
+ if (pud_large(*pud) || !pud_present(*pud))
+ return (pte_t *)pud;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
*level = PG_LEVEL_2M;
- if (pmd_large(*pmd))
+ if (pmd_large(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
*level = PG_LEVEL_4K;
+
return pte_offset_kernel(pmd, address);
}
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
/* change init_mm */
@@ -189,18 +248,103 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
#endif
}
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+ struct cpa_data *cpa)
+{
+ unsigned long nextpage_addr, numpages, pmask, psize, flags;
+ pte_t new_pte, old_pte, *tmp;
+ pgprot_t old_prot, new_prot;
+ int level, do_split = 1;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ switch (level) {
+ case PG_LEVEL_2M:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#ifdef CONFIG_X86_64
+ case PG_LEVEL_1G:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#endif
+ default:
+ do_split = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Calculate the number of pages, which fit into this large
+ * page starting at address:
+ */
+ nextpage_addr = (address + psize) & pmask;
+ numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+ if (numpages < cpa->numpages)
+ cpa->numpages = numpages;
+
+ /*
+ * We are safe now. Check whether the new pgprot is the same:
+ */
+ old_pte = *kpte;
+ old_prot = new_prot = pte_pgprot(old_pte);
+
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+ new_prot = static_protections(new_prot, address);
+
+ /*
+ * If there are no changes, return. maxpages has been updated
+ * above:
+ */
+ if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+ do_split = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * We need to change the attributes. Check, whether we can
+ * change the large page in one go. We request a split, when
+ * the address is not aligned and the number of pages is
+ * smaller than the number of pages in the large page. Note
+ * that we limited the number of possible pages already to
+ * the number of pages in the large page.
+ */
+ if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+ /*
+ * The address is aligned and the number of pages
+ * covers the full page.
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+ __set_pmd_pte(kpte, address, new_pte);
+ cpa->flushtlb = 1;
+ do_split = 0;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ return do_split;
+}
+
static int split_large_page(pte_t *kpte, unsigned long address)
{
- pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ unsigned long flags, pfn, pfninc = 1;
gfp_t gfp_flags = GFP_KERNEL;
- unsigned long flags;
- unsigned long addr;
+ unsigned int i, level;
pte_t *pbase, *tmp;
+ pgprot_t ref_prot;
struct page *base;
- unsigned int i, level;
#ifdef CONFIG_DEBUG_PAGEALLOC
- gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
#endif
base = alloc_pages(gfp_flags, 0);
@@ -213,30 +357,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte) {
- WARN_ON_ONCE(1);
+ if (tmp != kpte)
goto out_unlock;
- }
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
#endif
+ ref_prot = pte_pgprot(pte_clrhuge(*kpte));
- pgprot_val(ref_prot) &= ~_PAGE_NX;
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+#ifdef CONFIG_X86_64
+ if (level == PG_LEVEL_1G) {
+ pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ }
+#endif
+
+ /*
+ * Get the target pfn from the original entry:
+ */
+ pfn = pte_pfn(*kpte);
+ for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+ set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
/*
- * Install the new, split up pagetable. Important detail here:
+ * Install the new, split up pagetable. Important details here:
*
* On Intel the NX bit of all levels must be cleared to make a
* page executable. See section 4.13.2 of Intel 64 and IA-32
* Architectures Software Developer's Manual).
+ *
+ * Mark the entry present. The current mapping might be
+ * set to not present, which we preserved above.
*/
ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+ pgprot_val(ref_prot) |= _PAGE_PRESENT;
__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
base = NULL;
@@ -249,18 +404,12 @@ out_unlock:
return 0;
}
-static int
-__change_page_attr(unsigned long address, unsigned long pfn,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
+ int level, do_split, err;
struct page *kpte_page;
- int level, err = 0;
pte_t *kpte;
-#ifdef CONFIG_X86_32
- BUG_ON(pfn > max_low_pfn);
-#endif
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -271,23 +420,62 @@ repeat:
BUG_ON(PageCompound(kpte_page));
if (level == PG_LEVEL_4K) {
- pgprot_t new_prot = pte_pgprot(*kpte);
pte_t new_pte, old_pte = *kpte;
+ pgprot_t new_prot = pte_pgprot(old_pte);
+
+ if(!pte_val(old_pte)) {
+ printk(KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", address,
+ cpa->vaddr);
+ WARN_ON(1);
+ return -EINVAL;
+ }
- pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
- pgprot_val(new_prot) |= pgprot_val(mask_set);
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
new_prot = static_protections(new_prot, address);
- new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
- BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+ /*
+ * We need to keep the pfn from the existing PTE,
+ * after all we're only going to change it's attributes
+ * not the memory it points to
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
- set_pte_atomic(kpte, new_pte);
- } else {
- err = split_large_page(kpte, address);
- if (!err)
- goto repeat;
+ /*
+ * Do we really change anything ?
+ */
+ if (pte_val(old_pte) != pte_val(new_pte)) {
+ set_pte_atomic(kpte, new_pte);
+ cpa->flushtlb = 1;
+ }
+ cpa->numpages = 1;
+ return 0;
}
+
+ /*
+ * Check, whether we can keep the large page intact
+ * and just change the pte:
+ */
+ do_split = try_preserve_large_page(kpte, address, cpa);
+ /*
+ * When the range fits into the existing large page,
+ * return. cp->numpages and cpa->tlbflush have been updated in
+ * try_large_page:
+ */
+ if (do_split <= 0)
+ return do_split;
+
+ /*
+ * We have to split the large page:
+ */
+ err = split_large_page(kpte, address);
+ if (!err) {
+ cpa->flushtlb = 1;
+ goto repeat;
+ }
+
return err;
}
@@ -304,19 +492,14 @@ repeat:
*
* Modules and drivers should use the set_memory_* APIs instead.
*/
-
-#define HIGH_MAP_START __START_KERNEL_map
-#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
-
-static int
-change_page_attr_addr(unsigned long address, pgprot_t mask_set,
- pgprot_t mask_clr)
+static int change_page_attr_addr(struct cpa_data *cpa)
{
- unsigned long phys_addr = __pa(address);
- unsigned long pfn = phys_addr >> PAGE_SHIFT;
int err;
+ unsigned long address = cpa->vaddr;
#ifdef CONFIG_X86_64
+ unsigned long phys_addr = __pa(address);
+
/*
* If we are inside the high mapped kernel range, then we
* fixup the low mapping first. __va() returns the virtual
@@ -326,7 +509,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
address = (unsigned long) __va(phys_addr);
#endif
- err = __change_page_attr(address, pfn, mask_set, mask_clr);
+ err = __change_page_attr(address, cpa);
if (err)
return err;
@@ -339,42 +522,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
/*
* Calc the high mapping address. See __phys_addr()
* for the non obvious details.
+ *
+ * Note that NX and other required permissions are
+ * checked in static_protections().
*/
address = phys_addr + HIGH_MAP_START - phys_base;
- /* Make sure the kernel mappings stay executable */
- pgprot_val(mask_clr) |= _PAGE_NX;
/*
* Our high aliases are imprecise, because we check
* everything between 0 and KERNEL_TEXT_SIZE, so do
* not propagate lookup failures back to users:
*/
- __change_page_attr(address, pfn, mask_set, mask_clr);
+ __change_page_attr(address, cpa);
}
#endif
return err;
}
-static int __change_page_attr_set_clr(unsigned long addr, int numpages,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr_set_clr(struct cpa_data *cpa)
{
- unsigned int i;
- int ret;
+ int ret, numpages = cpa->numpages;
- for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
- ret = change_page_attr_addr(addr, mask_set, mask_clr);
+ while (numpages) {
+ /*
+ * Store the remaining nr of pages for the large page
+ * preservation check.
+ */
+ cpa->numpages = numpages;
+ ret = change_page_attr_addr(cpa);
if (ret)
return ret;
- }
+ /*
+ * Adjust the number of pages with the result of the
+ * CPA operation. Either a large page has been
+ * preserved or a single page update happened.
+ */
+ BUG_ON(cpa->numpages > numpages);
+ numpages -= cpa->numpages;
+ cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ }
return 0;
}
+static inline int cache_attr(pgprot_t attr)
+{
+ return pgprot_val(attr) &
+ (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+}
+
static int change_page_attr_set_clr(unsigned long addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr)
{
- int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
- mask_clr);
+ struct cpa_data cpa;
+ int ret, cache;
+
+ /*
+ * Check, if we are requested to change a not supported
+ * feature:
+ */
+ mask_set = canon_pgprot(mask_set);
+ mask_clr = canon_pgprot(mask_clr);
+ if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+ return 0;
+
+ cpa.vaddr = addr;
+ cpa.numpages = numpages;
+ cpa.mask_set = mask_set;
+ cpa.mask_clr = mask_clr;
+ cpa.flushtlb = 0;
+
+ ret = __change_page_attr_set_clr(&cpa);
+
+ /*
+ * Check whether we really changed something:
+ */
+ if (!cpa.flushtlb)
+ return ret;
+
+ /*
+ * No need to flush, when we did not set any of the caching
+ * attributes:
+ */
+ cache = cache_attr(mask_set);
/*
* On success we use clflush, when the CPU supports it to
@@ -383,9 +613,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* wbindv):
*/
if (!ret && cpu_has_clflush)
- cpa_flush_range(addr, numpages);
+ cpa_flush_range(addr, numpages, cache);
else
- cpa_flush_all();
+ cpa_flush_all(cache);
return ret;
}
@@ -489,37 +719,26 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
-static inline int __change_page_attr_set(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-}
-
-static inline int __change_page_attr_clear(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-}
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
static int __set_pages_p(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(0)};
- return __change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_PRESENT | _PAGE_RW));
+ return __change_page_attr_set_clr(&cpa);
}
static int __set_pages_np(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(0),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
- return __change_page_attr_clear(addr, numpages,
- __pgprot(_PAGE_PRESENT));
+ return __change_page_attr_set_clr(&cpa);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa470249b..6c1914622a88 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
list_del(&page->lru);
}
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
-static void pgd_ctor(void *pgd)
+static void pgd_ctor(void *p)
{
+ pgd_t *pgd = p;
unsigned long flags;
- /* !PAE, no pagetable sharing */
+ /* Clear usermode parts of PGD */
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
- /* must happen under lock */
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
- /* PAE, kernel PMD may be shared */
-
- if (SHARED_KERNEL_PMD) {
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ /* If the pgd points to a shared pagetable level (either the
+ ptes in non-PAE, or shared PMD in PAE), then just copy the
+ references from swapper_pg_dir. */
+ if (PAGETABLE_LEVELS == 2 ||
+ (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+ clone_pgd_range(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
- } else {
- unsigned long flags;
+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ }
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
+ /* list required to sync kernel mapping updates */
+ if (!SHARED_KERNEL_PMD)
pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
-#endif /* PTRS_PER_PMD */
static void pgd_dtor(void *pgd)
{
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
#ifdef CONFIG_X86_PAE
/*
* Mop up any pmd pages which may still be attached to the pgd.
@@ -286,7 +272,7 @@ static void pgd_dtor(void *pgd)
* preallocate which never got a corresponding vma will need to be
* freed manually.
*/
-static void pgd_mop_up_pmds(pgd_t *pgdp)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
int i;
@@ -299,7 +285,7 @@ static void pgd_mop_up_pmds(pgd_t *pgdp)
pgdp[i] = native_make_pgd(0);
paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
- pmd_free(pmd);
+ pmd_free(mm, pmd);
}
}
}
@@ -327,7 +313,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
pmd_t *pmd = pmd_alloc_one(mm, addr);
if (!pmd) {
- pgd_mop_up_pmds(pgd);
+ pgd_mop_up_pmds(mm, pgd);
return 0;
}
@@ -347,7 +333,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
return 1;
}
-static void pgd_mop_up_pmds(pgd_t *pgd)
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
}
#endif /* CONFIG_X86_PAE */
@@ -366,9 +352,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
}
-void pgd_free(pgd_t *pgd)
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- pgd_mop_up_pmds(pgd);
+ pgd_mop_up_pmds(mm, pgd);
quicklist_free(0, pgd_dtor, pgd);
}
@@ -387,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
- /* This is called just after the pmd has been detached from
- the pgd, which requires a full tlb flush to be recognized
- by the CPU. Rather than incurring multiple tlb flushes
- while the address space is being pulled down, make the tlb
- gathering machinery do a full flush when we're done. */
- tlb->fullmm = 1;
-
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index cb63007e20b2..74d30ff33c49 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -17,7 +17,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
int pxb, reg;
u8 busno, suba, subb;
- printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d));
+ dev_warn(&d->dev, "Searching for i450NX host bridges\n");
reg = 0xd0;
for(pxb = 0; pxb < 2; pxb++) {
pci_read_config_byte(d, reg++, &busno);
@@ -41,7 +41,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
*/
u8 busno;
pci_read_config_byte(d, 0x4a, &busno);
- printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno);
+ dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno);
pci_scan_bus_with_sysdata(busno);
pcibios_last_bus = -1;
}
@@ -55,7 +55,7 @@ static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
*/
int i;
- printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d));
+ dev_warn(&d->dev, "Fixing base address flags\n");
for(i = 0; i < 4; i++)
d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
}
@@ -68,7 +68,7 @@ static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
* Fix class to be PCI_CLASS_STORAGE_SCSI
*/
if (!d->class) {
- printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", pci_name(d));
+ dev_warn(&d->dev, "Fixing NCR 53C810 class code\n");
d->class = PCI_CLASS_STORAGE_SCSI << 8;
}
}
@@ -80,7 +80,7 @@ static void __devinit pci_fixup_latency(struct pci_dev *d)
* SiS 5597 and 5598 chipsets require latency timer set to
* at most 32 to avoid lockups.
*/
- DBG("PCI: Setting max latency to 32\n");
+ dev_dbg(&d->dev, "Setting max latency to 32\n");
pcibios_max_latency = 32;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
@@ -138,7 +138,7 @@ static void pci_fixup_via_northbridge_bug(struct pci_dev *d)
pci_read_config_byte(d, where, &v);
if (v & ~mask) {
- printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
+ dev_warn(&d->dev, "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
d->device, d->revision, where, v, mask, v & mask);
v &= mask;
pci_write_config_byte(d, where, v);
@@ -200,7 +200,7 @@ static void pci_fixup_nforce2(struct pci_dev *dev)
* Apply fixup if needed, but don't touch disconnect state
*/
if ((val & 0x00FF0000) != 0x00010000) {
- printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n");
+ dev_warn(&dev->dev, "nForce2 C1 Halt Disconnect fixup\n");
pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000);
}
}
@@ -348,7 +348,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
pci_read_config_word(pdev, PCI_COMMAND, &config);
if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
- printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev));
+ dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
}
}
DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
@@ -388,11 +388,11 @@ static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
/* verify the change for status output */
pci_read_config_byte(dev, 0x50, &val);
if (val & 0x40)
- printk(KERN_INFO "PCI: Detected MSI K8T Neo2-FIR, "
+ dev_info(&dev->dev, "Detected MSI K8T Neo2-FIR; "
"can't enable onboard soundcard!\n");
else
- printk(KERN_INFO "PCI: Detected MSI K8T Neo2-FIR, "
- "enabled onboard soundcard.\n");
+ dev_info(&dev->dev, "Detected MSI K8T Neo2-FIR; "
+ "enabled onboard soundcard\n");
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 42ba0e2da1a0..103b9dff1213 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,7 +72,7 @@ pcibios_align_resource(void *data, struct resource *res,
}
}
}
-
+EXPORT_SYMBOL(pcibios_align_resource);
/*
* Handle resources of PCI devices. If the world were perfect, we could
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f69e0c..55270c26237c 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/nodemask.h>
+#include <mach_apic.h>
#include "pci.h"
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
+
#define BUS2QUAD(global) (mp_bus_id_to_node[global])
#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+extern void *xquad_portio; /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+ unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+ if (xquad_portio)
+ writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+ else
+ outl(val, 0xCF8);
+}
+
static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readb(adr + (reg & 3));
+ else
+ *value = inb(0xCFC + (reg & 3));
break;
case 2:
- *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readw(adr + (reg & 2));
+ else
+ *value = inw(0xCFC + (reg & 2));
break;
case 4:
- *value = inl_quad(0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readl(adr);
+ else
+ *value = inl(0xCFC);
break;
}
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ writeb(value, adr + (reg & 3));
+ else
+ outb((u8)value, 0xCFC + (reg & 3));
break;
case 2:
- outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ writew(value, adr + (reg & 2));
+ else
+ outw((u16)value, 0xCFC + (reg & 2));
break;
case 4:
- outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ writel(value, adr + reg);
+ else
+ outl((u32)value, 0xCFC);
break;
}