aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/x86/Kbuild1
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/Makefile_32.cpu13
-rw-r--r--arch/x86/ia32/sys_ia32.c1
-rw-r--r--arch/x86/include/asm/acpi.h3
-rw-r--r--arch/x86/include/asm/apic.h10
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/highmem.h11
-rw-r--r--arch/x86/include/asm/io.h13
-rw-r--r--arch/x86/include/asm/io_apic.h1
-rw-r--r--arch/x86/include/asm/iomap.h4
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/pci.h33
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/perf_event.h19
-rw-r--r--arch/x86/include/asm/pgtable_32.h14
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/smp.h9
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h25
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h208
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/asm/xen/hypercall.h17
-rw-r--r--arch/x86/include/asm/xen/interface.h6
-rw-r--r--arch/x86/include/asm/xen/interface_32.h5
-rw-r--r--arch/x86/include/asm/xen/interface_64.h13
-rw-r--r--arch/x86/include/asm/xen/page.h19
-rw-r--r--arch/x86/include/asm/xen/pci.h65
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c60
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/apic/apic.c1
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/irq_32.c17
-rw-r--r--arch/x86/kernel/kgdb.c15
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c71
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/ptrace.c17
-rw-r--r--arch/x86/kernel/pvclock.c43
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/x86_init.c7
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c19
-rw-r--r--arch/x86/kvm/x86.c16
-rw-r--r--arch/x86/mm/fault.c63
-rw-r--r--arch/x86/mm/highmem_32.c76
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/iomap_32.c43
-rw-r--r--arch/x86/mm/numa_64.c7
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/oprofile/nmi_int.c6
-rw-r--r--arch/x86/oprofile/op_model_amd.c146
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/acpi.c103
-rw-r--r--arch/x86/pci/common.c17
-rw-r--r--arch/x86/pci/i386.c19
-rw-r--r--arch/x86/pci/irq.c11
-rw-r--r--arch/x86/pci/mmconfig-shared.c4
-rw-r--r--arch/x86/pci/xen.c429
-rw-r--r--arch/x86/platform/Makefile8
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/efi.c (renamed from arch/x86/kernel/efi.c)0
-rw-r--r--arch/x86/platform/efi/efi_32.c (renamed from arch/x86/kernel/efi_32.c)0
-rw-r--r--arch/x86/platform/efi/efi_64.c (renamed from arch/x86/kernel/efi_64.c)0
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S (renamed from arch/x86/kernel/efi_stub_32.S)0
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S (renamed from arch/x86/kernel/efi_stub_64.S)0
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/mrst.c (renamed from arch/x86/kernel/mrst.c)0
-rw-r--r--arch/x86/platform/olpc/Makefile3
-rw-r--r--arch/x86/platform/olpc/olpc-xo1.c (renamed from arch/x86/kernel/olpc-xo1.c)0
-rw-r--r--arch/x86/platform/olpc/olpc.c (renamed from arch/x86/kernel/olpc.c)0
-rw-r--r--arch/x86/platform/olpc/olpc_ofw.c (renamed from arch/x86/kernel/olpc_ofw.c)0
-rw-r--r--arch/x86/platform/scx200/Makefile2
-rw-r--r--arch/x86/platform/scx200/scx200_32.c (renamed from arch/x86/kernel/scx200_32.c)0
-rw-r--r--arch/x86/platform/sfi/Makefile1
-rw-r--r--arch/x86/platform/sfi/sfi.c (renamed from arch/x86/kernel/sfi.c)0
-rw-r--r--arch/x86/platform/uv/Makefile1
-rw-r--r--arch/x86/platform/uv/bios_uv.c (renamed from arch/x86/kernel/bios_uv.c)0
-rw-r--r--arch/x86/platform/uv/tlb_uv.c (renamed from arch/x86/kernel/tlb_uv.c)15
-rw-r--r--arch/x86/platform/uv/uv_irq.c (renamed from arch/x86/kernel/uv_irq.c)0
-rw-r--r--arch/x86/platform/uv/uv_sysfs.c (renamed from arch/x86/kernel/uv_sysfs.c)0
-rw-r--r--arch/x86/platform/uv/uv_time.c (renamed from arch/x86/kernel/uv_time.c)4
-rw-r--r--arch/x86/platform/visws/Makefile1
-rw-r--r--arch/x86/platform/visws/visws_quirks.c (renamed from arch/x86/kernel/visws_quirks.c)0
-rw-r--r--arch/x86/xen/Kconfig21
-rw-r--r--arch/x86/xen/enlighten.c45
-rw-r--r--arch/x86/xen/mmu.c628
-rw-r--r--arch/x86/xen/mmu.h1
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c156
-rw-r--r--arch/x86/xen/smp.c32
-rw-r--r--arch/x86/xen/suspend.c1
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-ops.h5
119 files changed, 2331 insertions, 834 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index ad8ec356fb36..0e103236b754 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -14,3 +14,4 @@ obj-y += crypto/
obj-y += vdso/
obj-$(CONFIG_IA32_EMULATION) += ia32/
+obj-y += platform/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dfabfefc21c4..e330da21b84f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,6 +1,3 @@
-# x86 configuration
-mainmenu "Linux Kernel Configuration for x86"
-
# Select 32 or 64 bit
config 64BIT
bool "64-bit kernel" if ARCH = "x86"
@@ -24,7 +21,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
- select HAVE_PERF_EVENTS if (!M386 && !M486)
+ select HAVE_PERF_EVENTS
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
@@ -347,6 +344,7 @@ endif
config X86_VSMP
bool "ScaleMP vSMP"
+ select PARAVIRT_GUEST
select PARAVIRT
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
@@ -1895,6 +1893,11 @@ config PCI_OLPC
def_bool y
depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
+config PCI_XEN
+ def_bool y
+ depends on PCI && XEN
+ select SWIOTLB_XEN
+
config PCI_DOMAINS
def_bool y
depends on PCI
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1255d953c65d..f2ee1abb1df9 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
-cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
+ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+# Work around to a bug with asm goto with first implementations of it
+# in gcc causing gcc to mess up the push and pop of the stack in some
+# uses of asm goto.
+ifeq ($(CONFIG_JUMP_LABEL), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 849813f398e7..5852519b2d0f 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -28,7 +28,6 @@
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/utsname.h>
-#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/poll.h>
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 92091de11113..55d106b5e31b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -93,6 +93,9 @@ extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
+extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
+ int trigger, int polarity);
+
static inline void disable_acpi(void)
{
acpi_disabled = 1;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 286de34b0ed6..f6ce0bda3b98 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -141,13 +141,13 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
static inline u32 native_apic_msr_read(u32 reg)
{
- u32 low, high;
+ u64 msr;
if (reg == APIC_DFR)
return -1;
- rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
- return low;
+ rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
+ return (u32)msr;
}
static inline void native_x2apic_wait_icr_idle(void)
@@ -181,12 +181,12 @@ extern void enable_x2apic(void);
extern void x2apic_icr_write(u32 low, u32 id);
static inline int x2apic_enabled(void)
{
- int msr, msr2;
+ u64 msr;
if (!cpu_has_x2apic)
return 0;
- rdmsr(MSR_IA32_APICBASE, msr, msr2);
+ rdmsrl(MSR_IA32_APICBASE, msr);
if (msr & X2APIC_ENABLE)
return 1;
return 0;
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4d293dced62f..9479a037419f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
}
/* Return an pointer with offset calculated */
-static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags)
+static __always_inline unsigned long
+__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
{
__set_fixmap(idx, phys, flags);
return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 8caac76ac324..3bd04022fd0c 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -59,11 +59,12 @@ extern void kunmap_high(struct page *page);
void *kmap(struct page *page);
void kunmap(struct page *page);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
#define flush_cache_kmaps() do { } while (0)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index f0203f4791a8..072273082528 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -41,6 +41,8 @@
#include <asm-generic/int-ll64.h>
#include <asm/page.h>
+#include <xen/xen.h>
+
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -351,6 +353,17 @@ extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
+#ifdef CONFIG_XEN
+struct bio_vec;
+
+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
+ const struct bio_vec *vec2);
+
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
+ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+#endif /* CONFIG_XEN */
+
#define IO_SPACE_LIMIT 0xffff
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c8be4566c3d2..a6b28d017c2f 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern void probe_nr_irqs_gsi(void);
+extern int get_nr_irqs_gsi(void);
extern void setup_ioapic_ids_from_mpc(void);
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index c4191b3b7056..363e33eb6ec1 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -27,10 +27,10 @@
#include <asm/tlbflush.h>
void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type);
+iounmap_atomic(void __iomem *kvaddr);
int
iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 0bf5b0083650..13b0ebaa512f 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -21,10 +21,8 @@ static inline int irq_canonicalize(int irq)
#ifdef CONFIG_X86_32
extern void irq_ctx_init(int cpu);
-extern void irq_ctx_exit(int cpu);
#else
# define irq_ctx_init(cpu) do { } while (0)
-# define irq_ctx_exit(cpu) do { } while (0)
#endif
#define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83c4bb1d917d..6b89f5e86021 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,13 +121,14 @@
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
#define FAM10H_MMIO_CONF_ENABLE (1<<0)
#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
-#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
#define __PV_IS_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { func })
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
{
return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
}
-static inline void arch_local_irq_restore(unsigned long f)
+static inline notrace void arch_local_irq_restore(unsigned long f)
{
PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
}
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
{
PVOP_VCALLEE0(pv_irq_ops.irq_disable);
}
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
{
PVOP_VCALLEE0(pv_irq_ops.irq_enable);
}
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
{
unsigned long f;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d395540ff894..ca0437c714b2 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <asm/scatterlist.h>
#include <asm/io.h>
+#include <asm/x86_init.h>
#ifdef __KERNEL__
@@ -94,8 +95,36 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
-/* MSI arch hook */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
+#ifdef CONFIG_PCI_MSI
+/* MSI arch specific hooks */
+static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
+{
+ x86_msi.teardown_msi_irqs(dev);
+}
+
+static inline void x86_teardown_msi_irq(unsigned int irq)
+{
+ x86_msi.teardown_msi_irq(irq);
+}
+#define arch_setup_msi_irqs x86_setup_msi_irqs
+#define arch_teardown_msi_irqs x86_teardown_msi_irqs
+#define arch_teardown_msi_irq x86_teardown_msi_irq
+/* implemented in arch/x86/kernel/apic/io_apic. */
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void native_teardown_msi_irq(unsigned int irq);
+/* default to the implementation in drivers/lib/msi.c */
+#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev);
+#else
+#define native_setup_msi_irqs NULL
+#define native_teardown_msi_irq NULL
+#define default_teardown_msi_irqs NULL
+#endif
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 49c7219826f9..704526734bef 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -47,6 +47,7 @@ enum pci_bf_sort_state {
extern unsigned int pcibios_max_latency;
void pcibios_resource_survey(void);
+void pcibios_set_cache_line_size(void);
/* pci-pc.c */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 6e742cc4251b..550e26b1dbb3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -111,17 +111,18 @@ union cpuid10_edx {
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
/* IbsFetchCtl bits/masks */
-#define IBS_FETCH_RAND_EN (1ULL<<57)
-#define IBS_FETCH_VAL (1ULL<<49)
-#define IBS_FETCH_ENABLE (1ULL<<48)
-#define IBS_FETCH_CNT 0xFFFF0000ULL
-#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
+#define IBS_FETCH_RAND_EN (1ULL<<57)
+#define IBS_FETCH_VAL (1ULL<<49)
+#define IBS_FETCH_ENABLE (1ULL<<48)
+#define IBS_FETCH_CNT 0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
/* IbsOpCtl bits */
-#define IBS_OP_CNT_CTL (1ULL<<19)
-#define IBS_OP_VAL (1ULL<<18)
-#define IBS_OP_ENABLE (1ULL<<17)
-#define IBS_OP_MAX_CNT 0x0000FFFFULL
+#define IBS_OP_CNT_CTL (1ULL<<19)
+#define IBS_OP_VAL (1ULL<<18)
+#define IBS_OP_ENABLE (1ULL<<17)
+#define IBS_OP_MAX_CNT 0x0000FFFFULL
+#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8abde9ec90bf..0c92113c4cb6 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,24 +49,14 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
#endif
#if defined(CONFIG_HIGHPTE)
-#define __KM_PTE \
- (in_nmi() ? KM_NMI_PTE : \
- in_irq() ? KM_IRQ_PTE : \
- KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \
+ ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \
pte_index((address)))
-#define pte_offset_map_nested(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \
- pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
-#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
+#define pte_unmap(pte) kunmap_atomic((pte))
#else
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
#endif
/* Clear a kernel PTE and flush it from the TLB */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f96ac9bedf75..f86da20347f2 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -127,9 +127,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
/* x86-64 always has all page tables mapped. */
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7f7e577a0e39..31d84acc1512 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
struct pvclock_vcpu_time_info *vcpu,
struct timespec *ts);
+void pvclock_resume(void);
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4cfc90824068..4c2f63c7fc1b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index bf6b88ef8eeb..a501741c2335 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,7 +5,7 @@
*
* SGI UV architectural definitions
*
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_HUB_H
@@ -77,7 +77,8 @@
*
* 1111110000000000
* 5432109876543210
- * pppppppppplc0cch
+ * pppppppppplc0cch Nehalem-EX
+ * ppppppppplcc0cch Westmere-EX
* sssssssssss
*
* p = pnode bits
@@ -148,12 +149,25 @@ struct uv_hub_info_s {
unsigned char m_val;
unsigned char n_val;
struct uv_scir_s scir;
+ unsigned char apic_pnode_shift;
};
DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define uv_hub_info (&__get_cpu_var(__uv_hub_info))
#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
+union uvh_apicid {
+ unsigned long v;
+ struct uvh_apicid_s {
+ unsigned long local_apic_mask : 24;
+ unsigned long local_apic_shift : 5;
+ unsigned long unused1 : 3;
+ unsigned long pnode_mask : 24;
+ unsigned long pnode_shift : 5;
+ unsigned long unused2 : 3;
+ } s;
+};
+
/*
* Local & Global MMR space macros.
* Note: macros are intended to be used ONLY by inline functions
@@ -182,8 +196,11 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
(((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+#define UVH_APICID 0x002D0E00L
#define UV_APIC_PNODE_SHIFT 6
+#define UV_APICID_HIBIT_MASK 0xffff0000
+
/* Local Bus from cpu's perspective */
#define LOCAL_BUS_BASE 0x1c00000
#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
@@ -280,7 +297,7 @@ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
*/
static inline int uv_apicid_to_pnode(int apicid)
{
- return (apicid >> UV_APIC_PNODE_SHIFT);
+ return (apicid >> uv_hub_info->apic_pnode_shift);
}
/*
@@ -476,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
}
}
+extern unsigned int uv_apicid_hibits;
static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
{
+ apicid |= uv_apicid_hibits;
return (1UL << UVH_IPI_INT_SEND_SHFT) |
((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
(mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index b2f2d2e05cec..20cafeac7455 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
*
* SGI UV MMR definitions
*
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
};
/* ========================================================================= */
+/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */
+/* ========================================================================= */
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
+
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
+
+union uvh_lb_target_physical_apic_id_mask_u {
+ unsigned long v;
+ struct uvh_lb_target_physical_apic_id_mask_s {
+ unsigned long bit_enables : 32; /* RW */
+ unsigned long rsvd_32_63 : 32; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_NODE_ID */
/* ========================================================================= */
#define UVH_NODE_ID 0x0UL
@@ -806,6 +823,78 @@ union uvh_node_present_table_u {
};
/* ========================================================================= */
+/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
+ unsigned long v;
+ struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
+ unsigned long rsvd_0_23: 24; /* */
+ unsigned long base : 8; /* RW */
+ unsigned long rsvd_32_47: 16; /* */
+ unsigned long m_alias : 5; /* RW */
+ unsigned long rsvd_53_62: 10; /* */
+ unsigned long enable : 1; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
+ unsigned long v;
+ struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
+ unsigned long rsvd_0_23: 24; /* */
+ unsigned long base : 8; /* RW */
+ unsigned long rsvd_32_47: 16; /* */
+ unsigned long m_alias : 5; /* RW */
+ unsigned long rsvd_53_62: 10; /* */
+ unsigned long enable : 1; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
+union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
+ unsigned long v;
+ struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
+ unsigned long rsvd_0_23: 24; /* */
+ unsigned long base : 8; /* RW */
+ unsigned long rsvd_32_47: 16; /* */
+ unsigned long m_alias : 5; /* RW */
+ unsigned long rsvd_53_62: 10; /* */
+ unsigned long enable : 1; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
/* ========================================================================= */
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
@@ -857,6 +946,29 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
};
/* ========================================================================= */
+/* UVH_RH_GAM_CONFIG_MMR */
+/* ========================================================================= */
+#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
+
+#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
+#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
+#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12
+#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
+
+union uvh_rh_gam_config_mmr_u {
+ unsigned long v;
+ struct uvh_rh_gam_config_mmr_s {
+ unsigned long m_skt : 6; /* RW */
+ unsigned long n_skt : 4; /* RW */
+ unsigned long rsvd_10_11: 2; /* */
+ unsigned long mmiol_cfg : 1; /* RW */
+ unsigned long rsvd_13_63: 51; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
@@ -987,97 +1099,5 @@ union uvh_rtc1_int_config_u {
} s;
};
-/* ========================================================================= */
-/* UVH_SI_ADDR_MAP_CONFIG */
-/* ========================================================================= */
-#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL
-
-#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0
-#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
-#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8
-#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL
-
-union uvh_si_addr_map_config_u {
- unsigned long v;
- struct uvh_si_addr_map_config_s {
- unsigned long m_skt : 6; /* RW */
- unsigned long rsvd_6_7: 2; /* */
- unsigned long n_skt : 4; /* RW */
- unsigned long rsvd_12_63: 52; /* */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_SI_ALIAS0_OVERLAY_CONFIG */
-/* ========================================================================= */
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
-
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
-#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
-
-union uvh_si_alias0_overlay_config_u {
- unsigned long v;
- struct uvh_si_alias0_overlay_config_s {
- unsigned long rsvd_0_23: 24; /* */
- unsigned long base : 8; /* RW */
- unsigned long rsvd_32_47: 16; /* */
- unsigned long m_alias : 5; /* RW */
- unsigned long rsvd_53_62: 10; /* */
- unsigned long enable : 1; /* RW */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_SI_ALIAS1_OVERLAY_CONFIG */
-/* ========================================================================= */
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
-
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
-#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
-
-union uvh_si_alias1_overlay_config_u {
- unsigned long v;
- struct uvh_si_alias1_overlay_config_s {
- unsigned long rsvd_0_23: 24; /* */
- unsigned long base : 8; /* RW */
- unsigned long rsvd_32_47: 16; /* */
- unsigned long m_alias : 5; /* RW */
- unsigned long rsvd_53_62: 10; /* */
- unsigned long enable : 1; /* RW */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_SI_ALIAS2_OVERLAY_CONFIG */
-/* ========================================================================= */
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
-
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
-#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
-
-union uvh_si_alias2_overlay_config_u {
- unsigned long v;
- struct uvh_si_alias2_overlay_config_s {
- unsigned long rsvd_0_23: 24; /* */
- unsigned long base : 8; /* RW */
- unsigned long rsvd_32_47: 16; /* */
- unsigned long m_alias : 5; /* RW */
- unsigned long rsvd_53_62: 10; /* */
- unsigned long enable : 1; /* RW */
- } s;
-};
-
-#endif /* _ASM_X86_UV_UV_MMRS_H */
+#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index baa579c8e038..64642ad019fb 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -154,9 +154,18 @@ struct x86_platform_ops {
int (*i8042_detect)(void);
};
+struct pci_dev;
+
+struct x86_msi_ops {
+ int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+ void (*teardown_msi_irq)(unsigned int irq);
+ void (*teardown_msi_irqs)(struct pci_dev *dev);
+};
+
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
+extern struct x86_msi_ops x86_msi;
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7fda040a76cd..a3c28ae4025b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -200,6 +200,23 @@ extern struct { char _entry[32]; } hypercall_page[];
(type)__res; \
})
+static inline long
+privcmd_call(unsigned call,
+ unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4,
+ unsigned long a5)
+{
+ __HYPERCALL_DECLS;
+ __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+
+ asm volatile("call *%[call]"
+ : __HYPERCALL_5PARAM
+ : [call] "a" (&hypercall_page[call])
+ : __HYPERCALL_CLOBBER5);
+
+ return (long)__res;
+}
+
static inline int
HYPERVISOR_set_trap_table(struct trap_info *table)
{
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index e8506c1f0c55..1c10c88ee4e1 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void);
#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
#endif
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
/* Maximum number of virtual CPUs in multi-processor guests. */
#define MAX_VIRT_CPUS 32
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index 42a7e004ae5c..8413688b2571 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -32,6 +32,11 @@
/* And the trap vector is... */
#define TRAP_INSTR "int $0x82"
+#define __MACH2PHYS_VIRT_START 0xF5800000
+#define __MACH2PHYS_VIRT_END 0xF6800000
+
+#define __MACH2PHYS_SHIFT 2
+
/*
* Virtual addresses beyond this are not modifiable by guest OSes. The
* machine->physical mapping table starts at this address, read-only.
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index 100d2662b97c..839a4811cf98 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -39,18 +39,7 @@
#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
-
-#ifndef HYPERVISOR_VIRT_START
-#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
-#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
-#endif
-
-#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
-#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
-#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
-#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
-#endif
+#define __MACH2PHYS_SHIFT 3
/*
* int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bf5f7d32bd08..8760cc60a21c 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/pfn.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -35,16 +36,25 @@ typedef struct xpaddr {
#define MAX_DOMAIN_PAGES \
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+extern unsigned long *machine_to_phys_mapping;
+extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
-extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
+ unsigned long mfn;
+
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
- return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
+ mfn = get_phys_to_machine(pfn);
+
+ if (mfn != INVALID_P2M_ENTRY)
+ mfn &= ~FOREIGN_FRAME_BIT;
+
+ return mfn;
}
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
-#if 0
if (unlikely((mfn >> machine_to_phys_order) != 0))
- return max_mapnr;
-#endif
+ return ~0;
pfn = 0;
/*
@@ -159,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x)
#define pgd_val_ma(x) ((x).pgd)
+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
xmaddr_t arbitrary_virt_to_machine(void *address);
unsigned long arbitrary_virt_to_mfn(void *vaddr);
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
new file mode 100644
index 000000000000..2329b3eaf8d3
--- /dev/null
+++ b/arch/x86/include/asm/xen/pci.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_X86_XEN_PCI_H
+#define _ASM_X86_XEN_PCI_H
+
+#if defined(CONFIG_PCI_XEN)
+extern int __init pci_xen_init(void);
+extern int __init pci_xen_hvm_init(void);
+#define pci_xen 1
+#else
+#define pci_xen 0
+#define pci_xen_init (0)
+static inline int pci_xen_hvm_init(void)
+{
+ return -1;
+}
+#endif
+#if defined(CONFIG_XEN_DOM0)
+void __init xen_setup_pirqs(void);
+#else
+static inline void __init xen_setup_pirqs(void)
+{
+}
+#endif
+
+#if defined(CONFIG_PCI_MSI)
+#if defined(CONFIG_PCI_XEN)
+/* The drivers/pci/xen-pcifront.c sets this structure to
+ * its own functions.
+ */
+struct xen_pci_frontend_ops {
+ int (*enable_msi)(struct pci_dev *dev, int **vectors);
+ void (*disable_msi)(struct pci_dev *dev);
+ int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+ void (*disable_msix)(struct pci_dev *dev);
+};
+
+extern struct xen_pci_frontend_ops *xen_pci_frontend;
+
+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
+ int **vectors)
+{
+ if (xen_pci_frontend && xen_pci_frontend->enable_msi)
+ return xen_pci_frontend->enable_msi(dev, vectors);
+ return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
+{
+ if (xen_pci_frontend && xen_pci_frontend->disable_msi)
+ xen_pci_frontend->disable_msi(dev);
+}
+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
+ int **vectors, int nvec)
+{
+ if (xen_pci_frontend && xen_pci_frontend->enable_msix)
+ return xen_pci_frontend->enable_msix(dev, vectors, nvec);
+ return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
+{
+ if (xen_pci_frontend && xen_pci_frontend->disable_msix)
+ xen_pci_frontend->disable_msix(dev);
+}
+#endif /* CONFIG_PCI_XEN */
+#endif /* CONFIG_PCI_MSI */
+
+#endif /* _ASM_X86_XEN_PCI_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2c833d8c4141..9e13763b6092 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,6 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
-obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
@@ -58,7 +57,6 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
-obj-$(CONFIG_SFI) += sfi.o
obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
@@ -82,7 +80,6 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
@@ -104,14 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
-obj-$(CONFIG_SCx200) += scx200.o
-scx200-y += scx200_32.o
-
-obj-$(CONFIG_OLPC) += olpc.o
-obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
-obj-$(CONFIG_X86_MRST) += mrst.o
-
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
@@ -124,7 +113,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
- obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c05872aa3ce0..71232b941b6c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
return 0;
}
-/*
- * success: return IRQ number (>=0)
- * failure: return < 0
- */
-int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
+ int trigger, int polarity)
{
- unsigned int irq;
- unsigned int plat_gsi = gsi;
-
#ifdef CONFIG_PCI
/*
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
- if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
- if (trigger == ACPI_LEVEL_SENSITIVE)
- eisa_set_level_irq(gsi);
- }
+ if (trigger == ACPI_LEVEL_SENSITIVE)
+ eisa_set_level_irq(gsi);
#endif
+ return gsi;
+}
+
+static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+ int trigger, int polarity)
+{
#ifdef CONFIG_X86_IO_APIC
- if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
- plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
- }
+ gsi = mp_register_gsi(dev, gsi, trigger, polarity);
#endif
+
+ return gsi;
+}
+
+int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
+ int trigger, int polarity) = acpi_register_gsi_pic;
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+ unsigned int irq;
+ unsigned int plat_gsi = gsi;
+
+ plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
irq = gsi_to_irq(plat_gsi);
return irq;
}
+void __init acpi_set_irq_model_pic(void)
+{
+ acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+ __acpi_register_gsi = acpi_register_gsi_pic;
+ acpi_ioapic = 0;
+}
+
+void __init acpi_set_irq_model_ioapic(void)
+{
+ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+ __acpi_register_gsi = acpi_register_gsi_ioapic;
+ acpi_ioapic = 1;
+}
+
/*
* ACPI based hotplug support for CPU
*/
@@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void)
*/
error = acpi_parse_madt_ioapic_entries();
if (!error) {
- acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
- acpi_ioapic = 1;
+ acpi_set_irq_model_ioapic();
smp_found_config = 1;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a36bb90aef53..5079f24c955a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -638,71 +638,32 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
/* Use __stop_machine() because the caller already got online_cpus. */
- __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
return addr;
}
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+#ifdef CONFIG_X86_64
+unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
+#else
+unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+#endif
void __init arch_init_ideal_nop5(void)
{
- extern const unsigned char ftrace_test_p6nop[];
- extern const unsigned char ftrace_test_nop5[];
- extern const unsigned char ftrace_test_jmp[];
- int faulted = 0;
-
/*
- * There is no good nop for all x86 archs.
- * We will default to using the P6_NOP5, but first we
- * will test to make sure that the nop will actually
- * work on this CPU. If it faults, we will then
- * go to a lesser efficient 5 byte nop. If that fails
- * we then just use a jmp as our nop. This isn't the most
- * efficient nop, but we can not use a multi part nop
- * since we would then risk being preempted in the middle
- * of that nop, and if we enabled tracing then, it might
- * cause a system crash.
+ * There is no good nop for all x86 archs. This selection
+ * algorithm should be unified with the one in find_nop_table(),
+ * but this should be good enough for now.
*
- * TODO: check the cpuid to determine the best nop.
+ * For cases other than the ones below, use the safe (as in
+ * always functional) defaults above.
*/
- asm volatile (
- "ftrace_test_jmp:"
- "jmp ftrace_test_p6nop\n"
- "nop\n"
- "nop\n"
- "nop\n" /* 2 byte jmp + 3 bytes */
- "ftrace_test_p6nop:"
- P6_NOP5
- "jmp 1f\n"
- "ftrace_test_nop5:"
- ".byte 0x66,0x66,0x66,0x66,0x90\n"
- "1:"
- ".section .fixup, \"ax\"\n"
- "2: movl $1, %0\n"
- " jmp ftrace_test_nop5\n"
- "3: movl $2, %0\n"
- " jmp 1b\n"
- ".previous\n"
- _ASM_EXTABLE(ftrace_test_p6nop, 2b)
- _ASM_EXTABLE(ftrace_test_nop5, 3b)
- : "=r"(faulted) : "0" (faulted));
-
- switch (faulted) {
- case 0:
- pr_info("converting mcount calls to 0f 1f 44 00 00\n");
- memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
- break;
- case 1:
- pr_info("converting mcount calls to 66 66 66 66 90\n");
- memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
- break;
- case 2:
- pr_info("converting mcount calls to jmp . + 5\n");
- memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
- break;
- }
-
+#ifdef CONFIG_X86_64
+ /* Don't use these on 32 bits due to broken virtualizers */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ memcpy(ideal_nop5, p6_nops[5], 5);
+#endif
}
#endif
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 850657d1b0ed..3f838d537392 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,7 +52,6 @@
#include <asm/mce.h>
#include <asm/kvm_para.h>
#include <asm/tsc.h>
-#include <asm/atomic.h>
unsigned int num_processors;
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
#include <linux/nmi.h>
#include <linux/module.h>
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
u64 hw_nmi_get_sample_period(void)
{
return (u64)(cpu_khz) * 1000 * 60;
}
#ifdef ARCH_HAS_NMI_WATCHDOG
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
void arch_trigger_all_cpu_backtrace(void)
{
int i;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ae808d110f4..7cc0a721f628 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- if (intr_remapping_enabled)
+ if (irq_remapped(cfg))
free_irte(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
@@ -3331,7 +3331,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
return 0;
}
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
@@ -3389,7 +3389,7 @@ error:
return ret;
}
-void arch_teardown_msi_irq(unsigned int irq)
+void native_teardown_msi_irq(unsigned int irq)
{
destroy_irq(irq);
}
@@ -3650,6 +3650,11 @@ void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
+int get_nr_irqs_gsi(void)
+{
+ return nr_irqs_gsi;
+}
+
#ifdef CONFIG_SPARSE_IRQ
int __init arch_probe_nr_irqs(void)
{
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54cb248..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
*
* SGI UV APIC functions (note: not an Intel compatible APIC)
*
- * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/cpumask.h>
#include <linux/hardirq.h>
@@ -41,8 +41,11 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
static u64 gru_start_paddr, gru_end_paddr;
+static union uvh_apicid uvh_apicid;
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static DEFINE_SPINLOCK(uv_nmi_lock);
static inline bool is_GRU_range(u64 start, u64 end)
@@ -70,12 +73,44 @@ static int early_get_nodeid(void)
return node_id.s.node_id;
}
+static void __init early_get_apic_pnode_shift(void)
+{
+ unsigned long *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
+ uvh_apicid.v = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ if (!uvh_apicid.v)
+ /*
+ * Old bios, use default value
+ */
+ uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+}
+
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB. This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+ union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
+ unsigned long *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE |
+ UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
+ apicid_mask.v = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
+}
+
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
int nodeid;
if (!strcmp(oem_id, "SGI")) {
nodeid = early_get_nodeid();
+ early_get_apic_pnode_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
if (!strcmp(oem_table_id, "UVL"))
@@ -84,8 +119,9 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
__get_cpu_var(x2apic_extra_bits) =
- nodeid << (UV_APIC_PNODE_SHIFT - 1);
+ nodeid << (uvh_apicid.s.pnode_shift - 1);
uv_system_type = UV_NON_UNIQUE_APIC;
+ uv_set_apicid_hibit();
return 1;
}
}
@@ -139,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
int pnode;
pnode = uv_apicid_to_pnode(phys_apicid);
+ phys_apicid |= uv_apicid_hibits;
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -220,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
int cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
else
return BAD_APICID;
}
@@ -239,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- return per_cpu(x86_cpu_to_apicid, cpu);
+ return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -363,14 +400,14 @@ struct redir_addr {
#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
static __initdata struct redir_addr redir_addrs[] = {
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
+ {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
};
static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
{
- union uvh_si_alias0_overlay_config_u alias;
+ union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
int i;
@@ -644,7 +681,7 @@ void uv_nmi_init(void)
void __init uv_system_init(void)
{
- union uvh_si_addr_map_config_u m_n_config;
+ union uvh_rh_gam_config_mmr_u m_n_config;
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
@@ -654,7 +691,7 @@ void __init uv_system_init(void)
map_low_mmrs();
- m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+ m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
mmr_base =
@@ -716,6 +753,10 @@ void __init uv_system_init(void)
int apicid = per_cpu(x86_cpu_to_apicid, cpu);
nid = cpu_to_node(cpu);
+ /*
+ * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
+ */
+ uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
pnode = uv_apicid_to_pnode(apicid);
blade = boot_pnode_to_blade(pnode);
lcpu = uv_blade_info[blade].nr_possible_cpus;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index cd8da247dda1..a2baafb2fe6d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -701,6 +701,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
per_cpu(acfreq_data, policy->cpu) = NULL;
acpi_processor_unregister_performance(data->acpi_data,
policy->cpu);
+ kfree(data->freq_table);
kfree(data);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 733093d60436..141abebc4516 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -393,7 +393,7 @@ static struct cpufreq_driver nforce2_driver = {
* Detects nForce2 A2 and C1 stepping
*
*/
-static unsigned int nforce2_detect_chipset(void)
+static int nforce2_detect_chipset(void)
{
nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NFORCE2,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index fc09f142d94d..d9f51367666b 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -35,7 +35,7 @@ static unsigned int longrun_low_freq, longrun_high_freq;
* Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
* and MSR_TMTA_LONGRUN_CTRL
*/
-static void __init longrun_get_policy(struct cpufreq_policy *policy)
+static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
{
u32 msr_lo, msr_hi;
@@ -165,7 +165,7 @@ static unsigned int longrun_get(unsigned int cpu)
* TMTA rules:
* performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
*/
-static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
+static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
unsigned int *high_freq)
{
u32 msr_lo, msr_hi;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 12cd823c8d03..17ad03366211 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,6 +327,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+ l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fe73c1844a9a..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -49,7 +49,6 @@ static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
unsigned long offset, addr = (unsigned long)from;
- int type = in_nmi() ? KM_NMI : KM_IRQ0;
unsigned long size, len = 0;
struct page *page;
void *map;
@@ -63,9 +62,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
offset = addr & (PAGE_SIZE - 1);
size = min(PAGE_SIZE - offset, n - len);
- map = kmap_atomic(page, type);
+ map = kmap_atomic(page);
memcpy(to, map+offset, size);
- kunmap_atomic(map, type);
+ kunmap_atomic(map);
put_page(page);
len += size;
@@ -238,6 +237,7 @@ struct x86_pmu {
* Intel DebugStore bits
*/
int bts, pebs;
+ int bts_active, pebs_active;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
@@ -381,7 +381,21 @@ static void release_pmc_hardware(void) {}
#endif
-static int reserve_ds_buffers(void);
+static bool check_hw_exists(void)
+{
+ u64 val, val_new = 0;
+ int ret = 0;
+
+ val = 0xabcdUL;
+ ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+ ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ if (ret || val != val_new)
+ return false;
+
+ return true;
+}
+
+static void reserve_ds_buffers(void);
static void release_ds_buffers(void);
static void hw_perf_event_destroy(struct perf_event *event)
@@ -478,7 +492,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */
- if (!x86_pmu.bts)
+ if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
@@ -497,12 +511,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
int precise = 0;
/* Support for constant skid */
- if (x86_pmu.pebs)
+ if (x86_pmu.pebs_active) {
precise++;
- /* Support for IP fixup */
- if (x86_pmu.lbr_nr)
- precise++;
+ /* Support for IP fixup */
+ if (x86_pmu.lbr_nr)
+ precise++;
+ }
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
@@ -544,11 +559,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
- else {
- err = reserve_ds_buffers();
- if (err)
- release_pmc_hardware();
- }
+ else
+ reserve_ds_buffers();
}
if (!err)
atomic_inc(&active_events);
@@ -1374,6 +1386,12 @@ void __init init_hw_perf_events(void)
pmu_check_apic();
+ /* sanity check that the hardware exists or is emulated */
+ if (!check_hw_exists()) {
+ pr_cont("Broken PMU hardware detected, software events only.\n");
+ return;
+ }
+
pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.quirks)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 46d58448c3af..e421b8cd6944 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
struct amd_nb *nb;
int i;
- nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+ nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
+ cpu_to_node(cpu));
if (!nb)
return NULL;
- memset(nb, 0, sizeof(*nb));
nb->nb_id = nb_id;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
+static int alloc_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max, thresh = 1; /* always use a single PEBS record */
+ void *buffer;
+
+ if (!x86_pmu.pebs)
+ return 0;
+
+ buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+ max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+
+ ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ ds->pebs_index = ds->pebs_buffer_base;
+ ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+ max * x86_pmu.pebs_record_size;
+
+ ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+ thresh * x86_pmu.pebs_record_size;
+
+ return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.pebs)
+ return;
+
+ kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ int node = cpu_to_node(cpu);
+ int max, thresh;
+ void *buffer;
+
+ if (!x86_pmu.bts)
+ return 0;
+
+ buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!buffer))
+ return -ENOMEM;
+
+ max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ thresh = max / 16;
+
+ ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ ds->bts_index = ds->bts_buffer_base;
+ ds->bts_absolute_maximum = ds->bts_buffer_base +
+ max * BTS_RECORD_SIZE;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ thresh * BTS_RECORD_SIZE;
+
+ return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds || !x86_pmu.bts)
+ return;
+
+ kfree((void *)(unsigned long)ds->bts_buffer_base);
+ ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+ int node = cpu_to_node(cpu);
+ struct debug_store *ds;
+
+ ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+ if (unlikely(!ds))
+ return -ENOMEM;
+
+ per_cpu(cpu_hw_events, cpu).ds = ds;
+
+ return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+ if (!ds)
+ return;
+
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
+ kfree(ds);
+}
+
static void release_ds_buffers(void)
{
int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
return;
get_online_cpus();
-
for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- continue;
-
- per_cpu(cpu_hw_events, cpu).ds = NULL;
-
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
- kfree((void *)(unsigned long)ds->bts_buffer_base);
- kfree(ds);
+ release_pebs_buffer(cpu);
+ release_bts_buffer(cpu);
+ release_ds_buffer(cpu);
}
-
put_online_cpus();
}
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
{
- int cpu, err = 0;
+ int bts_err = 0, pebs_err = 0;
+ int cpu;
+
+ x86_pmu.bts_active = 0;
+ x86_pmu.pebs_active = 0;
if (!x86_pmu.bts && !x86_pmu.pebs)
- return 0;
+ return;
+
+ if (!x86_pmu.bts)
+ bts_err = 1;
+
+ if (!x86_pmu.pebs)
+ pebs_err = 1;
get_online_cpus();
for_each_possible_cpu(cpu) {
- struct debug_store *ds;
- void *buffer;
- int max, thresh;
+ if (alloc_ds_buffer(cpu)) {
+ bts_err = 1;
+ pebs_err = 1;
+ }
+
+ if (!bts_err && alloc_bts_buffer(cpu))
+ bts_err = 1;
- err = -ENOMEM;
- ds = kzalloc(sizeof(*ds), GFP_KERNEL);
- if (unlikely(!ds))
+ if (!pebs_err && alloc_pebs_buffer(cpu))
+ pebs_err = 1;
+
+ if (bts_err && pebs_err)
break;
- per_cpu(cpu_hw_events, cpu).ds = ds;
-
- if (x86_pmu.bts) {
- buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
- if (unlikely(!buffer))
- break;
-
- max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
- thresh = max / 16;
-
- ds->bts_buffer_base = (u64)(unsigned long)buffer;
- ds->bts_index = ds->bts_buffer_base;
- ds->bts_absolute_maximum = ds->bts_buffer_base +
- max * BTS_RECORD_SIZE;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
- thresh * BTS_RECORD_SIZE;
- }
+ }
- if (x86_pmu.pebs) {
- buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
- if (unlikely(!buffer))
- break;
-
- max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-
- ds->pebs_buffer_base = (u64)(unsigned long)buffer;
- ds->pebs_index = ds->pebs_buffer_base;
- ds->pebs_absolute_maximum = ds->pebs_buffer_base +
- max * x86_pmu.pebs_record_size;
- /*
- * Always use single record PEBS
- */
- ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
- x86_pmu.pebs_record_size;
- }
+ if (bts_err) {
+ for_each_possible_cpu(cpu)
+ release_bts_buffer(cpu);
+ }
- err = 0;
+ if (pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_pebs_buffer(cpu);
}
- if (err)
- release_ds_buffers();
- else {
+ if (bts_err && pebs_err) {
+ for_each_possible_cpu(cpu)
+ release_ds_buffer(cpu);
+ } else {
+ if (x86_pmu.bts && !bts_err)
+ x86_pmu.bts_active = 1;
+
+ if (x86_pmu.pebs && !pebs_err)
+ x86_pmu.pebs_active = 1;
+
for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu);
}
put_online_cpus();
-
- return err;
}
/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
if (!event)
return 0;
- if (!ds)
+ if (!x86_pmu.bts_active)
return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
struct pebs_record_core *at, *top;
int n;
- if (!ds || !x86_pmu.pebs)
+ if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 status = 0;
int bit, n;
- if (!ds || !x86_pmu.pebs)
+ if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
#else /* CONFIG_CPU_SUP_INTEL */
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
{
- return 0;
}
static void release_ds_buffers(void)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1b7b31ab7d86..212a6a42527c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,7 +33,6 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/device.h>
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 67414550c3cc..d5cd13945d5a 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -61,7 +61,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!is_crashed_pfn_valid(pfn))
return -EFAULT;
- vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
+ vaddr = kmap_atomic_pfn(pfn);
if (!userbuf) {
memcpy(buf, (vaddr + offset), csize);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0f6376ffa2d9..1bc7f75a5bda 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,11 +82,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (kstack_end(stack))
break;
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
- printk("\n%s", log_lvl);
- printk(" %08lx", *stack++);
+ printk(KERN_CONT "\n");
+ printk(KERN_CONT " %08lx", *stack++);
touch_nmi_watchdog();
}
- printk("\n");
+ printk(KERN_CONT "\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 57a21f11c791..6a340485249a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -265,20 +265,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
- printk(" <EOI> ");
+ printk(KERN_CONT " <EOI> ");
}
} else {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
}
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
- printk("\n%s", log_lvl);
- printk(" %016lx", *stack++);
+ printk(KERN_CONT "\n");
+ printk(KERN_CONT " %016lx", *stack++);
touch_nmi_watchdog();
}
preempt_enable();
- printk("\n");
+ printk(KERN_CONT "\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
.endm
/* save partial stack frame */
+ .pushsection .kprobes.text, "ax"
ENTRY(save_args)
XCPT_FRAME
cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
ret
CFI_ENDPROC
END(save_args)
+ .popsection
ENTRY(save_rest)
PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index aff0b3c27509..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -713,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
switch (action & 0xf) {
case CPU_ONLINE:
- INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
+ INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
init_completion(&work.complete);
/* FIXME: add schedule_work_on() */
schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p;
+ /* If it's a single step, TRAP bits are random */
+ if (dr6 & DR_STEP)
+ return NOTIFY_DONE;
+
/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 50fbbe60e507..96656f207751 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -17,6 +17,7 @@
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <linux/percpu.h>
+#include <linux/mm.h>
#include <asm/apic.h>
@@ -60,9 +61,6 @@ union irq_ctx {
static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
-static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE);
-static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE);
-
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
@@ -128,7 +126,9 @@ void __cpuinit irq_ctx_init(int cpu)
if (per_cpu(hardirq_ctx, cpu))
return;
- irqctx = &per_cpu(hardirq_stack, cpu);
+ irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
+ THREAD_FLAGS,
+ THREAD_ORDER));
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
@@ -137,7 +137,9 @@ void __cpuinit irq_ctx_init(int cpu)
per_cpu(hardirq_ctx, cpu) = irqctx;
- irqctx = &per_cpu(softirq_stack, cpu);
+ irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
+ THREAD_FLAGS,
+ THREAD_ORDER));
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
@@ -150,11 +152,6 @@ void __cpuinit irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
-void irq_ctx_exit(int cpu)
-{
- per_cpu(hardirq_ctx, cpu) = NULL;
-}
-
asmlinkage void do_softirq(void)
{
unsigned long flags;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index d81cfebb848f..cd21b654dec6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -315,14 +315,18 @@ static void kgdb_remove_all_hw_break(void)
if (!breakinfo[i].enabled)
continue;
bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
- if (bp->attr.disabled == 1)
+ if (!bp->attr.disabled) {
+ arch_uninstall_hw_breakpoint(bp);
+ bp->attr.disabled = 1;
continue;
+ }
if (dbg_is_early)
early_dr7 &= ~encode_dr7(i, breakinfo[i].len,
breakinfo[i].type);
- else
- arch_uninstall_hw_breakpoint(bp);
- bp->attr.disabled = 1;
+ else if (hw_break_release_slot(i))
+ printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n",
+ breakinfo[i].addr);
+ breakinfo[i].enabled = 0;
}
}
@@ -387,7 +391,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
* disable hardware debugging while it is processing gdb packets or
* handling exception.
*/
-void kgdb_disable_hw_debug(struct pt_regs *regs)
+static void kgdb_disable_hw_debug(struct pt_regs *regs)
{
int i;
int cpu = raw_smp_processor_id();
@@ -724,6 +728,7 @@ struct kgdb_arch arch_kgdb_ops = {
.flags = KGDB_HW_BREAKPOINT,
.set_hw_breakpoint = kgdb_set_hw_break,
.remove_hw_breakpoint = kgdb_remove_hw_break,
+ .disable_hw_break = kgdb_disable_hw_debug,
.remove_all_hw_break = kgdb_remove_all_hw_break,
.correct_hw_break = kgdb_correct_hw_break,
};
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index e1af7c055c7d..ce0cb4721c9a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -212,7 +212,7 @@ static int install_equiv_cpu_table(const u8 *buf)
return 0;
}
- equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
+ equiv_cpu_table = vmalloc(size);
if (!equiv_cpu_table) {
pr_err("failed to allocate equivalent CPU table\n");
return 0;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 71825806cd44..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
};
static u64 __cpuinitdata fam10h_pci_mmconf_base;
-static int __cpuinitdata fam10h_pci_mmconf_base_status;
static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
return start1 - start2;
}
-/*[47:0] */
-/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */
+#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
+#define MMCONF_MASK (~(MMCONF_UNIT - 1))
+#define MMCONF_SIZE (MMCONF_UNIT << 8)
+/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
-#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32)))
+#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
static void __cpuinit get_fam10h_pci_mmconf_base(void)
{
int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
struct range range[8];
/* only try to get setting from BSP */
- /* -1 or 1 */
- if (fam10h_pci_mmconf_base_status)
+ if (fam10h_pci_mmconf_base)
return;
if (!early_pci_allowed())
- goto fail;
+ return;
found = 0;
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
}
if (!found)
- goto fail;
+ return;
/* SYS_CFG */
address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
/* TOP_MEM2 is not enabled? */
if (!(val & (1<<21))) {
- tom2 = 0;
+ tom2 = 1ULL << 32;
} else {
/* TOP_MEM2 */
address = MSR_K8_TOP_MEM2;
rdmsrl(address, val);
- tom2 = val & (0xffffULL<<32);
+ tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
}
if (base <= tom2)
- base = tom2 + (1ULL<<32);
+ base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
/*
* need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
if (!(reg & 3))
continue;
- start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+ start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
- end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+ end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
- if (!end)
+ if (end < tom2)
continue;
range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
if (range[hi_mmio_num - 1].end < base)
goto out;
- if (range[0].start > base)
+ if (range[0].start > base + MMCONF_SIZE)
goto out;
/* need to find one window */
- base = range[0].start - (1ULL << 32);
+ base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
if ((base > tom2) && BASE_VALID(base))
goto out;
- base = range[hi_mmio_num - 1].end + (1ULL << 32);
- if ((base > tom2) && BASE_VALID(base))
+ base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+ if (BASE_VALID(base))
goto out;
/* need to find window between ranges */
- if (hi_mmio_num > 1)
- for (i = 0; i < hi_mmio_num - 1; i++) {
- if (range[i + 1].start > (range[i].end + (1ULL << 32))) {
- base = range[i].end + (1ULL << 32);
- if ((base > tom2) && BASE_VALID(base))
- goto out;
- }
+ for (i = 1; i < hi_mmio_num; i++) {
+ base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+ val = range[i].start & MMCONF_MASK;
+ if (val >= base + MMCONF_SIZE && BASE_VALID(base))
+ goto out;
}
-
-fail:
- fam10h_pci_mmconf_base_status = -1;
return;
+
out:
fam10h_pci_mmconf_base = base;
- fam10h_pci_mmconf_base_status = 1;
}
void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
/* only trust the one handle 256 buses, if acpi=off */
if (!acpi_pci_disabled || busnbits >= 8) {
- u64 base;
- base = val & (0xffffULL << 32);
- if (fam10h_pci_mmconf_base_status <= 0) {
+ u64 base = val & MMCONF_MASK;
+
+ if (!fam10h_pci_mmconf_base) {
fam10h_pci_mmconf_base = base;
- fam10h_pci_mmconf_base_status = 1;
return;
} else if (fam10h_pci_mmconf_base == base)
return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
* with 256 buses
*/
get_fam10h_pci_mmconf_base();
- if (fam10h_pci_mmconf_base_status <= 0)
+ if (!fam10h_pci_mmconf_base) {
+ pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
return;
+ }
printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
@@ -217,13 +213,13 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
wrmsrl(address, val);
}
-static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
+static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d)
{
pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF;
return 0;
}
-static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
+static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
{
.callback = set_check_enable_amd_mmconf,
.ident = "Sun Microsystems Machine",
@@ -234,7 +230,8 @@ static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
{}
};
-void __cpuinit check_enable_amd_mmconf_dmi(void)
+/* Called from a __cpuinit function, but only on the BSP. */
+void __ref check_enable_amd_mmconf_dmi(void)
{
dmi_check_system(mmconf_dmi_table);
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7bf2dc4c8f70..12fcbe2c143e 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -30,7 +30,6 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/device.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 70c4872cd8aa..45892dc4b72a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child)
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
#endif
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
{
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
@@ -812,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
unsigned long tmp;
ret = -EIO;
- if ((addr & (sizeof(data) - 1)) || addr < 0 ||
- addr >= sizeof(struct user))
+ if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
break;
tmp = 0; /* Default return condition */
@@ -830,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
ret = -EIO;
- if ((addr & (sizeof(data) - 1)) || addr < 0 ||
- addr >= sizeof(struct user))
+ if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user))
break;
if (addr < sizeof(struct user_regs_struct))
@@ -888,17 +887,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case PTRACE_GET_THREAD_AREA:
- if (addr < 0)
+ if ((int) addr < 0)
return -EIO;
ret = do_get_thread_area(child, addr,
- (struct user_desc __user *) data);
+ (struct user_desc __user *)data);
break;
case PTRACE_SET_THREAD_AREA:
- if (addr < 0)
+ if ((int) addr < 0)
return -EIO;
ret = do_set_thread_area(child, addr,
- (struct user_desc __user *) data, 0);
+ (struct user_desc __user *)data, 0);
break;
#endif
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index bab3b9e6f66d..42eb3300dfc6 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -41,44 +41,6 @@ void pvclock_set_flags(u8 flags)
valid_flags = flags;
}
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
- u64 product;
-#ifdef __i386__
- u32 tmp1, tmp2;
-#endif
-
- if (shift < 0)
- delta >>= -shift;
- else
- delta <<= shift;
-
-#ifdef __i386__
- __asm__ (
- "mul %5 ; "
- "mov %4,%%eax ; "
- "mov %%edx,%4 ; "
- "mul %5 ; "
- "xor %5,%5 ; "
- "add %4,%%eax ; "
- "adc %5,%%edx ; "
- : "=A" (product), "=r" (tmp1), "=r" (tmp2)
- : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif defined(__x86_64__)
- __asm__ (
- "mul %%rdx ; shrd $32,%%rdx,%%rax"
- : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
- return product;
-}
-
static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
{
u64 delta = native_read_tsc() - shadow->tsc_timestamp;
@@ -121,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
static atomic64_t last_value = ATOMIC64_INIT(0);
+void pvclock_resume(void)
+{
+ atomic64_set(&last_value, 0);
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f7f53dcd3e0a..c495aa8d4815 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -635,7 +635,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 95a32746fbf9..21c6746338af 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -769,6 +769,8 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
+ resource_alloc_from_bottom = 0;
+ iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
/* update the e820_saved too */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..513deac7228d 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6af118511b4a..083e99d1b7df 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -747,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -1373,7 +1373,6 @@ void play_dead_common(void)
{
idle_task_exit();
reset_lazy_tlbstate();
- irq_ctx_exit(raw_smp_processor_id());
c1e_remove_cpu(raw_smp_processor_id());
mb();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index cd6da6bf3eca..ceb2911aa439 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -6,10 +6,12 @@
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/module.h>
+#include <linux/pci.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
#include <asm/pci_x86.h>
+#include <asm/pci.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
};
EXPORT_SYMBOL_GPL(x86_platform);
+struct x86_msi_ops x86_msi = {
+ .setup_msi_irqs = native_setup_msi_irqs,
+ .teardown_msi_irq = native_teardown_msi_irq,
+ .teardown_msi_irqs = default_teardown_msi_irqs,
+};
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 908ea5464a51..fb8b376bf28c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -720,7 +720,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
}
}
-static void set_spte_track_bits(u64 *sptep, u64 new_spte)
+static int set_spte_track_bits(u64 *sptep, u64 new_spte)
{
pfn_t pfn;
u64 old_spte = *sptep;
@@ -731,19 +731,20 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte)
old_spte = __xchg_spte(sptep, new_spte);
if (!is_rmap_spte(old_spte))
- return;
+ return 0;
pfn = spte_to_pfn(old_spte);
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
kvm_set_pfn_dirty(pfn);
+ return 1;
}
static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
{
- set_spte_track_bits(sptep, new_spte);
- rmap_remove(kvm, sptep);
+ if (set_spte_track_bits(sptep, new_spte))
+ rmap_remove(kvm, sptep);
}
static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 82e144a4e514..1ca12298ffc7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3395,6 +3395,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
load_host_msrs(vcpu);
+ kvm_load_ldt(ldt_selector);
loadsegment(fs, fs_selector);
#ifdef CONFIG_X86_64
load_gs_index(gs_selector);
@@ -3402,7 +3403,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#else
loadsegment(gs, gs_selector);
#endif
- kvm_load_ldt(ldt_selector);
reload_tss(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8da0e45ff7c9..ff21fdda0c53 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu)) {
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
- }
#endif
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -839,23 +838,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
- if (vmx->host_state.fs_reload_needed)
- loadsegment(fs, vmx->host_state.fs_sel);
+#ifdef CONFIG_X86_64
+ if (is_long_mode(&vmx->vcpu))
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#endif
if (vmx->host_state.gs_ldt_reload_needed) {
kvm_load_ldt(vmx->host_state.ldt_sel);
#ifdef CONFIG_X86_64
load_gs_index(vmx->host_state.gs_sel);
- wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
#else
loadsegment(gs, vmx->host_state.gs_sel);
#endif
}
+ if (vmx->host_state.fs_reload_needed)
+ loadsegment(fs, vmx->host_state.fs_sel);
reload_tss();
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu)) {
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
- }
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
if (current_thread_info()->status & TS_USEDFPU)
clts();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2288ad829b32..cdac9e592aa5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2560,6 +2560,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
!kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
+ events->exception.pad = 0;
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected =
@@ -2573,12 +2574,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending;
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+ events->nmi.pad = 0;
events->sipi_vector = vcpu->arch.sipi_vector;
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW);
+ memset(&events->reserved, 0, sizeof(events->reserved));
}
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
@@ -2623,6 +2626,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
dbgregs->dr6 = vcpu->arch.dr6;
dbgregs->dr7 = vcpu->arch.dr7;
dbgregs->flags = 0;
+ memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -3106,6 +3110,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
sizeof(ps->channels));
ps->flags = kvm->arch.vpit->pit_state.flags;
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ memset(&ps->reserved, 0, sizeof(ps->reserved));
return r;
}
@@ -3169,10 +3174,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_memslots *slots, *old_slots;
unsigned long *dirty_bitmap;
- spin_lock(&kvm->mmu_lock);
- kvm_mmu_slot_remove_write_access(kvm, log->slot);
- spin_unlock(&kvm->mmu_lock);
-
r = -ENOMEM;
dirty_bitmap = vmalloc(n);
if (!dirty_bitmap)
@@ -3194,6 +3195,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
kfree(old_slots);
+ spin_lock(&kvm->mmu_lock);
+ kvm_mmu_slot_remove_write_access(kvm, log->slot);
+ spin_unlock(&kvm->mmu_lock);
+
r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
vfree(dirty_bitmap);
@@ -3486,6 +3491,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
local_irq_enable();
user_ns.flags = 0;
+ memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
@@ -3972,8 +3978,10 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
return X86EMUL_CONTINUE;
if (kvm_x86_ops->has_wbinvd_exit()) {
+ preempt_disable();
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
wbinvd_ipi, NULL, 1);
+ preempt_enable();
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
}
wbinvd();
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 79b0b372d2d0..7d90ceb882a4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -11,6 +11,7 @@
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
+#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk)
+ struct task_struct *tsk, int fault)
{
+ unsigned lsb = 0;
siginfo_t info;
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
- info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ info.si_addr_lsb = lsb;
force_sig_info(si_signo, &info, tsk);
}
@@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
return;
}
@@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
tsk->thread.trap_no = 14;
#ifdef CONFIG_MEMORY_FAILURE
- if (fault & VM_FAULT_HWPOISON) {
+ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
tsk->comm, tsk->pid, address);
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk);
+ force_sig_info_fault(SIGBUS, code, address, tsk, fault);
}
static noinline void
@@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
do_sigbus(regs, error_code, address, fault);
else
BUG();
@@ -912,9 +919,9 @@ spurious_fault(unsigned long error_code, unsigned long address)
int show_unhandled_signals = 1;
static inline int
-access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
+access_error(unsigned long error_code, struct vm_area_struct *vma)
{
- if (write) {
+ if (error_code & PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
@@ -949,8 +956,10 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
struct task_struct *tsk;
unsigned long address;
struct mm_struct *mm;
- int write;
int fault;
+ int write = error_code & PF_WRITE;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+ (write ? FAULT_FLAG_WRITE : 0);
tsk = current;
mm = tsk->mm;
@@ -1061,6 +1070,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
bad_area_nosemaphore(regs, error_code, address);
return;
}
+retry:
down_read(&mm->mmap_sem);
} else {
/*
@@ -1104,9 +1114,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
* we can handle it..
*/
good_area:
- write = error_code & PF_WRITE;
-
- if (unlikely(access_error(error_code, write, vma))) {
+ if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address);
return;
}
@@ -1116,21 +1124,34 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
- fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
}
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
- regs, address);
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ goto retry;
+ }
}
check_v8086_mode(regs, address, tsk);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 5e8fa12ef861..b49962662101 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -9,6 +9,7 @@ void *kmap(struct page *page)
return page_address(page);
return kmap_high(page);
}
+EXPORT_SYMBOL(kmap);
void kunmap(struct page *page)
{
@@ -18,6 +19,7 @@ void kunmap(struct page *page)
return;
kunmap_high(page);
}
+EXPORT_SYMBOL(kunmap);
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -27,10 +29,10 @@ void kunmap(struct page *page)
* However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
@@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
if (!PageHighMem(page))
return page_address(page);
- debug_kmap_atomic(type);
-
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
@@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
return (void *)vaddr;
}
+EXPORT_SYMBOL(kmap_atomic_prot);
+
+void *__kmap_atomic(struct page *page)
+{
+ return kmap_atomic_prot(page, kmap_prot);
+}
+EXPORT_SYMBOL(__kmap_atomic);
-void *kmap_atomic(struct page *page, enum km_type type)
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
{
- return kmap_atomic_prot(page, type, kmap_prot);
+ return kmap_atomic_prot_pfn(pfn, kmap_prot);
}
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
-
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
- else {
+ kmap_atomic_idx_pop();
+ }
#ifdef CONFIG_DEBUG_HIGHMEM
+ else {
BUG_ON(vaddr < PAGE_OFFSET);
BUG_ON(vaddr >= (unsigned long)high_memory);
-#endif
}
+#endif
pagefault_enable();
}
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
- return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
-}
-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+EXPORT_SYMBOL(__kunmap_atomic);
struct page *kmap_atomic_to_page(void *ptr)
{
@@ -98,12 +112,6 @@ struct page *kmap_atomic_to_page(void *ptr)
pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
return pte_page(*pte);
}
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
-EXPORT_SYMBOL(kmap_atomic_prot);
EXPORT_SYMBOL(kmap_atomic_to_page);
void __init set_highmem_pages_init(void)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 84346200e783..71a59296af80 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -51,7 +51,6 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
#include <asm/init.h>
-#include <linux/bootmem.h>
static int __init parse_direct_gbpages_off(char *arg)
{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 72fc70cf6184..7b179b499fa3 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -48,21 +48,20 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
}
EXPORT_SYMBOL_GPL(iomap_create_wc);
-void
-iomap_free(resource_size_t base, unsigned long size)
+void iomap_free(resource_size_t base, unsigned long size)
{
io_free_memtype(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
pagefault_disable();
- debug_kmap_atomic(type);
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
@@ -72,10 +71,10 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
}
/*
- * Map 'pfn' using fixed map 'type' and protections 'prot'
+ * Map 'pfn' using protections 'prot'
*/
void __iomem *
-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
/*
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
@@ -86,24 +85,34 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
prot = PAGE_KERNEL_UC_MINUS;
- return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot);
+ return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
+ kmap_atomic_idx_pop();
+ }
pagefault_enable();
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 60f498511dd6..7ffc9b727efd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -178,11 +178,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
/* extend the search scope */
end = max_pfn_mapped << PAGE_SHIFT;
- if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
- start = MAX_DMA32_PFN<<PAGE_SHIFT;
- else
- start = MAX_DMA_PFN<<PAGE_SHIFT;
- mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
+ start = MAX_DMA_PFN << PAGE_SHIFT;
+ mem = memblock_find_in_range(start, end, size, align);
if (mem != MEMBLOCK_ERROR)
return __va(mem);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 49358481c733..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
static void __cpuinit calculate_tlb_offset(void)
{
- int cpu, node, nr_node_vecs;
+ int cpu, node, nr_node_vecs, idx = 0;
/*
* we are changing tlb_vector_offset for each CPU in runtime, but this
* will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
for_each_online_node(node) {
- int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+ int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
nr_node_vecs;
int cpu_offset = 0;
for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,10 +248,11 @@ static void __cpuinit calculate_tlb_offset(void)
cpu_offset++;
cpu_offset = cpu_offset % nr_node_vecs;
}
+ idx++;
}
}
-static int tlb_cpuhp_notify(struct notifier_block *n,
+static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
unsigned long action, void *hcpu)
{
switch (action & 0xf) {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index bd1489c3ce09..4e8baad36d37 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
case 0x11:
cpu_type = "x86-64/family11h";
break;
+ case 0x12:
+ cpu_type = "x86-64/family12h";
+ break;
+ case 0x14:
+ cpu_type = "x86-64/family14h";
+ break;
default:
return -ENODEV;
}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 42fb46f83883..a011bcc0f943 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
static u32 ibs_caps;
-struct op_ibs_config {
+struct ibs_config {
unsigned long op_enabled;
unsigned long fetch_enabled;
unsigned long max_cnt_fetch;
unsigned long max_cnt_op;
unsigned long rand_en;
unsigned long dispatched_ops;
+ unsigned long branch_target;
};
-static struct op_ibs_config ibs_config;
-static u64 ibs_op_ctl;
+struct ibs_state {
+ u64 ibs_op_ctl;
+ int branch_target;
+ unsigned long sample_size;
+};
+
+static struct ibs_config ibs_config;
+static struct ibs_state ibs_state;
/*
* IBS cpuid feature detection
@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
* bit 0 is used to indicate the existence of IBS.
*/
#define IBS_CAPS_AVAIL (1U<<0)
+#define IBS_CAPS_FETCHSAM (1U<<1)
+#define IBS_CAPS_OPSAM (1U<<2)
#define IBS_CAPS_RDWROPCNT (1U<<3)
#define IBS_CAPS_OPCNT (1U<<4)
+#define IBS_CAPS_BRNTRGT (1U<<5)
+#define IBS_CAPS_OPCNTEXT (1U<<6)
+
+#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
+ | IBS_CAPS_FETCHSAM \
+ | IBS_CAPS_OPSAM)
/*
* IBS APIC setup
@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
/* check IBS cpuid feature flags */
max_level = cpuid_eax(0x80000000);
if (max_level < IBS_CPUID_FEATURES)
- return IBS_CAPS_AVAIL;
+ return IBS_CAPS_DEFAULT;
ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
if (!(ibs_caps & IBS_CAPS_AVAIL))
/* cpuid flags not valid */
- return IBS_CAPS_AVAIL;
+ return IBS_CAPS_DEFAULT;
return ibs_caps;
}
@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
if (ctl & IBS_OP_VAL) {
rdmsrl(MSR_AMD64_IBSOPRIP, val);
- oprofile_write_reserve(&entry, regs, val,
- IBS_OP_CODE, IBS_OP_SIZE);
+ oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
+ ibs_state.sample_size);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val);
oprofile_add_data64(&entry, val);
@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
oprofile_add_data64(&entry, val);
+ if (ibs_state.branch_target) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, val);
+ oprofile_add_data(&entry, (unsigned long)val);
+ }
oprofile_write_commit(&entry);
/* reenable the IRQ */
- ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
+ ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}
@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
if (!ibs_caps)
return;
+ memset(&ibs_state, 0, sizeof(ibs_state));
+
+ /*
+ * Note: Since the max count settings may out of range we
+ * write back the actual used values so that userland can read
+ * it.
+ */
+
if (ibs_config.fetch_enabled) {
- val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
+ val = ibs_config.max_cnt_fetch >> 4;
+ val = min(val, IBS_FETCH_MAX_CNT);
+ ibs_config.max_cnt_fetch = val << 4;
val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
val |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
}
if (ibs_config.op_enabled) {
- ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+ val = ibs_config.max_cnt_op >> 4;
if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
/*
* IbsOpCurCnt not supported. See
* op_amd_randomize_ibs_op() for details.
*/
- ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
+ val = clamp(val, 0x0081ULL, 0xFF80ULL);
+ ibs_config.max_cnt_op = val << 4;
} else {
/*
* The start value is randomized with a
@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
* with the half of the randomized range. Also
* avoid underflows.
*/
- ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
- IBS_OP_MAX_CNT);
+ val += IBS_RANDOM_MAXCNT_OFFSET;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ val = min(val, IBS_OP_MAX_CNT_EXT);
+ else
+ val = min(val, IBS_OP_MAX_CNT);
+ ibs_config.max_cnt_op =
+ (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
+ }
+ val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
+ val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+ val |= IBS_OP_ENABLE;
+ ibs_state.ibs_op_ctl = val;
+ ibs_state.sample_size = IBS_OP_SIZE;
+ if (ibs_config.branch_target) {
+ ibs_state.branch_target = 1;
+ ibs_state.sample_size++;
}
- if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
- ibs_op_ctl |= IBS_OP_CNT_CTL;
- ibs_op_ctl |= IBS_OP_ENABLE;
- val = op_amd_randomize_ibs_op(ibs_op_ctl);
+ val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, val);
}
}
@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
static inline int ibs_eilvt_valid(void)
{
- u64 val;
int offset;
+ u64 val;
rdmsrl(MSR_AMD64_IBSCTL, val);
+ offset = val & IBSCTL_LVT_OFFSET_MASK;
+
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
- pr_err(FW_BUG "cpu %d, invalid IBS "
- "interrupt offset %d (MSR%08X=0x%016llx)",
- smp_processor_id(), offset,
- MSR_AMD64_IBSCTL, val);
+ pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
return 0;
}
- offset = val & IBSCTL_LVT_OFFSET_MASK;
-
- if (eilvt_is_available(offset))
- return !0;
-
- pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
- "not available (MSR%08X=0x%016llx)",
- smp_processor_id(), offset,
- MSR_AMD64_IBSCTL, val);
+ if (!eilvt_is_available(offset)) {
+ pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+ smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+ return 0;
+ }
- return 0;
+ return 1;
}
static inline int get_ibs_offset(void)
@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
/* model specific files */
/* setup some reasonable defaults */
+ memset(&ibs_config, 0, sizeof(ibs_config));
ibs_config.max_cnt_fetch = 250000;
- ibs_config.fetch_enabled = 0;
ibs_config.max_cnt_op = 250000;
- ibs_config.op_enabled = 0;
- ibs_config.dispatched_ops = 0;
-
- dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
- oprofilefs_create_ulong(sb, dir, "enable",
- &ibs_config.fetch_enabled);
- oprofilefs_create_ulong(sb, dir, "max_count",
- &ibs_config.max_cnt_fetch);
- oprofilefs_create_ulong(sb, dir, "rand_enable",
- &ibs_config.rand_en);
-
- dir = oprofilefs_mkdir(sb, root, "ibs_op");
- oprofilefs_create_ulong(sb, dir, "enable",
- &ibs_config.op_enabled);
- oprofilefs_create_ulong(sb, dir, "max_count",
- &ibs_config.max_cnt_op);
- if (ibs_caps & IBS_CAPS_OPCNT)
- oprofilefs_create_ulong(sb, dir, "dispatched_ops",
- &ibs_config.dispatched_ops);
+
+ if (ibs_caps & IBS_CAPS_FETCHSAM) {
+ dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.fetch_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_fetch);
+ oprofilefs_create_ulong(sb, dir, "rand_enable",
+ &ibs_config.rand_en);
+ }
+
+ if (ibs_caps & IBS_CAPS_OPSAM) {
+ dir = oprofilefs_mkdir(sb, root, "ibs_op");
+ oprofilefs_create_ulong(sb, dir, "enable",
+ &ibs_config.op_enabled);
+ oprofilefs_create_ulong(sb, dir, "max_count",
+ &ibs_config.max_cnt_op);
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+ &ibs_config.dispatched_ops);
+ if (ibs_caps & IBS_CAPS_BRNTRGT)
+ oprofilefs_create_ulong(sb, dir, "branch_target",
+ &ibs_config.branch_target);
+ }
return 0;
}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index a0207a7fdf39..effd96e33f16 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
obj-$(CONFIG_PCI_OLPC) += olpc.o
+obj-$(CONFIG_PCI_XEN) += xen.o
obj-y += fixup.o
obj-$(CONFIG_ACPI) += acpi.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 15466c096ba5..0972315c3860 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
struct acpi_resource_address64 addr;
acpi_status status;
unsigned long flags;
- struct resource *root, *conflict;
u64 start, end;
status = resource_to_addr(acpi_res, &addr);
@@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
if (addr.resource_type == ACPI_MEMORY_RANGE) {
- root = &iomem_resource;
flags = IORESOURCE_MEM;
if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
flags |= IORESOURCE_PREFETCH;
} else if (addr.resource_type == ACPI_IO_RANGE) {
- root = &ioport_resource;
flags = IORESOURCE_IO;
} else
return AE_OK;
@@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
}
- conflict = insert_resource_conflict(root, res);
- if (conflict) {
- dev_err(&info->bridge->dev,
- "address space collision: host bridge window %pR "
- "conflicts with %s %pR\n",
- res, conflict->name, conflict);
- } else {
- pci_bus_add_resource(info->bus, res, 0);
- info->res_num++;
- if (addr.translation_offset)
- dev_info(&info->bridge->dev, "host bridge window %pR "
- "(PCI address [%#llx-%#llx])\n",
- res, res->start - addr.translation_offset,
- res->end - addr.translation_offset);
+ info->res_num++;
+ if (addr.translation_offset)
+ dev_info(&info->bridge->dev, "host bridge window %pR "
+ "(PCI address [%#llx-%#llx])\n",
+ res, res->start - addr.translation_offset,
+ res->end - addr.translation_offset);
+ else
+ dev_info(&info->bridge->dev, "host bridge window %pR\n", res);
+
+ return AE_OK;
+}
+
+static bool resource_contains(struct resource *res, resource_size_t point)
+{
+ if (res->start <= point && point <= res->end)
+ return true;
+ return false;
+}
+
+static void coalesce_windows(struct pci_root_info *info, int type)
+{
+ int i, j;
+ struct resource *res1, *res2;
+
+ for (i = 0; i < info->res_num; i++) {
+ res1 = &info->res[i];
+ if (!(res1->flags & type))
+ continue;
+
+ for (j = i + 1; j < info->res_num; j++) {
+ res2 = &info->res[j];
+ if (!(res2->flags & type))
+ continue;
+
+ /*
+ * I don't like throwing away windows because then
+ * our resources no longer match the ACPI _CRS, but
+ * the kernel resource tree doesn't allow overlaps.
+ */
+ if (resource_contains(res1, res2->start) ||
+ resource_contains(res1, res2->end) ||
+ resource_contains(res2, res1->start) ||
+ resource_contains(res2, res1->end)) {
+ res1->start = min(res1->start, res2->start);
+ res1->end = max(res1->end, res2->end);
+ dev_info(&info->bridge->dev,
+ "host bridge window expanded to %pR; %pR ignored\n",
+ res1, res2);
+ res2->flags = 0;
+ }
+ }
+ }
+}
+
+static void add_resources(struct pci_root_info *info)
+{
+ int i;
+ struct resource *res, *root, *conflict;
+
+ if (!pci_use_crs)
+ return;
+
+ coalesce_windows(info, IORESOURCE_MEM);
+ coalesce_windows(info, IORESOURCE_IO);
+
+ for (i = 0; i < info->res_num; i++) {
+ res = &info->res[i];
+
+ if (res->flags & IORESOURCE_MEM)
+ root = &iomem_resource;
+ else if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
else
- dev_info(&info->bridge->dev,
- "host bridge window %pR\n", res);
+ continue;
+
+ conflict = insert_resource_conflict(root, res);
+ if (conflict)
+ dev_err(&info->bridge->dev,
+ "address space collision: host bridge window %pR "
+ "conflicts with %s %pR\n",
+ res, conflict->name, conflict);
+ else
+ pci_bus_add_resource(info->bus, res, 0);
}
- return AE_OK;
}
static void
@@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum,
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
&info);
+ add_resources(&info);
return;
name_alloc_fail:
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index a0772af64efb..f7c8a399978c 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -421,16 +421,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
return bus;
}
-
-int __init pcibios_init(void)
+void __init pcibios_set_cache_line_size(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- if (!raw_pci_ops) {
- printk(KERN_WARNING "PCI: System does not support PCI\n");
- return 0;
- }
-
/*
* Set PCI cacheline size to that of the CPU if the CPU has reported it.
* (For older CPUs that don't support cpuid, we se it to 32 bytes
@@ -445,7 +439,16 @@ int __init pcibios_init(void)
pci_dfl_cache_line_size = 32 >> 2;
printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
}
+}
+
+int __init pcibios_init(void)
+{
+ if (!raw_pci_ops) {
+ printk(KERN_WARNING "PCI: System does not support PCI\n");
+ return 0;
+ }
+ pcibios_set_cache_line_size();
pcibios_resource_survey();
if (pci_bf_sort >= pci_force_bf)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 55253095be84..c4bb261c106e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,16 +65,21 @@ pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size, resource_size_t align)
{
struct pci_dev *dev = data;
- resource_size_t start = res->start;
+ resource_size_t start = round_down(res->end - size + 1, align);
if (res->flags & IORESOURCE_IO) {
- if (skip_isa_ioresource_align(dev))
- return start;
- if (start & 0x300)
- start = (start + 0x3ff) & ~0x3ff;
+
+ /*
+ * If we're avoiding ISA aliases, the largest contiguous I/O
+ * port space is 256 bytes. Clearing bits 9 and 10 preserves
+ * all 256-byte and smaller alignments, so the result will
+ * still be correctly aligned.
+ */
+ if (!skip_isa_ioresource_align(dev))
+ start &= ~0x300;
} else if (res->flags & IORESOURCE_MEM) {
if (start < BIOS_END)
- start = BIOS_END;
+ start = res->end; /* fail; no space */
}
return start;
}
@@ -311,6 +316,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
*/
prot |= _PAGE_CACHE_UC_MINUS;
+ prot |= _PAGE_IOMAP; /* creating a mapping for IO */
+
vma->vm_page_prot = __pgprot(prot);
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index f547ee05f715..9f9bfb705cf9 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -584,27 +584,28 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
case PCI_DEVICE_ID_INTEL_ICH9_3:
case PCI_DEVICE_ID_INTEL_ICH9_4:
case PCI_DEVICE_ID_INTEL_ICH9_5:
- case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
+ case PCI_DEVICE_ID_INTEL_EP80579_0:
case PCI_DEVICE_ID_INTEL_ICH10_0:
case PCI_DEVICE_ID_INTEL_ICH10_1:
case PCI_DEVICE_ID_INTEL_ICH10_2:
case PCI_DEVICE_ID_INTEL_ICH10_3:
+ case PCI_DEVICE_ID_INTEL_PATSBURG_LPC:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
return 1;
}
- if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) &&
- (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+ if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) &&
+ (device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) {
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
return 1;
}
- if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) &&
- (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) {
+ if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) &&
+ (device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) {
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index a918553ebc75..e282886616a0 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -65,7 +65,6 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
int end, u64 addr)
{
struct pci_mmcfg_region *new;
- int num_buses;
struct resource *res;
if (addr == 0)
@@ -82,10 +81,9 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
list_add_sorted(new);
- num_buses = end - start + 1;
res = &new->res;
res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
- res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
+ res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
"PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
new file mode 100644
index 000000000000..25cd4a07d09f
--- /dev/null
+++ b/arch/x86/pci/xen.c
@@ -0,0 +1,429 @@
+/*
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ * x86 PCI core to support the Xen PCI Frontend
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+#include <linux/io.h>
+#include <asm/io_apic.h>
+#include <asm/pci_x86.h>
+
+#include <asm/xen/hypervisor.h>
+
+#include <xen/features.h>
+#include <xen/events.h>
+#include <asm/xen/pci.h>
+
+#ifdef CONFIG_ACPI
+static int xen_hvm_register_pirq(u32 gsi, int triggering)
+{
+ int rc, irq;
+ struct physdev_map_pirq map_irq;
+ int shareable = 0;
+ char *name;
+
+ if (!xen_hvm_domain())
+ return -1;
+
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_GSI;
+ map_irq.index = gsi;
+ map_irq.pirq = -1;
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (rc) {
+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
+ return -1;
+ }
+
+ if (triggering == ACPI_EDGE_SENSITIVE) {
+ shareable = 0;
+ name = "ioapic-edge";
+ } else {
+ shareable = 1;
+ name = "ioapic-level";
+ }
+
+ irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name);
+
+ printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
+
+ return irq;
+}
+
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+ int trigger, int polarity)
+{
+ return xen_hvm_register_pirq(gsi, trigger);
+}
+#endif
+
+#if defined(CONFIG_PCI_MSI)
+#include <linux/msi.h>
+#include <asm/msidef.h>
+
+struct xen_pci_frontend_ops *xen_pci_frontend;
+EXPORT_SYMBOL_GPL(xen_pci_frontend);
+
+#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \
+ MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
+
+static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
+ struct msi_msg *msg)
+{
+ /* We set vector == 0 to tell the hypervisor we don't care about it,
+ * but we want a pirq setup instead.
+ * We use the dest_id field to pass the pirq that we want. */
+ msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ MSI_ADDR_DEST_MODE_PHYSICAL |
+ MSI_ADDR_REDIRECTION_CPU |
+ MSI_ADDR_DEST_ID(pirq);
+
+ msg->data = XEN_PIRQ_MSI_DATA;
+}
+
+static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, pirq, ret = 0;
+ struct msi_desc *msidesc;
+ struct msi_msg msg;
+
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ __read_msi_msg(msidesc, &msg);
+ pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
+ ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
+ if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
+ xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
+ if (irq < 0)
+ goto error;
+ ret = set_irq_msi(irq, msidesc);
+ if (ret < 0)
+ goto error_while;
+ printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
+ " pirq=%d\n", irq, pirq);
+ return 0;
+ }
+ xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
+ if (irq < 0 || pirq < 0)
+ goto error;
+ printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
+ xen_msi_compose_msg(dev, pirq, &msg);
+ ret = set_irq_msi(irq, msidesc);
+ if (ret < 0)
+ goto error_while;
+ write_msi_msg(irq, &msg);
+ }
+ return 0;
+
+error_while:
+ unbind_from_irqhandler(irq, NULL);
+error:
+ if (ret == -ENODEV)
+ dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+ " MSI/MSI-X support!\n");
+
+ return ret;
+}
+
+/*
+ * For MSI interrupts we have to use drivers/xen/event.s functions to
+ * allocate an irq_desc and setup the right */
+
+
+static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, ret, i;
+ struct msi_desc *msidesc;
+ int *v;
+
+ v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+ if (!v)
+ return -ENOMEM;
+
+ if (type == PCI_CAP_ID_MSIX)
+ ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+ else
+ ret = xen_pci_frontend_enable_msi(dev, &v);
+ if (ret)
+ goto error;
+ i = 0;
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = xen_allocate_pirq(v[i], 0, /* not sharable */
+ (type == PCI_CAP_ID_MSIX) ?
+ "pcifront-msi-x" : "pcifront-msi");
+ if (irq < 0) {
+ ret = -1;
+ goto free;
+ }
+
+ ret = set_irq_msi(irq, msidesc);
+ if (ret)
+ goto error_while;
+ i++;
+ }
+ kfree(v);
+ return 0;
+
+error_while:
+ unbind_from_irqhandler(irq, NULL);
+error:
+ if (ret == -ENODEV)
+ dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+ " MSI/MSI-X support!\n");
+free:
+ kfree(v);
+ return ret;
+}
+
+static void xen_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *msidesc;
+
+ msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+ if (msidesc->msi_attrib.is_msix)
+ xen_pci_frontend_disable_msix(dev);
+ else
+ xen_pci_frontend_disable_msi(dev);
+}
+
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+ xen_destroy_irq(irq);
+}
+
+static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, ret;
+ struct msi_desc *msidesc;
+
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = xen_create_msi_irq(dev, msidesc, type);
+ if (irq < 0)
+ return -1;
+
+ ret = set_irq_msi(irq, msidesc);
+ if (ret)
+ goto error;
+ }
+ return 0;
+
+error:
+ xen_destroy_irq(irq);
+ return ret;
+}
+#endif
+
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+ int rc;
+ int share = 1;
+
+ dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+
+ if (dev->irq < 0)
+ return -EINVAL;
+
+ if (dev->irq < NR_IRQS_LEGACY)
+ share = 0;
+
+ rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+ if (rc < 0) {
+ dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+ dev->irq, rc);
+ return rc;
+ }
+ return 0;
+}
+
+int __init pci_xen_init(void)
+{
+ if (!xen_pv_domain() || xen_initial_domain())
+ return -ENODEV;
+
+ printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
+
+ pcibios_set_cache_line_size();
+
+ pcibios_enable_irq = xen_pcifront_enable_irq;
+ pcibios_disable_irq = NULL;
+
+#ifdef CONFIG_ACPI
+ /* Keep ACPI out of the picture */
+ acpi_noirq = 1;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+ x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
+ x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
+#endif
+ return 0;
+}
+
+int __init pci_xen_hvm_init(void)
+{
+ if (!xen_feature(XENFEAT_hvm_pirqs))
+ return 0;
+
+#ifdef CONFIG_ACPI
+ /*
+ * We don't want to change the actual ACPI delivery model,
+ * just how GSIs get registered.
+ */
+ __acpi_register_gsi = acpi_register_gsi_xen_hvm;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+ x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
+ x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_XEN_DOM0
+static int xen_register_pirq(u32 gsi, int triggering)
+{
+ int rc, irq;
+ struct physdev_map_pirq map_irq;
+ int shareable = 0;
+ char *name;
+
+ if (!xen_pv_domain())
+ return -1;
+
+ if (triggering == ACPI_EDGE_SENSITIVE) {
+ shareable = 0;
+ name = "ioapic-edge";
+ } else {
+ shareable = 1;
+ name = "ioapic-level";
+ }
+
+ irq = xen_allocate_pirq(gsi, shareable, name);
+
+ printk(KERN_DEBUG "xen: --> irq=%d\n", irq);
+
+ if (irq < 0)
+ goto out;
+
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_GSI;
+ map_irq.index = gsi;
+ map_irq.pirq = irq;
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (rc) {
+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
+ return -1;
+ }
+
+out:
+ return irq;
+}
+
+static int xen_register_gsi(u32 gsi, int triggering, int polarity)
+{
+ int rc, irq;
+ struct physdev_setup_gsi setup_gsi;
+
+ if (!xen_pv_domain())
+ return -1;
+
+ printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
+ gsi, triggering, polarity);
+
+ irq = xen_register_pirq(gsi, triggering);
+
+ setup_gsi.gsi = gsi;
+ setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
+ setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+ if (rc == -EEXIST)
+ printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
+ else if (rc) {
+ printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
+ gsi, rc);
+ }
+
+ return irq;
+}
+
+static __init void xen_setup_acpi_sci(void)
+{
+ int rc;
+ int trigger, polarity;
+ int gsi = acpi_sci_override_gsi;
+
+ if (!gsi)
+ return;
+
+ rc = acpi_get_override_irq(gsi, &trigger, &polarity);
+ if (rc) {
+ printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
+ " sci, rc=%d\n", rc);
+ return;
+ }
+ trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+ polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
+
+ printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
+ "polarity=%d\n", gsi, trigger, polarity);
+
+ gsi = xen_register_gsi(gsi, trigger, polarity);
+ printk(KERN_INFO "xen: acpi sci %d\n", gsi);
+
+ return;
+}
+
+static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
+ int trigger, int polarity)
+{
+ return xen_register_gsi(gsi, trigger, polarity);
+}
+
+static int __init pci_xen_initial_domain(void)
+{
+#ifdef CONFIG_PCI_MSI
+ x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
+ x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+#endif
+ xen_setup_acpi_sci();
+ __acpi_register_gsi = acpi_register_gsi_xen;
+
+ return 0;
+}
+
+void __init xen_setup_pirqs(void)
+{
+ int irq;
+
+ pci_xen_initial_domain();
+
+ if (0 == nr_ioapics) {
+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
+ xen_allocate_pirq(irq, 0, "xt-pic");
+ return;
+ }
+
+ /* Pre-allocate legacy irqs */
+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
+ int trigger, polarity;
+
+ if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
+ continue;
+
+ xen_register_pirq(irq,
+ trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
+ }
+}
+#endif
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
new file mode 100644
index 000000000000..7bf70b812fa2
--- /dev/null
+++ b/arch/x86/platform/Makefile
@@ -0,0 +1,8 @@
+# Platform specific code goes here
+obj-y += efi/
+obj-y += mrst/
+obj-y += olpc/
+obj-y += scx200/
+obj-y += sfi/
+obj-y += visws/
+obj-y += uv/
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
new file mode 100644
index 000000000000..73b8be0f3675
--- /dev/null
+++ b/arch/x86/platform/efi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
diff --git a/arch/x86/kernel/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7c6258..0fe27d7c6258 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/platform/efi/efi.c
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 5cab48ee61a4..5cab48ee61a4 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a7ac3d..ac0621a7ac3d 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..fbe66e626c09 100644
--- a/arch/x86/kernel/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 4c07ccab8146..4c07ccab8146 100644
--- a/arch/x86/kernel/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
new file mode 100644
index 000000000000..efbbc552fa95
--- /dev/null
+++ b/arch/x86/platform/mrst/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_MRST) += mrst.o
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/platform/mrst/mrst.c
index 79ae68154e87..79ae68154e87 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
new file mode 100644
index 000000000000..c31b8fcb5a86
--- /dev/null
+++ b/arch/x86/platform/olpc/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_OLPC) += olpc.o
+obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
+obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c
index f5442c03abc3..f5442c03abc3 100644
--- a/arch/x86/kernel/olpc-xo1.c
+++ b/arch/x86/platform/olpc/olpc-xo1.c
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/platform/olpc/olpc.c
index edaf3fe8dc5e..edaf3fe8dc5e 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c
index 787320464379..787320464379 100644
--- a/arch/x86/kernel/olpc_ofw.c
+++ b/arch/x86/platform/olpc/olpc_ofw.c
diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile
new file mode 100644
index 000000000000..762b4c7f4314
--- /dev/null
+++ b/arch/x86/platform/scx200/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_SCx200) += scx200.o
+scx200-y += scx200_32.o
diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7e004acbe526..7e004acbe526 100644
--- a/arch/x86/kernel/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile
new file mode 100644
index 000000000000..cc5db1168a5e
--- /dev/null
+++ b/arch/x86/platform/sfi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SFI) += sfi.o
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/platform/sfi/sfi.c
index dd4c281ffe57..dd4c281ffe57 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
new file mode 100644
index 000000000000..6c40995fefb8
--- /dev/null
+++ b/arch/x86/platform/uv/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 8bc57baaa9ad..8bc57baaa9ad 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 20ea20a39e2a..ba9caa808a9c 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1343,8 +1343,8 @@ uv_activation_descriptor_init(int node, int pnode)
* each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
* per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
*/
- bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
- UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
+ bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
+ * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
BUG_ON(!bau_desc);
pa = uv_gpa(bau_desc); /* need the real nasid*/
@@ -1402,9 +1402,9 @@ uv_payload_queue_init(int node, int pnode)
struct bau_payload_queue_entry *pqp_malloc;
struct bau_control *bcp;
- pqp = (struct bau_payload_queue_entry *) kmalloc_node(
- (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
- GFP_KERNEL, node);
+ pqp = kmalloc_node((DEST_Q_SIZE + 1)
+ * sizeof(struct bau_payload_queue_entry),
+ GFP_KERNEL, node);
BUG_ON(!pqp);
pqp_malloc = pqp;
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
*/
- apicid = uvhub_to_first_apicid(uvhub);
+ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
((apicid << 32) | vector));
}
@@ -1520,8 +1520,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
timeout_us = calculate_destination_timeout();
- uvhub_descs = (struct uvhub_desc *)
- kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
+ uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
for_each_present_cpu(cpu) {
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 7b24460917d5..7b24460917d5 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 309c70fb7759..309c70fb7759 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 56e421bc379b..9daf5d1af9f1 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
apicid = cpu_physical_id(cpu);
pnode = uv_apicid_to_pnode(apicid);
+ apicid |= uv_apicid_hibits;
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
(X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
static int uv_setup_intr(int cpu, u64 expires)
{
u64 val;
+ unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
int pnode = uv_cpu_to_pnode(cpu);
uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
UVH_EVENT_OCCURRED0_RTC1_MASK);
val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
- ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+ ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
/* Set configuration */
uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile
new file mode 100644
index 000000000000..91bc17ab2fd5
--- /dev/null
+++ b/arch/x86/platform/visws/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 3371bd053b89..3371bd053b89 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 68128a1b401a..5b54892e4bc3 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -13,21 +13,28 @@ config XEN
kernel to boot in a paravirtualized environment under the
Xen hypervisor.
+config XEN_DOM0
+ def_bool y
+ depends on XEN && PCI_XEN && SWIOTLB_XEN
+ depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
+
+# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
+# name in tools.
+config XEN_PRIVILEGED_GUEST
+ def_bool XEN_DOM0
+
config XEN_PVHVM
def_bool y
depends on XEN
depends on X86_LOCAL_APIC
config XEN_MAX_DOMAIN_MEMORY
- int "Maximum allowed size of a domain in gigabytes"
- default 8 if X86_32
- default 32 if X86_64
+ int
+ default 128
depends on XEN
help
- The pseudo-physical to machine address array is sized
- according to the maximum possible memory size of a Xen
- domain. This array uses 1 page per gigabyte, so there's no
- need to be too stingy here.
+ This only affects the sizing of some bss arrays, the unused
+ portions of which are freed.
config XEN_SAVE_RESTORE
bool
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 63b83ceebd1a..44dcad43989d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -46,6 +46,7 @@
#include <asm/paravirt.h>
#include <asm/apic.h>
#include <asm/page.h>
+#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <asm/fixmap.h>
@@ -59,7 +60,6 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
-#include <asm/setup.h>
#include <asm/stackprotector.h>
#include <asm/hypervisor.h>
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
+EXPORT_SYMBOL(machine_to_phys_mapping);
+unsigned int machine_to_phys_order;
+EXPORT_SYMBOL(machine_to_phys_order);
+
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -136,9 +141,6 @@ static void xen_vcpu_setup(int cpu)
info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
- printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
- cpu, vcpup, info.mfn, info.offset);
-
/* Check to see if the hypervisor will put the vcpu_info
structure where we want it, which allows direct access via
a percpu-variable. */
@@ -152,9 +154,6 @@ static void xen_vcpu_setup(int cpu)
/* This cpu is using the registered vcpu info, even if
later ones fail to. */
per_cpu(xen_vcpu, cpu) = vcpup;
-
- printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
- cpu, vcpup);
}
}
@@ -243,6 +242,7 @@ static __init void xen_init_cpuid_mask(void)
cpuid_leaf1_edx_mask =
~((1 << X86_FEATURE_MCE) | /* disable MCE */
(1 << X86_FEATURE_MCA) | /* disable MCA */
+ (1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
if (!xen_initial_domain())
@@ -836,6 +836,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
Xen console noise. */
break;
+ case MSR_IA32_CR_PAT:
+ if (smp_processor_id() == 0)
+ xen_set_pat(((u64)high << 32) | low);
+ break;
+
default:
ret = native_write_msr_safe(msr, low, high);
}
@@ -874,8 +879,6 @@ void xen_setup_vcpu_info_placement(void)
/* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */
if (have_vcpu_info_placement) {
- printk(KERN_INFO "Xen: using vcpu_info placement\n");
-
pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1018,10 +1021,6 @@ static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
-#ifdef CONFIG_SMP
- smp_send_stop();
-#endif
-
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
BUG();
}
@@ -1092,6 +1091,8 @@ static void __init xen_setup_stackprotector(void)
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
+ struct physdev_set_iopl set_iopl;
+ int rc;
pgd_t *pgd;
if (!xen_start_info)
@@ -1099,6 +1100,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
+ xen_setup_machphys_mapping();
+
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
@@ -1188,8 +1191,10 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+ xen_ident_map_ISA();
- init_mm.pgd = pgd;
+ /* Allocate and initialize top and mid mfn levels for p2m structure */
+ xen_build_mfn_list_list();
/* keep using Xen gdt for now; no urgent need to change it */
@@ -1200,10 +1205,18 @@ asmlinkage void __init xen_start_kernel(void)
#else
pv_info.kernel_rpl = 0;
#endif
-
/* set the limit of our address space */
xen_reserve_top();
+ /* We used to do this in xen_arch_setup, but that is too late on AMD
+ * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
+ * which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+
#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
cpu_detect(&new_cpu_data);
@@ -1223,6 +1236,8 @@ asmlinkage void __init xen_start_kernel(void)
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
+ if (pci_xen)
+ x86_init.pci.arch_init = pci_xen_init;
} else {
/* Make sure ACS will be enabled */
pci_request_acs();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f72d18c69221..44924e551fde 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -57,6 +57,7 @@
#include <asm/linkage.h>
#include <asm/page.h>
#include <asm/init.h>
+#include <asm/pat.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -140,7 +141,8 @@ static inline void check_zero(void)
* large enough to allocate page table pages to allocate the rest.
* Each page can map 2MB.
*/
-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
+static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
@@ -171,49 +173,182 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
*/
#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
+/*
+ * Xen leaves the responsibility for maintaining p2m mappings to the
+ * guests themselves, but it must also access and update the p2m array
+ * during suspend/resume when all the pages are reallocated.
+ *
+ * The p2m table is logically a flat array, but we implement it as a
+ * three-level tree to allow the address space to be sparse.
+ *
+ * Xen
+ * |
+ * p2m_top p2m_top_mfn
+ * / \ / \
+ * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
+ * / \ / \ / /
+ * p2m p2m p2m p2m p2m p2m p2m ...
+ *
+ * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
+ *
+ * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
+ * maximum representable pseudo-physical address space is:
+ * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
+ *
+ * P2M_PER_PAGE depends on the architecture, as a mfn is always
+ * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
+ * 512 and 1024 entries respectively.
+ */
-#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
+unsigned long xen_max_p2m_pfn __read_mostly;
-/* Placeholder for holes in the address space */
-static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
- { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
- /* Array of pointers to pages containing p2m entries */
-static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
- { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-/* Arrays of p2m arrays expressed in mfns used for save/restore */
-static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
+/* Placeholders for holes in the address space */
+static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
-static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
- __page_aligned_bss;
+static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+
+RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
static inline unsigned p2m_top_index(unsigned long pfn)
{
- BUG_ON(pfn >= MAX_DOMAIN_PAGES);
- return pfn / P2M_ENTRIES_PER_PAGE;
+ BUG_ON(pfn >= MAX_P2M_PFN);
+ return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
+}
+
+static inline unsigned p2m_mid_index(unsigned long pfn)
+{
+ return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
}
static inline unsigned p2m_index(unsigned long pfn)
{
- return pfn % P2M_ENTRIES_PER_PAGE;
+ return pfn % P2M_PER_PAGE;
}
-/* Build the parallel p2m_top_mfn structures */
+static void p2m_top_init(unsigned long ***top)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
+ top[i] = p2m_mid_missing;
+}
+
+static void p2m_top_mfn_init(unsigned long *top)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
+ top[i] = virt_to_mfn(p2m_mid_missing_mfn);
+}
+
+static void p2m_top_mfn_p_init(unsigned long **top)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
+ top[i] = p2m_mid_missing_mfn;
+}
+
+static void p2m_mid_init(unsigned long **mid)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
+ mid[i] = p2m_missing;
+}
+
+static void p2m_mid_mfn_init(unsigned long *mid)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
+ mid[i] = virt_to_mfn(p2m_missing);
+}
+
+static void p2m_init(unsigned long *p2m)
+{
+ unsigned i;
+
+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
+ p2m[i] = INVALID_P2M_ENTRY;
+}
+
+/*
+ * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
+ *
+ * This is called both at boot time, and after resuming from suspend:
+ * - At boot time we're called very early, and must use extend_brk()
+ * to allocate memory.
+ *
+ * - After resume we're called from within stop_machine, but the mfn
+ * tree should alreay be completely allocated.
+ */
void xen_build_mfn_list_list(void)
{
- unsigned pfn, idx;
+ unsigned long pfn;
- for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
- unsigned topidx = p2m_top_index(pfn);
+ /* Pre-initialize p2m_top_mfn to be completely missing */
+ if (p2m_top_mfn == NULL) {
+ p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_mfn_init(p2m_mid_missing_mfn);
- p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+ p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_top_mfn_p_init(p2m_top_mfn_p);
+
+ p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_top_mfn_init(p2m_top_mfn);
+ } else {
+ /* Reinitialise, mfn's all change after migration */
+ p2m_mid_mfn_init(p2m_mid_missing_mfn);
}
- for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
- unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
- p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+ for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
+ unsigned long **mid;
+ unsigned long *mid_mfn_p;
+
+ mid = p2m_top[topidx];
+ mid_mfn_p = p2m_top_mfn_p[topidx];
+
+ /* Don't bother allocating any mfn mid levels if
+ * they're just missing, just update the stored mfn,
+ * since all could have changed over a migrate.
+ */
+ if (mid == p2m_mid_missing) {
+ BUG_ON(mididx);
+ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+ p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
+ pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
+ continue;
+ }
+
+ if (mid_mfn_p == p2m_mid_missing_mfn) {
+ /*
+ * XXX boot-time only! We should never find
+ * missing parts of the mfn tree after
+ * runtime. extend_brk() will BUG if we call
+ * it too late.
+ */
+ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_mfn_init(mid_mfn_p);
+
+ p2m_top_mfn_p[topidx] = mid_mfn_p;
+ }
+
+ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+ mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
}
}
@@ -222,8 +357,8 @@ void xen_setup_mfn_list_list(void)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(p2m_top_mfn_list);
- HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+ virt_to_mfn(p2m_top_mfn);
+ HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
}
/* Set up p2m_top to point to the domain-builder provided p2m pages */
@@ -231,98 +366,176 @@ void __init xen_build_dynamic_phys_to_machine(void)
{
unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
- unsigned pfn;
+ unsigned long pfn;
+
+ xen_max_p2m_pfn = max_pfn;
- for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+ p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_missing);
+
+ p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_init(p2m_mid_missing);
+
+ p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_top_init(p2m_top);
+
+ /*
+ * The domain builder gives us a pre-constructed p2m array in
+ * mfn_list for all the pages initially given to us, so we just
+ * need to graft that into our tree structure.
+ */
+ for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
- p2m_top[topidx] = &mfn_list[pfn];
- }
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_mid_init(mid);
- xen_build_mfn_list_list();
+ p2m_top[topidx] = mid;
+ }
+
+ p2m_top[topidx][mididx] = &mfn_list[pfn];
+ }
}
unsigned long get_phys_to_machine(unsigned long pfn)
{
- unsigned topidx, idx;
+ unsigned topidx, mididx, idx;
- if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+ if (unlikely(pfn >= MAX_P2M_PFN))
return INVALID_P2M_ENTRY;
topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
- return p2m_top[topidx][idx];
+
+ return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
-/* install a new p2m_top page */
-bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
+static void *alloc_p2m_page(void)
{
- unsigned topidx = p2m_top_index(pfn);
- unsigned long **pfnp, *mfnp;
- unsigned i;
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+}
- pfnp = &p2m_top[topidx];
- mfnp = &p2m_top_mfn[topidx];
+static void free_p2m_page(void *p)
+{
+ free_page((unsigned long)p);
+}
+
+/*
+ * Fully allocate the p2m structure for a given pfn. We need to check
+ * that both the top and mid levels are allocated, and make sure the
+ * parallel mfn tree is kept in sync. We may race with other cpus, so
+ * the new pages are installed with cmpxchg; if we lose the race then
+ * simply free the page we allocated and use the one that's there.
+ */
+static bool alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx, mididx;
+ unsigned long ***top_p, **mid;
+ unsigned long *top_mfn_p, *mid_mfn;
- for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
- p[i] = INVALID_P2M_ENTRY;
+ topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
- if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
- *mfnp = virt_to_mfn(p);
- return true;
+ top_p = &p2m_top[topidx];
+ mid = *top_p;
+
+ if (mid == p2m_mid_missing) {
+ /* Mid level is missing, allocate a new one */
+ mid = alloc_p2m_page();
+ if (!mid)
+ return false;
+
+ p2m_mid_init(mid);
+
+ if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
+ free_p2m_page(mid);
}
- return false;
-}
+ top_mfn_p = &p2m_top_mfn[topidx];
+ mid_mfn = p2m_top_mfn_p[topidx];
-static void alloc_p2m(unsigned long pfn)
-{
- unsigned long *p;
+ BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
+
+ if (mid_mfn == p2m_mid_missing_mfn) {
+ /* Separately check the mid mfn level */
+ unsigned long missing_mfn;
+ unsigned long mid_mfn_mfn;
+
+ mid_mfn = alloc_p2m_page();
+ if (!mid_mfn)
+ return false;
+
+ p2m_mid_mfn_init(mid_mfn);
+
+ missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
+ mid_mfn_mfn = virt_to_mfn(mid_mfn);
+ if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
+ free_p2m_page(mid_mfn);
+ else
+ p2m_top_mfn_p[topidx] = mid_mfn;
+ }
+
+ if (p2m_top[topidx][mididx] == p2m_missing) {
+ /* p2m leaf page is missing */
+ unsigned long *p2m;
+
+ p2m = alloc_p2m_page();
+ if (!p2m)
+ return false;
- p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
- BUG_ON(p == NULL);
+ p2m_init(p2m);
- if (!install_p2mtop_page(pfn, p))
- free_page((unsigned long)p);
+ if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+ free_p2m_page(p2m);
+ else
+ mid_mfn[mididx] = virt_to_mfn(p2m);
+ }
+
+ return true;
}
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
- unsigned topidx, idx;
+ unsigned topidx, mididx, idx;
- if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+ if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
}
topidx = p2m_top_index(pfn);
- if (p2m_top[topidx] == p2m_missing) {
- if (mfn == INVALID_P2M_ENTRY)
- return true;
- return false;
- }
-
+ mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
- p2m_top[topidx][idx] = mfn;
+
+ if (p2m_top[topidx][mididx] == p2m_missing)
+ return mfn == INVALID_P2M_ENTRY;
+
+ p2m_top[topidx][mididx][idx] = mfn;
return true;
}
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return;
+ return true;
}
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
- alloc_p2m(pfn);
+ if (!alloc_p2m(pfn))
+ return false;
if (!__set_phys_to_machine(pfn, mfn))
- BUG();
+ return false;
}
+
+ return true;
}
unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -399,7 +612,7 @@ static bool xen_iomap_pte(pte_t pte)
return pte_flags(pte) & _PAGE_IOMAP;
}
-static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
{
struct multicall_space mcs;
struct mmu_update *u;
@@ -411,10 +624,16 @@ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
u->ptr = arbitrary_virt_to_machine(ptep).maddr;
u->val = pte_val_ma(pteval);
- MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
+ MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
+EXPORT_SYMBOL_GPL(xen_set_domain_pte);
+
+static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
+{
+ xen_set_domain_pte(ptep, pteval, DOMID_IO);
+}
static void xen_extend_mmu_update(const struct mmu_update *update)
{
@@ -561,7 +780,20 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ unsigned long mfn = pfn_to_mfn(pfn);
+
+ /*
+ * If there's no mfn for the pfn, then just create an
+ * empty non-present pte. Unfortunately this loses
+ * information about the original pfn, so
+ * pte_mfn_to_pfn is asymmetric.
+ */
+ if (unlikely(mfn == INVALID_P2M_ENTRY)) {
+ mfn = 0;
+ flags = 0;
+ }
+
+ val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
return val;
@@ -583,10 +815,18 @@ static pteval_t iomap_pte(pteval_t val)
pteval_t xen_pte_val(pte_t pte)
{
- if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
- return pte.pte;
+ pteval_t pteval = pte.pte;
+
+ /* If this is a WC pte, convert back from Xen WC to Linux WC */
+ if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
+ WARN_ON(!pat_enabled);
+ pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
+ }
- return pte_mfn_to_pfn(pte.pte);
+ if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
+ return pteval;
+
+ return pte_mfn_to_pfn(pteval);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -596,10 +836,48 @@ pgdval_t xen_pgd_val(pgd_t pgd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
+/*
+ * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
+ * are reserved for now, to correspond to the Intel-reserved PAT
+ * types.
+ *
+ * We expect Linux's PAT set as follows:
+ *
+ * Idx PTE flags Linux Xen Default
+ * 0 WB WB WB
+ * 1 PWT WC WT WT
+ * 2 PCD UC- UC- UC-
+ * 3 PCD PWT UC UC UC
+ * 4 PAT WB WC WB
+ * 5 PAT PWT WC WP WT
+ * 6 PAT PCD UC- UC UC-
+ * 7 PAT PCD PWT UC UC UC
+ */
+
+void xen_set_pat(u64 pat)
+{
+ /* We expect Linux to use a PAT setting of
+ * UC UC- WC WB (ignoring the PAT flag) */
+ WARN_ON(pat != 0x0007010600070106ull);
+}
+
pte_t xen_make_pte(pteval_t pte)
{
phys_addr_t addr = (pte & PTE_PFN_MASK);
+ /* If Linux is trying to set a WC pte, then map to the Xen WC.
+ * If _PAGE_PAT is set, then it probably means it is really
+ * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
+ * things work out OK...
+ *
+ * (We should never see kernel mappings with _PAGE_PSE set,
+ * but we could see hugetlbfs mappings, I think.).
+ */
+ if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
+ if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
+ pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
+ }
+
/*
* Unprivileged domains are allowed to do IOMAPpings for
* PCI passthrough, but not map ISA space. The ISA
@@ -1697,6 +1975,7 @@ static void *m2v(phys_addr_t maddr)
return __ka(m2p(maddr));
}
+/* Set the page permissions on an identity-mapped pages */
static void set_page_prot(void *addr, pgprot_t prot)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
@@ -1712,6 +1991,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
unsigned ident_pte;
unsigned long pfn;
+ level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
+ PAGE_SIZE);
+
ident_pte = 0;
pfn = 0;
for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
@@ -1722,7 +2004,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
pte_page = m2v(pmd[pmdidx].pmd);
else {
/* Check for free pte pages */
- if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+ if (ident_pte == LEVEL1_IDENT_ENTRIES)
break;
pte_page = &level1_ident_pgt[ident_pte];
@@ -1752,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
set_page_prot(pmd, PAGE_KERNEL_RO);
}
+void __init xen_setup_machphys_mapping(void)
+{
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
+
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ } else {
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ }
+ machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+}
+
#ifdef CONFIG_X86_64
static void convert_pfn_mfn(void *v)
{
@@ -1837,45 +2133,88 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
return pgd;
}
#else /* !CONFIG_X86_64 */
-static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
+
+static __init void xen_write_cr3_init(unsigned long cr3)
+{
+ unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+
+ BUG_ON(read_cr3() != __pa(initial_page_table));
+ BUG_ON(cr3 != __pa(swapper_pg_dir));
+
+ /*
+ * We are switching to swapper_pg_dir for the first time (from
+ * initial_page_table) and therefore need to mark that page
+ * read-only and then pin it.
+ *
+ * Xen disallows sharing of kernel PMDs for PAE
+ * guests. Therefore we must copy the kernel PMD from
+ * initial_page_table into a new kernel PMD to be used in
+ * swapper_pg_dir.
+ */
+ swapper_kernel_pmd =
+ extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+ memcpy(swapper_kernel_pmd, initial_kernel_pmd,
+ sizeof(pmd_t) * PTRS_PER_PMD);
+ swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
+ __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
+ set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
+
+ set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+ xen_write_cr3(cr3);
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
+
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+ PFN_DOWN(__pa(initial_page_table)));
+ set_page_prot(initial_page_table, PAGE_KERNEL);
+ set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
+
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
+}
__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pmd_t *kernel_pmd;
+ initial_kernel_pmd =
+ extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
xen_start_info->nr_pt_frames * PAGE_SIZE +
512*1024);
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
- memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+ memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
- xen_map_identity_early(level2_kernel_pgt, max_pfn);
+ xen_map_identity_early(initial_kernel_pmd, max_pfn);
- memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
- set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
- __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+ memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+ initial_page_table[KERNEL_PGD_BOUNDARY] =
+ __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
- set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
- set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+ set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
+ set_page_prot(initial_page_table, PAGE_KERNEL_RO);
set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
- xen_write_cr3(__pa(swapper_pg_dir));
-
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
+ PFN_DOWN(__pa(initial_page_table)));
+ xen_write_cr3(__pa(initial_page_table));
memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
__pa(xen_start_info->pt_base +
xen_start_info->nr_pt_frames * PAGE_SIZE),
"XEN PAGETABLES");
- return swapper_pg_dir;
+ return initial_page_table;
}
#endif /* CONFIG_X86_64 */
+static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
+
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
pte_t pte;
@@ -1896,15 +2235,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
#endif
-#ifdef CONFIG_X86_LOCAL_APIC
- case FIX_APIC_BASE: /* maps dummy local APIC */
-#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
+#ifdef CONFIG_X86_LOCAL_APIC
+ case FIX_APIC_BASE: /* maps dummy local APIC */
+ pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
+ break;
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+ case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
+ /*
+ * We just don't map the IO APIC - all access is via
+ * hypercalls. Keep the address in the pte for reference.
+ */
+ pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
+ break;
+#endif
+
case FIX_PARAVIRT_BOOTMAP:
/* This is an MFN, but it isn't an IO mapping from the
IO domain */
@@ -1929,6 +2281,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
+__init void xen_ident_map_ISA(void)
+{
+ unsigned long pa;
+
+ /*
+ * If we're dom0, then linear map the ISA machine addresses into
+ * the kernel's address space.
+ */
+ if (!xen_initial_domain())
+ return;
+
+ xen_raw_printk("Xen: setup ISA identity maps\n");
+
+ for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
+ pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
+
+ if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
+ BUG();
+ }
+
+ xen_flush_tlb();
+}
+
static __init void xen_post_allocator_init(void)
{
pv_mmu_ops.set_pte = xen_set_pte;
@@ -1968,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
+#ifdef CONFIG_X86_32
+ .write_cr3 = xen_write_cr3_init,
+#else
.write_cr3 = xen_write_cr3,
+#endif
.flush_tlb_user = xen_flush_tlb,
.flush_tlb_kernel = xen_flush_tlb,
@@ -2036,7 +2415,7 @@ void __init xen_init_mmu_ops(void)
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
- vmap_lazy_unmap = false;
+ memset(dummy_mapping, 0xff, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
@@ -2269,6 +2648,73 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif
+#define REMAP_BATCH_SIZE 16
+
+struct remap_data {
+ unsigned long mfn;
+ pgprot_t prot;
+ struct mmu_update *mmu_update;
+};
+
+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+ unsigned long addr, void *data)
+{
+ struct remap_data *rmd = data;
+ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+ rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+ rmd->mmu_update->val = pte_val_ma(pte);
+ rmd->mmu_update++;
+
+ return 0;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long mfn, int nr,
+ pgprot_t prot, unsigned domid)
+{
+ struct remap_data rmd;
+ struct mmu_update mmu_update[REMAP_BATCH_SIZE];
+ int batch;
+ unsigned long range;
+ int err = 0;
+
+ prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
+
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
+ (VM_PFNMAP | VM_RESERVED | VM_IO)));
+
+ rmd.mfn = mfn;
+ rmd.prot = prot;
+
+ while (nr) {
+ batch = min(REMAP_BATCH_SIZE, nr);
+ range = (unsigned long)batch << PAGE_SHIFT;
+
+ rmd.mmu_update = mmu_update;
+ err = apply_to_page_range(vma->vm_mm, addr, range,
+ remap_area_mfn_pte_fn, &rmd);
+ if (err)
+ goto out;
+
+ err = -EFAULT;
+ if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
+ goto out;
+
+ nr -= batch;
+ addr += range;
+ }
+
+ err = 0;
+out:
+
+ flush_tlb_all();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mmu_debug;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index fa938c4aa2f7..537bb9aab777 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -12,7 +12,6 @@ enum pt_level {
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 22471001b74c..bfd0632fe65e 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -1,6 +1,7 @@
/* Glue code to lib/swiotlb-xen.c */
#include <linux/dma-mapping.h>
+#include <linux/pci.h>
#include <xen/swiotlb-xen.h>
#include <asm/xen/hypervisor.h>
@@ -55,6 +56,9 @@ void __init pci_xen_swiotlb_init(void)
if (xen_swiotlb) {
xen_swiotlb_init(1);
dma_ops = &xen_swiotlb_dma_ops;
+
+ /* Make sure ACS will be enabled */
+ pci_request_acs();
}
}
IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0f456386cce5..25c52f94a27c 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -68,7 +68,7 @@ static int __init check_platform_magic(void)
return 0;
}
-void __init xen_unplug_emulated_devices(void)
+void xen_unplug_emulated_devices(void)
{
int r;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9729c903404b..b5a7f928234b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -18,10 +18,11 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/page.h>
#include <xen/interface/callback.h>
-#include <xen/interface/physdev.h>
#include <xen/interface/memory.h>
+#include <xen/interface/physdev.h>
#include <xen/features.h>
#include "xen-ops.h"
@@ -34,6 +35,39 @@ extern void xen_sysenter_target(void);
extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
+/* Amount of extra memory space we add to the e820 ranges */
+phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+
+/*
+ * The maximum amount of extra memory compared to the base size. The
+ * main scaling factor is the size of struct page. At extreme ratios
+ * of base:extra, all the base memory can be filled with page
+ * structures for the extra memory, leaving no space for anything
+ * else.
+ *
+ * 10x seems like a reasonable balance between scaling flexibility and
+ * leaving a practically usable system.
+ */
+#define EXTRA_MEM_RATIO (10)
+
+static __init void xen_add_extra_mem(unsigned long pages)
+{
+ u64 size = (u64)pages * PAGE_SIZE;
+ u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
+
+ if (!pages)
+ return;
+
+ e820_add_region(extra_start, size, E820_RAM);
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+
+ memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+
+ xen_extra_mem_size += size;
+
+ xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+}
+
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
phys_addr_t end_addr)
{
@@ -83,16 +117,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
const struct e820map *e820)
{
phys_addr_t max_addr = PFN_PHYS(max_pfn);
- phys_addr_t last_end = 0;
+ phys_addr_t last_end = ISA_END_ADDRESS;
unsigned long released = 0;
int i;
+ /* Free any unused memory above the low 1Mbyte. */
for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
phys_addr_t end = e820->map[i].addr;
end = min(max_addr, end);
- released += xen_release_chunk(last_end, end);
- last_end = e820->map[i].addr + e820->map[i].size;
+ if (last_end < end)
+ released += xen_release_chunk(last_end, end);
+ last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
}
if (last_end < max_addr)
@@ -105,21 +141,72 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
-
char * __init xen_memory_setup(void)
{
+ static struct e820entry map[E820MAX] __initdata;
+
unsigned long max_pfn = xen_start_info->nr_pages;
+ unsigned long long mem_end;
+ int rc;
+ struct xen_memory_map memmap;
+ unsigned long extra_pages = 0;
+ unsigned long extra_limit;
+ int i;
+ int op;
max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+ mem_end = PFN_PHYS(max_pfn);
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, map);
+
+ op = xen_initial_domain() ?
+ XENMEM_machine_memory_map :
+ XENMEM_memory_map;
+ rc = HYPERVISOR_memory_op(op, &memmap);
+ if (rc == -ENOSYS) {
+ BUG_ON(xen_initial_domain());
+ memmap.nr_entries = 1;
+ map[0].addr = 0ULL;
+ map[0].size = mem_end;
+ /* 8MB slack (to balance backend allocations). */
+ map[0].size += 8ULL << 20;
+ map[0].type = E820_RAM;
+ rc = 0;
+ }
+ BUG_ON(rc);
e820.nr_map = 0;
+ xen_extra_mem_start = mem_end;
+ for (i = 0; i < memmap.nr_entries; i++) {
+ unsigned long long end = map[i].addr + map[i].size;
+
+ if (map[i].type == E820_RAM && end > mem_end) {
+ /* RAM off the end - may be partially included */
+ u64 delta = min(map[i].size, end - mem_end);
- e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
+ map[i].size -= delta;
+ end -= delta;
+
+ extra_pages += PFN_DOWN(delta);
+ }
+
+ if (map[i].size > 0 && end > xen_extra_mem_start)
+ xen_extra_mem_start = end;
+
+ /* Add region if any remains */
+ if (map[i].size > 0)
+ e820_add_region(map[i].addr, map[i].size, map[i].type);
+ }
/*
- * Even though this is normal, usable memory under Xen, reserve
- * ISA memory anyway because too many things think they can poke
+ * In domU, the ISA region is normal, usable memory, but we
+ * reserve ISA memory anyway because too many things poke
* about in there.
+ *
+ * In Dom0, the host E820 information can leave gaps in the
+ * ISA range, which would cause us to release those pages. To
+ * avoid this, we unconditionally reserve them here.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
@@ -136,23 +223,30 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- xen_return_unused_memory(xen_start_info->nr_pages, &e820);
+ extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
- return "Xen";
-}
+ /*
+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+ * factor the base size. On non-highmem systems, the base
+ * size is the full initial memory allocation; on highmem it
+ * is limited to the max size of lowmem, so that it doesn't
+ * get completely filled.
+ *
+ * In principle there could be a problem in lowmem systems if
+ * the initial memory is also very large with respect to
+ * lowmem, but we won't try to deal with that here.
+ */
+ extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+ max_pfn + extra_pages);
-static void xen_idle(void)
-{
- local_irq_disable();
-
- if (need_resched())
- local_irq_enable();
- else {
- current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
- safe_halt();
- current_thread_info()->status |= TS_POLLING;
- }
+ if (extra_limit >= max_pfn)
+ extra_pages = extra_limit - max_pfn;
+ else
+ extra_pages = 0;
+
+ xen_add_extra_mem(extra_pages);
+
+ return "Xen";
}
/*
@@ -224,9 +318,6 @@ void __cpuinit xen_enable_syscall(void)
void __init xen_arch_setup(void)
{
- struct physdev_set_iopl set_iopl;
- int rc;
-
xen_panic_handler_init();
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
@@ -243,11 +334,6 @@ void __init xen_arch_setup(void)
xen_enable_sysenter();
xen_enable_syscall();
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- printk(KERN_INFO "physdev_op failed %d\n", rc);
-
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
@@ -259,9 +345,11 @@ void __init xen_arch_setup(void)
MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
- pm_idle = xen_idle;
-
- paravirt_disable_iospace();
+ /* Set up idle, making sure it calls safe_halt() pvop */
+#ifdef CONFIG_X86_32
+ boot_cpu_data.hlt_works_ok = 1;
+#endif
+ pm_idle = default_idle;
fiddle_vdso();
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 25f232b18a82..72a4c7959045 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -28,6 +28,7 @@
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/page.h>
#include <xen/events.h>
@@ -156,11 +157,35 @@ static void __init xen_fill_possible_map(void)
{
int i, rc;
+ if (xen_initial_domain())
+ return;
+
+ for (i = 0; i < nr_cpu_ids; i++) {
+ rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+ if (rc >= 0) {
+ num_processors++;
+ set_cpu_possible(i, true);
+ }
+ }
+}
+
+static void __init xen_filter_cpu_maps(void)
+{
+ int i, rc;
+
+ if (!xen_initial_domain())
+ return;
+
+ num_processors = 0;
+ disabled_cpus = 0;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
+ } else {
+ set_cpu_possible(i, false);
+ set_cpu_present(i, false);
}
}
}
@@ -174,6 +199,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
old memory can be recycled */
make_lowmem_page_readwrite(xen_initial_gdt);
+ xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
@@ -400,9 +426,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -470,7 +496,7 @@ static const struct smp_ops xen_smp_ops __initdata = {
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 1d789d56877c..9bbd63a129b5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,6 +31,7 @@ void xen_hvm_post_suspend(int suspend_cancelled)
int cpu;
xen_hvm_init_shared_info();
xen_callback_vector();
+ xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
for_each_online_cpu(cpu) {
xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b2bb5aa3b054..5da5e53fb94c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -426,6 +426,8 @@ void xen_timer_resume(void)
{
int cpu;
+ pvclock_resume();
+
if (xen_clockevent != &xen_vcpuop_clockevent)
return;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 7c8ab86163e9..9d41bf985757 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,6 +30,9 @@ void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_ident_map_ISA(void);
void xen_reserve_top(void);
+extern unsigned long xen_max_p2m_pfn;
+
+void xen_set_pat(u64);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
@@ -40,7 +43,7 @@ void xen_vcpu_restore(void);
void xen_callback_vector(void);
void xen_hvm_init_shared_info(void);
-void __init xen_unplug_emulated_devices(void);
+void xen_unplug_emulated_devices(void);
void __init xen_build_dynamic_phys_to_machine(void);