aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-04-24 20:33:46 +0200
committerThomas Gleixner <tglx@linutronix.de>2013-04-24 20:33:54 +0200
commit6402c7dc2a19c19bd8cdc7d80878b850da418942 (patch)
treecda2ea2df40442e2aa016119f3548cc504127ea8 /arch/x86
parenttimekeeping: Update tk->cycle_last in resume (diff)
parentLinux 3.9-rc8 (diff)
downloadlinux-dev-6402c7dc2a19c19bd8cdc7d80878b850da418942.tar.xz
linux-dev-6402c7dc2a19c19bd8cdc7d80878b850da418942.zip
Merge branch 'linus' into timers/core
Reason: Get upstream fixes before adding conflicting code. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/compressed/Makefile5
-rw-r--r--arch/x86/boot/compressed/eboot.c47
-rw-r--r--arch/x86/include/asm/efi.h7
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/syscall.h4
-rw-r--r--arch/x86/include/asm/tlb.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c24
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/kprobes/core.c5
-rw-r--r--arch/x86/kernel/microcode_core_early.c38
-rw-r--r--arch/x86/kernel/microcode_intel_early.c30
-rw-r--r--arch/x86/kernel/paravirt.c25
-rw-r--r--arch/x86/kernel/setup.c45
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/x86.c69
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/pageattr-test.c2
-rw-r--r--arch/x86/mm/pageattr.c12
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/platform/efi/efi.c168
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/xen/mmu.c16
32 files changed, 442 insertions, 131 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 26bd79261532..9f74f523dfc6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,7 +112,7 @@ config X86
select GENERIC_STRNLEN_USER
select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
- select HAVE_VIRT_TO_BUS
+ select VIRT_TO_BUS
select MODULES_USE_ELF_REL if X86_32
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
@@ -1550,6 +1550,7 @@ config X86_SMAP
config EFI
bool "EFI runtime service support"
depends on ACPI
+ select UCS2_STRING
---help---
This enables the kernel to use EFI runtime services that are
available (such as the EFI variable services).
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8a84501acb1b..5ef205c5f37b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -29,7 +29,6 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/piggy.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
ifeq ($(CONFIG_EFI_STUB), y)
VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
@@ -43,7 +42,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-targets += vmlinux.bin.all vmlinux.relocs
+targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c205035a6b96..8615f7581820 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
*size = len;
}
+static efi_status_t setup_efi_vars(struct boot_params *params)
+{
+ struct setup_data *data;
+ struct efi_var_bootdata *efidata;
+ u64 store_size, remaining_size, var_size;
+ efi_status_t status;
+
+ if (!sys_table->runtime->query_variable_info)
+ return EFI_UNSUPPORTED;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ status = efi_call_phys4(sys_table->runtime->query_variable_info,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS, &store_size,
+ &remaining_size, &var_size);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, sizeof(*efidata), &efidata);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ efidata->data.type = SETUP_EFI_VARS;
+ efidata->data.len = sizeof(struct efi_var_bootdata) -
+ sizeof(struct setup_data);
+ efidata->data.next = 0;
+ efidata->store_size = store_size;
+ efidata->remaining_size = remaining_size;
+ efidata->max_var_size = var_size;
+
+ if (data)
+ data->next = (unsigned long)efidata;
+ else
+ params->hdr.setup_data = (unsigned long)efidata;
+
+}
+
static efi_status_t setup_efi_pci(struct boot_params *params)
{
efi_pci_io_protocol *pci;
@@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
setup_graphics(boot_params);
+ setup_efi_vars(boot_params);
+
setup_efi_pci(boot_params);
status = efi_call_phys3(sys_table->boottime->allocate_pool,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f30c727..2fb5d5884e23 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
+struct efi_var_bootdata {
+ struct setup_data data;
+ u64 store_size;
+ u64 remaining_size;
+ u64 max_var_size;
+};
+
#ifdef CONFIG_EFI
static inline bool efi_is_native(void)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d3ddd17405d0..5a6d2873f80e 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -77,6 +77,7 @@ struct arch_specific_insn {
* a post_handler or break_handler).
*/
int boostable;
+ bool if_modifier;
};
struct arch_optimized_insn {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 635a74d22409..4979778cc7fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,8 +414,8 @@ struct kvm_vcpu_arch {
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
- unsigned int time_offset;
- struct page *time_page;
+ struct gfn_to_hva_cache pv_time;
+ bool pv_time_enabled;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5edd1742cfd0..7361e47db79f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
}
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 142236ed83af..b3b0ec1dac86 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,6 +91,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+ void (*flush)(void);
};
struct pv_time_ops {
@@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next);
void paravirt_enter_lazy_mmu(void);
void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
void _paravirt_nop(void);
u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 1ace47b62592..2e188d68397c 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -29,13 +29,13 @@ extern const unsigned long sys_call_table[];
*/
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- return regs->orig_ax & __SYSCALL_MASK;
+ return regs->orig_ax;
}
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
- regs->ax = regs->orig_ax & __SYSCALL_MASK;
+ regs->ax = regs->orig_ax;
}
static inline long syscall_get_error(struct task_struct *task,
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 4fef20773b8f..c7797307fc2b 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -7,7 +7,7 @@
#define tlb_flush(tlb) \
{ \
- if (tlb->fullmm == 0) \
+ if (!tlb->fullmm && !tlb->need_flush_all) \
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
else \
flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index c20d1ce62dc6..e709884d0ef9 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
return _hypercall3(int, console_io, cmd, count, str);
}
-extern int __must_check HYPERVISOR_physdev_op_compat(int, void *);
+extern int __must_check xen_physdev_op_compat(int, void *);
static inline int
HYPERVISOR_physdev_op(int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
if (unlikely(rc == -ENOSYS))
- rc = HYPERVISOR_physdev_op_compat(cmd, arg);
+ rc = xen_physdev_op_compat(cmd, arg);
return rc;
}
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c15ddaf90710..08744242b8d2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,6 +6,7 @@
#define SETUP_E820_EXT 1
#define SETUP_DTB 2
#define SETUP_PCI 3
+#define SETUP_EFI_VARS 4
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..7a060f4b411f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -44,6 +44,7 @@
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a7d26d83fb70..8f4be53ea04b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void)
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
- /*
- * Xen emulates Hyper-V to support enlightened Windows.
- * Check to see first if we are on a Xen Hypervisor.
- */
- if (xen_cpuid_base())
- return false;
-
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
@@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void)
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
-#if IS_ENABLED(CONFIG_HYPERV)
- /*
- * Setup the IDT for hypervisor callback.
- */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
-#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr;
void hv_register_vmbus_handler(int irq, irq_handler_t handler)
{
+ /*
+ * Setup the IDT for hypervisor callback.
+ */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+
vmbus_irq = irq;
vmbus_isr = handler;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 529c8931fc02..cc45deb791b0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -101,6 +101,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -149,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
};
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
EVENT_EXTRA_END
};
@@ -2093,7 +2103,10 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ if (boot_cpu_data.x86_model == 45)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
@@ -2119,7 +2132,10 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ if (boot_cpu_data.x86_model == 62)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a4f2ee..26830f3af0df 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -314,10 +314,11 @@ int intel_pmu_drain_bts_buffer(void)
if (top <= at)
return 0;
+ memset(&regs, 0, sizeof(regs));
+
ds->bts_index = ds->bts_buffer_base;
perf_sample_data_init(&data, 0, event->hw.last_period);
- regs.ip = 0;
/*
* Prepare a generic sample, i.e. fill in the invariant fields.
@@ -729,3 +730,13 @@ void intel_ds_init(void)
}
}
}
+
+void perf_restore_debug_store(void)
+{
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+
+ if (!x86_pmu.bts && !x86_pmu.pebs)
+ return;
+
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 3f06e6149981..7bfe318d3d8a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
else
p->ainsn.boostable = -1;
+ /* Check whether the instruction modifies Interrupt Flag or not */
+ p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+
/* Also, displacement change doesn't affect the first byte */
p->opcode = p->ainsn.insn[0];
}
@@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_flags = kcb->kprobe_old_flags
= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- if (is_IF_modifier(p->ainsn.insn))
+ if (p->ainsn.if_modifier)
kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
}
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 577db8417d15..833d51d6ee06 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void)
u32 eax = 0x00000000;
u32 ebx, ecx = 0, edx;
- if (!have_cpuid_p())
- return X86_VENDOR_UNKNOWN;
-
native_cpuid(&eax, &ebx, &ecx, &edx);
if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
@@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void)
return X86_VENDOR_UNKNOWN;
}
+static int __cpuinit x86_family(void)
+{
+ u32 eax = 0x00000001;
+ u32 ebx, ecx = 0, edx;
+ int x86;
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ x86 = (eax >> 8) & 0xf;
+ if (x86 == 15)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
void __init load_ucode_bsp(void)
{
- int vendor = x86_vendor();
+ int vendor, x86;
+
+ if (!have_cpuid_p())
+ return;
- if (vendor == X86_VENDOR_INTEL)
+ vendor = x86_vendor();
+ x86 = x86_family();
+
+ if (vendor == X86_VENDOR_INTEL && x86 >= 6)
load_ucode_intel_bsp();
}
void __cpuinit load_ucode_ap(void)
{
- int vendor = x86_vendor();
+ int vendor, x86;
+
+ if (!have_cpuid_p())
+ return;
+
+ vendor = x86_vendor();
+ x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL)
+ if (vendor == X86_VENDOR_INTEL && x86 >= 6)
load_ucode_intel_ap();
}
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 7890bc838952..d893e8ed8ac9 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -90,13 +90,13 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
struct microcode_intel ***mc_saved;
mc_saved = (struct microcode_intel ***)
- __pa_symbol(&mc_saved_data->mc_saved);
+ __pa_nodebug(&mc_saved_data->mc_saved);
for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
struct microcode_intel *p;
p = *(struct microcode_intel **)
- __pa(mc_saved_data->mc_saved + i);
- mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
+ __pa_nodebug(mc_saved_data->mc_saved + i);
+ mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
}
}
#endif
@@ -562,7 +562,7 @@ scan_microcode(unsigned long start, unsigned long end,
struct cpio_data cd;
long offset = 0;
#ifdef CONFIG_X86_32
- char *p = (char *)__pa_symbol(ucode_name);
+ char *p = (char *)__pa_nodebug(ucode_name);
#else
char *p = ucode_name;
#endif
@@ -630,8 +630,8 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
if (mc_intel == NULL)
return;
- delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
- current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
+ delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+ current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
*delay_ucode_info_p = 1;
*current_mc_date_p = mc_intel->hdr.date;
@@ -659,8 +659,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
}
#endif
-static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
- struct ucode_cpu_info *uci)
+static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
+ struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_intel;
unsigned int val[2];
@@ -741,15 +741,15 @@ load_ucode_intel_bsp(void)
#ifdef CONFIG_X86_32
struct boot_params *boot_params_p;
- boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
+ boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
ramdisk_image = boot_params_p->hdr.ramdisk_image;
ramdisk_size = boot_params_p->hdr.ramdisk_size;
initrd_start_early = ramdisk_image;
initrd_end_early = initrd_start_early + ramdisk_size;
_load_ucode_intel_bsp(
- (struct mc_saved_data *)__pa_symbol(&mc_saved_data),
- (unsigned long *)__pa_symbol(&mc_saved_in_initrd),
+ (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+ (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
initrd_start_early, initrd_end_early, &uci);
#else
ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -772,10 +772,10 @@ void __cpuinit load_ucode_intel_ap(void)
unsigned long *initrd_start_p;
mc_saved_in_initrd_p =
- (unsigned long *)__pa_symbol(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
- initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
- initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
+ (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+ mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+ initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+ initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
#else
mc_saved_data_p = &mc_saved_data;
mc_saved_in_initrd_p = mc_saved_in_initrd;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 17fff18a1031..8bfb335f74bb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
}
+void paravirt_flush_lazy_mmu(void)
+{
+ preempt_disable();
+
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+ arch_leave_lazy_mmu_mode();
+ arch_enter_lazy_mmu_mode();
+ }
+
+ preempt_enable();
+}
+
void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
@@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return this_cpu_read(paravirt_lazy_mode);
}
-void arch_flush_lazy_mmu_mode(void)
-{
- preempt_disable();
-
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
- arch_leave_lazy_mmu_mode();
- arch_enter_lazy_mmu_mode();
- }
-
- preempt_enable();
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -475,6 +475,7 @@ struct pv_mmu_ops pv_mmu_ops = {
.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
+ .flush = paravirt_nop,
},
.set_fixmap = native_set_fixmap,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 90d8cc930f5e..fae9134a2de9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_MAX MAXMEM
+# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
#endif
static void __init reserve_crashkernel_low(void)
@@ -521,19 +524,35 @@ static void __init reserve_crashkernel_low(void)
unsigned long long low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long long base;
+ bool auto_set = false;
int ret;
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ /* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
&low_size, &base);
- if (ret != 0 || low_size <= 0)
- return;
+ if (ret != 0) {
+ /*
+ * two parts from lib/swiotlb.c:
+ * swiotlb size: user specified with swiotlb= or default.
+ * swiotlb overflow buffer: now is hardcoded to 32k.
+ * We round it to 8M for other buffers that
+ * may need to stay low too.
+ */
+ low_size = swiotlb_size_or_default() + (8UL<<20);
+ auto_set = true;
+ } else {
+ /* passed with crashkernel=0,low ? */
+ if (!low_size)
+ return;
+ }
low_base = memblock_find_in_range(low_size, (1ULL<<32),
low_size, alignment);
if (!low_base) {
- pr_info("crashkernel low reservation failed - No suitable area found.\n");
+ if (!auto_set)
+ pr_info("crashkernel low reservation failed - No suitable area found.\n");
return;
}
@@ -554,14 +573,22 @@ static void __init reserve_crashkernel(void)
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
+ bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
+ /* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
- if (ret != 0 || crash_size <= 0)
- return;
+ if (ret != 0 || crash_size <= 0) {
+ /* crashkernel=X,high */
+ ret = parse_crashkernel_high(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret != 0 || crash_size <= 0)
+ return;
+ high = true;
+ }
/* 0 means: find the address automatically */
if (crash_base <= 0) {
@@ -569,7 +596,9 @@ static void __init reserve_crashkernel(void)
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
crash_base = memblock_find_in_range(alignment,
- CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+ high ? CRASH_KERNEL_ADDR_HIGH_MAX :
+ CRASH_KERNEL_ADDR_LOW_MAX,
+ crash_size, alignment);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 02b51dd4e4ad..f77df1c5de6e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr);
+ addr, sizeof(u8));
}
void kvm_lapic_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f71500af1f81..e1721324c271 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1406,25 +1406,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
unsigned long flags, this_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
- void *shared_kaddr;
s64 kernel_ns, max_kernel_ns;
u64 tsc_timestamp, host_tsc;
- struct pvclock_vcpu_time_info *guest_hv_clock;
+ struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
kernel_ns = 0;
host_tsc = 0;
- /* Keep irq disabled to prevent changes to the clock */
- local_irq_save(flags);
- this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
- if (unlikely(this_tsc_khz == 0)) {
- local_irq_restore(flags);
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
- return 1;
- }
-
/*
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
@@ -1436,6 +1426,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
kernel_ns = ka->master_kernel_ns;
}
spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+ /* Keep irq disabled to prevent changes to the clock */
+ local_irq_save(flags);
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+ if (unlikely(this_tsc_khz == 0)) {
+ local_irq_restore(flags);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
+ return 1;
+ }
if (!use_master_clock) {
host_tsc = native_read_tsc();
kernel_ns = get_kernel_ns();
@@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_restore(flags);
- if (!vcpu->time_page)
+ if (!vcpu->pv_time_enabled)
return 0;
/*
@@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page);
-
- guest_hv_clock = shared_kaddr + vcpu->time_offset;
+ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+ &guest_hv_clock, sizeof(guest_hv_clock))))
+ return 0;
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
if (vcpu->pvclock_set_guest_stopped_request) {
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
@@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.flags = pvclock_flags;
- memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- kunmap_atomic(shared_kaddr);
-
- mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
return 0;
}
@@ -1827,7 +1823,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
+ sizeof(u32)))
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
@@ -1837,10 +1834,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.time_page) {
- kvm_release_page_dirty(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ vcpu->arch.pv_time_enabled = false;
}
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
@@ -1947,6 +1941,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
+ u64 gpa_offset;
kvmclock_reset(vcpu);
vcpu->arch.time = data;
@@ -1956,14 +1951,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- /* ...but clean it before doing the actual write */
- vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
-
- vcpu->arch.time_page =
- gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ gpa_offset = data & ~(PAGE_MASK | 1);
- if (is_error_page(vcpu->arch.time_page))
- vcpu->arch.time_page = NULL;
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.pv_time, data & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info)))
+ vcpu->arch.pv_time_enabled = false;
+ else
+ vcpu->arch.pv_time_enabled = true;
break;
}
@@ -1980,7 +1975,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
- data & KVM_STEAL_VALID_BITS))
+ data & KVM_STEAL_VALID_BITS,
+ sizeof(struct kvm_steal_time)))
return 1;
vcpu->arch.st.msr_val = data;
@@ -2967,7 +2963,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
*/
static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
{
- if (!vcpu->arch.time_page)
+ if (!vcpu->arch.pv_time_enabled)
return -EINVAL;
vcpu->arch.pvclock_set_guest_stopped_request = true;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -6718,6 +6714,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
goto fail_free_wbinvd_dirty_mask;
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
+ vcpu->arch.pv_time_enabled = false;
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cbd89ca5569..7114c63f047d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1334,6 +1334,7 @@ __init void lguest_init(void)
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
+ pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
pv_mmu_ops.pte_update_defer = lguest_pte_update;
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 05928aae911e..906fea315791 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
char c;
unsigned zero_len;
- for (; len; --len) {
+ for (; len; --len, to++) {
if (__get_user_nocheck(c, from++, sizeof(char)))
break;
- if (__put_user_nocheck(c, to++, sizeof(char)))
+ if (__put_user_nocheck(c, to, sizeof(char)))
break;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2b97525246d4..0e883364abb5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -378,10 +378,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;
- if (pgd_none(*pgd))
+ if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
- else
+ arch_flush_lazy_mmu_mode();
+ } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
/*
* Below here mismatches are bugs because these lower tables
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index b0086567271c..0e38951e65eb 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -68,7 +68,7 @@ static int print_split(struct split_state *s)
s->gpg++;
i += GPS/PAGE_SIZE;
} else if (level == PG_LEVEL_2M) {
- if (!(pte_val(*pte) & _PAGE_PSE)) {
+ if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) {
printk(KERN_ERR
"%lx level %d but not PSE %Lx\n",
addr, level, (u64)pte_val(*pte));
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 091934e1d0d9..fb4e73ec24d8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -467,7 +467,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* We are safe now. Check whether the new pgprot is the same:
*/
old_pte = *kpte;
- old_prot = new_prot = req_prot = pte_pgprot(old_pte);
+ old_prot = req_prot = pte_pgprot(old_pte);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -478,12 +478,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
* for the ancient hardware that doesn't support it.
*/
- if (pgprot_val(new_prot) & _PAGE_PRESENT)
- pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
+ if (pgprot_val(req_prot) & _PAGE_PRESENT)
+ pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
else
- pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
+ pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
- new_prot = canon_pgprot(new_prot);
+ req_prot = canon_pgprot(req_prot);
/*
* old_pte points to the large page base address. So we need
@@ -1413,6 +1413,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
+
+ arch_flush_lazy_mmu_mode();
}
#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 193350b51f90..17fda6a8b3c2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+ /*
+ * NOTE! For PAE, any changes to the top page-directory-pointer-table
+ * entries need a full cr3 reload to flush.
+ */
+#ifdef CONFIG_X86_PAE
+ tlb->need_flush_all = 1;
+#endif
tlb_remove_page(tlb, virt_to_page(pmd));
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 28d9efacc9b6..b55d174e5034 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -41,6 +41,7 @@
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>
+#include <linux/ucs2_string.h>
#include <asm/setup.h>
#include <asm/efi.h>
@@ -52,6 +53,13 @@
#define EFI_DEBUG 1
+/*
+ * There's some additional metadata associated with each
+ * variable. Intel's reference implementation is 60 bytes - bump that
+ * to account for potential alignment constraints
+ */
+#define VAR_METADATA_SIZE 64
+
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
.acpi = EFI_INVALID_TABLE_ADDR,
@@ -70,6 +78,13 @@ struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
+static u64 efi_var_store_size;
+static u64 efi_var_remaining_size;
+static u64 efi_var_max_var_size;
+static u64 boot_used_size;
+static u64 boot_var_size;
+static u64 active_size;
+
unsigned long x86_efi_facility;
/*
@@ -99,6 +114,15 @@ static int __init setup_add_efi_memmap(char *arg)
}
early_param("add_efi_memmap", setup_add_efi_memmap);
+static bool efi_no_storage_paranoia;
+
+static int __init setup_storage_paranoia(char *arg)
+{
+ efi_no_storage_paranoia = true;
+ return 0;
+}
+early_param("efi_no_storage_paranoia", setup_storage_paranoia);
+
static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
@@ -163,8 +187,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- return efi_call_virt3(get_next_variable,
- name_size, name, vendor);
+ efi_status_t status;
+ static bool finished = false;
+ static u64 var_size;
+
+ status = efi_call_virt3(get_next_variable,
+ name_size, name, vendor);
+
+ if (status == EFI_NOT_FOUND) {
+ finished = true;
+ if (var_size < boot_used_size) {
+ boot_var_size = boot_used_size - var_size;
+ active_size += boot_var_size;
+ } else {
+ printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n");
+ }
+ }
+
+ if (boot_used_size && !finished) {
+ unsigned long size;
+ u32 attr;
+ efi_status_t s;
+ void *tmp;
+
+ s = virt_efi_get_variable(name, vendor, &attr, &size, NULL);
+
+ if (s != EFI_BUFFER_TOO_SMALL || !size)
+ return status;
+
+ tmp = kmalloc(size, GFP_ATOMIC);
+
+ if (!tmp)
+ return status;
+
+ s = virt_efi_get_variable(name, vendor, &attr, &size, tmp);
+
+ if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) {
+ var_size += size;
+ var_size += ucs2_strsize(name, 1024);
+ active_size += size;
+ active_size += VAR_METADATA_SIZE;
+ active_size += ucs2_strsize(name, 1024);
+ }
+
+ kfree(tmp);
+ }
+
+ return status;
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -173,9 +242,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
unsigned long data_size,
void *data)
{
- return efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
+ efi_status_t status;
+ u32 orig_attr = 0;
+ unsigned long orig_size = 0;
+
+ status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size,
+ NULL);
+
+ if (status != EFI_BUFFER_TOO_SMALL)
+ orig_size = 0;
+
+ status = efi_call_virt5(set_variable,
+ name, vendor, attr,
+ data_size, data);
+
+ if (status == EFI_SUCCESS) {
+ if (orig_size) {
+ active_size -= orig_size;
+ active_size -= ucs2_strsize(name, 1024);
+ active_size -= VAR_METADATA_SIZE;
+ }
+ if (data_size) {
+ active_size += data_size;
+ active_size += ucs2_strsize(name, 1024);
+ active_size += VAR_METADATA_SIZE;
+ }
+ }
+
+ return status;
}
static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -690,6 +784,9 @@ void __init efi_init(void)
char vendor[100] = "unknown";
int i = 0;
void *tmp;
+ struct setup_data *data;
+ struct efi_var_bootdata *efi_var_data;
+ u64 pa_data;
#ifdef CONFIG_X86_32
if (boot_params.efi_info.efi_systab_hi ||
@@ -707,6 +804,22 @@ void __init efi_init(void)
if (efi_systab_init(efi_phys.systab))
return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, sizeof(*efi_var_data));
+ if (data->type == SETUP_EFI_VARS) {
+ efi_var_data = (struct efi_var_bootdata *)data;
+
+ efi_var_store_size = efi_var_data->store_size;
+ efi_var_remaining_size = efi_var_data->remaining_size;
+ efi_var_max_var_size = efi_var_data->max_var_size;
+ }
+ pa_data = data->next;
+ early_iounmap(data, sizeof(*efi_var_data));
+ }
+
+ boot_used_size = efi_var_store_size - efi_var_remaining_size;
+
set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
/*
@@ -1007,3 +1120,48 @@ u64 efi_mem_attributes(unsigned long phys_addr)
}
return 0;
}
+
+/*
+ * Some firmware has serious problems when using more than 50% of the EFI
+ * variable store, i.e. it triggers bugs that can brick machines. Ensure that
+ * we never use more than this safe limit.
+ *
+ * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
+ * store.
+ */
+efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
+{
+ efi_status_t status;
+ u64 storage_size, remaining_size, max_size;
+
+ status = efi.query_variable_info(attributes, &storage_size,
+ &remaining_size, &max_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ if (!max_size && remaining_size > size)
+ printk_once(KERN_ERR FW_BUG "Broken EFI implementation"
+ " is returning MaxVariableSize=0\n");
+ /*
+ * Some firmware implementations refuse to boot if there's insufficient
+ * space in the variable store. We account for that by refusing the
+ * write if permitting it would reduce the available space to under
+ * 50%. However, some firmware won't reclaim variable space until
+ * after the used (not merely the actively used) space drops below
+ * a threshold. We can approximate that case with the value calculated
+ * above. If both the firmware and our calculations indicate that the
+ * available space would drop below 50%, refuse the write.
+ */
+
+ if (!storage_size || size > remaining_size ||
+ (max_size && size > max_size))
+ return EFI_OUT_OF_RESOURCES;
+
+ if (!efi_no_storage_paranoia &&
+ ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) &&
+ (remaining_size - size < storage_size / 2)))
+ return EFI_OUT_OF_RESOURCES;
+
+ return EFI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(efi_query_variable_store);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 120cee1c3f8d..3c68768d7a75 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -11,6 +11,7 @@
#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/smp.h>
+#include <linux/perf_event.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -228,6 +229,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
do_fpu_end();
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
+ perf_restore_debug_store();
}
/* Needed by apm.c */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e8e34938c57d..e006c18d288a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3)
__xen_write_cr3(true, cr3);
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
-
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
}
#endif
@@ -1750,14 +1748,18 @@ static void *m2v(phys_addr_t maddr)
}
/* Set the page permissions on an identity-mapped pages */
-static void set_page_prot(void *addr, pgprot_t prot)
+static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
- if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+ if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+ return set_page_prot_flags(addr, prot, UVMF_NONE);
+}
#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
@@ -1841,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
unsigned long addr)
{
if (*pt_base == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_base)++;
}
if (*pt_end == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_end)--;
}
@@ -2122,6 +2124,7 @@ static void __init xen_post_allocator_init(void)
#endif
#ifdef CONFIG_X86_64
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_mark_init_mm_pinned();
@@ -2197,6 +2200,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.lazy_mode = {
.enter = paravirt_enter_lazy_mmu,
.leave = xen_leave_lazy_mmu,
+ .flush = paravirt_flush_lazy_mmu,
},
.set_fixmap = xen_set_fixmap,