aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c5
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_msr.c4
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/crash.c7
-rw-r--r--arch/x86/kernel/irq_32.c19
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/paravirt.c16
-rw-r--r--arch/x86/kernel/pci-dma.c7
-rw-r--r--arch/x86/kernel/process.c58
-rw-r--r--arch/x86/kernel/process_32.c28
-rw-r--r--arch/x86/kernel/process_64.c24
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/tsc.c17
-rw-r--r--arch/x86/kernel/vm86_32.c27
24 files changed, 198 insertions, 107 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c42827eb86cf..25f909362b7a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -338,10 +338,15 @@ done:
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
+ unsigned long flags;
+
if (instr[0] != 0x90)
return;
+ local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ sync_core();
+ local_irq_restore(flags);
DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
instr, a->instrlen - a->padlen, a->padlen);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3ca3e46aa405..24e94ce454e2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);
if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+ /*
+ * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
+ * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
+ * According to Intel, MFENCE can do the serialization here.
+ */
+ asm volatile("mfence" : : : "memory");
+
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 38a76f826530..4f2821527014 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void)
int pin, ioapic, irq, irq_entry;
const struct cpumask *mask;
struct irq_data *idata;
+ struct irq_chip *chip;
if (skip_ioapic_setup == 1)
return;
@@ -2545,9 +2546,11 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- irq_set_affinity(irq, mask);
+ chip = irq_data_get_irq_chip(idata);
+ /* Might be lapic_chip for irq 0 */
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(idata, mask, false);
}
-
}
#endif
@@ -2906,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_data *irq_data;
struct mp_chip_data *data;
struct irq_alloc_info *info = arg;
+ unsigned long flags;
if (!info || nr_irqs > 1)
return -EINVAL;
@@ -2938,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
cfg = irqd_cfg(irq_data);
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+
+ local_irq_save(flags);
if (info->ioapic_entry)
mp_setup_entry(cfg, data, info->ioapic_entry);
mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
+ local_irq_restore(flags);
apic_printk(APIC_VERBOSE, KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bbd0fe2c806..836d11b92811 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data,
err = assign_irq_vector(irq, data, dest);
if (err) {
- struct irq_data *top = irq_get_irq_data(irq);
-
if (assign_irq_vector(irq, data,
- irq_data_get_affinity_mask(top)))
+ irq_data_get_affinity_mask(irq_data)))
pr_err("Failed to recover vector for irq %d\n", irq);
return err;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 07ce52c22ec8..de22ea7ff82f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c)
else
printk(KERN_CONT "%d86", c->x86);
- printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+ printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
+ printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
else
printk(KERN_CONT ")\n");
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 381c8b9b3a33..20e242ea1bc4 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -34,11 +34,10 @@
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
-static void (*hv_kexec_handler)(void);
-static void (*hv_crash_handler)(struct pt_regs *regs);
-
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
+static void (*hv_kexec_handler)(void);
+static void (*hv_crash_handler)(struct pt_regs *regs);
void hyperv_vector_handler(struct pt_regs *regs)
{
@@ -96,8 +95,8 @@ void hv_remove_crash_handler(void)
hv_crash_handler = NULL;
}
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
-#endif
+#ifdef CONFIG_KEXEC_CORE
static void hv_machine_shutdown(void)
{
if (kexec_in_progress && hv_kexec_handler)
@@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hv_crash_handler(regs);
native_machine_crash_shutdown(regs);
}
-
+#endif /* CONFIG_KEXEC_CORE */
+#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
{
@@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void)
no_timer_check = 1;
#endif
+#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
+#endif
mark_tsc_unstable("running on Hyper-V");
}
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 5edf6d868fc1..165be83a7fa4 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -47,6 +47,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
+ EXTRA_REG_FE = 4, /* fe_* */
EXTRA_REG_MAX /* number of entries needed */
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cd9b6d0b10bf..f63360be2238 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ /*
+ * Note the low 8 bits eventsel code is not a continuous field, containing
+ * some #GPing bits. These are masked out.
+ */
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};
@@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
+ INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
EVENT_CONSTRAINT_END
};
@@ -2316,9 +2321,12 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = cpuc->event_constraint[idx];
+ struct event_constraint *c1 = NULL;
struct event_constraint *c2;
+ if (idx >= 0) /* fake does < 0 */
+ c1 = cpuc->event_constraint[idx];
+
/*
* first time only
* - static constraint: no change across incremental scheduling calls
@@ -2888,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
+PMU_FORMAT_ATTR(frontend, "config1:0-23");
+
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -2904,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};
+static struct attribute *skl_format_attr[] = {
+ &format_attr_frontend.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
@@ -3513,7 +3528,8 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+ skl_format_attr);
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
pr_cont("Skylake events, ");
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 54690e885759..d1c0f254afbe 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event)
if (!buf || bts_buffer_is_full(buf, bts))
return;
+ event->hw.itrace_started = 1;
event->hw.state = 0;
if (!buf->snapshot)
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
index 086b12eae794..f32ac13934f2 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -10,12 +10,12 @@ enum perf_msr_id {
PERF_MSR_EVENT_MAX,
};
-bool test_aperfmperf(int idx)
+static bool test_aperfmperf(int idx)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
-bool test_intel(int idx)
+static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 3d423a101fae..608fb26c7254 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
{ X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
{ X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 },
- { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 },
+ { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 },
{ X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 },
{ X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 },
{ X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 },
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d6683dba..74ca2fe7a0b3 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -185,10 +185,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(unsigned long start_pfn,
- unsigned long nr_pfn, void *arg)
+static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
{
- int *nr_ranges = arg;
+ unsigned int *nr_ranges = arg;
(*nr_ranges)++;
return 0;
@@ -214,7 +213,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
ced->image = image;
- walk_system_ram_range(0, -1, &nr_ranges,
+ walk_system_ram_res(0, -1, &nr_ranges,
get_nr_ram_ranges_callback);
ced->max_nr_ranges = nr_ranges;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index c80cf6699678..38da8f29a9c8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -68,11 +68,10 @@ static inline void *current_stack(void)
return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
}
-static inline int
-execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
+static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
{
struct irq_stack *curstk, *irqstk;
- u32 *isp, *prev_esp, arg1, arg2;
+ u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
irqstk = __this_cpu_read(hardirq_stack);
@@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
asm volatile("xchgl %%ebx,%%esp \n"
"call *%%edi \n"
"movl %%ebx,%%esp \n"
- : "=a" (arg1), "=d" (arg2), "=b" (isp)
- : "0" (irq), "1" (desc), "2" (isp),
+ : "=a" (arg1), "=b" (isp)
+ : "0" (desc), "1" (isp),
"D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
@@ -150,19 +149,15 @@ void do_softirq_own_stack(void)
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
- unsigned int irq;
- int overflow;
-
- overflow = check_stack_overflow();
+ int overflow = check_stack_overflow();
if (IS_ERR_OR_NULL(desc))
return false;
- irq = irq_desc_get_irq(desc);
- if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+ if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) {
if (unlikely(overflow))
print_stack_overflow();
- generic_handle_irq_desc(irq, desc);
+ generic_handle_irq_desc(desc);
}
return true;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index ff16ccb918f2..c767cf2bc80a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
if (unlikely(IS_ERR_OR_NULL(desc)))
return false;
- generic_handle_irq_desc(irq_desc_get_irq(desc), desc);
+ generic_handle_irq_desc(desc);
return true;
}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 2bcc0525f1c1..6acc9dd91f36 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size)
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else
- new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
if (!new_ldt->entries) {
kfree(new_ldt);
@@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(ldt->entries);
else
- kfree(ldt->entries);
+ free_page((unsigned long)ldt->entries);
kfree(ldt);
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f68e48f5f6c2..c2130aef3f9d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -41,10 +41,18 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
-/* nop stub */
-void _paravirt_nop(void)
-{
-}
+/*
+ * nop stub, which must not clobber anything *including the stack* to
+ * avoid confusing the entry prologues.
+ */
+extern void _paravirt_nop(void);
+asm (".pushsection .entry.text, \"ax\"\n"
+ ".global _paravirt_nop\n"
+ "_paravirt_nop:\n\t"
+ "ret\n\t"
+ ".size _paravirt_nop, . - _paravirt_nop\n\t"
+ ".type _paravirt_nop, @function\n\t"
+ ".popsection");
/* identity function, which can be inlined */
u32 _paravirt_ident_32(u32 x)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 84b8ef82a159..cd99433b8ba1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
- *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
- *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
if (!*dev)
*dev = &x86_dma_fallback_dev;
+
+ *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+ *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
+
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d0e62ae8516..9f7c21c22477 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
memcpy(dst, src, arch_task_struct_size);
+#ifdef CONFIG_VM86
+ dst->thread.vm86 = NULL;
+#endif
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
@@ -506,3 +509,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
+/*
+ * Called from fs/proc with a reference on @p to find the function
+ * which called into schedule(). This needs to be done carefully
+ * because the task might wake up and we might look at a stack
+ * changing under us.
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long start, bottom, top, sp, fp, ip;
+ int count = 0;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ start = (unsigned long)task_stack_page(p);
+ if (!start)
+ return 0;
+
+ /*
+ * Layout of the stack page:
+ *
+ * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
+ * PADDING
+ * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+ * stack
+ * ----------- bottom = start + sizeof(thread_info)
+ * thread_info
+ * ----------- start
+ *
+ * The tasks stack pointer points at the location where the
+ * framepointer is stored. The data on the stack is:
+ * ... IP FP ... IP FP
+ *
+ * We need to read FP and IP, so we need to adjust the upper
+ * bound by another unsigned long.
+ */
+ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+ top -= 2 * sizeof(unsigned long);
+ bottom = start + sizeof(struct thread_info);
+
+ sp = READ_ONCE(p->thread.sp);
+ if (sp < bottom || sp > top)
+ return 0;
+
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ do {
+ if (fp < bottom || fp > top)
+ return 0;
+ ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
+ if (!in_sched_functions(ip))
+ return ip;
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
+ } while (count++ < 16 && p->state != TASK_RUNNING);
+ return 0;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c13df2c735f8..737527b40e5b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -324,31 +324,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-
-#define top_esp (THREAD_SIZE - sizeof(unsigned long))
-#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long bp, sp, ip;
- unsigned long stack_page;
- int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
- stack_page = (unsigned long)task_stack_page(p);
- sp = p->thread.sp;
- if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
- return 0;
- /* include/asm-i386/system.h:switch_to() pushes bp last. */
- bp = *(unsigned long *) sp;
- do {
- if (bp < stack_page || bp > top_ebp+stack_page)
- return 0;
- ip = *(unsigned long *) (bp+4);
- if (!in_sched_functions(ip))
- return ip;
- bp = *(unsigned long *) bp;
- } while (count++ < 16);
- return 0;
-}
-
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c1bbcf12924..b35921a670b2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -499,30 +499,6 @@ void set_personality_ia32(bool x32)
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long stack;
- u64 fp, ip;
- int count = 0;
-
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
- stack = (unsigned long)task_stack_page(p);
- if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
- return 0;
- fp = *(u64 *)(p->thread.sp);
- do {
- if (fp < (unsigned long)stack ||
- fp >= (unsigned long)stack+THREAD_SIZE)
- return 0;
- ip = *(u64 *)(fp+8);
- if (!in_sched_functions(ip))
- return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
- return 0;
-}
-
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fdb7f2a2d328..a3cccbfc5f77 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1173,6 +1173,14 @@ void __init setup_arch(char **cmdline_p)
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
#endif
tboot_probe();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e5f920..892ee2e5ecbc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
*/
#define UDELAY_10MS_DEFAULT 10000
-static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+static unsigned int init_udelay = INT_MAX;
static int __init cpu_init_udelay(char *str)
{
@@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
- if (init_udelay != UDELAY_10MS_DEFAULT)
+ if (init_udelay != INT_MAX)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
init_udelay = 0;
+
+ /* else, use legacy delay */
+ init_udelay = UDELAY_10MS_DEFAULT;
}
/*
@@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(300);
pr_debug("Startup point 1\n");
@@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c8d52cb4cb6e..c3f7602cd038 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -21,6 +21,7 @@
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/geode.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static void __init check_system_tsc_reliable(void)
{
-#ifdef CONFIG_MGEODE_LX
- /* RTSC counts during suspend */
+#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
+ if (is_geode_lx()) {
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
- unsigned long res_low, res_high;
+ unsigned long res_low, res_high;
- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
- /* Geode_LX - the OLPC CPU has a very reliable TSC */
- if (res_low & RTSC_SUSP)
- tsc_clocksource_reliable = 1;
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a very reliable TSC */
+ if (res_low & RTSC_SUSP)
+ tsc_clocksource_reliable = 1;
+ }
#endif
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
tsc_clocksource_reliable = 1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index abd8b856bd2b..524619351961 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -45,6 +45,7 @@
#include <linux/audit.h>
#include <linux/stddef.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
struct pt_regs *regs = current_pt_regs();
unsigned long err = 0;
+ err = security_mmap_addr(0);
+ if (err) {
+ /*
+ * vm86 cannot virtualize the address space, so vm86 users
+ * need to manage the low 1MB themselves using mmap. Given
+ * that BIOS places important data in the first page, vm86
+ * is essentially useless if mmap_min_addr != 0. DOSEMU,
+ * for example, won't even bother trying to use vm86 if it
+ * can't map a page at virtual address 0.
+ *
+ * To reduce the available kernel attack surface, simply
+ * disallow vm86(old) for users who cannot mmap at va 0.
+ *
+ * The implementation of security_mmap_addr will allow
+ * suitably privileged users to map va 0 even if
+ * vm.mmap_min_addr is set above 0, and we want this
+ * behavior for vm86 as well, as it ensures that legacy
+ * tools like vbetool will not fail just because of
+ * vm.mmap_min_addr.
+ */
+ pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
+ current->comm, task_pid_nr(current),
+ from_kuid_munged(&init_user_ns, current_uid()));
+ return -EPERM;
+ }
+
if (!vm86) {
if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
return -ENOMEM;