From a21211672c9a1d730a39aa65d4a5b3414700adfb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 23 Mar 2016 21:07:39 -0700 Subject: ACPI / processor: Request native thermal interrupt handling via _OSC There are several reports of freeze on enabling HWP (Hardware PStates) feature on Skylake-based systems by the Intel P-states driver. The root cause is identified as the HWP interrupts causing BIOS code to freeze. HWP interrupts use the thermal LVT which can be handled by Linux natively, but on the affected Skylake-based systems SMM will respond to it by default. This is a problem for several reasons: - On the affected systems the SMM thermal LVT handler is broken (it will crash when invoked) and a BIOS update is necessary to fix it. - With thermal interrupt handled in SMM we lose all of the reporting features of the arch/x86/kernel/cpu/mcheck/therm_throt driver. - Some thermal drivers like x86-package-temp depend on the thermal threshold interrupts signaled via the thermal LVT. - The HWP interrupts are useful for debugging and tuning performance (if the kernel can handle them). The native handling of thermal interrupts needs to be enabled because of that. This requires some way to tell SMM that the OS can handle thermal interrupts. That can be done by using _OSC/_PDC in processor scope very early during ACPI initialization. The meaning of _OSC/_PDC bit 12 in processor scope is whether or not the OS supports native handling of interrupts for Collaborative Processor Performance Control (CPPC) notifications. Since on HWP-capable systems CPPC is a firmware interface to HWP, setting this bit effectively tells the firmware that the OS will handle thermal interrupts natively going forward. For details on _OSC/_PDC refer to: http://www.intel.com/content/www/us/en/standards/processor-vendor-specific-acpi-specification.html To implement the _OSC/_PDC handshake as described, introduce a new function, acpi_early_processor_osc(), that walks the ACPI namespace looking for ACPI processor objects and invokes _OSC for them with bit 12 in the capabilities buffer set and terminates the namespace walk on the first success. Also modify intel_thermal_interrupt() to clear HWP status bits in the HWP_STATUS MSR to acknowledge HWP interrupts (which prevents them from firing continuously). Signed-off-by: Srinivas Pandruvada [ rjw: Subject & changelog, function rename ] Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2c5aaf8c2e2f..05538582a809 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -385,6 +385,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; + if (static_cpu_has(X86_FEATURE_HWP)) + wrmsrl_safe(MSR_HWP_STATUS, 0); + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); /* Check for violation of core thermal thresholds*/ -- cgit v1.2.3-59-g8ed1b From ee6825c80e870fff1a370c718ec77022ade0889b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Mar 2016 15:52:34 +0100 Subject: x86/topology: Fix AMD core count It turns out AMD gets x86_max_cores wrong when there are compute units. The issue is that Linux assumes: nr_logical_cpus = nr_cores * nr_siblings But AMD reports its CU unit as 2 cores, but then sets num_smp_siblings to 2 as well. Boris: fixup ras/mce_amd_inj.c too, to compute the Node Base Core properly, according to the new nomenclature. Fixes: 1f12e32f4cd5 ("x86/topology: Create logical package id") Reported-by: Xiong Zhou Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Cc: Andreas Herrmann Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20160317095220.GO6344@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/cpu/amd.c | 8 ++++---- arch/x86/ras/mce_amd_inj.c | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 20a3de5cb3b0..66b057306f40 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -155,6 +155,7 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } +#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e47e3a916f1..4d0087f94ee5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -312,9 +312,9 @@ static void amd_get_topology(struct cpuinfo_x86 *c) node_id = ecx & 7; /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; + cores_per_cu = smp_num_siblings = ((ebx >> 8) & 3) + 1; + c->x86_max_cores /= smp_num_siblings; c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -329,8 +329,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) u32 cus_per_node; set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes_per_socket; - cus_per_node = cores_per_node / cores_per_cu; + cus_per_node = c->x86_max_cores / nodes_per_socket; + cores_per_node = cus_per_node * cores_per_cu; /* store NodeID, use llc_shared_map to store sibling info */ per_cpu(cpu_llc_id, cpu) = node_id; diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 55d38cfa46c2..9e02dcaef683 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -206,7 +207,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket(); + cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); return cores_per_node * node_id; } -- cgit v1.2.3-59-g8ed1b From 8196dab4fc159943df6baaac04973bb1accb7100 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 25 Mar 2016 15:52:36 +0100 Subject: x86/cpu: Get rid of compute_unit_id It is cpu_core_id anyway. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1458917557-8757-3-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/amd_nb.c | 6 ++---- arch/x86/kernel/cpu/amd.c | 10 +++------- arch/x86/kernel/smpboot.c | 2 +- 4 files changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 983738ac014c..9264476f3d57 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,8 +132,6 @@ struct cpuinfo_x86 { u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; - /* Compute unit id */ - u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 29fa475ec518..a147e676fc7b 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); - cuid = cpu_data(cpu).compute_unit_id; - return (mask >> (4 * cuid)) & 0xf; + return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf; } int amd_set_subcaches(int cpu, unsigned long mask) @@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } - cuid = cpu_data(cpu).compute_unit_id; + cuid = cpu_data(cpu).cpu_core_id; mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4d0087f94ee5..7b76eb67a9b3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -300,7 +300,6 @@ static int nearby_node(int apicid) #ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { - u32 cores_per_cu = 1; u8 node_id; int cpu = smp_processor_id(); @@ -312,9 +311,9 @@ static void amd_get_topology(struct cpuinfo_x86 *c) node_id = ecx & 7; /* get compute unit information */ - cores_per_cu = smp_num_siblings = ((ebx >> 8) & 3) + 1; + smp_num_siblings = ((ebx >> 8) & 3) + 1; c->x86_max_cores /= smp_num_siblings; - c->compute_unit_id = ebx & 0xff; + c->cpu_core_id = ebx & 0xff; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -325,19 +324,16 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* fixup multi-node processor information */ if (nodes_per_socket > 1) { - u32 cores_per_node; u32 cus_per_node; set_cpu_cap(c, X86_FEATURE_AMD_DCM); cus_per_node = c->x86_max_cores / nodes_per_socket; - cores_per_node = cus_per_node * cores_per_cu; /* store NodeID, use llc_shared_map to store sibling info */ per_cpu(cpu_llc_id, cpu) = node_id; /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; + c->cpu_core_id %= cus_per_node; } } #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b2c99f811c3f..a2065d3b3b39 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -422,7 +422,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->phys_proc_id == o->phys_proc_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->compute_unit_id == o->compute_unit_id) + c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); } else if (c->phys_proc_id == o->phys_proc_id && -- cgit v1.2.3-59-g8ed1b From 34a4cceb78e48c75d1b48b25352a3f3b2cc2b2da Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Fri, 25 Mar 2016 10:08:40 +0800 Subject: x86/cpu: Add advanced power management bits Bit 11 of CPUID 8000_0007 edx is processor feedback interface. Bit 12 of CPUID 8000_0007 edx is accumulated power. Print proper names in proc/cpuinfo Reported-and-tested-by: Borislav Petkov Signed-off-by: Huang Rui Cc: Tony Li Cc: Fenghua Yu Cc: Tony Luck Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Andy Lutomirski Cc: Fengguang Wu Cc: Sherry Hurwitz Cc: Borislav Petkov Cc: "Len Brown" Link: http://lkml.kernel.org/r/1458871720-3209-1-git-send-email-ray.huang@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/powerflags.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 31f0f335ed22..1dd8294fd730 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = { "", /* tsc invariant mapped to constant_tsc */ "cpb", /* core performance boost */ "eff_freq_ro", /* Readonly aperf/mperf */ + "proc_feedback", /* processor feedback interface */ + "acc_power", /* accumulated power mechanism */ }; -- cgit v1.2.3-59-g8ed1b From c4004b02f8e5b9ce357a0bb1641756cc86962664 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 6 Apr 2016 13:45:07 -0700 Subject: x86: remove the kernel code/data/bss resources from /proc/iomem Let's see if anybody even notices. I doubt anybody uses this, and it does expose addresses that should be randomized, so let's just remove the code. It's old and traditional, and it used to be cute, but we should have removed this long ago. If it turns out anybody notices and this breaks something, we'll have to revert this, and maybe we'll end up using other approaches instead (using %pK or similar). But removing unnecessary code is always the preferred option. Noted-by: Emrah Demir Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2367ae07eb76..319b08a5b6ed 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -146,31 +146,6 @@ int default_check_phys_apicid_present(int phys_apicid) struct boot_params boot_params; -/* - * Machine setup.. - */ -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - -static struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - - #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data = { @@ -949,13 +924,6 @@ void __init setup_arch(char **cmdline_p) mpx_mm_init(&init_mm); - code_resource.start = __pa_symbol(_text); - code_resource.end = __pa_symbol(_etext)-1; - data_resource.start = __pa_symbol(_etext); - data_resource.end = __pa_symbol(_edata)-1; - bss_resource.start = __pa_symbol(__bss_start); - bss_resource.end = __pa_symbol(__bss_stop)-1; - #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); @@ -1019,11 +987,6 @@ void __init setup_arch(char **cmdline_p) x86_init.resources.probe_roms(); - /* after parse_early_param, so could debug it */ - insert_resource(&iomem_resource, &code_resource); - insert_resource(&iomem_resource, &data_resource); - insert_resource(&iomem_resource, &bss_resource); - e820_add_kernel_range(); trim_bios_range(); #ifdef CONFIG_X86_32 -- cgit v1.2.3-59-g8ed1b From a3125494cff084b098c80bb36fbe2061ffed9d52 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 6 Apr 2016 10:05:16 +0200 Subject: x86/mce: Avoid using object after free in genpool When we loop over all queued machine check error records to pass them to the registered notifiers we use llist_for_each_entry(). But the loop calls gen_pool_free() for the entry in the body of the loop - and then the iterator looks at node->next after the free. Use llist_for_each_entry_safe() instead. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Cc: Gong Chen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac Link: http://lkml.kernel.org/r/0205920@agluck-desk.sc.intel.com Link: http://lkml.kernel.org/r/1459929916-12852-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 0a850100c594..2658e2af74ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ]; void mce_gen_pool_process(void) { struct llist_node *head; - struct mce_evt_llist *node; + struct mce_evt_llist *node, *tmp; struct mce *mce; head = llist_del_all(&mce_event_llist); @@ -37,7 +37,7 @@ void mce_gen_pool_process(void) return; head = llist_reverse_order(head); - llist_for_each_entry(node, head, llnode) { + llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); -- cgit v1.2.3-59-g8ed1b From 4046d6e81f33b7ef50d6668b78076d54c5e066b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Apr 2016 11:18:57 -0700 Subject: Revert "x86: remove the kernel code/data/bss resources from /proc/iomem" This reverts commit c4004b02f8e5b9ce357a0bb1641756cc86962664. Sadly, my hope that nobody would actually use the special kernel entries in /proc/iomem were dashed by kexec. Which reads /proc/iomem explicitly to find the kernel base address. Nasty. Anyway, that means we can't do the sane and simple thing and just remove the entries, and we'll instead have to mask them out based on permissions. Reported-by: Zhengyu Zhang Reported-by: Dave Young Reported-by: Freeman Zhang Reported-by: Emrah Demir Reported-by: Baoquan He Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 319b08a5b6ed..2367ae07eb76 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -146,6 +146,31 @@ int default_check_phys_apicid_present(int phys_apicid) struct boot_params boot_params; +/* + * Machine setup.. + */ +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + + #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data = { @@ -924,6 +949,13 @@ void __init setup_arch(char **cmdline_p) mpx_mm_init(&init_mm); + code_resource.start = __pa_symbol(_text); + code_resource.end = __pa_symbol(_etext)-1; + data_resource.start = __pa_symbol(_etext); + data_resource.end = __pa_symbol(_edata)-1; + bss_resource.start = __pa_symbol(__bss_start); + bss_resource.end = __pa_symbol(__bss_stop)-1; + #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); @@ -987,6 +1019,11 @@ void __init setup_arch(char **cmdline_p) x86_init.resources.probe_roms(); + /* after parse_early_param, so could debug it */ + insert_resource(&iomem_resource, &code_resource); + insert_resource(&iomem_resource, &data_resource); + insert_resource(&iomem_resource, &bss_resource); + e820_add_kernel_range(); trim_bios_range(); #ifdef CONFIG_X86_32 -- cgit v1.2.3-59-g8ed1b From 1e2ae9ec072f3b7887f456426bc2cf23b80f661a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 15 Apr 2016 15:50:32 +0200 Subject: x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances Generation2 instances don't support reporting the NMI status on port 0x61, read from there returns 'ff' and we end up reporting nonsensical PCI error (as there is no PCI bus in these instances) on all NMIs: NMI: PCI system error (SERR) for reason ff on CPU 0. Dazed and confused, but trying to continue Fix the issue by overriding x86_platform.get_nmi_reason. Use 'booted on EFI' flag to detect Gen2 instances. Signed-off-by: Vitaly Kuznetsov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Cathy Avery Cc: Haiyang Zhang Cc: Jiri Olsa Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/1460728232-31433-1-git-send-email-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4e7c6933691c..10c11b4da31d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -152,6 +152,11 @@ static struct clocksource hyperv_cs = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static unsigned char hv_get_nmi_reason(void) +{ + return 0; +} + static void __init ms_hyperv_init_platform(void) { /* @@ -191,6 +196,13 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif mark_tsc_unstable("running on Hyper-V"); + + /* + * Generation 2 instances don't support reading the NMI status from + * 0x61 port. + */ + if (efi_enabled(EFI_BOOT)) + x86_platform.get_nmi_reason = hv_get_nmi_reason; } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { -- cgit v1.2.3-59-g8ed1b From e16d8a6cbb499c5c8bfe9330d3351b649bded4af Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 08:52:44 -0700 Subject: Revert "x86/mm/32: Set NX in __supported_pte_mask before enabling paging" This reverts commit 320d25b6a05f8b73c23fc21025d2906ecdd2d4fc. This change was problematic for a couple of reasons: 1. It missed a some entry points (Xen things and 64-bit native). 2. The entry it changed can be executed more than once. This isn't really a problem, but it conflated per-cpu state setup and global state setup. 3. It broke 64-bit non-NX. 64-bit non-NX worked the other way around from 32-bit -- __supported_pte_mask had NX set initially and was *cleared* in x86_configure_nx. With the patch applied, it never got cleared. Reported-and-tested-by: Meelis Roos Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/59bd15f7f4b56b633a611b7f70876c6d2ad01a98.1461685884.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 6 ------ arch/x86/mm/setup_nx.c | 5 +++-- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 54cdbd2003fe..af1112980dd4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -389,12 +389,6 @@ default_entry: /* Make changes effective */ wrmsr - /* - * And make sure that all the mappings we set up have NX set from - * the beginning. - */ - orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) - enable_paging: /* diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 8bea84724a7d..f65a33f505b6 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -32,8 +32,9 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - /* If disable_nx is set, clear NX on all new mappings going forward. */ - if (disable_nx) + if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) + __supported_pte_mask |= _PAGE_NX; + else __supported_pte_mask &= ~_PAGE_NX; } -- cgit v1.2.3-59-g8ed1b From 1bdb8970392a68489b469c3a330a1adb5ef61beb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Apr 2016 14:22:32 -0600 Subject: x86/apic: Handle zero vector gracefully in clear_vector_irq() If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup the already allocated vectors. This subsequently calls clear_vector_irq(). The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in clear_vector_irq(). We cannot suppress the call to x86_vector_free_irqs() for the failed interrupt, because the other data related to this irq must be cleaned up as well. So calling clear_vector_irq() with vector == 0 is legitimate. Remove the BUG_ON and return if vector is zero, [ tglx: Massaged changelog ] Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors" Signed-off-by: Keith Busch Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ad59d70bcb1a..ef495511f019 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -256,7 +256,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) struct irq_desc *desc; int cpu, vector; - BUG_ON(!data->cfg.vector); + if (!data->cfg.vector) + return; vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) -- cgit v1.2.3-59-g8ed1b From 08914f436bdd2ed60923f49cbc402307aba20fe4 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Wed, 4 May 2016 17:39:52 -0500 Subject: x86/platform/UV: Bring back the call to map_low_mmrs in uv_system_init A while back the following commit: d394f2d9d8e1 ("x86/platform/UV: Remove EFI memmap quirk for UV2+") changed uv_system_init() to only call map_low_mmrs() on older UV1 hardware, which requires EFI_OLD_MEMMAP to be set in order to boot. The recent changes to the EFI memory mapping code in: d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping") exposed some issues with the fact that we were relying on the EFI memory mapping mechanisms to map in our MMRs for us, after commit d394f2d9d8e1. Rather than revert the entire commit and go back to forcing EFI_OLD_MEMMAP on all UVs, we're going to add the call to map_low_mmrs() back into uv_system_init(), and then fix up our EFI runtime calls to use the appropriate page table. For now, UV2+ will still need efi=old_map to boot, but there will be other changes soon that should eliminate the need for this. Signed-off-by: Alex Thorlton Cc: Matt Fleming Cc: Adam Buchbinder Cc: Len Brown Cc: Borislav Petkov Cc: Russ Anderson Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1462401592-120735-1-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8f4942e2bcbb..d7ce96a7daca 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -891,9 +891,7 @@ void __init uv_system_init(void) } pr_info("UV: Found %s hub\n", hub); - /* We now only need to map the MMRs on UV1 */ - if (is_uv1_hub()) - map_low_mmrs(); + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; -- cgit v1.2.3-59-g8ed1b From c10fcb14c7afd6688c7b197a814358fecf244222 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 5 May 2016 14:14:21 +0100 Subject: x86/sysfb_efi: Fix valid BAR address range check The code for checking whether a BAR address range is valid will break out of the loop when a start address of 0x0 is encountered. This behaviour is wrong since by breaking out of the loop we may miss the BAR that describes the EFI frame buffer in a later iteration. Because of this bug I can't use video=efifb: boot parameter to get efifb on my new ThinkPad E550 for my old linux system hard disk with 3.10 kernel. In 3.10, efifb is the only choice due to DRM/I915 not supporting the GPU. This patch also add a trivial optimization to break out after we find the frame buffer address range without testing later BARs. Signed-off-by: Wang YanQing [ Rewrote changelog. ] Signed-off-by: Matt Fleming Reviewed-by: Peter Jones Cc: Cc: Ard Biesheuvel Cc: David Herrmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1462454061-21561-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/kernel/sysfb_efi.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c index b285d4e8c68e..5da924bbf0a0 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/arch/x86/kernel/sysfb_efi.c @@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id) continue; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { resource_size_t start, end; + unsigned long flags; + + flags = pci_resource_flags(dev, i); + if (!(flags & IORESOURCE_MEM)) + continue; + + if (flags & IORESOURCE_UNSET) + continue; + + if (pci_resource_len(dev, i) == 0) + continue; start = pci_resource_start(dev, i); - if (start == 0) - break; end = pci_resource_end(dev, i); if (screen_info.lfb_base >= start && screen_info.lfb_base < end) { found_bar = 1; + break; } } } -- cgit v1.2.3-59-g8ed1b From 886123fb3a8656699dff40afa0573df359abeb18 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 6 May 2016 11:33:39 +0800 Subject: x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO Currently we read the tsc radio: ratio = (MSR_PLATFORM_INFO >> 8) & 0x1f; Thus we get bit 8-12 of MSR_PLATFORM_INFO, however according to the SDM (35.5), the ratio bits are bit 8-15. Ignoring the upper bits can result in an incorrect tsc ratio, which causes the TSC calibration and the Local APIC timer frequency to be incorrect. Fix this problem by masking 0xff instead. [ tglx: Massaged changelog ] Fixes: 7da7c1561366 "x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs" Signed-off-by: Chen Yu Cc: "Rafael J. Wysocki" Cc: stable@vger.kernel.org Cc: Bin Gao Cc: Len Brown Link: http://lkml.kernel.org/r/1462505619-5516-1-git-send-email-yu.c.chen@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 92ae6acac8a7..6aa0f4d9eea6 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void) if (freq_desc_tables[cpu_index].msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); - ratio = (lo >> 8) & 0x1f; + ratio = (lo >> 8) & 0xff; } else { rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; -- cgit v1.2.3-59-g8ed1b