From 2e5ad6b9c45d43cc4e7b8ac5ded1c55a7c4a3893 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:53:11 -0400 Subject: xen/hvc: Collapse error logic. All of the error paths are doing the same logic. In which case we might as well collapse them in one path. CC: stable@kernel.org Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index d3d91dae065c..afc7fc27aa52 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -216,22 +216,16 @@ static int xen_hvm_console_init(void) return 0; r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + if (r < 0) + goto err; info->evtchn = v; hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); - if (r < 0) { - kfree(info); - return -ENODEV; - } + if (r < 0) + goto err; mfn = v; info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); - if (info->intf == NULL) { - kfree(info); - return -ENODEV; - } + if (info->intf == NULL) + goto err; info->vtermno = HVC_COOKIE; spin_lock(&xencons_lock); @@ -239,6 +233,9 @@ static int xen_hvm_console_init(void) spin_unlock(&xencons_lock); return 0; +err: + kfree(info); + return -ENODEV; } static int xen_pv_console_init(void) -- cgit v1.2.3-59-g8ed1b From a32c88b9386ce3df87f28dd46bdc3776cd6edf75 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:55:38 -0400 Subject: xen/hvc: Fix error cases around HVM_PARAM_CONSOLE_PFN We weren't resetting the parameter to be passed in to a known default. Nor were we checking the return value of hvm_get_parameter. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index afc7fc27aa52..3277f0eec4a7 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -219,7 +219,8 @@ static int xen_hvm_console_init(void) if (r < 0) goto err; info->evtchn = v; - hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + v = 0; + r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); if (r < 0) goto err; mfn = v; -- cgit v1.2.3-59-g8ed1b From 5842f5768599094758931b74190cdf93641a8e35 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 12:56:59 -0400 Subject: xen/hvc: Check HVM_PARAM_CONSOLE_[EVTCHN|PFN] for correctness. We need to make sure that those parameters are setup to be correct. As such the value of 0 is deemed invalid and we find that we bail out. The hypervisor sets by default all of them to be zero and when the hypercall is done does a simple: a.value = d->arch.hvm_domain.params[a.index]; Which means that if the Xen toolstack forgot to setup the proper HVM_PARAM_CONSOLE_EVTCHN (or the PFN one), we would get the default value of 0 and use that. CC: stable@kernel.org Fixes-Oracle-Bug: 14091238 Signed-off-by: Konrad Rzeszutek Wilk --- drivers/tty/hvc/hvc_xen.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 3277f0eec4a7..944eaeb8e0cf 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -214,14 +214,19 @@ static int xen_hvm_console_init(void) /* already configured */ if (info->intf != NULL) return 0; - + /* + * If the toolstack (or the hypervisor) hasn't set these values, the + * default value is 0. Even though mfn = 0 and evtchn = 0 are + * theoretically correct values, in practice they never are and they + * mean that a legacy toolstack hasn't initialized the pv console correctly. + */ r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); - if (r < 0) + if (r < 0 || v == 0) goto err; info->evtchn = v; v = 0; r = hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); - if (r < 0) + if (r < 0 || v == 0) goto err; mfn = v; info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); -- cgit v1.2.3-59-g8ed1b From 5e152e6c4b0da84c66cad56597b42c4ecedb0448 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 23 May 2012 13:28:44 -0400 Subject: xen/events: Add WARN_ON when quick lookup found invalid type. All of the bind_XYZ_to_irq do a quick lookup to see if the event exists. And if it does, then the initialized IRQ number is returned instead of initializing a new IRQ number. This patch adds an extra logic to check that the type returned is proper one and that there is an IRQ handler setup for it. This patch has the benefit of being able to find drivers that are doing something naught. [v1: Enhanced based on Stefano's review] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index faae2f910ad2..af8b8fbd9d88 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -827,6 +827,9 @@ int bind_evtchn_to_irq(unsigned int evtchn) handle_edge_irq, "event"); xen_irq_info_evtchn_init(irq, evtchn); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_EVTCHN); } out: @@ -862,6 +865,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); bind_evtchn_to_cpu(evtchn, cpu); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_IPI); } out: @@ -939,6 +945,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) xen_irq_info_virq_init(cpu, irq, evtchn, virq); bind_evtchn_to_cpu(evtchn, cpu); + } else { + struct irq_info *info = info_for_irq(irq); + WARN_ON(info == NULL || info->type != IRQT_VIRQ); } out: -- cgit v1.2.3-59-g8ed1b From 780dbcd0eedec6528d777b668019dabb36badf1a Mon Sep 17 00:00:00 2001 From: "Zhang, Yang Z" Date: Tue, 22 May 2012 08:40:16 +0000 Subject: xen/pci: Check for PCI bridge before using it. Some SR-IOV devices may use more than one bus number, but there is no real bridges because that have internal routing mechanism. So need to check whether the bridge is existing before using it. Signed-off-by: Yang Zhang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index b84bf0b6cc34..18fff88254eb 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -59,7 +59,7 @@ static int xen_add_device(struct device *dev) #ifdef CONFIG_ACPI handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); - if (!handle) + if (!handle && pci_dev->bus->bridge) handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); #ifdef CONFIG_PCI_IOV if (!handle && pci_dev->is_virtfn) -- cgit v1.2.3-59-g8ed1b From 58b7b53a36b0be8081fbfc91aeea24b83c20ca1b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 29 May 2012 12:36:43 -0400 Subject: xen/balloon: Subtract from xen_released_pages the count that is populated. We did not take into account that xen_released_pages would be used outside the initial E820 parsing code. As such we would did not subtract from xen_released_pages the count of pages that we had populated back (instead we just did a simple extra_pages = released - populated). The balloon driver uses xen_released_pages to set the initial current_pages count. If this is wrong (too low) then when a new (higher) target is set, the balloon driver will request too many pages from Xen." This fixes errors such as: (XEN) memory.c:133:d0 Could not allocate order=0 extent: id=0 memflags=0 (51 of 512) during bootup and free_memory : 0 where the free_memory should be 128. Acked-by: David Vrabel [v1: Per David's review made the git commit better] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3ebba0753d38..a4790bf22c59 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -371,7 +371,8 @@ char * __init xen_memory_setup(void) populated = xen_populate_chunk(map, memmap.nr_entries, max_pfn, &last_pfn, xen_released_pages); - extra_pages += (xen_released_pages - populated); + xen_released_pages -= populated; + extra_pages += xen_released_pages; if (last_pfn > max_pfn) { max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); -- cgit v1.2.3-59-g8ed1b From 5e626254206a709c6e937f3dda69bf26c7344f6f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 29 May 2012 13:07:31 +0200 Subject: xen/setup: filter APERFMPERF cpuid feature out Xen PV kernels allow access to the APERF/MPERF registers to read the effective frequency. Access to the MSRs is however redirected to the currently scheduled physical CPU, making consecutive read and compares unreliable. In addition each rdmsr traps into the hypervisor. So to avoid bogus readouts and expensive traps, disable the kernel internal feature flag for APERF/MPERF if running under Xen. This will a) remove the aperfmperf flag from /proc/cpuinfo b) not mislead the power scheduler (arch/x86/kernel/cpu/sched.c) to use the feature to improve scheduling (by default disabled) c) not mislead the cpufreq driver to use the MSRs This does not cover userland programs which access the MSRs via the device file interface, but this will be addressed separately. Signed-off-by: Andre Przywara Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d1f9a0472d44..272ebd0ce326 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -208,6 +208,9 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +#define CPUID_THERM_POWER_LEAF 6 +#define APERFMPERF_PRESENT 0 + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; @@ -241,6 +244,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *dx = cpuid_leaf5_edx_val; return; + case CPUID_THERM_POWER_LEAF: + /* Disabling APERFMPERF for kernel usage */ + maskecx = ~(1 << APERFMPERF_PRESENT); + break; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; -- cgit v1.2.3-59-g8ed1b From b9e0d95c041ca2d7ad297ee37c2e9cfab67a188f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 23 May 2012 18:57:20 +0100 Subject: xen: mark local pages as FOREIGN in the m2p_override When the frontend and the backend reside on the same domain, even if we add pages to the m2p_override, these pages will never be returned by mfn_to_pfn because the check "get_phys_to_machine(pfn) != mfn" will always fail, so the pfn of the frontend will be returned instead (resulting in a deadlock because the frontend pages are already locked). INFO: task qemu-system-i38:1085 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. qemu-system-i38 D ffff8800cfc137c0 0 1085 1 0x00000000 ffff8800c47ed898 0000000000000282 ffff8800be4596b0 00000000000137c0 ffff8800c47edfd8 ffff8800c47ec010 00000000000137c0 00000000000137c0 ffff8800c47edfd8 00000000000137c0 ffffffff82213020 ffff8800be4596b0 Call Trace: [] ? __lock_page+0x70/0x70 [] schedule+0x29/0x70 [] io_schedule+0x60/0x80 [] sleep_on_page+0xe/0x20 [] __wait_on_bit_lock+0x5a/0xc0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] ? bio_add_page+0x36/0x40 [] set_page_dirty_lock+0x52/0x60 [] bio_set_pages_dirty+0x51/0x70 [] do_blockdev_direct_IO+0xb24/0xeb0 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] __blockdev_direct_IO+0x55/0x60 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] ext3_direct_IO+0xf8/0x390 [] ? ext3_get_blocks_handle+0xe00/0xe00 [] ? xen_mc_flush+0xb0/0x1b0 [] generic_file_aio_read+0x737/0x780 [] ? gnttab_map_refs+0x15b/0x1e0 [] ? find_get_pages+0x150/0x150 [] aio_rw_vect_retry+0x7c/0x1d0 [] ? lookup_ioctx+0x90/0x90 [] aio_run_iocb+0x66/0x1a0 [] do_io_submit+0x708/0xb90 [] sys_io_submit+0x10/0x20 [] system_call_fastpath+0x16/0x1b The explanation is in the comment within the code: We need to do this because the pages shared by the frontend (xen-blkfront) can be already locked (lock_page, called by do_read_cache_page); when the userspace backend tries to use them with direct_IO, mfn_to_pfn returns the pfn of the frontend, so do_blockdev_direct_IO is going to try to lock the same pages again resulting in a deadlock. A simplified call graph looks like this: pygrub QEMU ----------------------------------------------- do_read_cache_page io_submit | | lock_page ext3_direct_IO | bio_add_page | lock_page Internally the xen-blkback uses m2p_add_override to swizzle (temporarily) a 'struct page' to have a different MFN (so that it can point to another guest). It also can easily find out whether another pfn corresponding to the mfn exists in the m2p, and can set the FOREIGN bit in the p2m, making sure that mfn_to_pfn returns the pfn of the backend. This allows the backend to perform direct_IO on these pages, but as a side effect prevents the frontend from using get_user_pages_fast on them while they are being shared with the backend. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ffd08c414e91..64effdc6da94 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -706,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -741,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); + /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in + * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other + * pfn so that the following mfn_to_pfn(mfn) calls will return the + * pfn from the m2p_override (the backend pfn) instead. + * We need to do this because the pages shared by the frontend + * (xen-blkfront) can be already locked (lock_page, called by + * do_read_cache_page); when the userspace backend tries to use them + * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so + * do_blockdev_direct_IO is going to try to lock the same pages + * again resulting in a deadlock. + * As a side effect get_user_pages_fast might not be safe on the + * frontend pages while they are being shared with the backend, + * because mfn_to_pfn (that ends up being called by GUPF) will + * return the backend pfn rather than the frontend pfn. */ + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == mfn) + set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); + return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); @@ -752,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -821,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte) } else set_phys_to_machine(pfn, page->index); + /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present + * somewhere in this domain, even before being added to the + * m2p_override (see comment above in m2p_add_override). + * If there are no other entries in the m2p_override corresponding + * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for + * the original pfn (the one shared by the frontend): the backend + * cannot do any IO on this page anymore because it has been + * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of + * the original pfn causes mfn_to_pfn(mfn) to return the frontend + * pfn again. */ + mfn &= ~FOREIGN_FRAME_BIT; + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + m2p_find_override(mfn) == NULL) + set_phys_to_machine(pfn, mfn); + return 0; } EXPORT_SYMBOL_GPL(m2p_remove_override); -- cgit v1.2.3-59-g8ed1b