aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig91
-rw-r--r--drivers/xen/Makefile13
-rw-r--r--drivers/xen/balloon.c208
-rw-r--r--drivers/xen/cpu_hotplug.c8
-rw-r--r--drivers/xen/events/events_2l.c52
-rw-r--r--drivers/xen/events/events_base.c966
-rw-r--r--drivers/xen/events/events_fifo.c113
-rw-r--r--drivers/xen/events/events_internal.h114
-rw-r--r--drivers/xen/evtchn.c81
-rw-r--r--drivers/xen/features.c18
-rw-r--r--drivers/xen/gntdev-common.h3
-rw-r--r--drivers/xen/gntdev-dmabuf.c24
-rw-r--r--drivers/xen/gntdev.c162
-rw-r--r--drivers/xen/grant-table.c128
-rw-r--r--drivers/xen/manage.c1
-rw-r--r--drivers/xen/mem-reservation.c27
-rw-r--r--drivers/xen/pci.c76
-rw-r--r--drivers/xen/pcpu.c41
-rw-r--r--drivers/xen/platform-pci.c10
-rw-r--r--drivers/xen/preempt.c42
-rw-r--r--drivers/xen/privcmd.c96
-rw-r--r--drivers/xen/pvcalls-back.c87
-rw-r--r--drivers/xen/pvcalls-front.c20
-rw-r--r--drivers/xen/swiotlb-xen.c321
-rw-r--r--drivers/xen/time.c5
-rw-r--r--drivers/xen/unpopulated-alloc.c204
-rw-r--r--drivers/xen/xen-acpi-cpuhotplug.c446
-rw-r--r--drivers/xen/xen-acpi-memhotplug.c475
-rw-r--r--drivers/xen/xen-acpi-processor.c9
-rw-r--r--drivers/xen/xen-balloon.c28
-rw-r--r--drivers/xen/xen-front-pgdir-shbuf.c11
-rw-r--r--drivers/xen/xen-pciback/Makefile7
-rw-r--r--drivers/xen/xen-pciback/conf_space.c18
-rw-r--r--drivers/xen/xen-pciback/conf_space.h8
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c2
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c52
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c6
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c58
-rw-r--r--drivers/xen/xen-pciback/pciback.h19
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c103
-rw-r--r--drivers/xen/xen-pciback/vpci.c29
-rw-r--r--drivers/xen/xen-pciback/xenbus.c45
-rw-r--r--drivers/xen/xen-scsiback.c132
-rw-r--r--drivers/xen/xen-stub.c90
-rw-r--r--drivers/xen/xenbus/xenbus.h4
-rw-r--r--drivers/xen/xenbus/xenbus_client.c337
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c219
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c19
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c34
-rw-r--r--drivers/xen/xlate_mmu.c4
52 files changed, 2591 insertions, 2491 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 61212fc7f0c7..33e941e40082 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -13,12 +13,16 @@ config XEN_BALLOON
config XEN_BALLOON_MEMORY_HOTPLUG
bool "Memory hotplug support for Xen balloon driver"
depends on XEN_BALLOON && MEMORY_HOTPLUG
+ default y
help
Memory hotplug support for Xen balloon driver allows expanding memory
available for the system above limit declared at system startup.
It is very useful on critical systems which require long
run without rebooting.
+ It's also very useful for non PV domains to obtain unpopulated physical
+ memory ranges to use in order to map foreign memory or grants.
+
Memory could be hotplugged in following steps:
1) target domain: ensure that memory auto online policy is in
@@ -46,13 +50,11 @@ config XEN_BALLOON_MEMORY_HOTPLUG
SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'"
-config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+config XEN_MEMORY_HOTPLUG_LIMIT
int "Hotplugged memory limit (in GiB) for a PV guest"
- default 512 if X86_64
- default 4 if X86_32
- range 0 64 if X86_32
+ default 512
depends on XEN_HAVE_PVMMU
- depends on XEN_BALLOON_MEMORY_HOTPLUG
+ depends on MEMORY_HOTPLUG
help
Maxmium amount of memory (in GiB) that a PV guest can be
expanded to when using memory hotplug.
@@ -175,12 +177,38 @@ config XEN_GRANT_DMA_ALLOC
config SWIOTLB_XEN
def_bool y
+ depends on XEN_PV || ARM || ARM64
+ select DMA_OPS
select SWIOTLB
+config XEN_PCI_STUB
+ bool
+
+config XEN_PCIDEV_STUB
+ tristate "Xen PCI-device stub driver"
+ depends on PCI && !X86 && XEN
+ depends on XEN_BACKEND
+ select XEN_PCI_STUB
+ default m
+ help
+ The PCI device stub driver provides limited version of the PCI
+ device backend driver without para-virtualized support for guests.
+ If you select this to be a module, you will need to make sure no
+ other driver has bound to the device(s) you want to make visible to
+ other guests.
+
+ The "hide" parameter (only applicable if backend driver is compiled
+ into the kernel) allows you to bind the PCI devices to this module
+ from the default device drivers. The argument is the list of PCI BDFs:
+ xen-pciback.hide=(03:00.0)(04:00.0)
+
+ If in doubt, say m.
+
config XEN_PCIDEV_BACKEND
tristate "Xen PCI-device backend driver"
depends on PCI && X86 && XEN
depends on XEN_BACKEND
+ select XEN_PCI_STUB
default m
help
The PCI device backend driver allows the kernel to export arbitrary
@@ -211,7 +239,7 @@ config XEN_PVCALLS_FRONTEND
implements them.
config XEN_PVCALLS_BACKEND
- bool "XEN PV Calls backend driver"
+ tristate "XEN PV Calls backend driver"
depends on INET && XEN && XEN_BACKEND
help
Experimental backend for the Xen PV Calls protocol
@@ -231,44 +259,19 @@ config XEN_SCSI_BACKEND
if guests need generic access to SCSI devices.
config XEN_PRIVCMD
- tristate
+ tristate "Xen hypercall passthrough driver"
depends on XEN
default m
-
-config XEN_STUB
- bool "Xen stub drivers"
- depends on XEN && X86_64 && BROKEN
- help
- Allow kernel to install stub drivers, to reserve space for Xen drivers,
- i.e. memory hotplug and cpu hotplug, and to block native drivers loaded,
- so that real Xen drivers can be modular.
-
- To enable Xen features like cpu and memory hotplug, select Y here.
-
-config XEN_ACPI_HOTPLUG_MEMORY
- tristate "Xen ACPI memory hotplug"
- depends on XEN_DOM0 && XEN_STUB && ACPI
help
- This is Xen ACPI memory hotplug.
-
- Currently Xen only support ACPI memory hot-add. If you want
- to hot-add memory at runtime (the hot-added memory cannot be
- removed until machine stop), select Y/M here, otherwise select N.
-
-config XEN_ACPI_HOTPLUG_CPU
- tristate "Xen ACPI cpu hotplug"
- depends on XEN_DOM0 && XEN_STUB && ACPI
- select ACPI_CONTAINER
- help
- Xen ACPI cpu enumerating and hotplugging
-
- For hotplugging, currently Xen only support ACPI cpu hotadd.
- If you want to hotadd cpu at runtime (the hotadded cpu cannot
- be removed until machine stop), select Y/M here.
+ The hypercall passthrough driver allows privileged user programs to
+ perform Xen hypercalls. This driver is normally required for systems
+ running as Dom0 to perform privileged operations, but in some
+ disaggregated Xen setups this driver might be needed for other
+ domains, too.
config XEN_ACPI_PROCESSOR
tristate "Xen ACPI processor"
- depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
+ depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
default m
help
This ACPI processor uploads Power Management information to the Xen
@@ -286,7 +289,7 @@ config XEN_ACPI_PROCESSOR
config XEN_MCE_LOG
bool "Xen platform mcelog"
- depends on XEN_DOM0 && X86_MCE
+ depends on XEN_PV_DOM0 && X86_MCE
help
Allow kernel fetching MCE error from Xen platform and
converting it into Linux mcelog format for mcelog tools
@@ -322,4 +325,14 @@ config XEN_HAVE_VPMU
config XEN_FRONT_PGDIR_SHBUF
tristate
+config XEN_UNPOPULATED_ALLOC
+ bool "Use unpopulated memory ranges for guest mappings"
+ depends on X86 && ZONE_DEVICE
+ default XEN_BACKEND || XEN_GNTDEV || XEN_DOM0
+ help
+ Use unpopulated memory ranges in order to create mappings for guest
+ memory regions, including grant maps and foreign pages. This avoids
+ having to balloon out RAM regions in order to obtain physical memory
+ space to create such mappings.
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 0c4efa6fe450..5aae66e638a7 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,12 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
-obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o
+obj-y += grant-table.o features.o balloon.o manage.o time.o
obj-y += mem-reservation.o
obj-y += events/
obj-y += xenbus/
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_features.o := $(nostackp)
+CFLAGS_features.o := -fno-stack-protector
dom0-$(CONFIG_ARM64) += arm-device.o
dom0-$(CONFIG_PCI) += pci.o
@@ -22,14 +21,11 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
+obj-$(CONFIG_XEN_PVHVM_GUEST) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
+obj-$(CONFIG_XEN_PCI_STUB) += xen-pciback/
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
-obj-$(CONFIG_XEN_STUB) += xen-stub.o
-obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
-obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
obj-$(CONFIG_XEN_EFI) += efi.o
obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o
@@ -42,3 +38,4 @@ xen-gntdev-$(CONFIG_XEN_GNTDEV_DMABUF) += gntdev-dmabuf.o
xen-gntalloc-y := gntalloc.o
xen-privcmd-y := privcmd.o privcmd-buf.o
obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o
+obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 0c142bcab79d..ba2ea11e0d3d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,8 @@
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/pagemap.h>
@@ -56,10 +58,9 @@
#include <linux/percpu-defs.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/moduleparam.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
@@ -73,9 +74,14 @@
#include <xen/page.h>
#include <xen/mem-reservation.h>
-static int xen_hotplug_unpopulated;
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly balloon_boot_timeout = 180;
+module_param(balloon_boot_timeout, uint, 0444);
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static int xen_hotplug_unpopulated;
static struct ctl_table balloon_table[] = {
{
@@ -108,6 +114,8 @@ static struct ctl_table xen_root[] = {
{ }
};
+#else
+#define xen_hotplug_unpopulated 0
#endif
/*
@@ -117,7 +125,7 @@ static struct ctl_table xen_root[] = {
#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
/*
- * balloon_process() state:
+ * balloon_thread() state:
*
* BP_DONE: done or nothing to do,
* BP_WAIT: wait to be rescheduled,
@@ -125,13 +133,15 @@ static struct ctl_table xen_root[] = {
* BP_ECANCELED: error, balloon operation canceled.
*/
-enum bp_state {
+static enum bp_state {
BP_DONE,
BP_WAIT,
BP_EAGAIN,
BP_ECANCELED
-};
+} balloon_state = BP_DONE;
+/* Main waiting point for xen-balloon thread. */
+static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
static DEFINE_MUTEX(balloon_mutex);
@@ -146,10 +156,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
static LIST_HEAD(ballooned_pages);
static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
-/* Main work function, always executed in process context. */
-static void balloon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
-
/* When ballooning out (allocating memory to return to Xen) we don't really
want the kernel to try too hard since that can trigger the oom killer. */
#define GFP_BALLOON \
@@ -201,18 +207,15 @@ static struct page *balloon_next_page(struct page *page)
return list_entry(next, struct page, lru);
}
-static enum bp_state update_schedule(enum bp_state state)
+static void update_schedule(void)
{
- if (state == BP_WAIT)
- return BP_WAIT;
-
- if (state == BP_ECANCELED)
- return BP_ECANCELED;
+ if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED)
+ return;
- if (state == BP_DONE) {
+ if (balloon_state == BP_DONE) {
balloon_stats.schedule_delay = 1;
balloon_stats.retry_count = 1;
- return BP_DONE;
+ return;
}
++balloon_stats.retry_count;
@@ -221,7 +224,8 @@ static enum bp_state update_schedule(enum bp_state state)
balloon_stats.retry_count > balloon_stats.max_retry_count) {
balloon_stats.schedule_delay = 1;
balloon_stats.retry_count = 1;
- return BP_ECANCELED;
+ balloon_state = BP_ECANCELED;
+ return;
}
balloon_stats.schedule_delay <<= 1;
@@ -229,7 +233,7 @@ static enum bp_state update_schedule(enum bp_state state)
if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
- return BP_EAGAIN;
+ balloon_state = BP_EAGAIN;
}
#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
@@ -267,20 +271,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
return NULL;
}
-#ifdef CONFIG_SPARSEMEM
- {
- unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT);
- unsigned long pfn = res->start >> PAGE_SHIFT;
-
- if (pfn > limit) {
- pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
- pfn, limit);
- release_memory_resource(res);
- return NULL;
- }
- }
-#endif
-
return res;
}
@@ -347,7 +337,7 @@ static enum bp_state reserve_additional_memory(void)
mutex_unlock(&balloon_mutex);
/* add_memory_resource() requires the device_hotplug lock */
lock_device_hotplug();
- rc = add_memory_resource(nid, resource);
+ rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE);
unlock_device_hotplug();
mutex_lock(&balloon_mutex);
@@ -382,7 +372,7 @@ static void xen_online_page(struct page *page, unsigned int order)
static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
{
if (val == MEM_ONLINE)
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
return NOTIFY_OK;
}
@@ -507,50 +497,79 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
}
/*
- * As this is a work item it is guaranteed to run as a single instance only.
+ * Stop waiting if either state is BP_DONE and ballooning action is
+ * needed, or if the credit has changed while state is not BP_DONE.
+ */
+static bool balloon_thread_cond(long credit)
+{
+ if (balloon_state == BP_DONE)
+ credit = 0;
+
+ return current_credit() != credit || kthread_should_stop();
+}
+
+/*
+ * As this is a kthread it is guaranteed to run as a single instance only.
* We may of course race updates of the target counts (which are protected
* by the balloon lock), or with changes to the Xen hard limit, but we will
* recover from these in time.
*/
-static void balloon_process(struct work_struct *work)
+static int balloon_thread(void *unused)
{
- enum bp_state state = BP_DONE;
long credit;
+ unsigned long timeout;
+
+ set_freezable();
+ for (;;) {
+ switch (balloon_state) {
+ case BP_DONE:
+ case BP_ECANCELED:
+ timeout = 3600 * HZ;
+ break;
+ case BP_EAGAIN:
+ timeout = balloon_stats.schedule_delay * HZ;
+ break;
+ case BP_WAIT:
+ timeout = HZ;
+ break;
+ }
+
+ credit = current_credit();
+
+ wait_event_freezable_timeout(balloon_thread_wq,
+ balloon_thread_cond(credit), timeout);
+ if (kthread_should_stop())
+ return 0;
- do {
mutex_lock(&balloon_mutex);
credit = current_credit();
if (credit > 0) {
if (balloon_is_inflated())
- state = increase_reservation(credit);
+ balloon_state = increase_reservation(credit);
else
- state = reserve_additional_memory();
+ balloon_state = reserve_additional_memory();
}
if (credit < 0) {
long n_pages;
n_pages = min(-credit, si_mem_available());
- state = decrease_reservation(n_pages, GFP_BALLOON);
- if (state == BP_DONE && n_pages != -credit &&
+ balloon_state = decrease_reservation(n_pages,
+ GFP_BALLOON);
+ if (balloon_state == BP_DONE && n_pages != -credit &&
n_pages < totalreserve_pages)
- state = BP_EAGAIN;
+ balloon_state = BP_EAGAIN;
}
- state = update_schedule(state);
+ update_schedule();
mutex_unlock(&balloon_mutex);
cond_resched();
-
- } while (credit && state == BP_DONE);
-
- /* Schedule more work if there is some still to be done. */
- if (state == BP_EAGAIN)
- schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
+ }
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -558,22 +577,25 @@ void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
balloon_stats.target_pages = target;
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
}
EXPORT_SYMBOL_GPL(balloon_set_new_target);
-static int add_ballooned_pages(int nr_pages)
+#ifndef CONFIG_XEN_UNPOPULATED_ALLOC
+static int add_ballooned_pages(unsigned int nr_pages)
{
enum bp_state st;
if (xen_hotplug_unpopulated) {
st = reserve_additional_memory();
if (st != BP_ECANCELED) {
+ int rc;
+
mutex_unlock(&balloon_mutex);
- wait_event(balloon_wq,
+ rc = wait_event_interruptible(balloon_wq,
!list_empty(&ballooned_pages));
mutex_lock(&balloon_mutex);
- return 0;
+ return rc ? -ENOMEM : 0;
}
}
@@ -588,14 +610,14 @@ static int add_ballooned_pages(int nr_pages)
}
/**
- * alloc_xenballooned_pages - get pages that have been ballooned out
+ * xen_alloc_unpopulated_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
* @return 0 on success, error otherwise
*/
-int alloc_xenballooned_pages(int nr_pages, struct page **pages)
+int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
{
- int pgno = 0;
+ unsigned int pgno = 0;
struct page *page;
int ret;
@@ -630,19 +652,25 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
return 0;
out_undo:
mutex_unlock(&balloon_mutex);
- free_xenballooned_pages(pgno, pages);
+ xen_free_unpopulated_pages(pgno, pages);
+ /*
+ * NB: free_xenballooned_pages will only subtract pgno pages, but since
+ * target_unpopulated is incremented with nr_pages at the start we need
+ * to remove the remaining ones also, or accounting will be screwed.
+ */
+ balloon_stats.target_unpopulated -= nr_pages - pgno;
return ret;
}
-EXPORT_SYMBOL(alloc_xenballooned_pages);
+EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
/**
- * free_xenballooned_pages - return pages retrieved with get_ballooned_pages
+ * xen_free_unpopulated_pages - return pages retrieved with get_ballooned_pages
* @nr_pages: Number of pages
* @pages: pages to return
*/
-void free_xenballooned_pages(int nr_pages, struct page **pages)
+void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
{
- int i;
+ unsigned int i;
mutex_lock(&balloon_mutex);
@@ -655,13 +683,13 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
/* The balloon may be too large now. Shrink it if needed. */
if (current_credit())
- schedule_delayed_work(&balloon_worker, 0);
+ wake_up(&balloon_thread_wq);
mutex_unlock(&balloon_mutex);
}
-EXPORT_SYMBOL(free_xenballooned_pages);
+EXPORT_SYMBOL(xen_free_unpopulated_pages);
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV)
static void __init balloon_add_region(unsigned long start_pfn,
unsigned long pages)
{
@@ -684,9 +712,12 @@ static void __init balloon_add_region(unsigned long start_pfn,
balloon_stats.total_pages += extra_pfn_end - start_pfn;
}
#endif
+#endif
static int __init balloon_init(void)
{
+ struct task_struct *task;
+
if (!xen_domain())
return -ENODEV;
@@ -715,7 +746,7 @@ static int __init balloon_init(void)
register_sysctl_table(xen_root);
#endif
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC)
{
int i;
@@ -730,9 +761,50 @@ static int __init balloon_init(void)
}
#endif
+ task = kthread_run(balloon_thread, NULL, "xen-balloon");
+ if (IS_ERR(task)) {
+ pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
+ return PTR_ERR(task);
+ }
+
/* Init the xen-balloon driver. */
xen_balloon_init();
return 0;
}
subsys_initcall(balloon_init);
+
+static int __init balloon_wait_finish(void)
+{
+ long credit, last_credit = 0;
+ unsigned long last_changed = 0;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ /* PV guests don't need to wait. */
+ if (xen_pv_domain() || !current_credit())
+ return 0;
+
+ pr_notice("Waiting for initial ballooning down having finished.\n");
+
+ while ((credit = current_credit()) < 0) {
+ if (credit != last_credit) {
+ last_changed = jiffies;
+ last_credit = credit;
+ }
+ if (balloon_state == BP_ECANCELED) {
+ pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n",
+ -credit);
+ if (jiffies - last_changed >= HZ * balloon_boot_timeout)
+ panic("Initial ballooning failed!\n");
+ }
+
+ schedule_timeout_interruptible(HZ / 10);
+ }
+
+ pr_notice("Initial ballooning down finished.\n");
+
+ return 0;
+}
+late_initcall_sync(balloon_wait_finish);
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index f192b6f42da9..b96b11e2b571 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -93,10 +93,8 @@ static int setup_cpu_watcher(struct notifier_block *notifier,
(void)register_xenbus_watch(&cpu_watch);
for_each_possible_cpu(cpu) {
- if (vcpu_online(cpu) == 0) {
- (void)cpu_down(cpu);
- set_cpu_present(cpu, false);
- }
+ if (vcpu_online(cpu) == 0)
+ disable_hotplug_cpu(cpu);
}
return NOTIFY_DONE;
@@ -119,5 +117,5 @@ static int __init setup_vcpu_hotplug_event(void)
return 0;
}
-arch_initcall(setup_vcpu_hotplug_event);
+late_initcall(setup_vcpu_hotplug_event);
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 8edef51c92e5..b8f2f971c2f0 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -47,43 +47,43 @@ static unsigned evtchn_2l_max_channels(void)
return EVTCHN_2L_NR_CHANNELS;
}
-static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu)
{
- clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
- set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+ clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
}
-static void evtchn_2l_clear_pending(unsigned port)
+static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu)
{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+ clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, old_cpu)));
+ set_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
}
-static void evtchn_2l_set_pending(unsigned port)
+static void evtchn_2l_clear_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, BM(&s->evtchn_pending[0]));
+ sync_clear_bit(port, BM(&s->evtchn_pending[0]));
}
-static bool evtchn_2l_is_pending(unsigned port)
+static void evtchn_2l_set_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+ sync_set_bit(port, BM(&s->evtchn_pending[0]));
}
-static bool evtchn_2l_test_and_set_mask(unsigned port)
+static bool evtchn_2l_is_pending(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+ return sync_test_bit(port, BM(&s->evtchn_pending[0]));
}
-static void evtchn_2l_mask(unsigned port)
+static void evtchn_2l_mask(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, BM(&s->evtchn_mask[0]));
}
-static void evtchn_2l_unmask(unsigned port)
+static void evtchn_2l_unmask(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
unsigned int cpu = get_cpu();
@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port)
BUG_ON(!irqs_disabled());
+ smp_wmb(); /* All writes before unmask must be visible. */
+
if (unlikely((cpu != cpu_from_evtchn(port))))
do_hypercall = 1;
else {
@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
@@ -173,7 +175,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Timer interrupt has highest priority. */
irq = irq_from_virq(cpu, VIRQ_TIMER);
if (irq != -1) {
- unsigned int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
word_idx = evtchn / BITS_PER_LONG;
bit_idx = evtchn % BITS_PER_LONG;
if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
@@ -228,7 +230,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
do {
xen_ulong_t bits;
- int port;
+ evtchn_port_t port;
bits = MASK_LSBS(pending_bits, bit_idx);
@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = get_evtchn_to_irq(port);
-
- if (irq != -1)
- generic_handle_irq(irq);
+ handle_irq_for_port(port, ctrl);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
@@ -355,18 +354,27 @@ static void evtchn_2l_resume(void)
EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
}
+static int evtchn_2l_percpu_deinit(unsigned int cpu)
+{
+ memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) *
+ EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+
+ return 0;
+}
+
static const struct evtchn_ops evtchn_ops_2l = {
.max_channels = evtchn_2l_max_channels,
.nr_channels = evtchn_2l_max_channels,
+ .remove = evtchn_2l_remove,
.bind_to_cpu = evtchn_2l_bind_to_cpu,
.clear_pending = evtchn_2l_clear_pending,
.set_pending = evtchn_2l_set_pending,
.is_pending = evtchn_2l_is_pending,
- .test_and_set_mask = evtchn_2l_test_and_set_mask,
.mask = evtchn_2l_mask,
.unmask = evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
.resume = evtchn_2l_resume,
+ .percpu_deinit = evtchn_2l_percpu_deinit,
};
void __init xen_evtchn_2l_init(void)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 499eff7d3f65..46d9295d9a6e 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -33,10 +33,15 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
#include <asm/ptrace.h>
+#include <asm/idtentry.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
#include <asm/i8259.h>
@@ -58,10 +63,79 @@
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/xenbus.h>
#include <asm/hw_irq.h>
#include "events_internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info {
+ struct list_head list;
+ struct list_head eoi_list;
+ short refcnt;
+ u8 spurious_cnt;
+ u8 is_accounted;
+ short type; /* type: IRQT_* */
+ u8 mask_reason; /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT 0x01
+#define EVT_MASK_REASON_TEMPORARY 0x02
+#define EVT_MASK_REASON_EOI_PENDING 0x04
+ u8 is_active; /* Is event just being handled? */
+ unsigned irq;
+ evtchn_port_t evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
+ raw_spinlock_t lock;
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short pirq;
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
+ uint16_t domid;
+ } pirq;
+ struct xenbus_device *interdomain;
+ } u;
+};
+
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
+#define PIRQ_MSI_GROUP (1 << 2)
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
const struct evtchn_ops *evtchn_ops;
/*
@@ -70,6 +144,25 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);
+/*
+ * Lock protecting event handling loop against removing event channels.
+ * Adding of event channels is no issue as the associated IRQ becomes active
+ * only after everything is setup (before request_[threaded_]irq() the handler
+ * can't be entered for an event, as the event channel will be unmasked only
+ * then).
+ */
+static DEFINE_RWLOCK(evtchn_rwlock);
+
+/*
+ * Lock hierarchy:
+ *
+ * irq_mapping_update_lock
+ * evtchn_rwlock
+ * IRQ-desc lock
+ * percpu eoi_list_lock
+ * irq_info->lock
+ */
+
static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */
@@ -78,7 +171,10 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
-int **evtchn_to_irq;
+/* Event channel distribution data */
+static atomic_t channels_on_cpu[NR_CPUS];
+
+static int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
@@ -91,18 +187,23 @@ static bool (*pirq_needs_eoi)(unsigned irq);
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
+static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
+
static struct irq_chip xen_dynamic_chip;
+static struct irq_chip xen_lateeoi_chip;
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
-static void clear_evtchn_to_irq_row(unsigned row)
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
+static void clear_evtchn_to_irq_row(int *evtchn_row)
{
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
- evtchn_to_irq[row][col] = -1;
+ WRITE_ONCE(evtchn_row[col], -1);
}
static void clear_evtchn_to_irq_all(void)
@@ -112,14 +213,15 @@ static void clear_evtchn_to_irq_all(void)
for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
if (evtchn_to_irq[row] == NULL)
continue;
- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_to_irq[row]);
}
}
-static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
{
unsigned row;
unsigned col;
+ int *evtchn_row;
if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
@@ -132,37 +234,81 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
if (irq == -1)
return 0;
- evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
- if (evtchn_to_irq[row] == NULL)
+ evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
+ if (evtchn_row == NULL)
return -ENOMEM;
- clear_evtchn_to_irq_row(row);
+ clear_evtchn_to_irq_row(evtchn_row);
+
+ /*
+ * We've prepared an empty row for the mapping. If a different
+ * thread was faster inserting it, we can drop ours.
+ */
+ if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
+ free_page((unsigned long) evtchn_row);
}
- evtchn_to_irq[row][col] = irq;
+ WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0;
}
-int get_evtchn_to_irq(unsigned evtchn)
+int get_evtchn_to_irq(evtchn_port_t evtchn)
{
if (evtchn >= xen_evtchn_max_channels())
return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1;
- return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+ return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
}
/* Get info for IRQ */
-struct irq_info *info_for_irq(unsigned irq)
+static struct irq_info *info_for_irq(unsigned irq)
+{
+ if (irq < nr_legacy_irqs())
+ return legacy_info_ptrs[irq];
+ else
+ return irq_get_chip_data(irq);
+}
+
+static void set_info_for_irq(unsigned int irq, struct irq_info *info)
+{
+ if (irq < nr_legacy_irqs())
+ legacy_info_ptrs[irq] = info;
+ else
+ irq_set_chip_data(irq, info);
+}
+
+/* Per CPU channel accounting */
+static void channels_on_cpu_dec(struct irq_info *info)
+{
+ if (!info->is_accounted)
+ return;
+
+ info->is_accounted = 0;
+
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
+}
+
+static void channels_on_cpu_inc(struct irq_info *info)
{
- return irq_get_handler_data(irq);
+ if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
+ return;
+
+ if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
+ INT_MAX)))
+ return;
+
+ info->is_accounted = 1;
}
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
enum xen_irq_type type,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned short cpu)
{
int ret;
@@ -173,6 +319,8 @@ static int xen_irq_info_common_setup(struct irq_info *info,
info->irq = irq;
info->evtchn = evtchn;
info->cpu = cpu;
+ info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+ raw_spin_lock_init(&info->lock);
ret = set_evtchn_to_irq(evtchn, irq);
if (ret < 0)
@@ -180,20 +328,27 @@ static int xen_irq_info_common_setup(struct irq_info *info,
irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
- return xen_evtchn_port_setup(info);
+ return xen_evtchn_port_setup(evtchn);
}
static int xen_irq_info_evtchn_setup(unsigned irq,
- unsigned evtchn)
+ evtchn_port_t evtchn,
+ struct xenbus_device *dev)
{
struct irq_info *info = info_for_irq(irq);
+ int ret;
+
+ ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+ info->u.interdomain = dev;
+ if (dev)
+ atomic_inc(&dev->event_channels);
- return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+ return ret;
}
static int xen_irq_info_ipi_setup(unsigned cpu,
unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
enum ipi_vector ipi)
{
struct irq_info *info = info_for_irq(irq);
@@ -207,7 +362,7 @@ static int xen_irq_info_ipi_setup(unsigned cpu,
static int xen_irq_info_virq_setup(unsigned cpu,
unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned virq)
{
struct irq_info *info = info_for_irq(irq);
@@ -220,7 +375,7 @@ static int xen_irq_info_virq_setup(unsigned cpu,
}
static int xen_irq_info_pirq_setup(unsigned irq,
- unsigned evtchn,
+ evtchn_port_t evtchn,
unsigned pirq,
unsigned gsi,
uint16_t domid,
@@ -239,21 +394,27 @@ static int xen_irq_info_pirq_setup(unsigned irq,
static void xen_irq_info_cleanup(struct irq_info *info)
{
set_evtchn_to_irq(info->evtchn, -1);
+ xen_evtchn_port_remove(info->evtchn, info->cpu);
info->evtchn = 0;
+ channels_on_cpu_dec(info);
}
/*
* Accessors for packed IRQ information.
*/
-unsigned int evtchn_from_irq(unsigned irq)
+evtchn_port_t evtchn_from_irq(unsigned irq)
{
- if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
+ const struct irq_info *info = NULL;
+
+ if (likely(irq < nr_irqs))
+ info = info_for_irq(irq);
+ if (!info)
return 0;
- return info_for_irq(irq)->evtchn;
+ return info->evtchn;
}
-unsigned irq_from_evtchn(unsigned int evtchn)
+unsigned int irq_from_evtchn(evtchn_port_t evtchn)
{
return get_evtchn_to_irq(evtchn);
}
@@ -299,12 +460,12 @@ static enum xen_irq_type type_from_irq(unsigned irq)
return info_for_irq(irq)->type;
}
-unsigned cpu_from_irq(unsigned irq)
+static unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
-unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
unsigned ret = 0;
@@ -315,6 +476,34 @@ unsigned int cpu_from_evtchn(unsigned int evtchn)
return ret;
}
+static void do_mask(struct irq_info *info, u8 reason)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&info->lock, flags);
+
+ if (!info->mask_reason)
+ mask_evtchn(info->evtchn);
+
+ info->mask_reason |= reason;
+
+ raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void do_unmask(struct irq_info *info, u8 reason)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&info->lock, flags);
+
+ info->mask_reason &= ~reason;
+
+ if (!info->mask_reason)
+ unmask_evtchn(info->evtchn);
+
+ raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
#ifdef CONFIG_X86
static bool pirq_check_eoi_map(unsigned irq)
{
@@ -330,18 +519,25 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ bool force_affinity)
{
- int irq = get_evtchn_to_irq(chn);
+ int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
-#endif
- xen_evtchn_port_bind_to_cpu(info, cpu);
+ if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
+ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
+ cpumask_copy(irq_get_effective_affinity_mask(irq),
+ cpumask_of(cpu));
+ }
+
+ xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+
+ channels_on_cpu_dec(info);
info->cpu = cpu;
+ channels_on_cpu_inc(info);
}
/**
@@ -354,20 +550,178 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
*/
void notify_remote_via_irq(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
notify_remote_via_evtchn(evtchn);
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+struct lateeoi_work {
+ struct delayed_work delayed;
+ spinlock_t eoi_list_lock;
+ struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+ list_del_init(&info->eoi_list);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ struct irq_info *elem;
+ u64 now = get_jiffies_64();
+ unsigned long delay;
+ unsigned long flags;
+
+ if (now < info->eoi_time)
+ delay = info->eoi_time - now;
+ else
+ delay = 1;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ if (list_empty(&eoi->eoi_list)) {
+ list_add(&info->eoi_list, &eoi->eoi_list);
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, delay);
+ } else {
+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+ if (elem->eoi_time <= info->eoi_time)
+ break;
+ }
+ list_add(&info->eoi_list, &elem->eoi_list);
+ }
+
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
+{
+ evtchn_port_t evtchn;
+ unsigned int cpu;
+ unsigned int delay = 0;
+
+ evtchn = info->evtchn;
+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
+ return;
+
+ if (spurious) {
+ struct xenbus_device *dev = info->u.interdomain;
+ unsigned int threshold = 1;
+
+ if (dev && dev->spurious_threshold)
+ threshold = dev->spurious_threshold;
+
+ if ((1 << info->spurious_cnt) < (HZ << 2)) {
+ if (info->spurious_cnt != 0xFF)
+ info->spurious_cnt++;
+ }
+ if (info->spurious_cnt > threshold) {
+ delay = 1 << (info->spurious_cnt - 1 - threshold);
+ if (delay > HZ)
+ delay = HZ;
+ if (!info->eoi_time)
+ info->eoi_cpu = smp_processor_id();
+ info->eoi_time = get_jiffies_64() + delay;
+ if (dev)
+ atomic_add(delay, &dev->jiffies_eoi_delayed);
+ }
+ if (dev)
+ atomic_inc(&dev->spurious_events);
+ } else {
+ info->spurious_cnt = 0;
+ }
+
+ cpu = info->eoi_cpu;
+ if (info->eoi_time &&
+ (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
+ lateeoi_list_add(info);
+ return;
+ }
+
+ info->eoi_time = 0;
+
+ /* is_active hasn't been reset yet, do it now. */
+ smp_store_release(&info->is_active, 0);
+ do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
+}
+
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+ struct lateeoi_work *eoi;
+ struct irq_info *info;
+ u64 now = get_jiffies_64();
+ unsigned long flags;
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ while (true) {
+ spin_lock(&eoi->eoi_list_lock);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+ if (info == NULL || now < info->eoi_time) {
+ spin_unlock(&eoi->eoi_list_lock);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+ spin_unlock(&eoi->eoi_list_lock);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info, false);
+ }
+
+ if (info)
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, info->eoi_time - now);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+ spin_lock_init(&eoi->eoi_list_lock);
+ INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+{
+ struct irq_info *info;
+ unsigned long flags;
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ info = info_for_irq(irq);
+
+ if (info)
+ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
-#ifdef CONFIG_SMP
- /* By default all event channels notify CPU#0. */
- cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
-#endif
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info == NULL)
@@ -376,8 +730,14 @@ static void xen_irq_init(unsigned irq)
info->type = IRQT_UNBOUND;
info->refcnt = -1;
- irq_set_handler_data(irq, info);
+ set_info_for_irq(irq, info);
+ /*
+ * Interrupt affinity setting can be immediate. No point
+ * in delaying it until an interrupt is handled.
+ */
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}
@@ -425,17 +785,25 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
- struct irq_info *info = irq_get_handler_data(irq);
+ struct irq_info *info = info_for_irq(irq);
+ unsigned long flags;
if (WARN_ON(!info))
return;
+ write_lock_irqsave(&evtchn_rwlock, flags);
+
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
list_del(&info->list);
- irq_set_handler_data(irq, NULL);
+ set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0);
+ write_unlock_irqrestore(&evtchn_rwlock, flags);
+
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@@ -445,7 +813,7 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
-static void xen_evtchn_close(unsigned int port)
+static void xen_evtchn_close(evtchn_port_t port)
{
struct evtchn_close close;
@@ -454,6 +822,13 @@ static void xen_evtchn_close(unsigned int port)
BUG();
}
+/* Not called for lateeoi events. */
+static void event_handler_exit(struct irq_info *info)
+{
+ smp_store_release(&info->is_active, 0);
+ clear_evtchn(info->evtchn);
+}
+
static void pirq_query_unmask(int irq)
{
struct physdev_irq_status_query irq_status;
@@ -472,25 +847,15 @@ static void pirq_query_unmask(int irq)
static void eoi_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
int rc = 0;
if (!VALID_EVTCHN(evtchn))
return;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
-
- clear_evtchn(evtchn);
-
- irq_move_masked_irq(data);
-
- if (!masked)
- unmask_evtchn(evtchn);
- } else
- clear_evtchn(evtchn);
+ event_handler_exit(info);
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@ -508,7 +873,7 @@ static unsigned int __startup_pirq(unsigned int irq)
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
int rc;
BUG_ON(info->type != IRQT_PIRQ);
@@ -534,14 +899,15 @@ static unsigned int __startup_pirq(unsigned int irq)
goto err;
info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0);
+ bind_evtchn_to_cpu(evtchn, 0, false);
- rc = xen_evtchn_port_setup(info);
+ rc = xen_evtchn_port_setup(evtchn);
if (rc)
goto err;
out:
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_EXPLICIT);
+
eoi_pirq(irq_get_irq_data(irq));
return 0;
@@ -561,14 +927,14 @@ static void shutdown_pirq(struct irq_data *data)
{
unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
- unsigned evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
if (!VALID_EVTCHN(evtchn))
return;
- mask_evtchn(evtchn);
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
}
@@ -601,8 +967,8 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
static void __unbind_from_irq(unsigned int irq)
{
- int evtchn = evtchn_from_irq(irq);
- struct irq_info *info = irq_get_handler_data(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = info_for_irq(irq);
if (info->refcnt > 0) {
info->refcnt--;
@@ -612,6 +978,7 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
+ struct xenbus_device *dev;
xen_evtchn_close(evtchn);
@@ -622,6 +989,11 @@ static void __unbind_from_irq(unsigned int irq)
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
break;
+ case IRQT_EVTCHN:
+ dev = info->u.interdomain;
+ if (dev)
+ atomic_dec(&dev->event_channels);
+ break;
default:
break;
}
@@ -645,7 +1017,7 @@ static void __unbind_from_irq(unsigned int irq)
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
unsigned pirq, int shareable, char *name)
{
- int irq = -1;
+ int irq;
struct physdev_irq irq_op;
int ret;
@@ -827,7 +1199,8 @@ int xen_pirq_from_irq(unsigned irq)
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
-int bind_evtchn_to_irq(unsigned int evtchn)
+static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ struct xenbus_device *dev)
{
int irq;
int ret;
@@ -844,17 +1217,23 @@ int bind_evtchn_to_irq(unsigned int evtchn)
if (irq < 0)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event");
- ret = xen_irq_info_evtchn_setup(irq, evtchn);
+ ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
if (ret < 0) {
__unbind_from_irq(irq);
irq = ret;
goto out;
}
- /* New interdomain events are bound to VCPU 0. */
- bind_evtchn_to_cpu(evtchn, 0);
+ /*
+ * New interdomain events are initially bound to vCPU0 This
+ * is required to setup the event channel in the first
+ * place and also important for UP guests because the
+ * affinity setting is not invoked on them so nothing would
+ * bind the channel.
+ */
+ bind_evtchn_to_cpu(evtchn, 0, false);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -865,13 +1244,24 @@ out:
return irq;
}
+
+int bind_evtchn_to_irq(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
+
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
- int evtchn, irq;
- int ret;
+ evtchn_port_t evtchn;
+ int ret, irq;
mutex_lock(&irq_mapping_update_lock);
@@ -897,7 +1287,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq = ret;
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask to the target CPU so proc shows
+ * the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, true);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_IPI);
@@ -908,26 +1302,36 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port)
+static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
+ evtchn_port_t remote_port,
+ struct irq_chip *chip)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
- bind_interdomain.remote_dom = remote_domain;
+ bind_interdomain.remote_dom = dev->otherend_id;
bind_interdomain.remote_port = remote_port;
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
+ chip, dev);
+}
+
+int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
+ &xen_lateeoi_chip);
}
-EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
{
struct evtchn_status status;
- int port, rc = -ENOENT;
+ evtchn_port_t port;
+ int rc = -ENOENT;
memset(&status, 0, sizeof(status));
for (port = 0; port < xen_evtchn_max_channels(); port++) {
@@ -939,7 +1343,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
if (status.status != EVTCHNSTAT_virq)
continue;
if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
- rc = port;
+ *evtchn = port;
break;
}
}
@@ -962,7 +1366,8 @@ EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq, ret;
+ evtchn_port_t evtchn = 0;
+ int irq, ret;
mutex_lock(&irq_mapping_update_lock);
@@ -988,9 +1393,8 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu);
+ ret = find_virq(virq, cpu, &evtchn);
BUG_ON(ret < 0);
- evtchn = ret;
}
ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
@@ -1000,7 +1404,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
goto out;
}
- bind_evtchn_to_cpu(evtchn, cpu);
+ /*
+ * Force the affinity mask for percpu interrupts so proc
+ * shows the correct target.
+ */
+ bind_evtchn_to_cpu(evtchn, cpu, percpu);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
@@ -1019,14 +1427,15 @@ static void unbind_from_irq(unsigned int irq)
mutex_unlock(&irq_mapping_update_lock);
}
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname, void *dev_id)
+static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id,
+ struct irq_chip *chip)
{
int irq, retval;
- irq = bind_evtchn_to_irq(evtchn);
+ irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1037,18 +1446,37 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
return irq;
}
+
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- unsigned int remote_port,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
+
+static int bind_interdomain_evtchn_to_irqhandler_chip(
+ struct xenbus_device *dev, evtchn_port_t remote_port,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id, struct irq_chip *chip)
{
int irq, retval;
- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+ irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
if (irq < 0)
return irq;
@@ -1060,7 +1488,19 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
return irq;
}
-EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(dev,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
@@ -1106,7 +1546,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
void unbind_from_irqhandler(unsigned int irq, void *dev_id)
{
- struct irq_info *info = irq_get_handler_data(irq);
+ struct irq_info *info = info_for_irq(irq);
if (WARN_ON(!info))
return;
@@ -1132,7 +1572,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);
-int evtchn_make_refcounted(unsigned int evtchn)
+int evtchn_make_refcounted(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
@@ -1140,7 +1580,7 @@ int evtchn_make_refcounted(unsigned int evtchn)
if (irq == -1)
return -ENOENT;
- info = irq_get_handler_data(irq);
+ info = info_for_irq(irq);
if (!info)
return -ENOENT;
@@ -1153,7 +1593,7 @@ int evtchn_make_refcounted(unsigned int evtchn)
}
EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
-int evtchn_get(unsigned int evtchn)
+int evtchn_get(evtchn_port_t evtchn)
{
int irq;
struct irq_info *info;
@@ -1168,13 +1608,13 @@ int evtchn_get(unsigned int evtchn)
if (irq == -1)
goto done;
- info = irq_get_handler_data(irq);
+ info = info_for_irq(irq);
if (!info)
goto done;
err = -EINVAL;
- if (info->refcnt <= 0)
+ if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
goto done;
info->refcnt++;
@@ -1186,7 +1626,7 @@ int evtchn_get(unsigned int evtchn)
}
EXPORT_SYMBOL_GPL(evtchn_get);
-void evtchn_put(unsigned int evtchn)
+void evtchn_put(evtchn_port_t evtchn)
{
int irq = get_evtchn_to_irq(evtchn);
if (WARN_ON(irq == -1))
@@ -1213,21 +1653,88 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
+struct evtchn_loop_ctrl {
+ ktime_t timeout;
+ unsigned count;
+ bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+ int irq;
+ struct irq_info *info;
+ struct xenbus_device *dev;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq == -1)
+ return;
+
+ /*
+ * Check for timeout every 256 events.
+ * We are setting the timeout value only after the first 256
+ * events in order to not hurt the common case of few loop
+ * iterations. The 256 is basically an arbitrary value.
+ *
+ * In case we are hitting the timeout we need to defer all further
+ * EOIs in order to ensure to leave the event handling loop rather
+ * sooner than later.
+ */
+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+ ktime_t kt = ktime_get();
+
+ if (!ctrl->timeout) {
+ kt = ktime_add_ms(kt,
+ jiffies_to_msecs(event_loop_timeout));
+ ctrl->timeout = kt;
+ } else if (kt > ctrl->timeout) {
+ ctrl->defer_eoi = true;
+ }
+ }
+
+ info = info_for_irq(irq);
+ if (xchg_acquire(&info->is_active, 1))
+ return;
+
+ dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
+ if (dev)
+ atomic_inc(&dev->events);
+
+ if (ctrl->defer_eoi) {
+ info->eoi_cpu = smp_processor_id();
+ info->irq_epoch = __this_cpu_read(irq_epoch);
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+ generic_handle_irq(irq);
+}
+
static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = smp_processor_id();
+ struct evtchn_loop_ctrl ctrl = { 0 };
+
+ read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
- xen_evtchn_handle_events(cpu);
+ xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending);
+
+ read_unlock(&evtchn_rwlock);
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for
+ * xen_irq_lateeoi() invocations occurring from inside the loop
+ * above.
+ */
+ __this_cpu_inc(irq_epoch);
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1235,9 +1742,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
-#ifdef CONFIG_X86
- inc_irq_stat(irq_hv_callback_count);
-#endif
__xen_evtchn_do_upcall();
@@ -1252,7 +1756,7 @@ void xen_hvm_evtchn_do_upcall(void)
EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
/* Rebind a new event channel to an existing irq. */
-void rebind_evtchn_irq(int evtchn, int irq)
+void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -1271,23 +1775,21 @@ void rebind_evtchn_irq(int evtchn, int irq)
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- (void)xen_irq_info_evtchn_setup(irq, evtchn);
+ (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
mutex_unlock(&irq_mapping_update_lock);
- bind_evtchn_to_cpu(evtchn, info->cpu);
- /* This will be deferred until interrupt is processed */
- irq_set_affinity(irq, cpumask_of(info->cpu));
+ bind_evtchn_to_cpu(evtchn, info->cpu, false);
/* Unmask the event channel. */
enable_irq(irq);
}
/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu)
+static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
{
struct evtchn_bind_vcpu bind_vcpu;
- int masked;
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1303,7 +1805,7 @@ static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu)
* Mask the event while changing the VCPU binding to prevent
* it being delivered on an unexpected VCPU.
*/
- masked = test_and_set_mask(evtchn);
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
/*
* If this fails, it usually just indicates that we're dealing with a
@@ -1311,90 +1813,125 @@ static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu)
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu);
+ bind_evtchn_to_cpu(evtchn, tcpu, false);
- if (!masked)
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
return 0;
}
+/*
+ * Find the CPU within @dest mask which has the least number of channels
+ * assigned. This is not precise as the per cpu counts can be modified
+ * concurrently.
+ */
+static unsigned int select_target_cpu(const struct cpumask *dest)
+{
+ unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
+
+ for_each_cpu_and(cpu, dest, cpu_online_mask) {
+ unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
+
+ if (curch < minch) {
+ minch = curch;
+ best_cpu = cpu;
+ }
+ }
+
+ /*
+ * Catch the unlikely case that dest contains no online CPUs. Can't
+ * recurse.
+ */
+ if (best_cpu == UINT_MAX)
+ return select_target_cpu(cpu_online_mask);
+
+ return best_cpu;
+}
+
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
- int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+ unsigned int tcpu = select_target_cpu(dest);
+ int ret;
+ ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
if (!ret)
irq_data_update_effective_affinity(data, cpumask_of(tcpu));
return ret;
}
-/* To be called with desc->lock held. */
-int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu)
-{
- struct irq_data *d = irq_desc_get_irq_data(desc);
-
- return set_affinity_irq(d, cpumask_of(tcpu), false);
-}
-EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn);
-
static void enable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_EXPLICIT);
}
static void disable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
- mask_evtchn(evtchn);
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
}
static void ack_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
-
- if (!VALID_EVTCHN(evtchn))
- return;
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (unlikely(irqd_is_setaffinity_pending(data)) &&
- likely(!irqd_irq_disabled(data))) {
- int masked = test_and_set_mask(evtchn);
+ if (VALID_EVTCHN(evtchn))
+ event_handler_exit(info);
+}
- clear_evtchn(evtchn);
+static void mask_ack_dynirq(struct irq_data *data)
+{
+ disable_dynirq(data);
+ ack_dynirq(data);
+}
- irq_move_masked_irq(data);
+static void lateeoi_ack_dynirq(struct irq_data *data)
+{
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (!masked)
- unmask_evtchn(evtchn);
- } else
+ if (VALID_EVTCHN(evtchn)) {
+ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+ /*
+ * Don't call event_handler_exit().
+ * Need to keep is_active non-zero in order to ignore re-raised
+ * events after cpu affinity changes while a lateeoi is pending.
+ */
clear_evtchn(evtchn);
+ }
}
-static void mask_ack_dynirq(struct irq_data *data)
+static void lateeoi_mask_ack_dynirq(struct irq_data *data)
{
- disable_dynirq(data);
- ack_dynirq(data);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
+
+ if (VALID_EVTCHN(evtchn)) {
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
+ event_handler_exit(info);
+ }
}
static int retrigger_dynirq(struct irq_data *data)
{
- unsigned int evtchn = evtchn_from_irq(data->irq);
- int masked;
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn))
return 0;
- masked = test_and_set_mask(evtchn);
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
set_evtchn(evtchn);
- if (!masked)
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
return 1;
}
@@ -1440,7 +1977,8 @@ static void restore_pirqs(void)
static void restore_cpu_virqs(unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
- int virq, irq, evtchn;
+ evtchn_port_t evtchn;
+ int virq, irq;
for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
@@ -1458,14 +1996,16 @@ static void restore_cpu_virqs(unsigned int cpu)
/* Record the new mapping. */
(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
static void restore_cpu_ipis(unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
- int ipi, irq, evtchn;
+ evtchn_port_t evtchn;
+ int ipi, irq;
for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
@@ -1482,22 +2022,24 @@ static void restore_cpu_ipis(unsigned int cpu)
/* Record the new mapping. */
(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
+ /* The affinity mask is still valid */
+ bind_evtchn_to_cpu(evtchn, cpu, false);
}
}
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = info_for_irq(irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
- clear_evtchn(evtchn);
+ event_handler_exit(info);
}
EXPORT_SYMBOL(xen_clear_irq_pending);
void xen_set_irq_pending(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
set_evtchn(evtchn);
@@ -1505,7 +2047,7 @@ void xen_set_irq_pending(int irq)
bool xen_test_irq_pending(int irq)
{
- int evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = evtchn_from_irq(irq);
bool ret = false;
if (VALID_EVTCHN(evtchn))
@@ -1565,8 +2107,12 @@ void xen_irq_resume(void)
xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
- list_for_each_entry(info, &xen_irq_list_head, list)
- info->evtchn = 0; /* zap event-channel binding */
+ list_for_each_entry(info, &xen_irq_list_head, list) {
+ /* Zap event-channel binding */
+ info->evtchn = 0;
+ /* Adjust accounting */
+ channels_on_cpu_dec(info);
+ }
clear_evtchn_to_irq_all();
@@ -1592,6 +2138,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.irq_retrigger = retrigger_dynirq,
};
+static struct irq_chip xen_lateeoi_chip __read_mostly = {
+ /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
+ .name = "xen-dyn-lateeoi",
+
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
+
+ .irq_ack = lateeoi_ack_dynirq,
+ .irq_mask_ack = lateeoi_mask_ack_dynirq,
+
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
+};
+
static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq",
@@ -1622,57 +2183,78 @@ static struct irq_chip xen_percpu_chip __read_mostly = {
.irq_ack = ack_dynirq,
};
-int xen_set_callback_via(uint64_t via)
-{
- struct xen_hvm_param a;
- a.domid = DOMID_SELF;
- a.index = HVM_PARAM_CALLBACK_IRQ;
- a.value = via;
- return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
-}
-EXPORT_SYMBOL_GPL(xen_set_callback_via);
-
#ifdef CONFIG_XEN_PVHVM
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */
-void xen_callback_vector(void)
+void xen_setup_callback_vector(void)
{
- int rc;
uint64_t callback_via;
if (xen_have_vector_callback) {
callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
- rc = xen_set_callback_via(callback_via);
- if (rc) {
+ if (xen_set_callback_via(callback_via)) {
pr_err("Request for Xen HVM callback vector failed\n");
xen_have_vector_callback = 0;
- return;
}
- pr_info_once("Xen HVM callback vector for event delivery is enabled\n");
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
- xen_hvm_callback_vector);
}
}
+
+static __init void xen_alloc_callback_vector(void)
+{
+ if (!xen_have_vector_callback)
+ return;
+
+ pr_info("Xen HVM callback vector for event delivery is enabled\n");
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
+}
#else
-void xen_callback_vector(void) {}
+void xen_setup_callback_vector(void) {}
+static inline void xen_alloc_callback_vector(void) {}
#endif
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
+bool xen_fifo_events = true;
+module_param_named(fifo_events, xen_fifo_events, bool, 0);
+
+static int xen_evtchn_cpu_prepare(unsigned int cpu)
+{
+ int ret = 0;
+
+ xen_cpu_init_eoi(cpu);
+
+ if (evtchn_ops->percpu_init)
+ ret = evtchn_ops->percpu_init(cpu);
+
+ return ret;
+}
+
+static int xen_evtchn_cpu_dead(unsigned int cpu)
+{
+ int ret = 0;
-static bool fifo_events = true;
-module_param(fifo_events, bool, 0);
+ if (evtchn_ops->percpu_deinit)
+ ret = evtchn_ops->percpu_deinit(cpu);
+
+ return ret;
+}
void __init xen_init_IRQ(void)
{
int ret = -EINVAL;
- unsigned int evtchn;
+ evtchn_port_t evtchn;
- if (fifo_events)
+ if (xen_fifo_events)
ret = xen_evtchn_fifo_init();
- if (ret < 0)
+ if (ret < 0) {
xen_evtchn_2l_init();
+ xen_fifo_events = false;
+ }
+
+ xen_cpu_init_eoi(smp_processor_id());
+
+ cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+ "xen/evtchn:prepare",
+ xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
sizeof(*evtchn_to_irq), GFP_KERNEL);
@@ -1689,8 +2271,10 @@ void __init xen_init_IRQ(void)
if (xen_initial_domain())
pci_xen_initial_domain();
}
- if (xen_feature(XENFEAT_hvm_callback_vector))
- xen_callback_vector();
+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
+ xen_setup_callback_vector();
+ xen_alloc_callback_vector();
+ }
if (xen_hvm_domain()) {
native_init_IRQ();
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 76b318e88382..ad9fe51d3fb3 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -82,7 +82,7 @@ static unsigned event_array_pages __read_mostly;
#endif
-static inline event_word_t *event_word_from_port(unsigned port)
+static inline event_word_t *event_word_from_port(evtchn_port_t port)
{
unsigned i = port / EVENT_WORDS_PER_PAGE;
@@ -138,9 +138,8 @@ static void init_array_page(event_word_t *array_page)
array_page[i] = 1 << EVTCHN_FIFO_MASKED;
}
-static int evtchn_fifo_setup(struct irq_info *info)
+static int evtchn_fifo_setup(evtchn_port_t port)
{
- unsigned port = info->evtchn;
unsigned new_array_pages;
int ret;
@@ -186,70 +185,73 @@ static int evtchn_fifo_setup(struct irq_info *info)
return ret;
}
-static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+static void evtchn_fifo_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu)
{
/* no-op */
}
-static void evtchn_fifo_clear_pending(unsigned port)
+static void evtchn_fifo_clear_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static void evtchn_fifo_set_pending(unsigned port)
+static void evtchn_fifo_set_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static bool evtchn_fifo_is_pending(unsigned port)
+static bool evtchn_fifo_is_pending(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static bool evtchn_fifo_test_and_set_mask(unsigned port)
-{
- event_word_t *word = event_word_from_port(port);
- return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
-}
-
-static void evtchn_fifo_mask(unsigned port)
+static void evtchn_fifo_mask(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
-static bool evtchn_fifo_is_masked(unsigned port)
+static bool evtchn_fifo_is_masked(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
/*
- * Clear MASKED, spinning if BUSY is set.
+ * Clear MASKED if not PENDING, spinning if BUSY is set.
+ * Return true if mask was cleared.
*/
-static void clear_masked(volatile event_word_t *word)
+static bool clear_masked_cond(volatile event_word_t *word)
{
event_word_t new, old, w;
w = *word;
do {
+ if (!(w & (1 << EVTCHN_FIFO_MASKED)))
+ return true;
+
+ if (w & (1 << EVTCHN_FIFO_PENDING))
+ return false;
+
old = w & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
w = sync_cmpxchg(word, old, new);
} while (w != old);
+
+ return true;
}
-static void evtchn_fifo_unmask(unsigned port)
+static void evtchn_fifo_unmask(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
BUG_ON(!irqs_disabled());
- clear_masked(word);
- if (evtchn_fifo_is_pending(port)) {
+ if (!clear_masked_cond(word)) {
struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
}
@@ -270,23 +272,13 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK;
}
-static void handle_irq_for_port(unsigned port)
-{
- int irq;
-
- irq = get_evtchn_to_irq(port);
- if (irq != -1)
- generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready,
- bool drop)
+ unsigned priority, unsigned long *ready)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
- unsigned port;
+ evtchn_port_t port;
event_word_t *word;
head = q->head[priority];
@@ -315,16 +307,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
- if (unlikely(drop))
+ if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port);
else
- handle_irq_for_port(port);
+ handle_irq_for_port(port, ctrl);
}
q->head[priority] = head;
}
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -336,14 +329,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready, drop);
+ consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0);
}
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- __evtchn_fifo_handle_events(cpu, false);
+ __evtchn_fifo_handle_events(cpu, ctrl);
}
static void evtchn_fifo_resume(void)
@@ -380,21 +374,6 @@ static void evtchn_fifo_resume(void)
event_array_pages = 0;
}
-static const struct evtchn_ops evtchn_ops_fifo = {
- .max_channels = evtchn_fifo_max_channels,
- .nr_channels = evtchn_fifo_nr_channels,
- .setup = evtchn_fifo_setup,
- .bind_to_cpu = evtchn_fifo_bind_to_cpu,
- .clear_pending = evtchn_fifo_clear_pending,
- .set_pending = evtchn_fifo_set_pending,
- .is_pending = evtchn_fifo_is_pending,
- .test_and_set_mask = evtchn_fifo_test_and_set_mask,
- .mask = evtchn_fifo_mask,
- .unmask = evtchn_fifo_unmask,
- .handle_events = evtchn_fifo_handle_events,
- .resume = evtchn_fifo_resume,
-};
-
static int evtchn_fifo_alloc_control_block(unsigned cpu)
{
void *control_block = NULL;
@@ -417,19 +396,35 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu)
return ret;
}
-static int xen_evtchn_cpu_prepare(unsigned int cpu)
+static int evtchn_fifo_percpu_init(unsigned int cpu)
{
if (!per_cpu(cpu_control_block, cpu))
return evtchn_fifo_alloc_control_block(cpu);
return 0;
}
-static int xen_evtchn_cpu_dead(unsigned int cpu)
+static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{
- __evtchn_fifo_handle_events(cpu, true);
+ __evtchn_fifo_handle_events(cpu, NULL);
return 0;
}
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+ .percpu_init = evtchn_fifo_percpu_init,
+ .percpu_deinit = evtchn_fifo_percpu_deinit,
+};
+
int __init xen_evtchn_fifo_init(void)
{
int cpu = smp_processor_id();
@@ -443,9 +438,5 @@ int __init xen_evtchn_fifo_init(void)
evtchn_ops = &evtchn_ops_fifo;
- cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
- "xen/evtchn:prepare",
- xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
-
return ret;
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 82938cff6c7a..4d3398eff9cd 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -7,78 +7,36 @@
#ifndef __EVENTS_INTERNAL_H__
#define __EVENTS_INTERNAL_H__
-/* Interrupt types. */
-enum xen_irq_type {
- IRQT_UNBOUND = 0,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
- * guest, or GSI (real passthrough IRQ) of the device.
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
- */
-struct irq_info {
- struct list_head list;
- int refcnt;
- enum xen_irq_type type; /* type */
- unsigned irq;
- unsigned int evtchn; /* event channel */
- unsigned short cpu; /* cpu bound */
-
- union {
- unsigned short virq;
- enum ipi_vector ipi;
- struct {
- unsigned short pirq;
- unsigned short gsi;
- unsigned char vector;
- unsigned char flags;
- uint16_t domid;
- } pirq;
- } u;
-};
-
-#define PIRQ_NEEDS_EOI (1 << 0)
-#define PIRQ_SHAREABLE (1 << 1)
-#define PIRQ_MSI_GROUP (1 << 2)
+struct evtchn_loop_ctrl;
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
- int (*setup)(struct irq_info *info);
- void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+ int (*setup)(evtchn_port_t port);
+ void (*remove)(evtchn_port_t port, unsigned int cpu);
+ void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
+ unsigned int old_cpu);
- void (*clear_pending)(unsigned port);
- void (*set_pending)(unsigned port);
- bool (*is_pending)(unsigned port);
- bool (*test_and_set_mask)(unsigned port);
- void (*mask)(unsigned port);
- void (*unmask)(unsigned port);
+ void (*clear_pending)(evtchn_port_t port);
+ void (*set_pending)(evtchn_port_t port);
+ bool (*is_pending)(evtchn_port_t port);
+ void (*mask)(evtchn_port_t port);
+ void (*unmask)(evtchn_port_t port);
- void (*handle_events)(unsigned cpu);
+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
+
+ int (*percpu_init)(unsigned int cpu);
+ int (*percpu_deinit)(unsigned int cpu);
};
extern const struct evtchn_ops *evtchn_ops;
-extern int **evtchn_to_irq;
-int get_evtchn_to_irq(unsigned int evtchn);
+int get_evtchn_to_irq(evtchn_port_t evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
-struct irq_info *info_for_irq(unsigned irq);
-unsigned cpu_from_irq(unsigned irq);
-unsigned cpu_from_evtchn(unsigned int evtchn);
+unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
static inline unsigned xen_evtchn_max_channels(void)
{
@@ -89,52 +47,56 @@ static inline unsigned xen_evtchn_max_channels(void)
* Do any ABI specific setup for a bound event channel before it can
* be unmasked and used.
*/
-static inline int xen_evtchn_port_setup(struct irq_info *info)
+static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
{
if (evtchn_ops->setup)
- return evtchn_ops->setup(info);
+ return evtchn_ops->setup(evtchn);
return 0;
}
-static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
- unsigned cpu)
+static inline void xen_evtchn_port_remove(evtchn_port_t evtchn,
+ unsigned int cpu)
{
- evtchn_ops->bind_to_cpu(info, cpu);
+ if (evtchn_ops->remove)
+ evtchn_ops->remove(evtchn, cpu);
}
-static inline void clear_evtchn(unsigned port)
+static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
+ unsigned int cpu,
+ unsigned int old_cpu)
{
- evtchn_ops->clear_pending(port);
+ evtchn_ops->bind_to_cpu(evtchn, cpu, old_cpu);
}
-static inline void set_evtchn(unsigned port)
+static inline void clear_evtchn(evtchn_port_t port)
{
- evtchn_ops->set_pending(port);
+ evtchn_ops->clear_pending(port);
}
-static inline bool test_evtchn(unsigned port)
+static inline void set_evtchn(evtchn_port_t port)
{
- return evtchn_ops->is_pending(port);
+ evtchn_ops->set_pending(port);
}
-static inline bool test_and_set_mask(unsigned port)
+static inline bool test_evtchn(evtchn_port_t port)
{
- return evtchn_ops->test_and_set_mask(port);
+ return evtchn_ops->is_pending(port);
}
-static inline void mask_evtchn(unsigned port)
+static inline void mask_evtchn(evtchn_port_t port)
{
return evtchn_ops->mask(port);
}
-static inline void unmask_evtchn(unsigned port)
+static inline void unmask_evtchn(evtchn_port_t port)
{
return evtchn_ops->unmask(port);
}
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- return evtchn_ops->handle_events(cpu);
+ return evtchn_ops->handle_events(cpu, ctrl);
}
static inline void xen_evtchn_resume(void)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 052b55a14ebc..c99415a70051 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -83,7 +83,7 @@ struct per_user_data {
struct user_evtchn {
struct rb_node node;
struct per_user_data *user;
- unsigned port;
+ evtchn_port_t port;
bool enabled;
};
@@ -138,7 +138,8 @@ static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
kfree(evtchn);
}
-static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
+static struct user_evtchn *find_evtchn(struct per_user_data *u,
+ evtchn_port_t port)
{
struct rb_node *node = u->evtchns.rb_node;
@@ -161,20 +162,24 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
{
struct user_evtchn *evtchn = data;
struct per_user_data *u = evtchn->user;
+ unsigned int prod, cons;
WARN(!evtchn->enabled,
- "Interrupt for port %d, but apparently not enabled; per-user %p\n",
+ "Interrupt for port %u, but apparently not enabled; per-user %p\n",
evtchn->port, u);
- disable_irq_nosync(irq);
evtchn->enabled = false;
spin_lock(&u->ring_prod_lock);
- if ((u->ring_prod - u->ring_cons) < u->ring_size) {
- *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
- wmb(); /* Ensure ring contents visible */
- if (u->ring_cons == u->ring_prod++) {
+ prod = READ_ONCE(u->ring_prod);
+ cons = READ_ONCE(u->ring_cons);
+
+ if ((prod - cons) < u->ring_size) {
+ *evtchn_ring_entry(u, prod) = evtchn->port;
+ smp_wmb(); /* Ensure ring contents visible */
+ WRITE_ONCE(u->ring_prod, prod + 1);
+ if (cons == prod) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
if (u->ring_overflow)
goto unlock_out;
- c = u->ring_cons;
- p = u->ring_prod;
+ c = READ_ONCE(u->ring_cons);
+ p = READ_ONCE(u->ring_prod);
if (c != p)
break;
@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait,
- u->ring_cons != u->ring_prod);
+ READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
if (rc)
return rc;
}
@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}
rc = -EFAULT;
- rmb(); /* Ensure that we see the port before we copy it. */
+ smp_rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
- u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+ WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
rc = bytes1 + bytes2;
unlock_out:
@@ -286,13 +291,13 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
- unsigned port = kbuf[i];
+ evtchn_port_t port = kbuf[i];
struct user_evtchn *evtchn;
evtchn = find_evtchn(u, port);
if (evtchn && !evtchn->enabled) {
evtchn->enabled = true;
- enable_irq(irq_from_evtchn(port));
+ xen_irq_lateeoi(irq_from_evtchn(port), 0);
}
}
@@ -361,7 +366,7 @@ static int evtchn_resize_ring(struct per_user_data *u)
return 0;
}
-static int evtchn_bind_to_user(struct per_user_data *u, int port)
+static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
{
struct user_evtchn *evtchn;
struct evtchn_close close;
@@ -392,8 +397,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0)
goto err;
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
- u->name, evtchn);
+ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
+ u->name, evtchn);
if (rc < 0)
goto err;
@@ -421,36 +426,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
del_evtchn(u, evtchn);
}
-static DEFINE_PER_CPU(int, bind_last_selected_cpu);
-
-static void evtchn_bind_interdom_next_vcpu(int evtchn)
-{
- unsigned int selected_cpu, irq;
- struct irq_desc *desc;
- unsigned long flags;
-
- irq = irq_from_evtchn(evtchn);
- desc = irq_to_desc(irq);
-
- if (!desc)
- return;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- selected_cpu = this_cpu_read(bind_last_selected_cpu);
- selected_cpu = cpumask_next_and(selected_cpu,
- desc->irq_common_data.affinity, cpu_online_mask);
-
- if (unlikely(selected_cpu >= nr_cpu_ids))
- selected_cpu = cpumask_first_and(desc->irq_common_data.affinity,
- cpu_online_mask);
-
- this_cpu_write(bind_last_selected_cpu, selected_cpu);
-
- /* unmask expects irqs to be disabled */
- xen_set_affinity_evtchn(desc, selected_cpu);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
-
static long evtchn_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -508,10 +483,8 @@ static long evtchn_ioctl(struct file *file,
break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
- if (rc == 0) {
+ if (rc == 0)
rc = bind_interdomain.local_port;
- evtchn_bind_interdom_next_vcpu(rc);
- }
break;
}
@@ -584,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);
- u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+ WRITE_ONCE(u->ring_cons, 0);
+ WRITE_ONCE(u->ring_prod, 0);
+ u->ring_overflow = 0;
spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
@@ -627,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
- if (u->ring_cons != u->ring_prod)
+ if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow)
mask = EPOLLERR;
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 25c053b09605..7b591443833c 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -9,13 +9,26 @@
#include <linux/types.h>
#include <linux/cache.h>
#include <linux/export.h>
+#include <linux/printk.h>
#include <asm/xen/hypercall.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
#include <xen/features.h>
+/*
+ * Linux kernel expects at least Xen 4.0.
+ *
+ * Assume some features to be available for that reason (depending on guest
+ * mode, of course).
+ */
+#define chk_required_feature(f) { \
+ if (!xen_feature(f)) \
+ panic("Xen: feature %s not available!\n", #f); \
+ }
+
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
EXPORT_SYMBOL_GPL(xen_features);
@@ -31,4 +44,9 @@ void xen_setup_features(void)
for (j = 0; j < 32; j++)
xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
}
+
+ if (xen_pv_domain()) {
+ chk_required_feature(XENFEAT_mmu_pt_update_preserve_ad);
+ chk_required_feature(XENFEAT_gnttab_map_avail_bits);
+ }
}
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 9a3960ecff6c..20d7d059dadb 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -15,6 +15,7 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/types.h>
+#include <xen/interface/event_channel.h>
struct gntdev_dmabuf_priv;
@@ -38,7 +39,7 @@ struct gntdev_unmap_notify {
int flags;
/* Address relative to the start of the gntdev_grant_map. */
int addr;
- int event;
+ evtchn_port_t event;
};
struct gntdev_grant_map {
diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
index 75d3bb948bf3..12e380db7f55 100644
--- a/drivers/xen/gntdev-dmabuf.c
+++ b/drivers/xen/gntdev-dmabuf.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
+#include <linux/module.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -21,6 +22,8 @@
#include "gntdev-common.h"
#include "gntdev-dmabuf.h"
+MODULE_IMPORT_NS(DMA_BUF);
+
#ifndef GRANT_INVALID_REF
/*
* Note on usage of grant reference 0 as invalid grant reference:
@@ -247,10 +250,9 @@ static void dmabuf_exp_ops_detach(struct dma_buf *dma_buf,
if (sgt) {
if (gntdev_dmabuf_attach->dir != DMA_NONE)
- dma_unmap_sg_attrs(attach->dev, sgt->sgl,
- sgt->nents,
- gntdev_dmabuf_attach->dir,
- DMA_ATTR_SKIP_CPU_SYNC);
+ dma_unmap_sgtable(attach->dev, sgt,
+ gntdev_dmabuf_attach->dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(sgt);
}
@@ -288,8 +290,8 @@ dmabuf_exp_ops_map_dma_buf(struct dma_buf_attachment *attach,
sgt = dmabuf_pages_to_sgt(gntdev_dmabuf->pages,
gntdev_dmabuf->nr_pages);
if (!IS_ERR(sgt)) {
- if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (dma_map_sgtable(attach->dev, sgt, dir,
+ DMA_ATTR_SKIP_CPU_SYNC)) {
sg_free_table(sgt);
kfree(sgt);
sgt = ERR_PTR(-ENOMEM);
@@ -613,6 +615,14 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev,
goto fail_detach;
}
+ /* Check that we have zero offset. */
+ if (sgt->sgl->offset) {
+ ret = ERR_PTR(-EINVAL);
+ pr_debug("DMA buffer has %d bytes offset, user-space expects 0\n",
+ sgt->sgl->offset);
+ goto fail_unmap;
+ }
+
/* Check number of pages that imported buffer has. */
if (attach->dmabuf->size != gntdev_dmabuf->nr_pages << PAGE_SHIFT) {
ret = ERR_PTR(-EINVAL);
@@ -625,7 +635,7 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev,
/* Now convert sgt to array of pages and check for page validity. */
i = 0;
- for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) {
+ for_each_sgtable_page(sgt, &sg_iter, 0) {
struct page *page = sg_page_iter_page(&sg_iter);
/*
* Check if page is valid: this can happen if we are given
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 0258415ca0b2..fec1b6537166 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -133,20 +133,26 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
if (NULL == add)
return NULL;
- add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
- add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
- add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
- add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
- add->kunmap_ops = kvcalloc(count,
- sizeof(add->kunmap_ops[0]), GFP_KERNEL);
+ add->grants = kvmalloc_array(count, sizeof(add->grants[0]),
+ GFP_KERNEL);
+ add->map_ops = kvmalloc_array(count, sizeof(add->map_ops[0]),
+ GFP_KERNEL);
+ add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
+ GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
- NULL == add->kmap_ops ||
- NULL == add->kunmap_ops ||
NULL == add->pages)
goto err;
+ if (use_ptemod) {
+ add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
+ GFP_KERNEL);
+ add->kunmap_ops = kvmalloc_array(count, sizeof(add->kunmap_ops[0]),
+ GFP_KERNEL);
+ if (NULL == add->kmap_ops || NULL == add->kunmap_ops)
+ goto err;
+ }
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
add->dma_flags = dma_flags;
@@ -183,10 +189,14 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
goto err;
for (i = 0; i < count; i++) {
- add->map_ops[i].handle = -1;
- add->unmap_ops[i].handle = -1;
- add->kmap_ops[i].handle = -1;
- add->kunmap_ops[i].handle = -1;
+ add->grants[i].domid = DOMID_INVALID;
+ add->grants[i].ref = INVALID_GRANT_REF;
+ add->map_ops[i].handle = INVALID_GRANT_HANDLE;
+ add->unmap_ops[i].handle = INVALID_GRANT_HANDLE;
+ if (use_ptemod) {
+ add->kmap_ops[i].handle = INVALID_GRANT_HANDLE;
+ add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE;
+ }
}
add->index = 0;
@@ -256,35 +266,20 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
- int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
+ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
+ (1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
BUG_ON(pgnr >= map->count);
pte_maddr = arbitrary_virt_to_machine(pte).maddr;
- /*
- * Set the PTE as special to force get_user_pages_fast() fall
- * back to the slow path. If this is not supported as part of
- * the grant map, it will be done afterwards.
- */
- if (xen_feature(XENFEAT_gnttab_map_avail_bits))
- flags |= (1 << _GNTMAP_guest_avail0);
-
gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
map->grants[pgnr].ref,
map->grants[pgnr].domid);
gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
- -1 /* handle */);
- return 0;
-}
-
-#ifdef CONFIG_X86
-static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
-{
- set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
+ INVALID_GRANT_HANDLE);
return 0;
}
-#endif
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
@@ -292,7 +287,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
if (!use_ptemod) {
/* Note: it could already be mapped */
- if (map->map_ops[0].handle != -1)
+ if (map->map_ops[0].handle != INVALID_GRANT_HANDLE)
return 0;
for (i = 0; i < map->count; i++) {
unsigned long addr = (unsigned long)
@@ -301,7 +296,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->grants[i].ref,
map->grants[i].domid);
gnttab_set_unmap_op(&map->unmap_ops[i], addr,
- map->flags, -1 /* handle */);
+ map->flags, INVALID_GRANT_HANDLE);
}
} else {
/*
@@ -309,44 +304,47 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
* to the kernel linear addresses of the struct pages.
* These ptes are completely different from the user ptes dealt
* with find_grant_ptes.
+ * Note that GNTMAP_device_map isn't needed here: The
+ * dev_bus_addr output field gets consumed only from ->map_ops,
+ * and by not requesting it when mapping we also avoid needing
+ * to mirror dev_bus_addr into ->unmap_ops (and holding an extra
+ * reference to the page in the hypervisor).
*/
+ unsigned int flags = (map->flags & ~GNTMAP_device_map) |
+ GNTMAP_host_map;
+
for (i = 0; i < map->count; i++) {
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
BUG_ON(PageHighMem(map->pages[i]));
- gnttab_set_map_op(&map->kmap_ops[i], address,
- map->flags | GNTMAP_host_map,
+ gnttab_set_map_op(&map->kmap_ops[i], address, flags,
map->grants[i].ref,
map->grants[i].domid);
gnttab_set_unmap_op(&map->kunmap_ops[i], address,
- map->flags | GNTMAP_host_map, -1);
+ flags, INVALID_GRANT_HANDLE);
}
}
pr_debug("map %d+%d\n", map->index, map->count);
- err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
- map->pages, map->count);
- if (err)
- return err;
+ err = gnttab_map_refs(map->map_ops, map->kmap_ops, map->pages,
+ map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status) {
+ if (map->map_ops[i].status == GNTST_okay)
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+ else if (!err)
err = -EINVAL;
- continue;
- }
- map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (use_ptemod)
- map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
-#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
- else if (map->dma_vaddr) {
- unsigned long bfn;
+ if (map->flags & GNTMAP_device_map)
+ map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
- bfn = pfn_to_bfn(page_to_pfn(map->pages[i]));
- map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn);
+ if (use_ptemod) {
+ if (map->kmap_ops[i].status == GNTST_okay)
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+ else if (!err)
+ err = -EINVAL;
}
-#endif
}
return err;
}
@@ -382,7 +380,15 @@ static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status);
- map->unmap_ops[offset+i].handle = -1;
+ map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+ if (use_ptemod) {
+ if (map->kunmap_ops[offset+i].status)
+ err = -EINVAL;
+ pr_debug("kunmap handle=%u st=%d\n",
+ map->kunmap_ops[offset+i].handle,
+ map->kunmap_ops[offset+i].status);
+ map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+ }
}
return err;
}
@@ -398,13 +404,15 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
* already unmapped some of the grants. Only unmap valid ranges.
*/
while (pages && !err) {
- while (pages && map->unmap_ops[offset].handle == -1) {
+ while (pages &&
+ map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset+range].handle == -1)
+ if (map->unmap_ops[offset + range].handle ==
+ INVALID_GRANT_HANDLE)
break;
range++;
}
@@ -625,7 +633,7 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
return -EFAULT;
pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
vma = find_vma(current->mm, op.vaddr);
if (!vma || vma->vm_ops != &gntdev_vmops)
goto out_unlock;
@@ -639,7 +647,7 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
rv = 0;
out_unlock:
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
return -EFAULT;
@@ -652,7 +660,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
struct gntdev_grant_map *map;
int rc;
int out_flags;
- unsigned int out_event;
+ evtchn_port_t out_event;
if (copy_from_user(&op, u, sizeof(op)))
return -EFAULT;
@@ -720,17 +728,18 @@ struct gntdev_copy_batch {
s16 __user *status[GNTDEV_COPY_BATCH];
unsigned int nr_ops;
unsigned int nr_pages;
+ bool writeable;
};
static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
- bool writeable, unsigned long *gfn)
+ unsigned long *gfn)
{
unsigned long addr = (unsigned long)virt;
struct page *page;
unsigned long xen_pfn;
int ret;
- ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
+ ret = pin_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
if (ret < 0)
return ret;
@@ -744,11 +753,9 @@ static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
static void gntdev_put_pages(struct gntdev_copy_batch *batch)
{
- unsigned int i;
-
- for (i = 0; i < batch->nr_pages; i++)
- put_page(batch->pages[i]);
+ unpin_user_pages_dirty_lock(batch->pages, batch->nr_pages, batch->writeable);
batch->nr_pages = 0;
+ batch->writeable = false;
}
static int gntdev_copy(struct gntdev_copy_batch *batch)
@@ -837,8 +844,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
virt = seg->source.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
+ batch->writeable = false;
- ret = gntdev_get_page(batch, virt, false, &gfn);
+ ret = gntdev_get_page(batch, virt, &gfn);
if (ret < 0)
return ret;
@@ -856,8 +864,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
virt = seg->dest.virt + copied;
off = (unsigned long)virt & ~XEN_PAGE_MASK;
len = min(len, (size_t)XEN_PAGE_SIZE - off);
+ batch->writeable = true;
- ret = gntdev_get_page(batch, virt, true, &gfn);
+ ret = gntdev_get_page(batch, virt, &gfn);
if (ret < 0)
return ret;
@@ -1001,8 +1010,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err)
+ if (err) {
+ map->vma = NULL;
goto out_unlock_put;
+ }
}
mutex_unlock(&priv->lock);
@@ -1014,7 +1025,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
* to the PTE from going stale.
*
* Since this vma's mappings can't be touched without the
- * mmap_sem, and we are holding it now, there is no need for
+ * mmap_lock, and we are holding it now, there is no need for
* the notifier_range locking pattern.
*/
mmu_interval_read_begin(&map->notifier);
@@ -1037,23 +1048,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
err = vm_map_pages_zero(vma, map->pages, map->count);
if (err)
goto out_put_map;
- } else {
-#ifdef CONFIG_X86
- /*
- * If the PTEs were not made special by the grant map
- * hypercall, do so here.
- *
- * This is racy since the mapping is already visible
- * to userspace but userspace should be well-behaved
- * enough to not touch it until the mmap() call
- * returns.
- */
- if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
- apply_to_page_range(vma->vm_mm, vma->vm_start,
- vma->vm_end - vma->vm_start,
- set_grant_ptes_as_special, NULL);
- }
-#endif
}
return 0;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7b36b51cdb9f..3729bea0c989 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -64,7 +64,6 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
-#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
/* External tools reserve first few grant table entries. */
@@ -802,7 +801,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
{
int ret;
- ret = alloc_xenballooned_pages(nr_pages, pages);
+ ret = xen_alloc_unpopulated_pages(nr_pages, pages);
if (ret < 0)
return ret;
@@ -814,6 +813,129 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
}
EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+ cache->pages = NULL;
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+ return !cache->pages;
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+ struct page *page;
+
+ page = cache->pages;
+ cache->pages = page->zone_device_data;
+
+ return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+ page->zone_device_data = cache->pages;
+ cache->pages = page;
+}
+#else
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+ INIT_LIST_HEAD(&cache->pages);
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+ return list_empty(&cache->pages);
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+ struct page *page;
+
+ page = list_first_entry(&cache->pages, struct page, lru);
+ list_del(&page->lru);
+
+ return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+ list_add(&page->lru, &cache->pages);
+}
+#endif
+
+void gnttab_page_cache_init(struct gnttab_page_cache *cache)
+{
+ spin_lock_init(&cache->lock);
+ cache_init(cache);
+ cache->num_pages = 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
+
+int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ if (cache_empty(cache)) {
+ spin_unlock_irqrestore(&cache->lock, flags);
+ return gnttab_alloc_pages(1, page);
+ }
+
+ page[0] = cache_deq(cache);
+ cache->num_pages--;
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
+
+void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
+ unsigned int num)
+{
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ for (i = 0; i < num; i++)
+ cache_enq(cache, page[i]);
+ cache->num_pages += num;
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
+
+void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
+{
+ struct page *page[10];
+ unsigned int i = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cache->lock, flags);
+
+ while (cache->num_pages > num) {
+ page[i] = cache_deq(cache);
+ cache->num_pages--;
+ if (++i == ARRAY_SIZE(page)) {
+ spin_unlock_irqrestore(&cache->lock, flags);
+ gnttab_free_pages(i, page);
+ i = 0;
+ spin_lock_irqsave(&cache->lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ if (i != 0)
+ gnttab_free_pages(i, page);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
+
void gnttab_pages_clear_private(int nr_pages, struct page **pages)
{
int i;
@@ -837,7 +959,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
void gnttab_free_pages(int nr_pages, struct page **pages)
{
gnttab_pages_clear_private(nr_pages, pages);
- free_xenballooned_pages(nr_pages, pages);
+ xen_free_unpopulated_pages(nr_pages, pages);
}
EXPORT_SYMBOL_GPL(gnttab_free_pages);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index cd046684e0d1..374d36de7f5a 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -179,6 +179,7 @@ static int poweroff_nb(struct notifier_block *cb, unsigned long code, void *unus
case SYS_HALT:
case SYS_POWER_OFF:
shutting_down = SHUTDOWN_POWEROFF;
+ break;
default:
break;
}
diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
index 3782cf070338..24648836e0d4 100644
--- a/drivers/xen/mem-reservation.c
+++ b/drivers/xen/mem-reservation.c
@@ -35,6 +35,7 @@ void __xenmem_reservation_va_mapping_update(unsigned long count,
for (i = 0; i < count; i++) {
struct page *page = pages[i];
unsigned long pfn = page_to_pfn(page);
+ int ret;
BUG_ON(!page);
@@ -46,16 +47,10 @@ void __xenmem_reservation_va_mapping_update(unsigned long count,
set_phys_to_machine(pfn, frames[i]);
- /* Link back into the page tables if not highmem. */
- if (!PageHighMem(page)) {
- int ret;
-
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(frames[i], PAGE_KERNEL),
- 0);
- BUG_ON(ret);
- }
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(frames[i], PAGE_KERNEL), 0);
+ BUG_ON(ret);
}
}
EXPORT_SYMBOL_GPL(__xenmem_reservation_va_mapping_update);
@@ -68,6 +63,7 @@ void __xenmem_reservation_va_mapping_reset(unsigned long count,
for (i = 0; i < count; i++) {
struct page *page = pages[i];
unsigned long pfn = page_to_pfn(page);
+ int ret;
/*
* We don't support PV MMU when Linux and Xen are using
@@ -75,14 +71,11 @@ void __xenmem_reservation_va_mapping_reset(unsigned long count,
*/
BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
- if (!PageHighMem(page)) {
- int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ __pte_ma(0), 0);
+ BUG_ON(ret);
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
- }
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
}
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 224df03ce42e..2c890f4f2cbc 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -8,6 +8,7 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/pci-acpi.h>
+#include <xen/pci.h>
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
@@ -254,3 +255,78 @@ static int xen_mcfg_late(void)
return 0;
}
#endif
+
+#ifdef CONFIG_XEN_DOM0
+struct xen_device_domain_owner {
+ domid_t domain;
+ struct pci_dev *dev;
+ struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+
+ list_for_each_entry(owner, &dev_domain_list, list) {
+ if (owner->dev == dev)
+ return owner;
+ }
+ return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+ int domain = -ENODEV;
+
+ spin_lock(&dev_domain_list_spinlock);
+ owner = find_device(dev);
+ if (owner)
+ domain = owner->domain;
+ spin_unlock(&dev_domain_list_spinlock);
+ return domain;
+}
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+ struct xen_device_domain_owner *owner;
+
+ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+ if (!owner)
+ return -ENODEV;
+
+ spin_lock(&dev_domain_list_spinlock);
+ if (find_device(dev)) {
+ spin_unlock(&dev_domain_list_spinlock);
+ kfree(owner);
+ return -EEXIST;
+ }
+ owner->domain = domain;
+ owner->dev = dev;
+ list_add_tail(&owner->list, &dev_domain_list);
+ spin_unlock(&dev_domain_list_spinlock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+
+ spin_lock(&dev_domain_list_spinlock);
+ owner = find_device(dev);
+ if (!owner) {
+ spin_unlock(&dev_domain_list_spinlock);
+ return -ENODEV;
+ }
+ list_del(&owner->list);
+ spin_unlock(&dev_domain_list_spinlock);
+ kfree(owner);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+#endif
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index cdc6daa7a9f6..47aa3a1ccaf5 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -92,7 +92,7 @@ static int xen_pcpu_up(uint32_t cpu_id)
return HYPERVISOR_platform_op(&op);
}
-static ssize_t show_online(struct device *dev,
+static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -101,7 +101,7 @@ static ssize_t show_online(struct device *dev,
return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
}
-static ssize_t __ref store_online(struct device *dev,
+static ssize_t __ref online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -130,7 +130,7 @@ static ssize_t __ref store_online(struct device *dev,
ret = count;
return ret;
}
-static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
+static DEVICE_ATTR_RW(online);
static struct attribute *pcpu_dev_attrs[] = {
&dev_attr_online.attr,
@@ -345,41 +345,6 @@ static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-/* Sync with Xen hypervisor after cpu hotadded */
-void xen_pcpu_hotplug_sync(void)
-{
- schedule_work(&xen_pcpu_work);
-}
-EXPORT_SYMBOL_GPL(xen_pcpu_hotplug_sync);
-
-/*
- * For hypervisor presented cpu, return logic cpu id;
- * For hypervisor non-presented cpu, return -ENODEV.
- */
-int xen_pcpu_id(uint32_t acpi_id)
-{
- int cpu_id = 0, max_id = 0;
- struct xen_platform_op op;
-
- op.cmd = XENPF_get_cpuinfo;
- while (cpu_id <= max_id) {
- op.u.pcpu_info.xen_cpuid = cpu_id;
- if (HYPERVISOR_platform_op(&op)) {
- cpu_id++;
- continue;
- }
-
- if (acpi_id == op.u.pcpu_info.acpi_id)
- return cpu_id;
- if (op.u.pcpu_info.max_present > max_id)
- max_id = op.u.pcpu_info.max_present;
- cpu_id++;
- }
-
- return -ENODEV;
-}
-EXPORT_SYMBOL_GPL(xen_pcpu_id);
-
static int __init xen_pcpu_init(void)
{
int irq, ret;
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 59e85e408c23..18f0ed8b1f93 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -132,6 +132,13 @@ static int platform_pci_probe(struct pci_dev *pdev,
dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
goto out;
}
+ /*
+ * It doesn't strictly *have* to run on CPU0 but it sure
+ * as hell better process the event channel ports delivered
+ * to CPU0.
+ */
+ irq_set_affinity(pdev->irq, cpumask_of(0));
+
callback_via = get_callback_via(pdev);
ret = xen_set_callback_via(callback_via);
if (ret) {
@@ -149,7 +156,6 @@ static int platform_pci_probe(struct pci_dev *pdev,
ret = gnttab_init();
if (ret)
goto grant_out;
- xenbus_probe(NULL);
return 0;
grant_out:
gnttab_free_auto_xlat_frames();
@@ -168,7 +174,7 @@ static const struct pci_device_id platform_pci_tbl[] = {
{0,}
};
-static struct dev_pm_ops platform_pm_ops = {
+static const struct dev_pm_ops platform_pm_ops = {
.resume_noirq = platform_pci_resume,
};
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
deleted file mode 100644
index 17240c5325a3..000000000000
--- a/drivers/xen/preempt.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Preemptible hypercalls
- *
- * Copyright (C) 2014 Citrix Systems R&D ltd.
- */
-
-#include <linux/sched.h>
-#include <xen/xen-ops.h>
-
-#ifndef CONFIG_PREEMPTION
-
-/*
- * Some hypercalls issued by the toolstack can take many 10s of
- * seconds. Allow tasks running hypercalls via the privcmd driver to
- * be voluntarily preempted even if full kernel preemption is
- * disabled.
- *
- * Such preemptible hypercalls are bracketed by
- * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
- * calls.
- */
-
-DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
-EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
-
-asmlinkage __visible void xen_maybe_preempt_hcall(void)
-{
- if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
- && need_resched())) {
- /*
- * Clear flag as we may be rescheduled on a different
- * cpu.
- */
- __this_cpu_write(xen_in_preemptible_hcall, false);
- local_irq_enable();
- cond_resched();
- local_irq_disable();
- __this_cpu_write(xen_in_preemptible_hcall, true);
- }
-}
-#endif /* CONFIG_PREEMPTION */
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index c6070e70dd73..3369734108af 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -25,9 +25,6 @@
#include <linux/miscdevice.h>
#include <linux/moduleparam.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -260,7 +257,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
LIST_HEAD(pagelist);
struct mmap_gfn_state state;
- /* We only support privcmd_ioctl_mmap_batch for auto translated. */
+ /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */
if (xen_feature(XENFEAT_auto_translated_physmap))
return -ENOSYS;
@@ -278,7 +275,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
if (rc || list_empty(&pagelist))
goto out;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
{
struct page *page = list_first_entry(&pagelist,
@@ -303,7 +300,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
out:
free_page_list(&pagelist);
@@ -423,15 +420,15 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
int rc;
struct page **pages;
- pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
+ pages = kvcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
if (pages == NULL)
return -ENOMEM;
- rc = alloc_xenballooned_pages(numpgs, pages);
+ rc = xen_alloc_unpopulated_pages(numpgs, pages);
if (rc != 0) {
pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
numpgs, rc);
- kfree(pages);
+ kvfree(pages);
return -ENOMEM;
}
BUG_ON(vma->vm_private_data != NULL);
@@ -499,7 +496,7 @@ static long privcmd_ioctl_mmap_batch(
}
}
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
vma = find_vma(mm, m.addr);
if (!vma ||
@@ -555,7 +552,7 @@ static long privcmd_ioctl_mmap_batch(
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
if (state.global_error) {
/* Write back errors in second pass. */
@@ -576,19 +573,19 @@ out:
return ret;
out_unlock:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
goto out;
}
static int lock_pages(
struct privcmd_dm_op_buf kbufs[], unsigned int num,
- struct page *pages[], unsigned int nr_pages)
+ struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
{
unsigned int i;
for (i = 0; i < num; i++) {
unsigned int requested;
- int pinned;
+ int page_count;
requested = DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
@@ -596,14 +593,15 @@ static int lock_pages(
if (requested > nr_pages)
return -ENOSPC;
- pinned = get_user_pages_fast(
+ page_count = pin_user_pages_fast(
(unsigned long) kbufs[i].uptr,
requested, FOLL_WRITE, pages);
- if (pinned < 0)
- return pinned;
+ if (page_count < 0)
+ return page_count;
- nr_pages -= pinned;
- pages += pinned;
+ *pinned += page_count;
+ nr_pages -= page_count;
+ pages += page_count;
}
return 0;
@@ -611,15 +609,7 @@ static int lock_pages(
static void unlock_pages(struct page *pages[], unsigned int nr_pages)
{
- unsigned int i;
-
- if (!pages)
- return;
-
- for (i = 0; i < nr_pages; i++) {
- if (pages[i])
- put_page(pages[i]);
- }
+ unpin_user_pages_dirty_lock(pages, nr_pages, true);
}
static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
@@ -632,6 +622,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
struct xen_dm_op_buf *xbufs = NULL;
unsigned int i;
long rc;
+ unsigned int pinned = 0;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
return -EFAULT;
@@ -685,9 +676,11 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
goto out;
}
- rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
- if (rc)
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
+ if (rc < 0) {
+ nr_pages = pinned;
goto out;
+ }
for (i = 0; i < kdata.num; i++) {
set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
@@ -724,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
return 0;
}
-static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
+static long privcmd_ioctl_mmap_resource(struct file *file,
+ struct privcmd_mmap_resource __user *udata)
{
struct privcmd_data *data = file->private_data;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct privcmd_mmap_resource kdata;
xen_pfn_t *pfns = NULL;
- struct xen_mem_acquire_resource xdata;
+ struct xen_mem_acquire_resource xdata = { };
int rc;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
@@ -741,7 +735,23 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
return -EPERM;
- down_write(&mm->mmap_sem);
+ /* Both fields must be set or unset */
+ if (!!kdata.addr != !!kdata.num)
+ return -EINVAL;
+
+ xdata.domid = kdata.dom;
+ xdata.type = kdata.type;
+ xdata.id = kdata.id;
+
+ if (!kdata.addr && !kdata.num) {
+ /* Query the size of the resource. */
+ rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
+ if (rc)
+ return rc;
+ return __put_user(xdata.nr_frames, &udata->num);
+ }
+
+ mmap_write_lock(mm);
vma = find_vma(mm, kdata.addr);
if (!vma || vma->vm_ops != &privcmd_vm_ops) {
@@ -775,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
} else
vma->vm_private_data = PRIV_VMA_LOCKED;
- memset(&xdata, 0, sizeof(xdata));
- xdata.domid = kdata.dom;
- xdata.type = kdata.type;
- xdata.id = kdata.id;
xdata.frame = kdata.idx;
xdata.nr_frames = kdata.num;
set_xen_guest_handle(xdata.frame_list, pfns);
@@ -797,21 +803,21 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
unsigned int domid =
(xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
DOMID_SELF : kdata.dom;
- int num;
+ int num, *errs = (int *)pfns;
+ BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns));
num = xen_remap_domain_mfn_array(vma,
kdata.addr & PAGE_MASK,
- pfns, kdata.num, (int *)pfns,
+ pfns, kdata.num, errs,
vma->vm_page_prot,
- domid,
- vma->vm_private_data);
+ domid);
if (num < 0)
rc = num;
else if (num != kdata.num) {
unsigned int i;
for (i = 0; i < num; i++) {
- rc = pfns[i];
+ rc = errs[i];
if (rc < 0)
break;
}
@@ -820,7 +826,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
}
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
kfree(pfns);
return rc;
@@ -902,11 +908,11 @@ static void privcmd_close(struct vm_area_struct *vma)
rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
if (rc == 0)
- free_xenballooned_pages(numpgs, pages);
+ xen_free_unpopulated_pages(numpgs, pages);
else
pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
numpgs, rc);
- kfree(pages);
+ kvfree(pages);
}
static vm_fault_t privcmd_fault(struct vm_fault *vmf)
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index c57c71b7d53d..d6f945fd4147 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -24,7 +24,7 @@
#define PVCALLS_VERSIONS "1"
#define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
-struct pvcalls_back_global {
+static struct pvcalls_back_global {
struct list_head frontends;
struct semaphore frontends_lock;
} pvcalls_back_global;
@@ -66,6 +66,7 @@ struct sock_mapping {
atomic_t write;
atomic_t io;
atomic_t release;
+ atomic_t eoi;
void (*saved_data_ready)(struct sock *sk);
struct pvcalls_ioworker ioworker;
};
@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
struct pvcalls_fedata *fedata,
struct sock_mapping *map);
-static void pvcalls_conn_back_read(void *opaque)
+static bool pvcalls_conn_back_read(void *opaque)
{
struct sock_mapping *map = (struct sock_mapping *)opaque;
struct msghdr msg;
@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque)
virt_mb();
if (error)
- return;
+ return false;
size = pvcalls_queued(prod, cons, array_size);
if (size >= array_size)
- return;
+ return false;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
atomic_set(&map->read, 0);
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
flags);
- return;
+ return true;
}
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
wanted = array_size - size;
@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque)
ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
WARN_ON(ret > wanted);
if (ret == -EAGAIN) /* shouldn't happen */
- return;
+ return true;
if (!ret)
ret = -ENOTCONN;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque)
virt_wmb();
notify_remote_via_irq(map->irq);
- return;
+ return true;
}
-static void pvcalls_conn_back_write(struct sock_mapping *map)
+static bool pvcalls_conn_back_write(struct sock_mapping *map)
{
struct pvcalls_data_intf *intf = map->ring;
struct pvcalls_data *data = &map->data;
@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
array_size = XEN_FLEX_RING_SIZE(map->ring_order);
size = pvcalls_queued(prod, cons, array_size);
if (size == 0)
- return;
+ return false;
memset(&msg, 0, sizeof(msg));
msg.msg_flags |= MSG_DONTWAIT;
@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
- if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
+ if (ret == -EAGAIN) {
atomic_inc(&map->write);
atomic_inc(&map->io);
+ return true;
}
- if (ret == -EAGAIN)
- return;
/* write the data, then update the indexes */
virt_wmb();
@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
}
/* update the indexes, then notify the other end */
virt_wmb();
- if (prod != cons + ret)
+ if (prod != cons + ret) {
atomic_inc(&map->write);
+ atomic_inc(&map->io);
+ }
notify_remote_via_irq(map->irq);
+
+ return true;
}
static void pvcalls_back_ioworker(struct work_struct *work)
@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
struct pvcalls_ioworker, register_work);
struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
ioworker);
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
while (atomic_read(&map->io) > 0) {
if (atomic_read(&map->release) > 0) {
@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
return;
}
- if (atomic_read(&map->read) > 0)
- pvcalls_conn_back_read(map);
- if (atomic_read(&map->write) > 0)
- pvcalls_conn_back_write(map);
+ if (atomic_read(&map->read) > 0 &&
+ pvcalls_conn_back_read(map))
+ eoi_flags = 0;
+ if (atomic_read(&map->write) > 0 &&
+ pvcalls_conn_back_write(map))
+ eoi_flags = 0;
+
+ if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
+ atomic_set(&map->eoi, 0);
+ xen_irq_lateeoi(map->irq, eoi_flags);
+ eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+ }
atomic_dec(&map->io);
}
@@ -300,7 +313,7 @@ static struct sock_mapping *pvcalls_new_active_socket(
struct pvcalls_fedata *fedata,
uint64_t id,
grant_ref_t ref,
- uint32_t evtchn,
+ evtchn_port_t evtchn,
struct socket *sock)
{
int ret;
@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
goto out;
map->bytes = page;
- ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
- evtchn,
- pvcalls_back_conn_event,
- 0,
- "pvcalls-backend",
- map);
+ ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ fedata->dev, evtchn,
+ pvcalls_back_conn_event, 0, "pvcalls-backend", map);
if (ret < 0)
goto out;
map->irq = ret;
@@ -455,7 +465,6 @@ static int pvcalls_back_release_passive(struct xenbus_device *dev,
write_unlock_bh(&mappass->sock->sk->sk_callback_lock);
}
sock_release(mappass->sock);
- flush_workqueue(mappass->wq);
destroy_workqueue(mappass->wq);
kfree(mappass);
@@ -873,15 +882,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
{
struct xenbus_device *dev = dev_id;
struct pvcalls_fedata *fedata = NULL;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (dev == NULL)
- return IRQ_HANDLED;
+ if (dev) {
+ fedata = dev_get_drvdata(&dev->dev);
+ if (fedata) {
+ pvcalls_back_work(fedata);
+ eoi_flags = 0;
+ }
+ }
- fedata = dev_get_drvdata(&dev->dev);
- if (fedata == NULL)
- return IRQ_HANDLED;
+ xen_irq_lateeoi(irq, eoi_flags);
- pvcalls_back_work(fedata);
return IRQ_HANDLED;
}
@@ -891,12 +903,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
struct pvcalls_ioworker *iow;
if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
- map->sock->sk->sk_user_data != map)
+ map->sock->sk->sk_user_data != map) {
+ xen_irq_lateeoi(irq, 0);
return IRQ_HANDLED;
+ }
iow = &map->ioworker;
atomic_inc(&map->write);
+ atomic_inc(&map->eoi);
atomic_inc(&map->io);
queue_work(iow->wq, &iow->register_work);
@@ -905,7 +920,8 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
static int backend_connect(struct xenbus_device *dev)
{
- int err, evtchn;
+ int err;
+ evtchn_port_t evtchn;
grant_ref_t ring_ref;
struct pvcalls_fedata *fedata = NULL;
@@ -931,7 +947,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error;
}
- err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
if (err < 0)
goto error;
fedata->irq = err;
@@ -1087,7 +1103,8 @@ static void set_backend_state(struct xenbus_device *dev,
case XenbusStateInitialised:
switch (state) {
case XenbusStateConnected:
- backend_connect(dev);
+ if (backend_connect(dev))
+ return;
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateClosing:
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 57592a6b5c9e..3c9ae156b597 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -368,12 +368,12 @@ out:
return -ENOMEM;
}
-static int create_active(struct sock_mapping *map, int *evtchn)
+static int create_active(struct sock_mapping *map, evtchn_port_t *evtchn)
{
void *bytes;
- int ret = -ENOMEM, irq = -1, i;
+ int ret, irq = -1, i;
- *evtchn = -1;
+ *evtchn = 0;
init_waitqueue_head(&map->active.inflight_conn_req);
bytes = map->active.data.in;
@@ -404,7 +404,7 @@ static int create_active(struct sock_mapping *map, int *evtchn)
return 0;
out_error:
- if (*evtchn >= 0)
+ if (*evtchn > 0)
xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
return ret;
}
@@ -415,7 +415,8 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
struct pvcalls_bedata *bedata;
struct sock_mapping *map = NULL;
struct xen_pvcalls_request *req;
- int notify, req_id, ret, evtchn;
+ int notify, req_id, ret;
+ evtchn_port_t evtchn;
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP;
@@ -765,7 +766,8 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
struct sock_mapping *map;
struct sock_mapping *map2 = NULL;
struct xen_pvcalls_request *req;
- int notify, req_id, ret, evtchn, nonblock;
+ int notify, req_id, ret, nonblock;
+ evtchn_port_t evtchn;
map = pvcalls_enter_sock(sock);
if (IS_ERR(map))
@@ -1125,7 +1127,8 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
static int pvcalls_front_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
- int ret = -ENOMEM, evtchn, i;
+ int ret = -ENOMEM, i;
+ evtchn_port_t evtchn;
unsigned int max_page_order, function_calls, len;
char *versions;
grant_ref_t gref_head = 0;
@@ -1260,7 +1263,7 @@ static void pvcalls_front_changed(struct xenbus_device *dev,
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's CLOSING state */
- /* fall through */
+ fallthrough;
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
@@ -1272,6 +1275,7 @@ static struct xenbus_driver pvcalls_front_driver = {
.probe = pvcalls_front_probe,
.remove = pvcalls_front_remove,
.otherend_changed = pvcalls_front_changed,
+ .not_essential = true,
};
static int __init pvcalls_frontend_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b6d27762c6f8..47aebd98f52f 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -28,7 +28,7 @@
#include <linux/memblock.h>
#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
@@ -40,49 +40,39 @@
#include <trace/events/swiotlb.h>
#define MAX_DMA_BITS 32
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static char *xen_io_tlb_start, *xen_io_tlb_end;
-static unsigned long xen_io_tlb_nslabs;
/*
* Quick lookup value of the bus address of the IOTLB.
*/
-static u64 start_dma_addr;
-
-/*
- * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t
- * can be 32bit when dma_addr_t is 64bit leading to a loss in
- * information if the shift is done before casting to 64bit.
- */
-static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
+static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
{
unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
- dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT;
+ phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
- dma |= paddr & ~XEN_PAGE_MASK;
+ baddr |= paddr & ~XEN_PAGE_MASK;
+ return baddr;
+}
- return dma;
+static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
}
-static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
+static inline phys_addr_t xen_bus_to_phys(struct device *dev,
+ phys_addr_t baddr)
{
unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
- dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT;
- phys_addr_t paddr = dma;
-
- paddr |= baddr & ~XEN_PAGE_MASK;
+ phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
+ (baddr & ~XEN_PAGE_MASK);
return paddr;
}
-static inline dma_addr_t xen_virt_to_bus(void *address)
+static inline phys_addr_t xen_dma_to_phys(struct device *dev,
+ dma_addr_t dma_addr)
{
- return xen_phys_to_bus(virt_to_phys(address));
+ return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
}
static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
@@ -99,60 +89,46 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
return 0;
}
-static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
+static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
{
- unsigned long bfn = XEN_PFN_DOWN(dma_addr);
+ unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
unsigned long xen_pfn = bfn_to_local_pfn(bfn);
- phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn);
+ phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
/* If the address is outside our domain, it CAN
* have the same virtual address as another address
* in our domain. Therefore _only_ check address within our domain.
*/
- if (pfn_valid(PFN_DOWN(paddr))) {
- return paddr >= virt_to_phys(xen_io_tlb_start) &&
- paddr < virt_to_phys(xen_io_tlb_end);
- }
+ if (pfn_valid(PFN_DOWN(paddr)))
+ return is_swiotlb_buffer(dev, paddr);
return 0;
}
-static int
-xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
+static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
{
- int i, rc;
- int dma_bits;
+ int rc;
+ unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
+ unsigned int i, dma_bits = order + PAGE_SHIFT;
dma_addr_t dma_handle;
phys_addr_t p = virt_to_phys(buf);
- dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+ BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
+ BUG_ON(nslabs % IO_TLB_SEGSIZE);
i = 0;
do {
- int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
-
do {
rc = xen_create_contiguous_region(
- p + (i << IO_TLB_SHIFT),
- get_order(slabs << IO_TLB_SHIFT),
+ p + (i << IO_TLB_SHIFT), order,
dma_bits, &dma_handle);
} while (rc && dma_bits++ < MAX_DMA_BITS);
if (rc)
return rc;
- i += slabs;
+ i += IO_TLB_SEGSIZE;
} while (i < nslabs);
return 0;
}
-static unsigned long xen_set_nslabs(unsigned long nr_tbl)
-{
- if (!nr_tbl) {
- xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
- xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
- } else
- xen_io_tlb_nslabs = nr_tbl;
-
- return xen_io_tlb_nslabs << IO_TLB_SHIFT;
-}
enum xen_swiotlb_err {
XEN_SWIOTLB_UNKNOWN = 0,
@@ -175,103 +151,113 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
}
return "";
}
-int __ref xen_swiotlb_init(int verbose, bool early)
+
+int xen_swiotlb_init(void)
{
- unsigned long bytes, order;
- int rc = -ENOMEM;
enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
- unsigned int repeat = 3;
-
- xen_io_tlb_nslabs = swiotlb_nr_tbl();
-retry:
- bytes = xen_set_nslabs(xen_io_tlb_nslabs);
- order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
+ unsigned long bytes = swiotlb_size_or_default();
+ unsigned long nslabs = bytes >> IO_TLB_SHIFT;
+ unsigned int order, repeat = 3;
+ int rc = -ENOMEM;
+ char *start;
- /*
- * IO TLB memory already allocated. Just use it.
- */
- if (io_tlb_start != 0) {
- xen_io_tlb_start = phys_to_virt(io_tlb_start);
- goto end;
+ if (io_tlb_default_mem.nslabs) {
+ pr_warn("swiotlb buffer already initialized\n");
+ return -EEXIST;
}
+retry:
+ m_ret = XEN_SWIOTLB_ENOMEM;
+ order = get_order(bytes);
+
/*
* Get IO TLB memory from any location.
*/
- if (early) {
- xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes),
- PAGE_SIZE);
- if (!xen_io_tlb_start)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
- } else {
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
- while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
- if (xen_io_tlb_start)
- break;
- order--;
- }
- if (order != get_order(bytes)) {
- pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
- (PAGE_SIZE << order) >> 20);
- xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
- bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
- }
+ while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+ start = (void *)xen_get_swiotlb_free_pages(order);
+ if (start)
+ break;
+ order--;
}
- if (!xen_io_tlb_start) {
- m_ret = XEN_SWIOTLB_ENOMEM;
- goto error;
+ if (!start)
+ goto exit;
+ if (order != get_order(bytes)) {
+ pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
+ (PAGE_SIZE << order) >> 20);
+ nslabs = SLABS_PER_PAGE << order;
+ bytes = nslabs << IO_TLB_SHIFT;
}
+
/*
* And replace that memory with pages under 4GB.
*/
- rc = xen_swiotlb_fixup(xen_io_tlb_start,
- bytes,
- xen_io_tlb_nslabs);
+ rc = xen_swiotlb_fixup(start, nslabs);
if (rc) {
- if (early)
- memblock_free(__pa(xen_io_tlb_start),
- PAGE_ALIGN(bytes));
- else {
- free_pages((unsigned long)xen_io_tlb_start, order);
- xen_io_tlb_start = NULL;
- }
+ free_pages((unsigned long)start, order);
m_ret = XEN_SWIOTLB_EFIXUP;
goto error;
}
- start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
- if (early) {
- if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
- verbose))
- panic("Cannot allocate SWIOTLB buffer");
- rc = 0;
- } else
- rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
-
-end:
- xen_io_tlb_end = xen_io_tlb_start + bytes;
- if (!rc)
- swiotlb_set_max_segment(PAGE_SIZE);
-
- return rc;
+ rc = swiotlb_late_init_with_tbl(start, nslabs);
+ if (rc)
+ return rc;
+ swiotlb_set_max_segment(PAGE_SIZE);
+ return 0;
error:
- if (repeat--) {
- xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
- (xen_io_tlb_nslabs >> 1));
- pr_info("Lowering to %luMB\n",
- (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ if (nslabs > 1024 && repeat--) {
+ /* Min is 2MB */
+ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+ bytes = nslabs << IO_TLB_SHIFT;
+ pr_info("Lowering to %luMB\n", bytes >> 20);
goto retry;
}
+exit:
pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
- if (early)
- panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
- else
- free_pages((unsigned long)xen_io_tlb_start, order);
return rc;
}
+#ifdef CONFIG_X86
+void __init xen_swiotlb_init_early(void)
+{
+ unsigned long bytes = swiotlb_size_or_default();
+ unsigned long nslabs = bytes >> IO_TLB_SHIFT;
+ unsigned int repeat = 3;
+ char *start;
+ int rc;
+
+retry:
+ /*
+ * Get IO TLB memory from any location.
+ */
+ start = memblock_alloc(PAGE_ALIGN(bytes),
+ IO_TLB_SEGSIZE << IO_TLB_SHIFT);
+ if (!start)
+ panic("%s: Failed to allocate %lu bytes\n",
+ __func__, PAGE_ALIGN(bytes));
+
+ /*
+ * And replace that memory with pages under 4GB.
+ */
+ rc = xen_swiotlb_fixup(start, nslabs);
+ if (rc) {
+ memblock_free(start, PAGE_ALIGN(bytes));
+ if (nslabs > 1024 && repeat--) {
+ /* Min is 2MB */
+ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+ bytes = nslabs << IO_TLB_SHIFT;
+ pr_info("Lowering to %luMB\n", bytes >> 20);
+ goto retry;
+ }
+ panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
+ }
+
+ if (swiotlb_init_with_tbl(start, nslabs, true))
+ panic("Cannot allocate SWIOTLB buffer");
+ swiotlb_set_max_segment(PAGE_SIZE);
+}
+#endif /* CONFIG_X86 */
+
static void *
xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
@@ -307,12 +293,12 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
- /* At this point dma_handle is the physical address, next we are
+ /* At this point dma_handle is the dma address, next we are
* going to set it to the machine address.
* Do not use virt_to_phys(ret) because on ARM it doesn't correspond
* to *dma_handle. */
- phys = *dma_handle;
- dev_addr = xen_phys_to_bus(phys);
+ phys = dma_to_phys(hwdev, *dma_handle);
+ dev_addr = xen_phys_to_dma(hwdev, phys);
if (((dev_addr + size - 1 <= dma_mask)) &&
!range_straddles_page_boundary(phys, size))
*dma_handle = dev_addr;
@@ -322,6 +308,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
return NULL;
}
+ *dma_handle = phys_to_dma(hwdev, *dma_handle);
SetPageXenRemapped(virt_to_page(ret));
}
memset(ret, 0, size);
@@ -335,23 +322,30 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
int order = get_order(size);
phys_addr_t phys;
u64 dma_mask = DMA_BIT_MASK(32);
+ struct page *page;
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
/* do not use virt_to_phys because on ARM it doesn't return you the
* physical address */
- phys = xen_bus_to_phys(dev_addr);
+ phys = xen_dma_to_phys(hwdev, dev_addr);
/* Convert the size to actually allocated. */
size = 1UL << (order + XEN_PAGE_SHIFT);
+ if (is_vmalloc_addr(vaddr))
+ page = vmalloc_to_page(vaddr);
+ else
+ page = virt_to_page(vaddr);
+
if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
range_straddles_page_boundary(phys, size)) &&
- TestClearPageXenRemapped(virt_to_page(vaddr)))
+ TestClearPageXenRemapped(page))
xen_destroy_contiguous_region(phys, order);
- xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
+ xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
+ attrs);
}
/*
@@ -367,7 +361,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
phys_addr_t map, phys = page_to_phys(page) + offset;
- dma_addr_t dev_addr = xen_phys_to_bus(phys);
+ dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
BUG_ON(dir == DMA_NONE);
/*
@@ -378,7 +372,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
if (dma_capable(dev, dev_addr, size, true) &&
!range_straddles_page_boundary(phys, size) &&
!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
- swiotlb_force != SWIOTLB_FORCE)
+ !is_swiotlb_force_bounce(dev))
goto done;
/*
@@ -386,26 +380,29 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
*/
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
- map = swiotlb_tbl_map_single(dev, start_dma_addr, phys,
- size, size, dir, attrs);
+ map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
phys = map;
- dev_addr = xen_phys_to_bus(map);
+ dev_addr = xen_phys_to_dma(dev, map);
/*
* Ensure that the address returned is DMA'ble
*/
if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, map, size, size, dir,
+ swiotlb_tbl_unmap_single(dev, map, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
done:
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xen_dma_sync_for_device(dev_addr, phys, size, dir);
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+ arch_sync_dma_for_device(phys, size, dir);
+ else
+ xen_dma_sync_for_device(dev, dev_addr, size, dir);
+ }
return dev_addr;
}
@@ -420,42 +417,54 @@ done:
static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- phys_addr_t paddr = xen_bus_to_phys(dev_addr);
+ phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
- if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xen_dma_sync_for_cpu(dev_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ else
+ xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
+ }
/* NOTE: We use dev_addr here, not paddr! */
- if (is_xen_swiotlb_buffer(dev_addr))
- swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
+ if (is_xen_swiotlb_buffer(hwdev, dev_addr))
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
}
static void
xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
- phys_addr_t paddr = xen_bus_to_phys(dma_addr);
+ phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
- if (!dev_is_dma_coherent(dev))
- xen_dma_sync_for_cpu(dma_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(dev)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ else
+ xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
+ }
- if (is_xen_swiotlb_buffer(dma_addr))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
+ if (is_xen_swiotlb_buffer(dev, dma_addr))
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
}
static void
xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir)
{
- phys_addr_t paddr = xen_bus_to_phys(dma_addr);
+ phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
- if (is_xen_swiotlb_buffer(dma_addr))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
+ if (is_xen_swiotlb_buffer(dev, dma_addr))
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
- if (!dev_is_dma_coherent(dev))
- xen_dma_sync_for_device(dma_addr, paddr, size, dir);
+ if (!dev_is_dma_coherent(dev)) {
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ arch_sync_dma_for_device(paddr, size, dir);
+ else
+ xen_dma_sync_for_device(dev, dma_addr, size, dir);
+ }
}
/*
@@ -498,7 +507,7 @@ xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
out_unmap:
xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
sg_dma_len(sgl) = 0;
- return 0;
+ return -EIO;
}
static void
@@ -536,7 +545,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
static int
xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
+ return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
}
const struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -553,4 +562,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.dma_supported = xen_swiotlb_dma_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 0968859c29d0..152dd33bb223 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -7,6 +7,7 @@
#include <linux/math64.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/static_call.h>
#include <asm/paravirt.h>
#include <asm/xen/hypervisor.h>
@@ -64,7 +65,7 @@ static void xen_get_runstate_snapshot_cpu_delta(
do {
state_time = get64(&state->state_entry_time);
rmb(); /* Hypervisor might update data. */
- *res = READ_ONCE(*state);
+ *res = __READ_ONCE(*state);
rmb(); /* Hypervisor might update data. */
} while (get64(&state->state_entry_time) != state_time ||
(state_time & XEN_RUNSTATE_UPDATE));
@@ -175,7 +176,7 @@ void __init xen_time_setup_guest(void)
xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_runstate_update_flag);
- pv_ops.time.steal_clock = xen_steal_clock;
+ static_call_update(pv_steal_clock, xen_steal_clock);
static_key_slow_inc(&paravirt_steal_enabled);
if (xen_runstate_remote)
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
new file mode 100644
index 000000000000..87e6b7db892f
--- /dev/null
+++ b/drivers/xen/unpopulated-alloc.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memremap.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
+
+#include <xen/page.h>
+#include <xen/xen.h>
+
+static DEFINE_MUTEX(list_lock);
+static struct page *page_list;
+static unsigned int list_count;
+
+static int fill_list(unsigned int nr_pages)
+{
+ struct dev_pagemap *pgmap;
+ struct resource *res;
+ void *vaddr;
+ unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
+ int ret = -ENOMEM;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->name = "Xen scratch";
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ ret = allocate_resource(&iomem_resource, res,
+ alloc_pages * PAGE_SIZE, 0, -1,
+ PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+ if (ret < 0) {
+ pr_err("Cannot allocate new IOMEM resource\n");
+ goto err_resource;
+ }
+
+ pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
+ if (!pgmap) {
+ ret = -ENOMEM;
+ goto err_pgmap;
+ }
+
+ pgmap->type = MEMORY_DEVICE_GENERIC;
+ pgmap->range = (struct range) {
+ .start = res->start,
+ .end = res->end,
+ };
+ pgmap->nr_range = 1;
+ pgmap->owner = res;
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+ /*
+ * memremap will build page tables for the new memory so
+ * the p2m must contain invalid entries so the correct
+ * non-present PTEs will be written.
+ *
+ * If a failure occurs, the original (identity) p2m entries
+ * are not restored since this region is now known not to
+ * conflict with any devices.
+ */
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_pfn_t pfn = PFN_DOWN(res->start);
+
+ for (i = 0; i < alloc_pages; i++) {
+ if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
+ pr_warn("set_phys_to_machine() failed, no memory added\n");
+ ret = -ENOMEM;
+ goto err_memremap;
+ }
+ }
+ }
+#endif
+
+ vaddr = memremap_pages(pgmap, NUMA_NO_NODE);
+ if (IS_ERR(vaddr)) {
+ pr_err("Cannot remap memory range\n");
+ ret = PTR_ERR(vaddr);
+ goto err_memremap;
+ }
+
+ for (i = 0; i < alloc_pages; i++) {
+ struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
+
+ BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i));
+ pg->zone_device_data = page_list;
+ page_list = pg;
+ list_count++;
+ }
+
+ return 0;
+
+err_memremap:
+ kfree(pgmap);
+err_pgmap:
+ release_resource(res);
+err_resource:
+ kfree(res);
+ return ret;
+}
+
+/**
+ * xen_alloc_unpopulated_pages - alloc unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages returned
+ * @return 0 on success, error otherwise
+ */
+int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ int ret = 0;
+
+ mutex_lock(&list_lock);
+ if (list_count < nr_pages) {
+ ret = fill_list(nr_pages - list_count);
+ if (ret)
+ goto out;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *pg = page_list;
+
+ BUG_ON(!pg);
+ page_list = pg->zone_device_data;
+ list_count--;
+ pages[i] = pg;
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = xen_alloc_p2m_entry(page_to_pfn(pg));
+ if (ret < 0) {
+ unsigned int j;
+
+ for (j = 0; j <= i; j++) {
+ pages[j]->zone_device_data = page_list;
+ page_list = pages[j];
+ list_count++;
+ }
+ goto out;
+ }
+ }
+#endif
+ }
+
+out:
+ mutex_unlock(&list_lock);
+ return ret;
+}
+EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
+
+/**
+ * xen_free_unpopulated_pages - return unpopulated pages
+ * @nr_pages: Number of pages
+ * @pages: pages to return
+ */
+void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+
+ mutex_lock(&list_lock);
+ for (i = 0; i < nr_pages; i++) {
+ pages[i]->zone_device_data = page_list;
+ page_list = pages[i];
+ list_count++;
+ }
+ mutex_unlock(&list_lock);
+}
+EXPORT_SYMBOL(xen_free_unpopulated_pages);
+
+#ifdef CONFIG_XEN_PV
+static int __init init(void)
+{
+ unsigned int i;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (!xen_pv_domain())
+ return 0;
+
+ /*
+ * Initialize with pages from the extra memory regions (see
+ * arch/x86/xen/setup.c).
+ */
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ unsigned int j;
+
+ for (j = 0; j < xen_extra_mem[i].n_pfns; j++) {
+ struct page *pg =
+ pfn_to_page(xen_extra_mem[i].start_pfn + j);
+
+ pg->zone_device_data = page_list;
+ page_list = pg;
+ list_count++;
+ }
+ }
+
+ return 0;
+}
+subsys_initcall(init);
+#endif
diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c
deleted file mode 100644
index 00ab1ece02e5..000000000000
--- a/drivers/xen/xen-acpi-cpuhotplug.c
+++ /dev/null
@@ -1,446 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 Intel Corporation
- * Author: Liu Jinsong <jinsong.liu@intel.com>
- * Author: Jiang Yunhong <yunhong.jiang@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/cpu.h>
-#include <linux/acpi.h>
-#include <linux/uaccess.h>
-#include <acpi/processor.h>
-#include <xen/acpi.h>
-#include <xen/interface/platform.h>
-#include <asm/xen/hypercall.h>
-
-#define PREFIX "ACPI:xen_cpu_hotplug:"
-
-#define INSTALL_NOTIFY_HANDLER 0
-#define UNINSTALL_NOTIFY_HANDLER 1
-
-static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr);
-
-/* --------------------------------------------------------------------------
- Driver Interface
--------------------------------------------------------------------------- */
-
-static int xen_acpi_processor_enable(struct acpi_device *device)
-{
- acpi_status status = 0;
- unsigned long long value;
- union acpi_object object = { 0 };
- struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
- struct acpi_processor *pr = acpi_driver_data(device);
-
- if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) {
- /* Declared with "Processor" statement; match ProcessorID */
- status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
- if (ACPI_FAILURE(status)) {
- pr_err(PREFIX "Evaluating processor object\n");
- return -ENODEV;
- }
-
- pr->acpi_id = object.processor.proc_id;
- } else {
- /* Declared with "Device" statement; match _UID */
- status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID,
- NULL, &value);
- if (ACPI_FAILURE(status)) {
- pr_err(PREFIX "Evaluating processor _UID\n");
- return -ENODEV;
- }
-
- pr->acpi_id = value;
- }
-
- pr->id = xen_pcpu_id(pr->acpi_id);
-
- if (invalid_logical_cpuid(pr->id))
- /* This cpu is not presented at hypervisor, try to hotadd it */
- if (ACPI_FAILURE(xen_acpi_cpu_hotadd(pr))) {
- pr_err(PREFIX "Hotadd CPU (acpi_id = %d) failed.\n",
- pr->acpi_id);
- return -ENODEV;
- }
-
- return 0;
-}
-
-static int xen_acpi_processor_add(struct acpi_device *device)
-{
- int ret;
- struct acpi_processor *pr;
-
- if (!device)
- return -EINVAL;
-
- pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
- if (!pr)
- return -ENOMEM;
-
- pr->handle = device->handle;
- strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
- strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
- device->driver_data = pr;
-
- ret = xen_acpi_processor_enable(device);
- if (ret)
- pr_err(PREFIX "Error when enabling Xen processor\n");
-
- return ret;
-}
-
-static int xen_acpi_processor_remove(struct acpi_device *device)
-{
- struct acpi_processor *pr;
-
- if (!device)
- return -EINVAL;
-
- pr = acpi_driver_data(device);
- if (!pr)
- return -EINVAL;
-
- kfree(pr);
- return 0;
-}
-
-/*--------------------------------------------------------------
- Acpi processor hotplug support
---------------------------------------------------------------*/
-
-static int is_processor_present(acpi_handle handle)
-{
- acpi_status status;
- unsigned long long sta = 0;
-
-
- status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
-
- if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT))
- return 1;
-
- /*
- * _STA is mandatory for a processor that supports hot plug
- */
- if (status == AE_NOT_FOUND)
- pr_info(PREFIX "Processor does not support hot plug\n");
- else
- pr_info(PREFIX "Processor Device is not present");
- return 0;
-}
-
-static int xen_apic_id(acpi_handle handle)
-{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- struct acpi_madt_local_apic *lapic;
- int apic_id;
-
- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
- return -EINVAL;
-
- if (!buffer.length || !buffer.pointer)
- return -EINVAL;
-
- obj = buffer.pointer;
- if (obj->type != ACPI_TYPE_BUFFER ||
- obj->buffer.length < sizeof(*lapic)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
-
- if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
- !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- apic_id = (uint32_t)lapic->id;
- kfree(buffer.pointer);
- buffer.length = ACPI_ALLOCATE_BUFFER;
- buffer.pointer = NULL;
-
- return apic_id;
-}
-
-static int xen_hotadd_cpu(struct acpi_processor *pr)
-{
- int cpu_id, apic_id, pxm;
- struct xen_platform_op op;
-
- apic_id = xen_apic_id(pr->handle);
- if (apic_id < 0) {
- pr_err(PREFIX "Failed to get apic_id for acpi_id %d\n",
- pr->acpi_id);
- return -ENODEV;
- }
-
- pxm = xen_acpi_get_pxm(pr->handle);
- if (pxm < 0) {
- pr_err(PREFIX "Failed to get _PXM for acpi_id %d\n",
- pr->acpi_id);
- return pxm;
- }
-
- op.cmd = XENPF_cpu_hotadd;
- op.u.cpu_add.apic_id = apic_id;
- op.u.cpu_add.acpi_id = pr->acpi_id;
- op.u.cpu_add.pxm = pxm;
-
- cpu_id = HYPERVISOR_platform_op(&op);
- if (cpu_id < 0)
- pr_err(PREFIX "Failed to hotadd CPU for acpi_id %d\n",
- pr->acpi_id);
-
- return cpu_id;
-}
-
-static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr)
-{
- if (!is_processor_present(pr->handle))
- return AE_ERROR;
-
- pr->id = xen_hotadd_cpu(pr);
- if (invalid_logical_cpuid(pr->id))
- return AE_ERROR;
-
- /*
- * Sync with Xen hypervisor, providing new /sys/.../xen_cpuX
- * interface after cpu hotadded.
- */
- xen_pcpu_hotplug_sync();
-
- return AE_OK;
-}
-
-static int acpi_processor_device_remove(struct acpi_device *device)
-{
- pr_debug(PREFIX "Xen does not support CPU hotremove\n");
-
- return -ENOSYS;
-}
-
-static void acpi_processor_hotplug_notify(acpi_handle handle,
- u32 event, void *data)
-{
- struct acpi_processor *pr;
- struct acpi_device *device = NULL;
- u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
- int result;
-
- acpi_scan_lock_acquire();
-
- switch (event) {
- case ACPI_NOTIFY_BUS_CHECK:
- case ACPI_NOTIFY_DEVICE_CHECK:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "Processor driver received %s event\n",
- (event == ACPI_NOTIFY_BUS_CHECK) ?
- "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK"));
-
- if (!is_processor_present(handle))
- break;
-
- acpi_bus_get_device(handle, &device);
- if (acpi_device_enumerated(device))
- break;
-
- result = acpi_bus_scan(handle);
- if (result) {
- pr_err(PREFIX "Unable to add the device\n");
- break;
- }
- device = NULL;
- acpi_bus_get_device(handle, &device);
- if (!acpi_device_enumerated(device)) {
- pr_err(PREFIX "Missing device object\n");
- break;
- }
- ost_code = ACPI_OST_SC_SUCCESS;
- break;
-
- case ACPI_NOTIFY_EJECT_REQUEST:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "received ACPI_NOTIFY_EJECT_REQUEST\n"));
-
- if (acpi_bus_get_device(handle, &device)) {
- pr_err(PREFIX "Device don't exist, dropping EJECT\n");
- break;
- }
- pr = acpi_driver_data(device);
- if (!pr) {
- pr_err(PREFIX "Driver data is NULL, dropping EJECT\n");
- break;
- }
-
- /*
- * TBD: implement acpi_processor_device_remove if Xen support
- * CPU hotremove in the future.
- */
- acpi_processor_device_remove(device);
- break;
-
- default:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "Unsupported event [0x%x]\n", event));
-
- /* non-hotplug event; possibly handled by other handler */
- goto out;
- }
-
- (void) acpi_evaluate_ost(handle, event, ost_code, NULL);
-
-out:
- acpi_scan_lock_release();
-}
-
-static acpi_status is_processor_device(acpi_handle handle)
-{
- struct acpi_device_info *info;
- char *hid;
- acpi_status status;
-
- status = acpi_get_object_info(handle, &info);
- if (ACPI_FAILURE(status))
- return status;
-
- if (info->type == ACPI_TYPE_PROCESSOR) {
- kfree(info);
- return AE_OK; /* found a processor object */
- }
-
- if (!(info->valid & ACPI_VALID_HID)) {
- kfree(info);
- return AE_ERROR;
- }
-
- hid = info->hardware_id.string;
- if ((hid == NULL) || strcmp(hid, ACPI_PROCESSOR_DEVICE_HID)) {
- kfree(info);
- return AE_ERROR;
- }
-
- kfree(info);
- return AE_OK; /* found a processor device object */
-}
-
-static acpi_status
-processor_walk_namespace_cb(acpi_handle handle,
- u32 lvl, void *context, void **rv)
-{
- acpi_status status;
- int *action = context;
-
- status = is_processor_device(handle);
- if (ACPI_FAILURE(status))
- return AE_OK; /* not a processor; continue to walk */
-
- switch (*action) {
- case INSTALL_NOTIFY_HANDLER:
- acpi_install_notify_handler(handle,
- ACPI_SYSTEM_NOTIFY,
- acpi_processor_hotplug_notify,
- NULL);
- break;
- case UNINSTALL_NOTIFY_HANDLER:
- acpi_remove_notify_handler(handle,
- ACPI_SYSTEM_NOTIFY,
- acpi_processor_hotplug_notify);
- break;
- default:
- break;
- }
-
- /* found a processor; skip walking underneath */
- return AE_CTRL_DEPTH;
-}
-
-static
-void acpi_processor_install_hotplug_notify(void)
-{
- int action = INSTALL_NOTIFY_HANDLER;
- acpi_walk_namespace(ACPI_TYPE_ANY,
- ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX,
- processor_walk_namespace_cb, NULL, &action, NULL);
-}
-
-static
-void acpi_processor_uninstall_hotplug_notify(void)
-{
- int action = UNINSTALL_NOTIFY_HANDLER;
- acpi_walk_namespace(ACPI_TYPE_ANY,
- ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX,
- processor_walk_namespace_cb, NULL, &action, NULL);
-}
-
-static const struct acpi_device_id processor_device_ids[] = {
- {ACPI_PROCESSOR_OBJECT_HID, 0},
- {ACPI_PROCESSOR_DEVICE_HID, 0},
- {"", 0},
-};
-MODULE_DEVICE_TABLE(acpi, processor_device_ids);
-
-static struct acpi_driver xen_acpi_processor_driver = {
- .name = "processor",
- .class = ACPI_PROCESSOR_CLASS,
- .ids = processor_device_ids,
- .ops = {
- .add = xen_acpi_processor_add,
- .remove = xen_acpi_processor_remove,
- },
-};
-
-static int __init xen_acpi_processor_init(void)
-{
- int result = 0;
-
- if (!xen_initial_domain())
- return -ENODEV;
-
- /* unregister the stub which only used to reserve driver space */
- xen_stub_processor_exit();
-
- result = acpi_bus_register_driver(&xen_acpi_processor_driver);
- if (result < 0) {
- xen_stub_processor_init();
- return result;
- }
-
- acpi_processor_install_hotplug_notify();
- return 0;
-}
-
-static void __exit xen_acpi_processor_exit(void)
-{
- if (!xen_initial_domain())
- return;
-
- acpi_processor_uninstall_hotplug_notify();
-
- acpi_bus_unregister_driver(&xen_acpi_processor_driver);
-
- /*
- * stub reserve space again to prevent any chance of native
- * driver loading.
- */
- xen_stub_processor_init();
- return;
-}
-
-module_init(xen_acpi_processor_init);
-module_exit(xen_acpi_processor_exit);
-ACPI_MODULE_NAME("xen-acpi-cpuhotplug");
-MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
-MODULE_DESCRIPTION("Xen Hotplug CPU Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
deleted file mode 100644
index 7457213b8915..000000000000
--- a/drivers/xen/xen-acpi-memhotplug.c
+++ /dev/null
@@ -1,475 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 Intel Corporation
- * Author: Liu Jinsong <jinsong.liu@intel.com>
- * Author: Jiang Yunhong <yunhong.jiang@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/acpi.h>
-#include <xen/acpi.h>
-#include <xen/interface/platform.h>
-#include <asm/xen/hypercall.h>
-
-#define PREFIX "ACPI:xen_memory_hotplug:"
-
-struct acpi_memory_info {
- struct list_head list;
- u64 start_addr; /* Memory Range start physical addr */
- u64 length; /* Memory Range length */
- unsigned short caching; /* memory cache attribute */
- unsigned short write_protect; /* memory read/write attribute */
- /* copied from buffer getting from _CRS */
- unsigned int enabled:1;
-};
-
-struct acpi_memory_device {
- struct acpi_device *device;
- struct list_head res_list;
-};
-
-static bool acpi_hotmem_initialized __read_mostly;
-
-static int xen_hotadd_memory(int pxm, struct acpi_memory_info *info)
-{
- int rc;
- struct xen_platform_op op;
-
- op.cmd = XENPF_mem_hotadd;
- op.u.mem_add.spfn = info->start_addr >> PAGE_SHIFT;
- op.u.mem_add.epfn = (info->start_addr + info->length) >> PAGE_SHIFT;
- op.u.mem_add.pxm = pxm;
-
- rc = HYPERVISOR_dom0_op(&op);
- if (rc)
- pr_err(PREFIX "Xen Hotplug Memory Add failed on "
- "0x%lx -> 0x%lx, _PXM: %d, error: %d\n",
- (unsigned long)info->start_addr,
- (unsigned long)(info->start_addr + info->length),
- pxm, rc);
-
- return rc;
-}
-
-static int xen_acpi_memory_enable_device(struct acpi_memory_device *mem_device)
-{
- int pxm, result;
- int num_enabled = 0;
- struct acpi_memory_info *info;
-
- if (!mem_device)
- return -EINVAL;
-
- pxm = xen_acpi_get_pxm(mem_device->device->handle);
- if (pxm < 0)
- return pxm;
-
- list_for_each_entry(info, &mem_device->res_list, list) {
- if (info->enabled) { /* just sanity check...*/
- num_enabled++;
- continue;
- }
-
- if (!info->length)
- continue;
-
- result = xen_hotadd_memory(pxm, info);
- if (result)
- continue;
- info->enabled = 1;
- num_enabled++;
- }
-
- if (!num_enabled)
- return -ENODEV;
-
- return 0;
-}
-
-static acpi_status
-acpi_memory_get_resource(struct acpi_resource *resource, void *context)
-{
- struct acpi_memory_device *mem_device = context;
- struct acpi_resource_address64 address64;
- struct acpi_memory_info *info, *new;
- acpi_status status;
-
- status = acpi_resource_to_address64(resource, &address64);
- if (ACPI_FAILURE(status) ||
- (address64.resource_type != ACPI_MEMORY_RANGE))
- return AE_OK;
-
- list_for_each_entry(info, &mem_device->res_list, list) {
- if ((info->caching == address64.info.mem.caching) &&
- (info->write_protect == address64.info.mem.write_protect) &&
- (info->start_addr + info->length == address64.address.minimum)) {
- info->length += address64.address.address_length;
- return AE_OK;
- }
- }
-
- new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
- if (!new)
- return AE_ERROR;
-
- INIT_LIST_HEAD(&new->list);
- new->caching = address64.info.mem.caching;
- new->write_protect = address64.info.mem.write_protect;
- new->start_addr = address64.address.minimum;
- new->length = address64.address.address_length;
- list_add_tail(&new->list, &mem_device->res_list);
-
- return AE_OK;
-}
-
-static int
-acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
-{
- acpi_status status;
- struct acpi_memory_info *info, *n;
-
- if (!list_empty(&mem_device->res_list))
- return 0;
-
- status = acpi_walk_resources(mem_device->device->handle,
- METHOD_NAME__CRS, acpi_memory_get_resource, mem_device);
-
- if (ACPI_FAILURE(status)) {
- list_for_each_entry_safe(info, n, &mem_device->res_list, list)
- kfree(info);
- INIT_LIST_HEAD(&mem_device->res_list);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int acpi_memory_get_device(acpi_handle handle,
- struct acpi_memory_device **mem_device)
-{
- struct acpi_device *device = NULL;
- int result = 0;
-
- acpi_scan_lock_acquire();
-
- acpi_bus_get_device(handle, &device);
- if (acpi_device_enumerated(device))
- goto end;
-
- /*
- * Now add the notified device. This creates the acpi_device
- * and invokes .add function
- */
- result = acpi_bus_scan(handle);
- if (result) {
- pr_warn(PREFIX "ACPI namespace scan failed\n");
- result = -EINVAL;
- goto out;
- }
- device = NULL;
- acpi_bus_get_device(handle, &device);
- if (!acpi_device_enumerated(device)) {
- pr_warn(PREFIX "Missing device object\n");
- result = -EINVAL;
- goto out;
- }
-
-end:
- *mem_device = acpi_driver_data(device);
- if (!(*mem_device)) {
- pr_err(PREFIX "driver data not found\n");
- result = -ENODEV;
- goto out;
- }
-
-out:
- acpi_scan_lock_release();
- return result;
-}
-
-static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
-{
- unsigned long long current_status;
-
- /* Get device present/absent information from the _STA */
- if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle,
- "_STA", NULL, &current_status)))
- return -ENODEV;
- /*
- * Check for device status. Device should be
- * present/enabled/functioning.
- */
- if (!((current_status & ACPI_STA_DEVICE_PRESENT)
- && (current_status & ACPI_STA_DEVICE_ENABLED)
- && (current_status & ACPI_STA_DEVICE_FUNCTIONING)))
- return -ENODEV;
-
- return 0;
-}
-
-static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
-{
- pr_debug(PREFIX "Xen does not support memory hotremove\n");
-
- return -ENOSYS;
-}
-
-static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
-{
- struct acpi_memory_device *mem_device;
- struct acpi_device *device;
- u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
-
- switch (event) {
- case ACPI_NOTIFY_BUS_CHECK:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "\nReceived BUS CHECK notification for device\n"));
- /* Fall Through */
- case ACPI_NOTIFY_DEVICE_CHECK:
- if (event == ACPI_NOTIFY_DEVICE_CHECK)
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "\nReceived DEVICE CHECK notification for device\n"));
-
- if (acpi_memory_get_device(handle, &mem_device)) {
- pr_err(PREFIX "Cannot find driver data\n");
- break;
- }
-
- ost_code = ACPI_OST_SC_SUCCESS;
- break;
-
- case ACPI_NOTIFY_EJECT_REQUEST:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "\nReceived EJECT REQUEST notification for device\n"));
-
- acpi_scan_lock_acquire();
- if (acpi_bus_get_device(handle, &device)) {
- acpi_scan_lock_release();
- pr_err(PREFIX "Device doesn't exist\n");
- break;
- }
- mem_device = acpi_driver_data(device);
- if (!mem_device) {
- acpi_scan_lock_release();
- pr_err(PREFIX "Driver Data is NULL\n");
- break;
- }
-
- /*
- * TBD: implement acpi_memory_disable_device and invoke
- * acpi_bus_remove if Xen support hotremove in the future
- */
- acpi_memory_disable_device(mem_device);
- acpi_scan_lock_release();
- break;
-
- default:
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "Unsupported event [0x%x]\n", event));
- /* non-hotplug event; possibly handled by other handler */
- return;
- }
-
- (void) acpi_evaluate_ost(handle, event, ost_code, NULL);
- return;
-}
-
-static int xen_acpi_memory_device_add(struct acpi_device *device)
-{
- int result;
- struct acpi_memory_device *mem_device = NULL;
-
-
- if (!device)
- return -EINVAL;
-
- mem_device = kzalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
- if (!mem_device)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&mem_device->res_list);
- mem_device->device = device;
- sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
- sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
- device->driver_data = mem_device;
-
- /* Get the range from the _CRS */
- result = acpi_memory_get_device_resources(mem_device);
- if (result) {
- kfree(mem_device);
- return result;
- }
-
- /*
- * For booting existed memory devices, early boot code has recognized
- * memory area by EFI/E820. If DSDT shows these memory devices on boot,
- * hotplug is not necessary for them.
- * For hot-added memory devices during runtime, it need hypercall to
- * Xen hypervisor to add memory.
- */
- if (!acpi_hotmem_initialized)
- return 0;
-
- if (!acpi_memory_check_device(mem_device))
- result = xen_acpi_memory_enable_device(mem_device);
-
- return result;
-}
-
-static int xen_acpi_memory_device_remove(struct acpi_device *device)
-{
- struct acpi_memory_device *mem_device = NULL;
-
- if (!device || !acpi_driver_data(device))
- return -EINVAL;
-
- mem_device = acpi_driver_data(device);
- kfree(mem_device);
-
- return 0;
-}
-
-/*
- * Helper function to check for memory device
- */
-static acpi_status is_memory_device(acpi_handle handle)
-{
- char *hardware_id;
- acpi_status status;
- struct acpi_device_info *info;
-
- status = acpi_get_object_info(handle, &info);
- if (ACPI_FAILURE(status))
- return status;
-
- if (!(info->valid & ACPI_VALID_HID)) {
- kfree(info);
- return AE_ERROR;
- }
-
- hardware_id = info->hardware_id.string;
- if ((hardware_id == NULL) ||
- (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
- status = AE_ERROR;
-
- kfree(info);
- return status;
-}
-
-static acpi_status
-acpi_memory_register_notify_handler(acpi_handle handle,
- u32 level, void *ctxt, void **retv)
-{
- acpi_status status;
-
- status = is_memory_device(handle);
- if (ACPI_FAILURE(status))
- return AE_OK; /* continue */
-
- status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
- acpi_memory_device_notify, NULL);
- /* continue */
- return AE_OK;
-}
-
-static acpi_status
-acpi_memory_deregister_notify_handler(acpi_handle handle,
- u32 level, void *ctxt, void **retv)
-{
- acpi_status status;
-
- status = is_memory_device(handle);
- if (ACPI_FAILURE(status))
- return AE_OK; /* continue */
-
- status = acpi_remove_notify_handler(handle,
- ACPI_SYSTEM_NOTIFY,
- acpi_memory_device_notify);
-
- return AE_OK; /* continue */
-}
-
-static const struct acpi_device_id memory_device_ids[] = {
- {ACPI_MEMORY_DEVICE_HID, 0},
- {"", 0},
-};
-MODULE_DEVICE_TABLE(acpi, memory_device_ids);
-
-static struct acpi_driver xen_acpi_memory_device_driver = {
- .name = "acpi_memhotplug",
- .class = ACPI_MEMORY_DEVICE_CLASS,
- .ids = memory_device_ids,
- .ops = {
- .add = xen_acpi_memory_device_add,
- .remove = xen_acpi_memory_device_remove,
- },
-};
-
-static int __init xen_acpi_memory_device_init(void)
-{
- int result;
- acpi_status status;
-
- if (!xen_initial_domain())
- return -ENODEV;
-
- /* unregister the stub which only used to reserve driver space */
- xen_stub_memory_device_exit();
-
- result = acpi_bus_register_driver(&xen_acpi_memory_device_driver);
- if (result < 0) {
- xen_stub_memory_device_init();
- return -ENODEV;
- }
-
- status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX,
- acpi_memory_register_notify_handler,
- NULL, NULL, NULL);
-
- if (ACPI_FAILURE(status)) {
- pr_warn(PREFIX "walk_namespace failed\n");
- acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
- xen_stub_memory_device_init();
- return -ENODEV;
- }
-
- acpi_hotmem_initialized = true;
- return 0;
-}
-
-static void __exit xen_acpi_memory_device_exit(void)
-{
- acpi_status status;
-
- if (!xen_initial_domain())
- return;
-
- status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX,
- acpi_memory_deregister_notify_handler,
- NULL, NULL, NULL);
- if (ACPI_FAILURE(status))
- pr_warn(PREFIX "walk_namespace failed\n");
-
- acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
-
- /*
- * stub reserve space again to prevent any chance of native
- * driver loading.
- */
- xen_stub_memory_device_init();
- return;
-}
-
-module_init(xen_acpi_memory_device_init);
-module_exit(xen_acpi_memory_device_exit);
-ACPI_MODULE_NAME("xen-acpi-memhotplug");
-MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
-MODULE_DESCRIPTION("Xen Hotplug Mem Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index ce8ffb595a46..9cb61db67efd 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -3,7 +3,8 @@
* Copyright 2012 by Oracle Inc
* Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
*
- * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
+ * This code borrows ideas from
+ * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
* so many thanks go to Kevin Tian <kevin.tian@intel.com>
* and Yu Ke <ke.yu@intel.com>.
*/
@@ -449,7 +450,7 @@ static struct acpi_processor_performance __percpu *acpi_perf_data;
static void free_acpi_perf_data(void)
{
- unsigned int i;
+ int i;
/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
for_each_possible_cpu(i)
@@ -461,7 +462,7 @@ static void free_acpi_perf_data(void)
static int xen_upload_processor_pm_data(void)
{
struct acpi_processor *pr_backup = NULL;
- unsigned int i;
+ int i;
int rc = 0;
pr_info("Uploading Xen processor PM info\n");
@@ -517,7 +518,7 @@ static struct syscore_ops xap_syscore_ops = {
static int __init xen_acpi_processor_init(void)
{
- unsigned int i;
+ int i;
int rc;
if (!xen_initial_domain())
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a8d24433c8e9..8cd583db20b1 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -134,13 +134,13 @@ void xen_balloon_init(void)
EXPORT_SYMBOL_GPL(xen_balloon_init);
#define BALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct device *dev, \
+ static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR_RO(name)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
@@ -152,16 +152,15 @@ static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
static DEVICE_BOOL_ATTR(scrub_pages, 0644, xen_scrub_pages);
-static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr,
+static ssize_t target_kb_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
}
-static ssize_t store_target_kb(struct device *dev,
+static ssize_t target_kb_store(struct device *dev,
struct device_attribute *attr,
- const char *buf,
- size_t count)
+ const char *buf, size_t count)
{
char *endchar;
unsigned long long target_bytes;
@@ -176,22 +175,19 @@ static ssize_t store_target_kb(struct device *dev,
return count;
}
-static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR,
- show_target_kb, store_target_kb);
+static DEVICE_ATTR_RW(target_kb);
-
-static ssize_t show_target(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t target_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%llu\n",
(unsigned long long)balloon_stats.target_pages
<< PAGE_SHIFT);
}
-static ssize_t store_target(struct device *dev,
+static ssize_t target_store(struct device *dev,
struct device_attribute *attr,
- const char *buf,
- size_t count)
+ const char *buf, size_t count)
{
char *endchar;
unsigned long long target_bytes;
@@ -206,9 +202,7 @@ static ssize_t store_target(struct device *dev,
return count;
}
-static DEVICE_ATTR(target, S_IRUGO | S_IWUSR,
- show_target, store_target);
-
+static DEVICE_ATTR_RW(target);
static struct attribute *balloon_attrs[] = {
&dev_attr_target_kb.attr,
diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c
index 48a658dc7ccf..81b6e13fa5ec 100644
--- a/drivers/xen/xen-front-pgdir-shbuf.c
+++ b/drivers/xen/xen-front-pgdir-shbuf.c
@@ -305,11 +305,18 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
/* Save handles even if error, so we can unmap. */
for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
- buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
- if (unlikely(map_ops[cur_page].status != GNTST_okay))
+ if (likely(map_ops[cur_page].status == GNTST_okay)) {
+ buf->backend_map_handles[cur_page] =
+ map_ops[cur_page].handle;
+ } else {
+ buf->backend_map_handles[cur_page] =
+ INVALID_GRANT_HANDLE;
+ if (!ret)
+ ret = -ENXIO;
dev_err(&buf->xb_dev->dev,
"Failed to map page %d: %d\n",
cur_page, map_ops[cur_page].status);
+ }
}
if (ret) {
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile
index e8d981d43235..d63df09de81c 100644
--- a/drivers/xen/xen-pciback/Makefile
+++ b/drivers/xen/xen-pciback/Makefile
@@ -1,5 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
+
+# N.B. The below cannot be expressed with a single line using
+# CONFIG_XEN_PCI_STUB as it always remains in "y" state,
+# thus preventing the driver to be built as a module.
+# Please note, that CONFIG_XEN_PCIDEV_BACKEND and
+# CONFIG_XEN_PCIDEV_STUB are mutually exclusive.
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
+obj-$(CONFIG_XEN_PCIDEV_STUB) += xen-pciback.o
xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
xen-pciback-y += conf_space.o conf_space_header.o \
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index b20e43e148ce..059de92aea7d 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,6 +10,8 @@
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/moduleparam.h>
#include <linux/pci.h>
@@ -154,9 +156,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
* (as if device didn't respond) */
u32 value = 0, tmp_val;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
- pci_name(dev), size, offset);
+ dev_dbg(&dev->dev, "read %d bytes at 0x%x\n", size, offset);
if (!valid_request(offset, size)) {
err = XEN_PCI_ERR_invalid_offset;
@@ -195,9 +195,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
}
out:
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
- pci_name(dev), size, offset, value);
+ dev_dbg(&dev->dev, "read %d bytes at 0x%x = %x\n", size, offset, value);
*ret_val = value;
return xen_pcibios_err_to_errno(err);
@@ -212,10 +210,8 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
u32 tmp_val;
int field_start, field_end;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
- pci_name(dev), size, offset, value);
+ dev_dbg(&dev->dev, "write request %d bytes at 0x%x = %x\n",
+ size, offset, value);
if (!valid_request(offset, size))
return XEN_PCI_ERR_invalid_offset;
@@ -320,7 +316,7 @@ int xen_pcibk_get_interrupt_type(struct pci_dev *dev)
if (val & PCI_MSIX_FLAGS_ENABLE)
ret |= INTERRUPT_TYPE_MSIX;
}
- return ret;
+ return ret ?: INTERRUPT_TYPE_NONE;
}
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
index 28c45180a12e..5fe431c79f25 100644
--- a/drivers/xen/xen-pciback/conf_space.h
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -65,10 +65,10 @@ struct config_field_entry {
void *data;
};
-#define INTERRUPT_TYPE_NONE (1<<0)
-#define INTERRUPT_TYPE_INTX (1<<1)
-#define INTERRUPT_TYPE_MSI (1<<2)
-#define INTERRUPT_TYPE_MSIX (1<<3)
+#define INTERRUPT_TYPE_NONE (0)
+#define INTERRUPT_TYPE_INTX (1<<0)
+#define INTERRUPT_TYPE_MSI (1<<1)
+#define INTERRUPT_TYPE_MSIX (1<<2)
extern bool xen_pcibk_permissive;
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
index 22f13abbe913..5e53b4817f16 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -160,7 +160,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
}
out:
- return ERR_PTR(err);
+ return err ? ERR_PTR(err) : NULL;
}
static const struct config_field caplist_pm[] = {
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index fb4fccb4aecc..981435103af1 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -67,53 +68,39 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
dev_data = pci_get_drvdata(dev);
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable\n");
err = pci_enable_device(dev);
if (err)
return err;
if (dev_data)
dev_data->enable_intx = 1;
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "disable\n");
pci_disable_device(dev);
if (dev_data)
dev_data->enable_intx = 0;
}
if (!dev->is_busmaster && is_master_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "set bus master\n");
pci_set_master(dev);
} else if (dev->is_busmaster && !is_master_cmd(value)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: clear bus master\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "clear bus master\n");
pci_clear_master(dev);
}
if (!(cmd->val & PCI_COMMAND_INVALIDATE) &&
(value & PCI_COMMAND_INVALIDATE)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: enable memory-write-invalidate\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable memory-write-invalidate\n");
err = pci_set_mwi(dev);
if (err) {
- pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
- pci_name(dev), err);
+ dev_warn(&dev->dev, "cannot enable memory-write-invalidate (%d)\n",
+ err);
value &= ~PCI_COMMAND_INVALIDATE;
}
} else if ((cmd->val & PCI_COMMAND_INVALIDATE) &&
!(value & PCI_COMMAND_INVALIDATE)) {
- if (unlikely(verbose_request))
- printk(KERN_DEBUG
- DRV_NAME ": %s: disable memory-write-invalidate\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "disable memory-write-invalidate\n");
pci_clear_mwi(dev);
}
@@ -157,8 +144,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -194,8 +180,7 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
u32 mask;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -228,8 +213,7 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- pr_warn(DRV_NAME ": driver data not found for %s\n",
- pci_name(dev));
+ dev_warn(&dev->dev, "driver data not found\n");
return XEN_PCI_ERR_op_failed;
}
@@ -252,8 +236,12 @@ static void *bar_init(struct pci_dev *dev, int offset)
else {
pos = (offset - PCI_BASE_ADDRESS_0) / 4;
if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) {
- bar->val = res[pos - 1].start >> 32;
- bar->len_val = -resource_size(&res[pos - 1]) >> 32;
+ /*
+ * Use ">> 16 >> 16" instead of direct ">> 32" shift
+ * to avoid warnings on 32-bit architectures.
+ */
+ bar->val = res[pos - 1].start >> 16 >> 16;
+ bar->len_val = -resource_size(&res[pos - 1]) >> 16 >> 16;
return bar;
}
}
@@ -433,8 +421,8 @@ int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
default:
err = -EINVAL;
- pr_err("%s: Unsupported header type %d!\n",
- pci_name(dev), dev->hdr_type);
+ dev_err(&dev->dev, "Unsupported header type %d!\n",
+ dev->hdr_type);
break;
}
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index ed593d1042a6..7dc281086302 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -6,6 +6,8 @@
* Author: Chris Bookholt <hap10@epoch.ncsc.mil>
*/
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
@@ -35,8 +37,8 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
if (match_one_device(&tmp_quirk->devid, dev) != NULL)
goto out;
tmp_quirk = NULL;
- printk(KERN_DEBUG DRV_NAME
- ": quirk didn't match any device known\n");
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "quirk didn't match any device known\n");
out:
return tmp_quirk;
}
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 7af93d65ed51..bba527620507 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -18,7 +19,8 @@
#include <linux/sched.h>
#include <linux/atomic.h>
#include <xen/events.h>
-#include <asm/xen/pci.h>
+#include <xen/pci.h>
+#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
#include <xen/interface/physdev.h>
#include "pciback.h"
@@ -626,11 +628,11 @@ static void pcistub_remove(struct pci_dev *dev)
if (found_psdev->pdev) {
int domid = xen_find_device_domain_owner(dev);
- pr_warn("****** removing device %s while still in-use by domain %d! ******\n",
+ dev_warn(&dev->dev, "****** removing device %s while still in-use by domain %d! ******\n",
pci_name(found_psdev->dev), domid);
- pr_warn("****** driver domain may still access this device's i/o resources!\n");
- pr_warn("****** shutdown driver domain before binding device\n");
- pr_warn("****** to other drivers or domains\n");
+ dev_warn(&dev->dev, "****** driver domain may still access this device's i/o resources!\n");
+ dev_warn(&dev->dev, "****** shutdown driver domain before binding device\n");
+ dev_warn(&dev->dev, "****** to other drivers or domains\n");
/* N.B. This ends up calling pcistub_put_pci_dev which ends up
* doing the FLR. */
@@ -711,14 +713,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
&aer_op->domain, &aer_op->bus, &aer_op->devfn);
if (!ret) {
- dev_err(&psdev->dev->dev,
- DRV_NAME ": failed to get pcifront device\n");
+ dev_err(&psdev->dev->dev, "failed to get pcifront device\n");
return PCI_ERS_RESULT_NONE;
}
wmb();
- dev_dbg(&psdev->dev->dev,
- DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n",
+ dev_dbg(&psdev->dev->dev, "aer_op %x dom %x bus %x devfn %x\n",
aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
/*local flag to mark there's aer request, xen_pcibk callback will use
* this flag to judge whether we need to check pci-front give aer
@@ -735,10 +735,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb();
notify_remote_via_irq(pdev->evtchn_irq);
+ /* Enable IRQ to signal "request done". */
+ xen_pcibk_lateeoi(pdev, 0);
+
ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ);
+ /* Enable IRQ for pcifront request if not already active. */
+ if (!test_bit(_PDEVF_op_active, &pdev->flags))
+ xen_pcibk_lateeoi(pdev, 0);
+
if (!ret) {
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) {
@@ -752,13 +759,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- if (test_bit(_XEN_PCIF_active,
- (unsigned long *)&sh_info->flags)) {
- dev_dbg(&psdev->dev->dev,
- "schedule pci_conf service in " DRV_NAME "\n");
- xen_pcibk_test_and_schedule_op(psdev->pdev);
- }
-
res = (pci_ers_result_t)aer_op->err;
return res;
}
@@ -786,13 +786,12 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -804,7 +803,7 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
"guest with no AER driver should have been killed\n");
goto end;
}
- result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
+ result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_slotreset, result);
if (result == PCI_ERS_RESULT_NONE ||
result == PCI_ERS_RESULT_DISCONNECT) {
@@ -844,13 +843,12 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -862,7 +860,7 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
"guest with no AER driver should have been killed\n");
goto end;
}
- result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
+ result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_mmio, result);
if (result == PCI_ERS_RESULT_NONE ||
result == PCI_ERS_RESULT_DISCONNECT) {
@@ -902,13 +900,12 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -956,13 +953,12 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
PCI_FUNC(dev->devfn));
if (!psdev || !psdev->pdev) {
- dev_err(&dev->dev,
- DRV_NAME " device is not found/assigned\n");
+ dev_err(&dev->dev, "device is not found/assigned\n");
goto end;
}
if (!psdev->pdev->sh_info) {
- dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
+ dev_err(&dev->dev, "device is not connected or owned"
" by HVM, kill it\n");
kill_domain_by_device(psdev);
goto end;
@@ -975,7 +971,7 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
kill_domain_by_device(psdev);
goto end;
}
- common_process(psdev, 1, XEN_PCI_OP_aer_resume,
+ common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_resume,
PCI_ERS_RESULT_RECOVERED);
end:
if (psdev)
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 7c95516a860f..9a64196e831d 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
+#include <xen/events.h>
#include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback"
@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
+#define _EOI_pending (2)
+#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device {
void *pci_dev_data;
@@ -68,6 +71,11 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev);
void pcistub_put_pci_dev(struct pci_dev *dev);
+static inline bool xen_pcibk_pv_support(void)
+{
+ return IS_ENABLED(CONFIG_XEN_PCIDEV_BACKEND);
+}
+
/* Ensure a device is turned off or reset */
void xen_pcibk_reset_device(struct pci_dev *pdev);
@@ -183,12 +191,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
+static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
+ unsigned int eoi_flag)
+{
+ if (test_and_clear_bit(_EOI_pending, &pdev->flags))
+ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
+}
+
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
-
-extern int verbose_request;
-
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 787966f44589..3fbc21466a93 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -6,6 +6,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/moduleparam.h>
#include <linux/wait.h>
@@ -14,9 +15,6 @@
#include <linux/sched.h>
#include "pciback.h"
-int verbose_request;
-module_param(verbose_request, int, 0644);
-
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
/* Ensure a device is has the fake IRQ handler "turned on/off" and is
@@ -147,9 +145,6 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
struct xen_pcibk_dev_data *dev_data;
int status;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
-
if (dev->msi_enabled)
status = -EALREADY;
else if (dev->msix_enabled)
@@ -158,9 +153,8 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
status = pci_enable_msi(dev);
if (status) {
- pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
- pci_name(dev), pdev->xdev->otherend_id,
- status);
+ dev_warn_ratelimited(&dev->dev, "error enabling MSI for guest %u: err %d\n",
+ pdev->xdev->otherend_id, status);
op->value = 0;
return XEN_PCI_ERR_op_failed;
}
@@ -169,9 +163,8 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
* the local domain's IRQ number. */
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
- op->value);
+
+ dev_dbg(&dev->dev, "MSI: %d\n", op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
@@ -184,10 +177,6 @@ static
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
- pci_name(dev));
-
if (dev->msi_enabled) {
struct xen_pcibk_dev_data *dev_data;
@@ -198,9 +187,9 @@ int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
dev_data->ack_intr = 1;
}
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
- op->value);
+
+ dev_dbg(&dev->dev, "MSI: %d\n", op->value);
+
return 0;
}
@@ -213,9 +202,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
struct msix_entry *entries;
u16 cmd;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
- pci_name(dev));
+ dev_dbg(&dev->dev, "enable MSI-X\n");
if (op->value > SH_INFO_MAX_VEC)
return -EINVAL;
@@ -248,17 +235,13 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
if (entries[i].vector) {
op->msix_entries[i].vector =
xen_pirq_from_irq(entries[i].vector);
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: " \
- "MSI-X[%d]: %d\n",
- pci_name(dev), i,
- op->msix_entries[i].vector);
+ dev_dbg(&dev->dev, "MSI-X[%d]: %d\n", i,
+ op->msix_entries[i].vector);
}
}
} else
- pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
- pci_name(dev), pdev->xdev->otherend_id,
- result);
+ dev_warn_ratelimited(&dev->dev, "error enabling MSI-X for guest %u: err %d!\n",
+ pdev->xdev->otherend_id, result);
kfree(entries);
op->value = result;
@@ -273,10 +256,6 @@ static
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
- pci_name(dev));
-
if (dev->msix_enabled) {
struct xen_pcibk_dev_data *dev_data;
@@ -291,32 +270,47 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
* an undefined IRQ value of zero.
*/
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
- if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
- pci_name(dev), op->value);
+
+ dev_dbg(&dev->dev, "MSI-X: %d\n", op->value);
+
return 0;
}
#endif
+
+static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
+{
+ return test_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags) &&
+ !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
+}
+
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
+ bool eoi = true;
+
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
- if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ if (xen_pcibk_test_op_pending(pdev)) {
schedule_work(&pdev->op_work);
+ eoi = false;
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
+ eoi = false;
}
+
+ /* EOI if there was nothing to do. */
+ if (eoi)
+ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
}
/* Performing the configuration space reads/writes must not be done in atomic
@@ -324,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
-void xen_pcibk_do_op(struct work_struct *data)
+static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{
- struct xen_pcibk_device *pdev =
- container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op;
@@ -400,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */
+}
- /* Check to see if the driver domain tried to start another request in
- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
- */
- xen_pcibk_test_and_schedule_op(pdev);
+void xen_pcibk_do_op(struct work_struct *data)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(data, struct xen_pcibk_device, op_work);
+
+ do {
+ xen_pcibk_do_one_op(pdev);
+ } while (xen_pcibk_test_op_pending(pdev));
+
+ xen_pcibk_lateeoi(pdev, 0);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
+ bool eoi;
+
+ /* IRQs might come in before pdev->evtchn_irq is written. */
+ if (unlikely(pdev->evtchn_irq != irq))
+ pdev->evtchn_irq = irq;
+
+ eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
+ WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev);
@@ -424,7 +431,7 @@ static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
dev_data->handled++;
if ((dev_data->handled % 1000) == 0) {
if (xen_test_irq_shared(irq)) {
- pr_info("%s IRQ line is not shared "
+ dev_info(&dev->dev, "%s IRQ line is not shared "
"with other domains. Turning ISR off\n",
dev_data->irq_name);
dev_data->ack_intr = 0;
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index f6ba18191c0f..cc7450f2b2a9 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -7,6 +7,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#include <linux/list.h>
#include <linux/slab.h>
@@ -69,7 +70,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev, int devid,
publish_pci_dev_cb publish_cb)
{
- int err = 0, slot, func = -1;
+ int err = 0, slot, func = PCI_FUNC(dev->devfn);
struct pci_dev_entry *t, *dev_entry;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
@@ -94,23 +95,25 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
/*
* Keep multi-function devices together on the virtual PCI bus, except
- * virtual functions.
+ * that we want to keep virtual functions at func 0 on their own. They
+ * aren't multi-function devices and hence their presence at func 0
+ * may cause guests to not scan the other functions.
*/
- if (!dev->is_virtfn) {
+ if (!dev->is_virtfn || func) {
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot]))
continue;
t = list_entry(list_first(&vpci_dev->dev_list[slot]),
struct pci_dev_entry, list);
+ if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn))
+ continue;
if (match_slot(dev, t->dev)) {
- pr_info("vpci: %s: assign to virtual slot %d func %d\n",
- pci_name(dev), slot,
- PCI_FUNC(dev->devfn));
+ dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n",
+ slot, func);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
- func = PCI_FUNC(dev->devfn);
goto unlock;
}
}
@@ -119,11 +122,10 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
/* Assign to a new slot on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot])) {
- pr_info("vpci: %s: assign to virtual slot %d\n",
- pci_name(dev), slot);
+ dev_info(&dev->dev, "vpci: assign to virtual slot %d\n",
+ slot);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
- func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
goto unlock;
}
}
@@ -233,7 +235,6 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
unsigned int *devfn)
{
struct pci_dev_entry *entry;
- struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
int found = 0, slot;
@@ -242,11 +243,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
list_for_each_entry(entry,
&vpci_dev->dev_list[slot],
list) {
- dev = entry->dev;
- if (dev && dev->bus->number == pcidev->bus->number
- && pci_domain_nr(dev->bus) ==
- pci_domain_nr(pcidev->bus)
- && dev->devfn == pcidev->devfn) {
+ if (entry->dev == pcidev) {
found = 1;
*domain = 0;
*bus = 0;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 833b2d2c4318..bde63ef677b8 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -14,7 +14,7 @@
#include <linux/workqueue.h>
#include <xen/xenbus.h>
#include <xen/events.h>
-#include <asm/xen/pci.h>
+#include <xen/pci.h>
#include "pciback.h"
#define INVALID_EVTCHN_IRQ (-1)
@@ -105,13 +105,13 @@ static void free_pdev(struct xen_pcibk_device *pdev)
}
static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
- int remote_evtchn)
+ evtchn_port_t remote_evtchn)
{
int err = 0;
void *vaddr;
dev_dbg(&pdev->xdev->dev,
- "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%u\n",
gnt_ref, remote_evtchn);
err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
@@ -123,8 +123,8 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr;
- err = bind_interdomain_evtchn_to_irqhandler(
- pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ pdev->xdev, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
@@ -142,7 +142,8 @@ out:
static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
{
int err = 0;
- int gnt_ref, remote_evtchn;
+ int gnt_ref;
+ evtchn_port_t remote_evtchn;
char *magic = NULL;
@@ -358,7 +359,8 @@ out:
return err;
}
-static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
+static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev,
+ enum xenbus_state state)
{
int err = 0;
int num_devs;
@@ -372,9 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
mutex_lock(&pdev->dev_lock);
- /* Make sure we only reconfigure once */
- if (xenbus_read_driver_state(pdev->xdev->nodename) !=
- XenbusStateReconfiguring)
+ if (xenbus_read_driver_state(pdev->xdev->nodename) != state)
goto out;
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
@@ -499,6 +499,10 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
}
}
+ if (state != XenbusStateReconfiguring)
+ /* Make sure we only reconfigure once. */
+ goto out;
+
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
@@ -524,7 +528,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
break;
case XenbusStateReconfiguring:
- xen_pcibk_reconfigure(pdev);
+ xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring);
break;
case XenbusStateConnected:
@@ -544,7 +548,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
xenbus_switch_state(xdev, XenbusStateClosed);
if (xenbus_dev_is_online(xdev))
break;
- /* fall through - if not online */
+ fallthrough; /* if not online */
case XenbusStateUnknown:
dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
device_unregister(&xdev->dev);
@@ -663,6 +667,15 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch,
xen_pcibk_setup_backend(pdev);
break;
+ case XenbusStateInitialised:
+ /*
+ * We typically move to Initialised when the first device was
+ * added. Hence subsequent devices getting added may need
+ * reconfiguring.
+ */
+ xen_pcibk_reconfigure(pdev, XenbusStateInitialised);
+ break;
+
default:
break;
}
@@ -688,7 +701,7 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
/* watch the backend node for backend configuration information */
err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
- xen_pcibk_be_watch);
+ NULL, xen_pcibk_be_watch);
if (err)
goto out;
@@ -730,6 +743,9 @@ const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
int __init xen_pcibk_xenbus_register(void)
{
+ if (!xen_pcibk_pv_support())
+ return 0;
+
xen_pcibk_backend = &xen_pcibk_vpci_backend;
if (passthrough)
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
@@ -739,5 +755,6 @@ int __init xen_pcibk_xenbus_register(void)
void __exit xen_pcibk_xenbus_unregister(void)
{
- xenbus_unregister_driver(&xen_pcibk_driver);
+ if (xen_pcibk_pv_support())
+ xenbus_unregister_driver(&xen_pcibk_driver);
}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index ba0942e481bc..0c5e565aa8cf 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -33,8 +33,6 @@
#define pr_fmt(fmt) "xen-pvscsi: " fmt
-#include <stdarg.h>
-
#include <linux/module.h>
#include <linux/utsname.h>
#include <linux/interrupt.h>
@@ -91,7 +89,6 @@ struct vscsibk_info {
unsigned int irq;
struct vscsiif_back_ring ring;
- int ring_error;
spinlock_t ring_lock;
atomic_t nr_unreplied_reqs;
@@ -100,6 +97,8 @@ struct vscsibk_info {
struct list_head v2p_entry_lists;
wait_queue_head_t waiting_to_free;
+
+ struct gnttab_page_cache free_pages;
};
/* theoretical maximum of grants for one request */
@@ -189,10 +188,6 @@ module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in backend buffer");
-static DEFINE_SPINLOCK(free_pages_lock);
-static int free_pages_num;
-static LIST_HEAD(scsiback_free_pages);
-
/* Global spinlock to protect scsiback TPG list */
static DEFINE_MUTEX(scsiback_mutex);
static LIST_HEAD(scsiback_list);
@@ -208,41 +203,6 @@ static void scsiback_put(struct vscsibk_info *info)
wake_up(&info->waiting_to_free);
}
-static void put_free_pages(struct page **page, int num)
-{
- unsigned long flags;
- int i = free_pages_num + num, n = num;
-
- if (num == 0)
- return;
- if (i > scsiback_max_buffer_pages) {
- n = min(num, i - scsiback_max_buffer_pages);
- gnttab_free_pages(n, page + num - n);
- n = num - n;
- }
- spin_lock_irqsave(&free_pages_lock, flags);
- for (i = 0; i < n; i++)
- list_add(&page[i]->lru, &scsiback_free_pages);
- free_pages_num += n;
- spin_unlock_irqrestore(&free_pages_lock, flags);
-}
-
-static int get_free_page(struct page **page)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&free_pages_lock, flags);
- if (list_empty(&scsiback_free_pages)) {
- spin_unlock_irqrestore(&free_pages_lock, flags);
- return gnttab_alloc_pages(1, page);
- }
- page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
- list_del(&page[0]->lru);
- free_pages_num--;
- spin_unlock_irqrestore(&free_pages_lock, flags);
- return 0;
-}
-
static unsigned long vaddr_page(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
@@ -260,10 +220,10 @@ static void scsiback_print_status(char *sense_buffer, int errors,
{
struct scsiback_tpg *tpg = pending_req->v2p->tpg;
- pr_err("[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
+ pr_err("[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x\n",
tpg->tport->tport_name, pending_req->v2p->lun,
- pending_req->cmnd[0], status_byte(errors), msg_byte(errors),
- host_byte(errors), driver_byte(errors));
+ pending_req->cmnd[0], errors & 0xff, COMMAND_COMPLETE,
+ host_byte(errors));
}
static void scsiback_fast_flush_area(struct vscsibk_pend *req)
@@ -303,7 +263,8 @@ static void scsiback_fast_flush_area(struct vscsibk_pend *req)
BUG_ON(err);
}
- put_free_pages(req->pages, req->n_grants);
+ gnttab_page_cache_put(&req->info->free_pages, req->pages,
+ req->n_grants);
req->n_grants = 0;
}
@@ -397,21 +358,18 @@ static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
{
struct se_cmd *se_cmd = &pending_req->se_cmd;
struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
- int rc;
scsiback_get(pending_req->info);
se_cmd->tag = pending_req->rqid;
- rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
- pending_req->sense_buffer, pending_req->v2p->lun,
- pending_req->data_len, 0,
- pending_req->sc_data_direction, TARGET_SCF_ACK_KREF,
- pending_req->sgl, pending_req->n_sg,
- NULL, 0, NULL, 0);
- if (rc < 0) {
- transport_send_check_condition_and_sense(se_cmd,
- TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
- transport_generic_free_cmd(se_cmd, 0);
- }
+ target_init_cmd(se_cmd, sess, pending_req->sense_buffer,
+ pending_req->v2p->lun, pending_req->data_len, 0,
+ pending_req->sc_data_direction, TARGET_SCF_ACK_KREF);
+
+ if (target_submit_prep(se_cmd, pending_req->cmnd, pending_req->sgl,
+ pending_req->n_sg, NULL, 0, NULL, 0, GFP_KERNEL))
+ return;
+
+ target_submit(se_cmd);
}
static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
@@ -423,12 +381,12 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
return 0;
err = gnttab_map_refs(map, NULL, pg, cnt);
- BUG_ON(err);
for (i = 0; i < cnt; i++) {
if (unlikely(map[i].status != GNTST_okay)) {
pr_err("invalid buffer -- could not remap it\n");
map[i].handle = SCSIBACK_INVALID_HANDLE;
- err = -ENOMEM;
+ if (!err)
+ err = -ENOMEM;
} else {
get_page(pg[i]);
}
@@ -446,8 +404,8 @@ static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
struct vscsibk_info *info = pending_req->info;
for (i = 0; i < cnt; i++) {
- if (get_free_page(pg + mapcount)) {
- put_free_pages(pg, mapcount);
+ if (gnttab_page_cache_get(&info->free_pages, pg + mapcount)) {
+ gnttab_page_cache_put(&info->free_pages, pg, mapcount);
pr_err("no grant page\n");
return -ENOMEM;
}
@@ -722,7 +680,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
return pending_req;
}
-static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+static int scsiback_do_cmd_fn(struct vscsibk_info *info,
+ unsigned int *eoi_flags)
{
struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request ring_req;
@@ -739,11 +698,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
rc = ring->rsp_prod_pvt;
pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc);
- info->ring_error = 1;
- return 0;
+ return -EINVAL;
}
while ((rc != rp)) {
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break;
@@ -757,10 +717,10 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
result = DID_NO_CONNECT;
break;
default:
- result = DRIVER_ERROR;
+ result = DID_ERROR;
break;
}
- scsiback_send_response(info, NULL, result << 24, 0,
+ scsiback_send_response(info, NULL, result << 16, 0,
ring_req.rqid);
return 1;
}
@@ -770,7 +730,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
if (scsiback_gnttab_data_map(&ring_req, pending_req)) {
scsiback_fast_flush_area(pending_req);
scsiback_do_resp_with_sense(NULL,
- DRIVER_ERROR << 24, 0, pending_req);
+ DID_ERROR << 16, 0, pending_req);
transport_generic_free_cmd(&pending_req->se_cmd, 0);
} else {
scsiback_cmd_exec(pending_req);
@@ -785,7 +745,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
break;
default:
pr_err_ratelimited("invalid request\n");
- scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24, 0,
+ scsiback_do_resp_with_sense(NULL, DID_ERROR << 16, 0,
pending_req);
transport_generic_free_cmd(&pending_req->se_cmd, 0);
break;
@@ -795,6 +755,8 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
cond_resched();
}
+ gnttab_page_cache_shrink(&info->free_pages, scsiback_max_buffer_pages);
+
RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
return more_to_do;
}
@@ -802,13 +764,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{
struct vscsibk_info *info = dev_id;
+ int rc;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (info->ring_error)
- return IRQ_HANDLED;
-
- while (scsiback_do_cmd_fn(info))
+ while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
cond_resched();
+ /* In case of a ring error we keep the event channel masked. */
+ if (!rc)
+ xen_irq_lateeoi(irq, eoi_flags);
+
return IRQ_HANDLED;
}
@@ -829,7 +794,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(info->dev, evtchn);
if (err < 0)
goto unmap_page;
@@ -854,7 +819,8 @@ unmap_page:
static int scsiback_map(struct vscsibk_info *info)
{
struct xenbus_device *dev = info->dev;
- unsigned int ring_ref, evtchn;
+ unsigned int ring_ref;
+ evtchn_port_t evtchn;
int err;
err = xenbus_gather(XBT_NIL, dev->otherend,
@@ -1184,7 +1150,7 @@ static void scsiback_frontend_changed(struct xenbus_device *dev,
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
- /* fall through - if not online */
+ fallthrough; /* if not online */
case XenbusStateUnknown:
device_unregister(&dev->dev);
break;
@@ -1228,6 +1194,8 @@ static int scsiback_remove(struct xenbus_device *dev)
scsiback_release_translation_entry(info);
+ gnttab_page_cache_shrink(&info->free_pages, 0);
+
dev_set_drvdata(&dev->dev, NULL);
return 0;
@@ -1252,13 +1220,13 @@ static int scsiback_probe(struct xenbus_device *dev,
info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock);
- info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free);
info->dev = dev;
info->irq = 0;
INIT_LIST_HEAD(&info->v2p_entry_lists);
spin_lock_init(&info->v2p_lock);
+ gnttab_page_cache_init(&info->free_pages);
err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
SG_ALL);
@@ -1431,8 +1399,7 @@ static int scsiback_queue_status(struct se_cmd *se_cmd)
if (se_cmd->sense_buffer &&
((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
(se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE)))
- pending_req->result = (DRIVER_SENSE << 24) |
- SAM_STAT_CHECK_CONDITION;
+ pending_req->result = SAM_STAT_CHECK_CONDITION;
else
pending_req->result = se_cmd->scsi_status;
@@ -1875,13 +1842,6 @@ out:
static void __exit scsiback_exit(void)
{
- struct page *page;
-
- while (free_pages_num) {
- if (get_free_page(&page))
- BUG();
- gnttab_free_pages(1, &page);
- }
target_unregister_template(&scsiback_ops);
xenbus_unregister_driver(&scsiback_driver);
}
diff --git a/drivers/xen/xen-stub.c b/drivers/xen/xen-stub.c
deleted file mode 100644
index 3be4e74660b5..000000000000
--- a/drivers/xen/xen-stub.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * xen-stub.c - stub drivers to reserve space for Xen
- *
- * Copyright (C) 2012 Intel Corporation
- * Author: Liu Jinsong <jinsong.liu@intel.com>
- * Author: Jiang Yunhong <yunhong.jiang@intel.com>
- *
- * Copyright (C) 2012 Oracle Inc
- * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/acpi.h>
-#include <xen/acpi.h>
-
-#ifdef CONFIG_ACPI
-
-/*--------------------------------------------
- stub driver for Xen memory hotplug
---------------------------------------------*/
-
-static const struct acpi_device_id memory_device_ids[] = {
- {ACPI_MEMORY_DEVICE_HID, 0},
- {"", 0},
-};
-
-static struct acpi_driver xen_stub_memory_device_driver = {
- /* same name as native memory driver to block native loaded */
- .name = "acpi_memhotplug",
- .class = ACPI_MEMORY_DEVICE_CLASS,
- .ids = memory_device_ids,
-};
-
-int xen_stub_memory_device_init(void)
-{
- if (!xen_initial_domain())
- return -ENODEV;
-
- /* just reserve space for Xen, block native driver loaded */
- return acpi_bus_register_driver(&xen_stub_memory_device_driver);
-}
-EXPORT_SYMBOL_GPL(xen_stub_memory_device_init);
-subsys_initcall(xen_stub_memory_device_init);
-
-void xen_stub_memory_device_exit(void)
-{
- acpi_bus_unregister_driver(&xen_stub_memory_device_driver);
-}
-EXPORT_SYMBOL_GPL(xen_stub_memory_device_exit);
-
-
-/*--------------------------------------------
- stub driver for Xen cpu hotplug
---------------------------------------------*/
-
-static const struct acpi_device_id processor_device_ids[] = {
- {ACPI_PROCESSOR_OBJECT_HID, 0},
- {ACPI_PROCESSOR_DEVICE_HID, 0},
- {"", 0},
-};
-
-static struct acpi_driver xen_stub_processor_driver = {
- /* same name as native processor driver to block native loaded */
- .name = "processor",
- .class = ACPI_PROCESSOR_CLASS,
- .ids = processor_device_ids,
-};
-
-int xen_stub_processor_init(void)
-{
- if (!xen_initial_domain())
- return -ENODEV;
-
- /* just reserve space for Xen, block native driver loaded */
- return acpi_bus_register_driver(&xen_stub_processor_driver);
-}
-EXPORT_SYMBOL_GPL(xen_stub_processor_init);
-subsys_initcall(xen_stub_processor_init);
-
-void xen_stub_processor_exit(void)
-{
- acpi_bus_unregister_driver(&xen_stub_processor_driver);
-}
-EXPORT_SYMBOL_GPL(xen_stub_processor_exit);
-
-#endif
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index 5f5b8a7d5b80..2754bdfadcb8 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -44,6 +44,8 @@ struct xen_bus_type {
int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
int (*probe)(struct xen_bus_type *bus, const char *type,
const char *dir);
+ bool (*otherend_will_handle)(struct xenbus_watch *watch,
+ const char *path, const char *token);
void (*otherend_changed)(struct xenbus_watch *watch, const char *path,
const char *token);
struct bus_type bus;
@@ -104,7 +106,7 @@ void xs_request_exit(struct xb_req_data *req);
int xenbus_match(struct device *_dev, struct device_driver *_drv);
int xenbus_dev_probe(struct device *_dev);
-int xenbus_dev_remove(struct device *_dev);
+void xenbus_dev_remove(struct device *_dev);
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner,
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index e17ca8156171..e8bed1cb76ba 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -69,11 +69,24 @@ struct xenbus_map_node {
unsigned int nr_handles;
};
+struct map_ring_valloc {
+ struct xenbus_map_node *node;
+
+ /* Why do we need two arrays? See comment of __xenbus_map_ring */
+ unsigned long addrs[XENBUS_MAX_RING_GRANTS];
+ phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+
+ struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
+ struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+
+ unsigned int idx;
+};
+
static DEFINE_SPINLOCK(xenbus_valloc_lock);
static LIST_HEAD(xenbus_valloc_pages);
struct xenbus_ring_ops {
- int (*map)(struct xenbus_device *dev,
+ int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
grant_ref_t *gnt_refs, unsigned int nr_grefs,
void **vaddr);
int (*unmap)(struct xenbus_device *dev, void *vaddr);
@@ -114,18 +127,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
*/
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch,
+ bool (*will_handle)(struct xenbus_watch *,
+ const char *, const char *),
void (*callback)(struct xenbus_watch *,
const char *, const char *))
{
int err;
watch->node = path;
+ watch->will_handle = will_handle;
watch->callback = callback;
err = register_xenbus_watch(watch);
if (err) {
watch->node = NULL;
+ watch->will_handle = NULL;
watch->callback = NULL;
xenbus_dev_fatal(dev, err, "adding watch on %s", path);
}
@@ -152,6 +169,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
*/
int xenbus_watch_pathfmt(struct xenbus_device *dev,
struct xenbus_watch *watch,
+ bool (*will_handle)(struct xenbus_watch *,
+ const char *, const char *),
void (*callback)(struct xenbus_watch *,
const char *, const char *),
const char *pathfmt, ...)
@@ -168,7 +187,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev,
xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
return -ENOMEM;
}
- err = xenbus_watch_path(dev, path, watch, callback);
+ err = xenbus_watch_path(dev, path, watch, will_handle, callback);
if (err)
kfree(path);
@@ -363,8 +382,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
int i, j;
for (i = 0; i < nr_pages; i++) {
- err = gnttab_grant_foreign_access(dev->otherend_id,
- virt_to_gfn(vaddr), 0);
+ unsigned long gfn;
+
+ if (is_vmalloc_addr(vaddr))
+ gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr));
+ else
+ gfn = virt_to_gfn(vaddr);
+
+ err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"granting access to ring page");
@@ -391,7 +416,7 @@ EXPORT_SYMBOL_GPL(xenbus_grant_ring);
* error, the device will switch to XenbusStateClosing, and the error will be
* saved in the store.
*/
-int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
+int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
{
struct evtchn_alloc_unbound alloc_unbound;
int err;
@@ -414,7 +439,7 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
/**
* Free an existing event channel. Returns 0 on success or -errno on error.
*/
-int xenbus_free_evtchn(struct xenbus_device *dev, int port)
+int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
{
struct evtchn_close close;
int err;
@@ -423,7 +448,7 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port)
err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
if (err)
- xenbus_dev_error(dev, err, "freeing event channel %d", port);
+ xenbus_dev_error(dev, err, "freeing event channel %u", port);
return err;
}
@@ -440,15 +465,34 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
* Map @nr_grefs pages of memory into this domain from another
* domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs
* pages of virtual address space, maps the pages to that address, and
- * sets *vaddr to that address. Returns 0 on success, and GNTST_*
- * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
+ * sets *vaddr to that address. Returns 0 on success, and -errno on
* error. If an error is returned, device will switch to
* XenbusStateClosing and the error message will be saved in XenStore.
*/
int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
unsigned int nr_grefs, void **vaddr)
{
- return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
+ int err;
+ struct map_ring_valloc *info;
+
+ *vaddr = NULL;
+
+ if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+ return -EINVAL;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
+ if (!info->node)
+ err = -ENOMEM;
+ else
+ err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
+
+ kfree(info->node);
+ kfree(info);
+ return err;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
@@ -459,79 +503,105 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
grant_ref_t *gnt_refs,
unsigned int nr_grefs,
grant_handle_t *handles,
- phys_addr_t *addrs,
+ struct map_ring_valloc *info,
unsigned int flags,
bool *leaked)
{
- struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
- struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
int i, j;
- int err = GNTST_okay;
if (nr_grefs > XENBUS_MAX_RING_GRANTS)
return -EINVAL;
for (i = 0; i < nr_grefs; i++) {
- memset(&map[i], 0, sizeof(map[i]));
- gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
- dev->otherend_id);
+ gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
+ gnt_refs[i], dev->otherend_id);
handles[i] = INVALID_GRANT_HANDLE;
}
- gnttab_batch_map(map, i);
+ gnttab_batch_map(info->map, i);
for (i = 0; i < nr_grefs; i++) {
- if (map[i].status != GNTST_okay) {
- err = map[i].status;
- xenbus_dev_fatal(dev, map[i].status,
+ if (info->map[i].status != GNTST_okay) {
+ xenbus_dev_fatal(dev, info->map[i].status,
"mapping in shared page %d from domain %d",
gnt_refs[i], dev->otherend_id);
goto fail;
} else
- handles[i] = map[i].handle;
+ handles[i] = info->map[i].handle;
}
- return GNTST_okay;
+ return 0;
fail:
for (i = j = 0; i < nr_grefs; i++) {
if (handles[i] != INVALID_GRANT_HANDLE) {
- memset(&unmap[j], 0, sizeof(unmap[j]));
- gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
+ gnttab_set_unmap_op(&info->unmap[j],
+ info->phys_addrs[i],
GNTMAP_host_map, handles[i]);
j++;
}
}
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
- BUG();
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
*leaked = false;
for (i = 0; i < j; i++) {
- if (unmap[i].status != GNTST_okay) {
+ if (info->unmap[i].status != GNTST_okay) {
*leaked = true;
break;
}
}
- return err;
+ return -ENOENT;
}
-struct map_ring_valloc_hvm
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handles: grant handle array
+ * @nr_handles: number of handles in the array
+ * @vaddrs: addresses to unmap
+ *
+ * Unmap memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
+ unsigned int nr_handles, unsigned long *vaddrs)
{
- unsigned int idx;
+ struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
+ int i;
+ int err;
- /* Why do we need two arrays? See comment of __xenbus_map_ring */
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- unsigned long addrs[XENBUS_MAX_RING_GRANTS];
-};
+ if (nr_handles > XENBUS_MAX_RING_GRANTS)
+ return -EINVAL;
+
+ for (i = 0; i < nr_handles; i++)
+ gnttab_set_unmap_op(&unmap[i], vaddrs[i],
+ GNTMAP_host_map, handles[i]);
+
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
+
+ err = GNTST_okay;
+ for (i = 0; i < nr_handles; i++) {
+ if (unmap[i].status != GNTST_okay) {
+ xenbus_dev_error(dev, unmap[i].status,
+ "unmapping page at handle %d error %d",
+ handles[i], unmap[i].status);
+ err = unmap[i].status;
+ break;
+ }
+ }
+
+ return err;
+}
static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
unsigned int goffset,
unsigned int len,
void *data)
{
- struct map_ring_valloc_hvm *info = data;
+ struct map_ring_valloc *info = data;
unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
info->phys_addrs[info->idx] = vaddr;
@@ -540,39 +610,28 @@ static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
info->idx++;
}
-static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
- grant_ref_t *gnt_ref,
- unsigned int nr_grefs,
- void **vaddr)
+static int xenbus_map_ring_hvm(struct xenbus_device *dev,
+ struct map_ring_valloc *info,
+ grant_ref_t *gnt_ref,
+ unsigned int nr_grefs,
+ void **vaddr)
{
- struct xenbus_map_node *node;
+ struct xenbus_map_node *node = info->node;
int err;
void *addr;
bool leaked = false;
- struct map_ring_valloc_hvm info = {
- .idx = 0,
- };
unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- *vaddr = NULL;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
-
- err = alloc_xenballooned_pages(nr_pages, node->hvm.pages);
+ err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
if (err)
goto out_err;
gnttab_foreach_grant(node->hvm.pages, nr_grefs,
xenbus_map_ring_setup_grant_hvm,
- &info);
+ info);
err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
- info.phys_addrs, GNTMAP_host_map, &leaked);
+ info, GNTMAP_host_map, &leaked);
node->nr_handles = nr_grefs;
if (err)
@@ -592,61 +651,23 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
spin_unlock(&xenbus_valloc_lock);
*vaddr = addr;
+ info->node = NULL;
+
return 0;
out_xenbus_unmap_ring:
if (!leaked)
- xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs);
+ xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
else
pr_alert("leaking %p size %u page(s)",
addr, nr_pages);
out_free_ballooned_pages:
if (!leaked)
- free_xenballooned_pages(nr_pages, node->hvm.pages);
+ xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
out_err:
- kfree(node);
return err;
}
-
-/**
- * xenbus_map_ring
- * @dev: xenbus device
- * @gnt_refs: grant reference array
- * @nr_grefs: number of grant reference
- * @handles: pointer to grant handle to be filled
- * @vaddrs: addresses to be mapped to
- * @leaked: fail to clean up a failed map, caller should not free vaddr
- *
- * Map pages of memory into this domain from another domain's grant table.
- * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the pages to the specified address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to
- * XenbusStateClosing and the first error message will be saved in XenStore.
- * Further more if we fail to map the ring, caller should check @leaked.
- * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller
- * should not free the address space of @vaddr.
- */
-int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
- unsigned int nr_grefs, grant_handle_t *handles,
- unsigned long *vaddrs, bool *leaked)
-{
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- int i;
-
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- for (i = 0; i < nr_grefs; i++)
- phys_addrs[i] = (unsigned long)vaddrs[i];
-
- return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
- phys_addrs, GNTMAP_host_map, leaked);
-}
-EXPORT_SYMBOL_GPL(xenbus_map_ring);
-
-
/**
* xenbus_unmap_ring_vfree
* @dev: xenbus device
@@ -666,40 +687,33 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
#ifdef CONFIG_XEN_PV
-static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
- grant_ref_t *gnt_refs,
- unsigned int nr_grefs,
- void **vaddr)
+static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
{
- struct xenbus_map_node *node;
- struct vm_struct *area;
- pte_t *ptes[XENBUS_MAX_RING_GRANTS];
- phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
- int err = GNTST_okay;
- int i;
- bool leaked;
+ struct map_ring_valloc *info = data;
- *vaddr = NULL;
-
- if (nr_grefs > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
+ info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
+ return 0;
+}
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
+static int xenbus_map_ring_pv(struct xenbus_device *dev,
+ struct map_ring_valloc *info,
+ grant_ref_t *gnt_refs,
+ unsigned int nr_grefs,
+ void **vaddr)
+{
+ struct xenbus_map_node *node = info->node;
+ struct vm_struct *area;
+ bool leaked = false;
+ int err = -ENOMEM;
- area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
- if (!area) {
- kfree(node);
+ area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
+ if (!area)
return -ENOMEM;
- }
-
- for (i = 0; i < nr_grefs; i++)
- phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
-
+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
+ XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
+ goto failed;
err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
- phys_addrs,
- GNTMAP_host_map | GNTMAP_contains_pte,
+ info, GNTMAP_host_map | GNTMAP_contains_pte,
&leaked);
if (err)
goto failed;
@@ -712,6 +726,8 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
spin_unlock(&xenbus_valloc_lock);
*vaddr = area->addr;
+ info->node = NULL;
+
return 0;
failed:
@@ -720,11 +736,10 @@ failed:
else
pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
- kfree(node);
return err;
}
-static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
{
struct xenbus_map_node *node;
struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
@@ -761,8 +776,7 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
unmap[i].handle = node->handles[i];
}
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
- BUG();
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
err = GNTST_okay;
leaked = false;
@@ -788,12 +802,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
}
static const struct xenbus_ring_ops ring_ops_pv = {
- .map = xenbus_map_ring_valloc_pv,
- .unmap = xenbus_unmap_ring_vfree_pv,
+ .map = xenbus_map_ring_pv,
+ .unmap = xenbus_unmap_ring_pv,
};
#endif
-struct unmap_ring_vfree_hvm
+struct unmap_ring_hvm
{
unsigned int idx;
unsigned long addrs[XENBUS_MAX_RING_GRANTS];
@@ -804,19 +818,19 @@ static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
unsigned int len,
void *data)
{
- struct unmap_ring_vfree_hvm *info = data;
+ struct unmap_ring_hvm *info = data;
info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
info->idx++;
}
-static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
{
int rv;
struct xenbus_map_node *node;
void *addr;
- struct unmap_ring_vfree_hvm info = {
+ struct unmap_ring_hvm info = {
.idx = 0,
};
unsigned int nr_pages;
@@ -849,7 +863,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
info.addrs);
if (!rv) {
vunmap(vaddr);
- free_xenballooned_pages(nr_pages, node->hvm.pages);
+ xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
}
else
WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
@@ -859,51 +873,6 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
}
/**
- * xenbus_unmap_ring
- * @dev: xenbus device
- * @handles: grant handle array
- * @nr_handles: number of handles in the array
- * @vaddrs: addresses to unmap
- *
- * Unmap memory in this domain that was imported from another domain.
- * Returns 0 on success and returns GNTST_* on error
- * (see xen/include/interface/grant_table.h).
- */
-int xenbus_unmap_ring(struct xenbus_device *dev,
- grant_handle_t *handles, unsigned int nr_handles,
- unsigned long *vaddrs)
-{
- struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
- int i;
- int err;
-
- if (nr_handles > XENBUS_MAX_RING_GRANTS)
- return -EINVAL;
-
- for (i = 0; i < nr_handles; i++)
- gnttab_set_unmap_op(&unmap[i], vaddrs[i],
- GNTMAP_host_map, handles[i]);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
- BUG();
-
- err = GNTST_okay;
- for (i = 0; i < nr_handles; i++) {
- if (unmap[i].status != GNTST_okay) {
- xenbus_dev_error(dev, unmap[i].status,
- "unmapping page at handle %d error %d",
- handles[i], unmap[i].status);
- err = unmap[i].status;
- break;
- }
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
-
-
-/**
* xenbus_read_driver_state
* @path: path for driver
*
@@ -922,8 +891,8 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
static const struct xenbus_ring_ops ring_ops_hvm = {
- .map = xenbus_map_ring_valloc_hvm,
- .unmap = xenbus_unmap_ring_vfree_hvm,
+ .map = xenbus_map_ring_hvm,
+ .unmap = xenbus_unmap_ring_hvm,
};
void __init xenbus_ring_ops_init(void)
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index eb5151fc8efa..e5fda0256feb 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex);
static int xenbus_irq;
static struct task_struct *xenbus_task;
-static DECLARE_WORK(probe_work, xenbus_probe);
-
-
static irqreturn_t wake_waiting(int irq, void *unused)
{
- if (unlikely(xenstored_ready == 0)) {
- xenstored_ready = 1;
- schedule_work(&probe_work);
- }
-
wake_up(&xb_waitq);
return IRQ_HANDLED;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 8c4d05b687b7..fe360c33ce71 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -31,6 +31,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
@@ -51,7 +52,6 @@
#include <linux/module.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <xen/xen.h>
@@ -136,6 +136,7 @@ static int watch_otherend(struct xenbus_device *dev)
container_of(dev->dev.bus, struct xen_bus_type, bus);
return xenbus_watch_pathfmt(dev, &dev->otherend_watch,
+ bus->otherend_will_handle,
bus->otherend_changed,
"%s/%s", dev->otherend, "state");
}
@@ -205,6 +206,64 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
}
EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
+#define XENBUS_SHOW_STAT(name) \
+static ssize_t name##_show(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ \
+ return sprintf(buf, "%d\n", atomic_read(&dev->name)); \
+} \
+static DEVICE_ATTR_RO(name)
+
+XENBUS_SHOW_STAT(event_channels);
+XENBUS_SHOW_STAT(events);
+XENBUS_SHOW_STAT(spurious_events);
+XENBUS_SHOW_STAT(jiffies_eoi_delayed);
+
+static ssize_t spurious_threshold_show(struct device *_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+
+ return sprintf(buf, "%d\n", dev->spurious_threshold);
+}
+
+static ssize_t spurious_threshold_store(struct device *_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned int val;
+ ssize_t ret;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ dev->spurious_threshold = val;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(spurious_threshold);
+
+static struct attribute *xenbus_attrs[] = {
+ &dev_attr_event_channels.attr,
+ &dev_attr_events.attr,
+ &dev_attr_spurious_events.attr,
+ &dev_attr_jiffies_eoi_delayed.attr,
+ &dev_attr_spurious_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group xenbus_group = {
+ .name = "xenbus",
+ .attrs = xenbus_attrs,
+};
+
int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
@@ -252,6 +311,11 @@ int xenbus_dev_probe(struct device *_dev)
return err;
}
+ dev->spurious_threshold = 1;
+ if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
+ dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
+ dev->nodename);
+
return 0;
fail_put:
module_put(drv->driver.owner);
@@ -261,13 +325,15 @@ fail:
}
EXPORT_SYMBOL_GPL(xenbus_dev_probe);
-int xenbus_dev_remove(struct device *_dev)
+void xenbus_dev_remove(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
DPRINTK("%s", dev->nodename);
+ sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
+
free_otherend_watch(dev);
if (drv->remove) {
@@ -289,8 +355,6 @@ int xenbus_dev_remove(struct device *_dev)
if (!drv->allow_rebind ||
xenbus_read_driver_state(dev->nodename) == XenbusStateClosing)
xenbus_switch_state(dev, XenbusStateClosed);
-
- return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_remove);
@@ -608,7 +672,7 @@ int xenbus_dev_suspend(struct device *dev)
if (drv->suspend)
err = drv->suspend(xdev);
if (err)
- pr_warn("suspend %s failed: %i\n", dev_name(dev), err);
+ dev_warn(dev, "suspend failed: %i\n", err);
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
@@ -627,8 +691,7 @@ int xenbus_dev_resume(struct device *dev)
drv = to_xenbus_driver(dev->driver);
err = talk_to_otherend(xdev);
if (err) {
- pr_warn("resume (talk_to_otherend) %s failed: %i\n",
- dev_name(dev), err);
+ dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err);
return err;
}
@@ -637,15 +700,14 @@ int xenbus_dev_resume(struct device *dev)
if (drv->resume) {
err = drv->resume(xdev);
if (err) {
- pr_warn("resume %s failed: %i\n", dev_name(dev), err);
+ dev_warn(dev, "resume failed: %i\n", err);
return err;
}
}
err = watch_otherend(xdev);
if (err) {
- pr_warn("resume (watch_otherend) %s failed: %d.\n",
- dev_name(dev), err);
+ dev_warn(dev, "resume (watch_otherend) failed: %d\n", err);
return err;
}
@@ -684,29 +746,107 @@ void unregister_xenstore_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
-void xenbus_probe(struct work_struct *unused)
+static void xenbus_probe(void)
{
xenstored_ready = 1;
+ /*
+ * In the HVM case, xenbus_init() deferred its call to
+ * xs_init() in case callbacks were not operational yet.
+ * So do it now.
+ */
+ if (xen_store_domain_type == XS_HVM)
+ xs_init();
+
/* Notify others that xenstore is up */
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
}
-EXPORT_SYMBOL_GPL(xenbus_probe);
-static int __init xenbus_probe_initcall(void)
+/*
+ * Returns true when XenStore init must be deferred in order to
+ * allow the PCI platform device to be initialised, before we
+ * can actually have event channel interrupts working.
+ */
+static bool xs_hvm_defer_init_for_callback(void)
{
- if (!xen_domain())
- return -ENODEV;
+#ifdef CONFIG_XEN_PVHVM
+ return xen_store_domain_type == XS_HVM &&
+ !xen_have_vector_callback;
+#else
+ return false;
+#endif
+}
- if (xen_initial_domain() || xen_hvm_domain())
- return 0;
+static int xenbus_probe_thread(void *unused)
+{
+ DEFINE_WAIT(w);
+
+ /*
+ * We actually just want to wait for *any* trigger of xb_waitq,
+ * and run xenbus_probe() the moment it occurs.
+ */
+ prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE);
+ schedule();
+ finish_wait(&xb_waitq, &w);
- xenbus_probe(NULL);
+ DPRINTK("probing");
+ xenbus_probe();
return 0;
}
+static int __init xenbus_probe_initcall(void)
+{
+ /*
+ * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
+ * need to wait for the platform PCI device to come up.
+ */
+ if (xen_store_domain_type == XS_PV ||
+ (xen_store_domain_type == XS_HVM &&
+ !xs_hvm_defer_init_for_callback()))
+ xenbus_probe();
+
+ /*
+ * For XS_LOCAL, spawn a thread which will wait for xenstored
+ * or a xenstore-stubdom to be started, then probe. It will be
+ * triggered when communication starts happening, by waiting
+ * on xb_waitq.
+ */
+ if (xen_store_domain_type == XS_LOCAL) {
+ struct task_struct *probe_task;
+
+ probe_task = kthread_run(xenbus_probe_thread, NULL,
+ "xenbus_probe");
+ if (IS_ERR(probe_task))
+ return PTR_ERR(probe_task);
+ }
+ return 0;
+}
device_initcall(xenbus_probe_initcall);
+int xen_set_callback_via(uint64_t via)
+{
+ struct xen_hvm_param a;
+ int ret;
+
+ a.domid = DOMID_SELF;
+ a.index = HVM_PARAM_CALLBACK_IRQ;
+ a.value = via;
+
+ ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+ if (ret)
+ return ret;
+
+ /*
+ * If xenbus_probe_initcall() deferred the xenbus_probe()
+ * due to the callback not functioning yet, we can do it now.
+ */
+ if (!xenstored_ready && xs_hvm_defer_init_for_callback())
+ xenbus_probe();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xen_set_callback_via);
+
/* Set up event channel for xenstored which is run as a local process
* (this is normally used only in dom0)
*/
@@ -769,7 +909,7 @@ static struct notifier_block xenbus_resume_nb = {
static int __init xenbus_init(void)
{
- int err = 0;
+ int err;
uint64_t v = 0;
xen_store_domain_type = XS_UNKNOWN;
@@ -809,6 +949,29 @@ static int __init xenbus_init(void)
err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
if (err)
goto out_error;
+ /*
+ * Uninitialized hvm_params are zero and return no error.
+ * Although it is theoretically possible to have
+ * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
+ * not zero when valid. If zero, it means that Xenstore hasn't
+ * been properly initialized. Instead of attempting to map a
+ * wrong guest physical address return error.
+ *
+ * Also recognize all bits set as an invalid value.
+ */
+ if (!v || !~v) {
+ err = -ENOENT;
+ goto out_error;
+ }
+ /* Avoid truncation on 32-bit. */
+#if BITS_PER_LONG == 32
+ if (v > ULONG_MAX) {
+ pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+ __func__, v);
+ err = -EINVAL;
+ goto out_error;
+ }
+#endif
xen_store_gfn = (unsigned long)v;
xen_store_interface =
xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
@@ -819,11 +982,17 @@ static int __init xenbus_init(void)
break;
}
- /* Initialize the interface to xenstore. */
- err = xs_init();
- if (err) {
- pr_warn("Error initializing xenstore comms: %i\n", err);
- goto out_error;
+ /*
+ * HVM domains may not have a functional callback yet. In that
+ * case let xs_init() be called from xenbus_probe(), which will
+ * get invoked at an appropriate time.
+ */
+ if (xen_store_domain_type != XS_HVM) {
+ err = xs_init();
+ if (err) {
+ pr_warn("Error initializing xenstore comms: %i\n", err);
+ goto out_error;
+ }
}
if ((xen_store_domain_type != XS_LOCAL) &&
@@ -837,8 +1006,10 @@ static int __init xenbus_init(void)
*/
proc_create_mount_point("xen");
#endif
+ return 0;
out_error:
+ xen_store_domain_type = XS_UNKNOWN;
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 9b2fbe69bccc..5abded97e1a7 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -48,7 +48,6 @@
#include <linux/semaphore.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <asm/hypervisor.h>
#include <xen/xenbus.h>
@@ -181,6 +180,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type,
return err;
}
+static bool frontend_will_handle(struct xenbus_watch *watch,
+ const char *path, const char *token)
+{
+ return watch->nr_pending == 0;
+}
+
static void frontend_changed(struct xenbus_watch *watch,
const char *path, const char *token)
{
@@ -192,6 +197,7 @@ static struct xen_bus_type xenbus_backend = {
.levels = 3, /* backend/type/<frontend>/<id> */
.get_bus_id = backend_bus_id,
.probe = xenbus_probe_backend,
+ .otherend_will_handle = frontend_will_handle,
.otherend_changed = frontend_changed,
.bus = {
.name = "xen-backend",
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 8a1650bbe18f..07b010a68fcf 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -19,7 +19,6 @@
#include <linux/module.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <xen/xenbus.h>
#include <xen/events.h>
@@ -212,19 +211,11 @@ static int is_device_connecting(struct device *dev, void *data, bool ignore_none
if (drv && (dev->driver != drv))
return 0;
- if (ignore_nonessential) {
- /* With older QEMU, for PVonHVM guests the guest config files
- * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0']
- * which is nonsensical as there is no PV FB (there can be
- * a PVKB) running as HVM guest. */
+ xendrv = to_xenbus_driver(dev->driver);
- if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0))
- return 0;
+ if (ignore_nonessential && xendrv->not_essential)
+ return 0;
- if ((strncmp(xendev->nodename, "device/vfb", 10) == 0))
- return 0;
- }
- xendrv = to_xenbus_driver(dev->driver);
return (xendev->state < XenbusStateConnected ||
(xendev->state == XenbusStateConnected &&
xendrv->is_ready && !xendrv->is_ready(xendev)));
@@ -402,12 +393,12 @@ static void xenbus_reset_frontend(char *fe, char *be, int be_state)
case XenbusStateConnected:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
xenbus_reset_wait_for_backend(be, XenbusStateClosing);
- /* fall through */
+ fallthrough;
case XenbusStateClosing:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
xenbus_reset_wait_for_backend(be, XenbusStateClosed);
- /* fall through */
+ fallthrough;
case XenbusStateClosed:
xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 3a06eb699f33..12e02eb01f59 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -705,9 +705,13 @@ int xs_watch_msg(struct xs_watch_event *event)
spin_lock(&watches_lock);
event->handle = find_watch(event->token);
- if (event->handle != NULL) {
+ if (event->handle != NULL &&
+ (!event->handle->will_handle ||
+ event->handle->will_handle(event->handle,
+ event->path, event->token))) {
spin_lock(&watch_events_lock);
list_add_tail(&event->list, &watch_events);
+ event->handle->nr_pending++;
wake_up(&watch_events_waitq);
spin_unlock(&watch_events_lock);
} else
@@ -765,6 +769,8 @@ int register_xenbus_watch(struct xenbus_watch *watch)
sprintf(token, "%lX", (long)watch);
+ watch->nr_pending = 0;
+
down_read(&xs_watch_rwsem);
spin_lock(&watches_lock);
@@ -814,11 +820,14 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
/* Cancel pending watch events. */
spin_lock(&watch_events_lock);
- list_for_each_entry_safe(event, tmp, &watch_events, list) {
- if (event->handle != watch)
- continue;
- list_del(&event->list);
- kfree(event);
+ if (watch->nr_pending) {
+ list_for_each_entry_safe(event, tmp, &watch_events, list) {
+ if (event->handle != watch)
+ continue;
+ list_del(&event->list);
+ kfree(event);
+ }
+ watch->nr_pending = 0;
}
spin_unlock(&watch_events_lock);
@@ -865,7 +874,6 @@ void xs_suspend_cancel(void)
static int xenwatch_thread(void *unused)
{
- struct list_head *ent;
struct xs_watch_event *event;
xenwatch_pid = current->pid;
@@ -880,13 +888,15 @@ static int xenwatch_thread(void *unused)
mutex_lock(&xenwatch_mutex);
spin_lock(&watch_events_lock);
- ent = watch_events.next;
- if (ent != &watch_events)
- list_del(ent);
+ event = list_first_entry_or_null(&watch_events,
+ struct xs_watch_event, list);
+ if (event) {
+ list_del(&event->list);
+ event->handle->nr_pending--;
+ }
spin_unlock(&watch_events_lock);
- if (ent != &watch_events) {
- event = list_entry(ent, struct xs_watch_event, list);
+ if (event) {
event->handle->callback(event->handle, event->path,
event->token);
kfree(event);
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 7b1077f0abcb..34742c6e189e 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -232,7 +232,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
kfree(pages);
return -ENOMEM;
}
- rc = alloc_xenballooned_pages(nr_pages, pages);
+ rc = xen_alloc_unpopulated_pages(nr_pages, pages);
if (rc) {
pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__,
nr_pages, rc);
@@ -249,7 +249,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
if (!vaddr) {
pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__,
nr_pages, rc);
- free_xenballooned_pages(nr_pages, pages);
+ xen_free_unpopulated_pages(nr_pages, pages);
kfree(pages);
kfree(pfns);
return -ENOMEM;