aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig3
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/balloon.c33
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c372
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)799
-rw-r--r--drivers/xen/events/events_fifo.c428
-rw-r--r--drivers/xen/events/events_internal.h150
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c90
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/platform-pci.c11
-rw-r--r--drivers/xen/swiotlb-xen.c22
-rw-r--r--drivers/xen/xen-acpi-cpuhotplug.c11
-rw-r--r--drivers/xen/xen-acpi-memhotplug.c8
-rw-r--r--drivers/xen/xen-acpi-pad.c5
-rw-r--r--drivers/xen/xen-acpi-processor.c4
-rw-r--r--drivers/xen/xen-selfballoon.c22
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c2
-rw-r--r--drivers/xen/xencomm.c219
23 files changed, 1403 insertions, 796 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index c794ea182140..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
config XEN_BALLOON
bool "Xen memory balloon driver"
- depends on !ARM
default y
help
The balloon driver allows the Xen domain to request more memory from
@@ -222,7 +221,7 @@ config XEN_ACPI_PROCESSOR
To do that the driver parses the Power Management data and uploads
said information to the Xen hypervisor. Then the Xen hypervisor can
- select the proper Cx and Pxx states. It also registers itslef as the
+ select the proper Cx and Pxx states. It also registers itself as the
SMM so that other drivers (such as ACPI cpufreq scaling driver) will
not load.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..45e00afa7f2d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o events.o balloon.o manage.o
+obj-y += grant-table.o features.o balloon.o manage.o
+obj-y += events/
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
@@ -15,7 +16,6 @@ xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
dom0-$(CONFIG_X86) += pcpu.o
obj-$(CONFIG_XEN_DOM0) += $(dom0-y)
obj-$(CONFIG_BLOCK) += biomerge.o
-obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..61a6ac8fa8fc 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
return page;
}
-static struct page *balloon_first_page(void)
-{
- if (list_empty(&ballooned_pages))
- return NULL;
- return list_entry(ballooned_pages.next, struct page, lru);
-}
-
static struct page *balloon_next_page(struct page *page)
{
struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
- page = balloon_first_page();
+ page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
for (i = 0; i < nr_pages; i++) {
if (!page) {
nr_pages = i;
@@ -406,11 +399,25 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
state = BP_EAGAIN;
break;
}
+ scrub_page(page);
- pfn = page_to_pfn(page);
- frame_list[i] = pfn_to_mfn(pfn);
+ frame_list[i] = page_to_pfn(page);
+ }
- scrub_page(page);
+ /*
+ * Ensure that ballooned highmem pages don't have kmaps.
+ *
+ * Do this before changing the p2m as kmap_flush_unused()
+ * reads PTEs to obtain pages (and hence needs the original
+ * p2m entry).
+ */
+ kmap_flush_unused();
+
+ /* Update direct mapping, invalidate P2M, and add to balloon. */
+ for (i = 0; i < nr_pages; i++) {
+ pfn = frame_list[i];
+ frame_list[i] = pfn_to_mfn(pfn);
+ page = pfn_to_page(pfn);
#ifdef CONFIG_XEN_HAVE_PVMMU
/*
@@ -436,11 +443,9 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
}
#endif
- balloon_append(pfn_to_page(pfn));
+ balloon_append(page);
}
- /* Ensure that ballooned highmem pages don't have kmaps. */
- kmap_flush_unused();
flush_tlb_all();
set_xen_guest_handle(reservation.extent_start, frame_list);
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
dbgp.op = op;
#ifdef CONFIG_PCI
- if (ctrlr->bus == &pci_bus_type) {
+ if (dev_is_pci(ctrlr)) {
const struct pci_dev *pdev = to_pci_dev(ctrlr);
dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
+obj-y += events.o
+
+events-y += events_base.o
+events-y += events_2l.o
+events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
+/*
+ * Xen event channels (2-level ABI)
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+/*
+ * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
+ * careful to only use bitops which allow for this (e.g
+ * test_bit/find_first_bit and friends but not __ffs) and to pass
+ * BITS_PER_EVTCHN_WORD as the bitmask length.
+ */
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
+/*
+ * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
+ * array. Primarily to avoid long lines (hence the terse name).
+ */
+#define BM(x) (unsigned long *)(x)
+/* Find the first set bit in a evtchn mask */
+#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
+
+static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
+ cpu_evtchn_mask);
+
+static unsigned evtchn_2l_max_channels(void)
+{
+ return EVTCHN_2L_NR_CHANNELS;
+}
+
+static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
+ set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
+static void evtchn_2l_clear_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static void evtchn_2l_set_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_is_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_test_and_set_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_unmask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ unsigned int cpu = get_cpu();
+ int do_hypercall = 0, evtchn_pending = 0;
+
+ BUG_ON(!irqs_disabled());
+
+ if (unlikely((cpu != cpu_from_evtchn(port))))
+ do_hypercall = 1;
+ else {
+ /*
+ * Need to clear the mask before checking pending to
+ * avoid a race with an event becoming pending.
+ *
+ * EVTCHNOP_unmask will only trigger an upcall if the
+ * mask bit was set, so if a hypercall is needed
+ * remask the event.
+ */
+ sync_clear_bit(port, BM(&s->evtchn_mask[0]));
+ evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+
+ if (unlikely(evtchn_pending && xen_hvm_domain())) {
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+ do_hypercall = 1;
+ }
+ }
+
+ /* Slow path (hypercall) if this is a non-local port or if this is
+ * an hvm domain and an event is pending (hvm domains don't have
+ * their own implementation of irq_enable). */
+ if (do_hypercall) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (evtchn_pending &&
+ !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+ BM(&vcpu_info->evtchn_pending_sel)))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+
+/*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
+
+static inline xen_ulong_t active_evtchns(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return sh->evtchn_pending[idx] &
+ per_cpu(cpu_evtchn_mask, cpu)[idx] &
+ ~sh->evtchn_mask[idx];
+}
+
+/*
+ * Search the CPU's pending events bitmasks. For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for handling.
+ *
+ * Xen uses a two-level bitmap to speed searching. The first level is
+ * a bitset of words which contain pending event bits. The second
+ * level is a bitset of pending events themselves.
+ */
+static void evtchn_2l_handle_events(unsigned cpu)
+{
+ int irq;
+ xen_ulong_t pending_words;
+ xen_ulong_t pending_bits;
+ int start_word_idx, start_bit_idx;
+ int word_idx, bit_idx;
+ int i;
+ struct irq_desc *desc;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /* Timer interrupt has highest priority. */
+ irq = irq_from_virq(cpu, VIRQ_TIMER);
+ if (irq != -1) {
+ unsigned int evtchn = evtchn_from_irq(irq);
+ word_idx = evtchn / BITS_PER_LONG;
+ bit_idx = evtchn % BITS_PER_LONG;
+ if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+ }
+
+ /*
+ * Master flag must be cleared /before/ clearing
+ * selector flag. xchg_xen_ulong must contain an
+ * appropriate barrier.
+ */
+ pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+
+ start_word_idx = __this_cpu_read(current_word_idx);
+ start_bit_idx = __this_cpu_read(current_bit_idx);
+
+ word_idx = start_word_idx;
+
+ for (i = 0; pending_words != 0; i++) {
+ xen_ulong_t words;
+
+ words = MASK_LSBS(pending_words, word_idx);
+
+ /*
+ * If we masked out all events, wrap to beginning.
+ */
+ if (words == 0) {
+ word_idx = 0;
+ bit_idx = 0;
+ continue;
+ }
+ word_idx = EVTCHN_FIRST_BIT(words);
+
+ pending_bits = active_evtchns(cpu, s, word_idx);
+ bit_idx = 0; /* usually scan entire word from start */
+ /*
+ * We scan the starting word in two parts.
+ *
+ * 1st time: start in the middle, scanning the
+ * upper bits.
+ *
+ * 2nd time: scan the whole word (not just the
+ * parts skipped in the first pass) -- if an
+ * event in the previously scanned bits is
+ * pending again it would just be scanned on
+ * the next loop anyway.
+ */
+ if (word_idx == start_word_idx) {
+ if (i == 0)
+ bit_idx = start_bit_idx;
+ }
+
+ do {
+ xen_ulong_t bits;
+ int port;
+
+ bits = MASK_LSBS(pending_bits, bit_idx);
+
+ /* If we masked out all events, move on. */
+ if (bits == 0)
+ break;
+
+ bit_idx = EVTCHN_FIRST_BIT(bits);
+
+ /* Process port. */
+ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+ irq = get_evtchn_to_irq(port);
+
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+
+ bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+
+ /* Next caller starts at last processed + 1 */
+ __this_cpu_write(current_word_idx,
+ bit_idx ? word_idx :
+ (word_idx+1) % BITS_PER_EVTCHN_WORD);
+ __this_cpu_write(current_bit_idx, bit_idx);
+ } while (bit_idx != 0);
+
+ /* Scan start_l1i twice; all others once. */
+ if ((word_idx != start_word_idx) || (i != 0))
+ pending_words &= ~(1UL << word_idx);
+
+ word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
+ }
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int cpu = smp_processor_id();
+ xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+ int i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(debug_lock);
+ struct vcpu_info *v;
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("\nvcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ int pending;
+ v = per_cpu(xen_vcpu, i);
+ pending = (get_irq_regs() && i == cpu)
+ ? xen_irqs_disabled(get_irq_regs())
+ : v->evtchn_upcall_mask;
+ printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
+ pending, v->evtchn_upcall_pending,
+ (int)(sizeof(v->evtchn_pending_sel)*2),
+ v->evtchn_pending_sel);
+ }
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk("\npending:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)sizeof(sh->evtchn_pending[0])*2,
+ sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nglobal mask:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nglobally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocal cpu%d mask:\n ", cpu);
+ for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+ xen_ulong_t pending = sh->evtchn_pending[i]
+ & ~sh->evtchn_mask[i]
+ & cpu_evtchn[i];
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ pending, i % 8 == 0 ? "\n " : " ");
+ }
+
+ printk("\npending list:\n");
+ for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+ if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+ int word_idx = i / BITS_PER_EVTCHN_WORD;
+ printk(" %d: event %d -> irq %d%s%s%s\n",
+ cpu_from_evtchn(i), i,
+ get_evtchn_to_irq(i),
+ sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+ ? "" : " l2-clear",
+ !sync_test_bit(i, BM(sh->evtchn_mask))
+ ? "" : " globally-masked",
+ sync_test_bit(i, BM(cpu_evtchn))
+ ? "" : " locally-masked");
+ }
+ }
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static const struct evtchn_ops evtchn_ops_2l = {
+ .max_channels = evtchn_2l_max_channels,
+ .nr_channels = evtchn_2l_max_channels,
+ .bind_to_cpu = evtchn_2l_bind_to_cpu,
+ .clear_pending = evtchn_2l_clear_pending,
+ .set_pending = evtchn_2l_set_pending,
+ .is_pending = evtchn_2l_is_pending,
+ .test_and_set_mask = evtchn_2l_test_and_set_mask,
+ .mask = evtchn_2l_mask,
+ .unmask = evtchn_2l_unmask,
+ .handle_events = evtchn_2l_handle_events,
+};
+
+void __init xen_evtchn_2l_init(void)
+{
+ pr_info("Using 2-level ABI\n");
+ evtchn_ops = &evtchn_ops_2l;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..f4a9e3311297 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
#include <xen/interface/vcpu.h>
#include <asm/hw_irq.h>
+#include "events_internal.h"
+
+const struct evtchn_ops *evtchn_ops;
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
-/* Interrupt types. */
-enum xen_irq_type {
- IRQT_UNBOUND = 0,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- * PIRQ - physical IRQ, GSI, flags, and owner domain
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
- */
-struct irq_info {
- struct list_head list;
- int refcnt;
- enum xen_irq_type type; /* type */
- unsigned irq;
- unsigned short evtchn; /* event channel */
- unsigned short cpu; /* cpu bound */
-
- union {
- unsigned short virq;
- enum ipi_vector ipi;
- struct {
- unsigned short pirq;
- unsigned short gsi;
- unsigned char flags;
- uint16_t domid;
- } pirq;
- } u;
-};
-#define PIRQ_NEEDS_EOI (1 << 0)
-#define PIRQ_SHAREABLE (1 << 1)
-
-static int *evtchn_to_irq;
+int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
static bool (*pirq_needs_eoi)(unsigned irq);
-/*
- * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
- * careful to only use bitops which allow for this (e.g
- * test_bit/find_first_bit and friends but not __ffs) and to pass
- * BITS_PER_EVTCHN_WORD as the bitmask length.
- */
-#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
-/*
- * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
- * array. Primarily to avoid long lines (hence the terse name).
- */
-#define BM(x) (unsigned long *)(x)
-/* Find the first set bit in a evtchn mask */
-#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
-
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
- cpu_evtchn_mask);
+#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static void clear_evtchn_to_irq_row(unsigned row)
+{
+ unsigned col;
+
+ for (col = 0; col < EVTCHN_PER_ROW; col++)
+ evtchn_to_irq[row][col] = -1;
+}
+
+static void clear_evtchn_to_irq_all(void)
+{
+ unsigned row;
+
+ for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
+ if (evtchn_to_irq[row] == NULL)
+ continue;
+ clear_evtchn_to_irq_row(row);
+ }
+}
+
+static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+{
+ unsigned row;
+ unsigned col;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -EINVAL;
+
+ row = EVTCHN_ROW(evtchn);
+ col = EVTCHN_COL(evtchn);
+
+ if (evtchn_to_irq[row] == NULL) {
+ /* Unallocated irq entries return -1 anyway */
+ if (irq == -1)
+ return 0;
+
+ evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
+ if (evtchn_to_irq[row] == NULL)
+ return -ENOMEM;
+
+ clear_evtchn_to_irq_row(row);
+ }
+
+ evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+ return 0;
+}
+
+int get_evtchn_to_irq(unsigned evtchn)
+{
+ if (evtchn >= xen_evtchn_max_channels())
+ return -1;
+ if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+ return -1;
+ return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+}
+
/* Get info for IRQ */
-static struct irq_info *info_for_irq(unsigned irq)
+struct irq_info *info_for_irq(unsigned irq)
{
return irq_get_handler_data(irq);
}
/* Constructors for packed IRQ information. */
-static void xen_irq_info_common_init(struct irq_info *info,
+static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
enum xen_irq_type type,
- unsigned short evtchn,
+ unsigned evtchn,
unsigned short cpu)
{
+ int ret;
BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
info->evtchn = evtchn;
info->cpu = cpu;
- evtchn_to_irq[evtchn] = irq;
+ ret = set_evtchn_to_irq(evtchn, irq);
+ if (ret < 0)
+ return ret;
irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+
+ return xen_evtchn_port_setup(info);
}
-static void xen_irq_info_evtchn_init(unsigned irq,
- unsigned short evtchn)
+static int xen_irq_info_evtchn_setup(unsigned irq,
+ unsigned evtchn)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
+ return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
}
-static void xen_irq_info_ipi_init(unsigned cpu,
+static int xen_irq_info_ipi_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
+ unsigned evtchn,
enum ipi_vector ipi)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
-
info->u.ipi = ipi;
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
}
-static void xen_irq_info_virq_init(unsigned cpu,
+static int xen_irq_info_virq_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
- unsigned short virq)
+ unsigned evtchn,
+ unsigned virq)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
-
info->u.virq = virq;
per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
}
-static void xen_irq_info_pirq_init(unsigned irq,
- unsigned short evtchn,
- unsigned short pirq,
- unsigned short gsi,
+static int xen_irq_info_pirq_setup(unsigned irq,
+ unsigned evtchn,
+ unsigned pirq,
+ unsigned gsi,
uint16_t domid,
unsigned char flags)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
-
info->u.pirq.pirq = pirq;
info->u.pirq.gsi = gsi;
info->u.pirq.domid = domid;
info->u.pirq.flags = flags;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+}
+
+static void xen_irq_info_cleanup(struct irq_info *info)
+{
+ set_evtchn_to_irq(info->evtchn, -1);
+ info->evtchn = 0;
}
/*
* Accessors for packed IRQ information.
*/
-static unsigned int evtchn_from_irq(unsigned irq)
+unsigned int evtchn_from_irq(unsigned irq)
{
if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
unsigned irq_from_evtchn(unsigned int evtchn)
{
- return evtchn_to_irq[evtchn];
+ return get_evtchn_to_irq(evtchn);
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);
+int irq_from_virq(unsigned int cpu, unsigned int virq)
+{
+ return per_cpu(virq_to_irq, cpu)[virq];
+}
+
static enum ipi_vector ipi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
return info_for_irq(irq)->type;
}
-static unsigned cpu_from_irq(unsigned irq)
+unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
-static unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
unsigned ret = 0;
if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static inline xen_ulong_t active_evtchns(unsigned int cpu,
- struct shared_info *sh,
- unsigned int idx)
-{
- return sh->evtchn_pending[idx] &
- per_cpu(cpu_evtchn_mask, cpu)[idx] &
- ~sh->evtchn_mask[idx];
-}
-
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
- int irq = evtchn_to_irq[chn];
+ int irq = get_evtchn_to_irq(chn);
+ struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
#endif
- clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))));
- set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+ xen_evtchn_port_bind_to_cpu(info, cpu);
- info_for_irq(irq)->cpu = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
- int i;
-#ifdef CONFIG_SMP
- struct irq_info *info;
-
- /* By default all event channels notify CPU#0. */
- list_for_each_entry(info, &xen_irq_list_head, list) {
- struct irq_desc *desc = irq_to_desc(info->irq);
- cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
- }
-#endif
-
- for_each_possible_cpu(i)
- memset(per_cpu(cpu_evtchn_mask, i),
- (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
-}
-
-static inline void clear_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+ info->cpu = cpu;
}
-static inline void set_evtchn(int port)
+static void xen_evtchn_mask_all(void)
{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, BM(&s->evtchn_pending[0]));
-}
+ unsigned int evtchn;
-static inline int test_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+ for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
+ mask_evtchn(evtchn);
}
-
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
* @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-static void mask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
-static void unmask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- unsigned int cpu = get_cpu();
- int do_hypercall = 0, evtchn_pending = 0;
-
- BUG_ON(!irqs_disabled());
-
- if (unlikely((cpu != cpu_from_evtchn(port))))
- do_hypercall = 1;
- else {
- /*
- * Need to clear the mask before checking pending to
- * avoid a race with an event becoming pending.
- *
- * EVTCHNOP_unmask will only trigger an upcall if the
- * mask bit was set, so if a hypercall is needed
- * remask the event.
- */
- sync_clear_bit(port, BM(&s->evtchn_mask[0]));
- evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
-
- if (unlikely(evtchn_pending && xen_hvm_domain())) {
- sync_set_bit(port, BM(&s->evtchn_mask[0]));
- do_hypercall = 1;
- }
- }
-
- /* Slow path (hypercall) if this is a non-local port or if this is
- * an hvm domain and an event is pending (hvm domains don't have
- * their own implementation of irq_enable). */
- if (do_hypercall) {
- struct evtchn_unmask unmask = { .port = port };
- (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
- } else {
- struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
- /*
- * The following is basically the equivalent of
- * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
- * the interrupt edge' if the channel is masked.
- */
- if (evtchn_pending &&
- !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
- BM(&vcpu_info->evtchn_pending_sel)))
- vcpu_info->evtchn_upcall_pending = 1;
- }
-
- put_cpu();
-}
-
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
+static void xen_evtchn_close(unsigned int port)
+{
+ struct evtchn_close close;
+
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+
+ /* Closed ports are implicitly re-bound to VCPU0. */
+ bind_evtchn_to_cpu(port, 0);
+}
+
static void pirq_query_unmask(int irq)
{
struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
pirq_query_unmask(irq);
- evtchn_to_irq[evtchn] = irq;
+ rc = set_evtchn_to_irq(evtchn, irq);
+ if (rc != 0) {
+ pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
+ irq, rc);
+ xen_evtchn_close(evtchn);
+ return 0;
+ }
bind_evtchn_to_cpu(evtchn, 0);
info->evtchn = evtchn;
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
static void shutdown_pirq(struct irq_data *data)
{
- struct evtchn_close close;
unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
- int evtchn = evtchn_from_irq(irq);
+ unsigned evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
mask_evtchn(evtchn);
-
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- bind_evtchn_to_cpu(evtchn, 0);
- evtchn_to_irq[evtchn] = -1;
- info->evtchn = 0;
+ xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
}
static void enable_pirq(struct irq_data *data)
@@ -675,6 +610,41 @@ int xen_irq_from_gsi(unsigned gsi)
}
EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+static void __unbind_from_irq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = irq_get_handler_data(irq);
+
+ if (info->refcnt > 0) {
+ info->refcnt--;
+ if (info->refcnt != 0)
+ return;
+ }
+
+ if (VALID_EVTCHN(evtchn)) {
+ unsigned int cpu = cpu_from_irq(irq);
+
+ xen_evtchn_close(evtchn);
+
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+ break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+ break;
+ default:
+ break;
+ }
+
+ xen_irq_info_cleanup(info);
+ }
+
+ BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
+
+ xen_free_irq(irq);
+}
+
/*
* Do not make any assumptions regarding the relationship between the
* IRQ number returned here and the Xen pirq argument.
@@ -690,6 +660,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
{
int irq = -1;
struct physdev_irq irq_op;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
@@ -717,8 +688,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
goto out;
}
- xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF,
+ ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
shareable ? PIRQ_SHAREABLE : 0);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
pirq_query_unmask(irq);
/* We try to use the handler with the appropriate semantic for the
@@ -778,7 +754,9 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
name);
- xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0);
+ ret = xen_irq_info_pirq_setup(irq, 0, pirq, 0, domid, 0);
+ if (ret < 0)
+ goto error_irq;
ret = irq_set_msi_desc(irq, msidesc);
if (ret < 0)
goto error_irq;
@@ -786,8 +764,8 @@ out:
mutex_unlock(&irq_mapping_update_lock);
return irq;
error_irq:
+ __unbind_from_irq(irq);
mutex_unlock(&irq_mapping_update_lock);
- xen_free_irq(irq);
return ret;
}
#endif
@@ -857,13 +835,18 @@ int xen_pirq_from_irq(unsigned irq)
return pirq_from_irq(irq);
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
+ int ret;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -ENOMEM;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1) {
irq = xen_allocate_irq_dynamic();
@@ -873,7 +856,14 @@ int bind_evtchn_to_irq(unsigned int evtchn)
irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_edge_irq, "event");
- xen_irq_info_evtchn_init(irq, evtchn);
+ ret = xen_irq_info_evtchn_setup(irq, evtchn);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
+ /* New interdomain events are bound to VCPU 0. */
+ bind_evtchn_to_cpu(evtchn, 0);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +880,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int evtchn, irq;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
@@ -909,8 +900,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
BUG();
evtchn = bind_ipi.port;
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
-
+ ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
struct irq_info *info = info_for_irq(irq);
@@ -943,7 +938,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
int port, rc = -ENOENT;
memset(&status, 0, sizeof(status));
- for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ for (port = 0; port < xen_evtchn_max_channels(); port++) {
status.dom = DOMID_SELF;
status.port = port;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +954,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
return rc;
}
+/**
+ * xen_evtchn_nr_channels - number of usable event channel ports
+ *
+ * This may be less than the maximum supported by the current
+ * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
+ * supported.
+ */
+unsigned xen_evtchn_nr_channels(void)
+{
+ return evtchn_ops->nr_channels();
+}
+EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
+
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
@@ -989,7 +997,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
evtchn = ret;
}
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
@@ -1005,50 +1018,8 @@ out:
static void unbind_from_irq(unsigned int irq)
{
- struct evtchn_close close;
- int evtchn = evtchn_from_irq(irq);
- struct irq_info *info = irq_get_handler_data(irq);
-
- if (WARN_ON(!info))
- return;
-
mutex_lock(&irq_mapping_update_lock);
-
- if (info->refcnt > 0) {
- info->refcnt--;
- if (info->refcnt != 0)
- goto done;
- }
-
- if (VALID_EVTCHN(evtchn)) {
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- switch (type_from_irq(irq)) {
- case IRQT_VIRQ:
- per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [virq_from_irq(irq)] = -1;
- break;
- case IRQT_IPI:
- per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [ipi_from_irq(irq)] = -1;
- break;
- default:
- break;
- }
-
- /* Closed ports are implicitly re-bound to VCPU0. */
- bind_evtchn_to_cpu(evtchn, 0);
-
- evtchn_to_irq[evtchn] = -1;
- }
-
- BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
- xen_free_irq(irq);
-
- done:
+ __unbind_from_irq(irq);
mutex_unlock(&irq_mapping_update_lock);
}
@@ -1148,9 +1119,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+/**
+ * xen_set_irq_priority() - set an event channel priority.
+ * @irq:irq bound to an event channel.
+ * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
+ */
+int xen_set_irq_priority(unsigned irq, unsigned priority)
+{
+ struct evtchn_set_priority set_priority;
+
+ set_priority.port = evtchn_from_irq(irq);
+ set_priority.priority = priority;
+
+ return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
+ &set_priority);
+}
+EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+
int evtchn_make_refcounted(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
if (irq == -1)
@@ -1175,12 +1163,12 @@ int evtchn_get(unsigned int evtchn)
struct irq_info *info;
int err = -ENOENT;
- if (evtchn >= NR_EVENT_CHANNELS)
+ if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1)
goto done;
@@ -1204,7 +1192,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
void evtchn_put(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
if (WARN_ON(irq == -1))
return;
unbind_from_irq(irq);
@@ -1228,222 +1216,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
- struct shared_info *sh = HYPERVISOR_shared_info;
- int cpu = smp_processor_id();
- xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
- int i;
- unsigned long flags;
- static DEFINE_SPINLOCK(debug_lock);
- struct vcpu_info *v;
-
- spin_lock_irqsave(&debug_lock, flags);
-
- printk("\nvcpu %d\n ", cpu);
-
- for_each_online_cpu(i) {
- int pending;
- v = per_cpu(xen_vcpu, i);
- pending = (get_irq_regs() && i == cpu)
- ? xen_irqs_disabled(get_irq_regs())
- : v->evtchn_upcall_mask;
- printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
- pending, v->evtchn_upcall_pending,
- (int)(sizeof(v->evtchn_pending_sel)*2),
- v->evtchn_pending_sel);
- }
- v = per_cpu(xen_vcpu, cpu);
-
- printk("\npending:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)sizeof(sh->evtchn_pending[0])*2,
- sh->evtchn_pending[i],
- i % 8 == 0 ? "\n " : " ");
- printk("\nglobal mask:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nglobally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocal cpu%d mask:\n ", cpu);
- for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
- cpu_evtchn[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
- xen_ulong_t pending = sh->evtchn_pending[i]
- & ~sh->evtchn_mask[i]
- & cpu_evtchn[i];
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- pending, i % 8 == 0 ? "\n " : " ");
- }
-
- printk("\npending list:\n");
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (sync_test_bit(i, BM(sh->evtchn_pending))) {
- int word_idx = i / BITS_PER_EVTCHN_WORD;
- printk(" %d: event %d -> irq %d%s%s%s\n",
- cpu_from_evtchn(i), i,
- evtchn_to_irq[i],
- sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
- ? "" : " l2-clear",
- !sync_test_bit(i, BM(sh->evtchn_mask))
- ? "" : " globally-masked",
- sync_test_bit(i, BM(cpu_evtchn))
- ? "" : " locally-masked");
- }
- }
-
- spin_unlock_irqrestore(&debug_lock, flags);
-
- return IRQ_HANDLED;
-}
-
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
-/*
- * Mask out the i least significant bits of w
- */
-#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
-
-/*
- * Search the CPUs pending events bitmasks. For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching. The first level is
- * a bitset of words which contain pending event bits. The second
- * level is a bitset of pending events themselves.
- */
static void __xen_evtchn_do_upcall(void)
{
- int start_word_idx, start_bit_idx;
- int word_idx, bit_idx;
- int i, irq;
- int cpu = get_cpu();
- struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ int cpu = get_cpu();
unsigned count;
do {
- xen_ulong_t pending_words;
- xen_ulong_t pending_bits;
- struct irq_desc *desc;
-
vcpu_info->evtchn_upcall_pending = 0;
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
- /*
- * Master flag must be cleared /before/ clearing
- * selector flag. xchg_xen_ulong must contain an
- * appropriate barrier.
- */
- if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
- int evtchn = evtchn_from_irq(irq);
- word_idx = evtchn / BITS_PER_LONG;
- pending_bits = evtchn % BITS_PER_LONG;
- if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
- desc = irq_to_desc(irq);
- if (desc)
- generic_handle_irq_desc(irq, desc);
- }
- }
-
- pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
-
- start_word_idx = __this_cpu_read(current_word_idx);
- start_bit_idx = __this_cpu_read(current_bit_idx);
-
- word_idx = start_word_idx;
-
- for (i = 0; pending_words != 0; i++) {
- xen_ulong_t words;
-
- words = MASK_LSBS(pending_words, word_idx);
-
- /*
- * If we masked out all events, wrap to beginning.
- */
- if (words == 0) {
- word_idx = 0;
- bit_idx = 0;
- continue;
- }
- word_idx = EVTCHN_FIRST_BIT(words);
-
- pending_bits = active_evtchns(cpu, s, word_idx);
- bit_idx = 0; /* usually scan entire word from start */
- /*
- * We scan the starting word in two parts.
- *
- * 1st time: start in the middle, scanning the
- * upper bits.
- *
- * 2nd time: scan the whole word (not just the
- * parts skipped in the first pass) -- if an
- * event in the previously scanned bits is
- * pending again it would just be scanned on
- * the next loop anyway.
- */
- if (word_idx == start_word_idx) {
- if (i == 0)
- bit_idx = start_bit_idx;
- }
-
- do {
- xen_ulong_t bits;
- int port;
-
- bits = MASK_LSBS(pending_bits, bit_idx);
-
- /* If we masked out all events, move on. */
- if (bits == 0)
- break;
-
- bit_idx = EVTCHN_FIRST_BIT(bits);
-
- /* Process port. */
- port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = evtchn_to_irq[port];
-
- if (irq != -1) {
- desc = irq_to_desc(irq);
- if (desc)
- generic_handle_irq_desc(irq, desc);
- }
-
- bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
-
- /* Next caller starts at last processed + 1 */
- __this_cpu_write(current_word_idx,
- bit_idx ? word_idx :
- (word_idx+1) % BITS_PER_EVTCHN_WORD);
- __this_cpu_write(current_bit_idx, bit_idx);
- } while (bit_idx != 0);
-
- /* Scan start_l1i twice; all others once. */
- if ((word_idx != start_word_idx) || (i != 0))
- pending_words &= ~(1UL << word_idx);
-
- word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
- }
+ xen_evtchn_handle_events(cpu);
BUG_ON(!irqs_disabled());
@@ -1492,12 +1279,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
- BUG_ON(evtchn_to_irq[evtchn] != -1);
+ BUG_ON(get_evtchn_to_irq(evtchn) != -1);
/* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- xen_irq_info_evtchn_init(irq, evtchn);
+ (void)xen_irq_info_evtchn_setup(irq, evtchn);
mutex_unlock(&irq_mapping_update_lock);
@@ -1511,7 +1298,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
- struct shared_info *s = HYPERVISOR_shared_info;
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
int masked;
@@ -1534,7 +1320,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
* Mask the event while changing the VCPU binding to prevent
* it being delivered on an unexpected VCPU.
*/
- masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+ masked = test_and_set_mask(evtchn);
/*
* If this fails, it usually just indicates that we're dealing with a
@@ -1558,22 +1344,26 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
return rebind_irq_to_cpu(data->irq, tcpu);
}
-int resend_irq_on_evtchn(unsigned int irq)
+static int retrigger_evtchn(int evtchn)
{
- int masked, evtchn = evtchn_from_irq(irq);
- struct shared_info *s = HYPERVISOR_shared_info;
+ int masked;
if (!VALID_EVTCHN(evtchn))
- return 1;
+ return 0;
- masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
- sync_set_bit(evtchn, BM(s->evtchn_pending));
+ masked = test_and_set_mask(evtchn);
+ set_evtchn(evtchn);
if (!masked)
unmask_evtchn(evtchn);
return 1;
}
+int resend_irq_on_evtchn(unsigned int irq)
+{
+ return retrigger_evtchn(evtchn_from_irq(irq));
+}
+
static void enable_dynirq(struct irq_data *data)
{
int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1398,7 @@ static void mask_ack_dynirq(struct irq_data *data)
static int retrigger_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
- struct shared_info *sh = HYPERVISOR_shared_info;
- int ret = 0;
-
- if (VALID_EVTCHN(evtchn)) {
- int masked;
-
- masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
- sync_set_bit(evtchn, BM(sh->evtchn_pending));
- if (!masked)
- unmask_evtchn(evtchn);
- ret = 1;
- }
-
- return ret;
+ return retrigger_evtchn(evtchn_from_irq(data->irq));
}
static void restore_pirqs(void)
@@ -1683,7 +1459,7 @@ static void restore_cpu_virqs(unsigned int cpu)
evtchn = bind_virq.port;
/* Record the new mapping. */
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1707,7 +1483,7 @@ static void restore_cpu_ipis(unsigned int cpu)
evtchn = bind_ipi.port;
/* Record the new mapping. */
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
+ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1784,21 +1560,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
void xen_irq_resume(void)
{
- unsigned int cpu, evtchn;
+ unsigned int cpu;
struct irq_info *info;
- init_evtchn_cpu_bindings();
-
/* New event-channel space is not 'live' yet. */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- mask_evtchn(evtchn);
+ xen_evtchn_mask_all();
+ xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
list_for_each_entry(info, &xen_irq_list_head, list)
info->evtchn = 0; /* zap event-channel binding */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- evtchn_to_irq[evtchn] = -1;
+ clear_evtchn_to_irq_all();
for_each_possible_cpu(cpu) {
restore_cpu_virqs(cpu);
@@ -1889,27 +1662,40 @@ void xen_callback_vector(void)
void xen_callback_vector(void) {}
#endif
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static bool fifo_events = true;
+module_param(fifo_events, bool, 0);
+
void __init xen_init_IRQ(void)
{
- int i;
+ int ret = -EINVAL;
- evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
- GFP_KERNEL);
- BUG_ON(!evtchn_to_irq);
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- evtchn_to_irq[i] = -1;
+ if (fifo_events)
+ ret = xen_evtchn_fifo_init();
+ if (ret < 0)
+ xen_evtchn_2l_init();
- init_evtchn_cpu_bindings();
+ evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
+ sizeof(*evtchn_to_irq), GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
/* No event channels are 'live' right now. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- mask_evtchn(i);
+ xen_evtchn_mask_all();
pirq_needs_eoi = pirq_needs_eoi_flag;
#ifdef CONFIG_X86
- if (xen_hvm_domain()) {
+ if (xen_pv_domain()) {
+ irq_ctx_init(smp_processor_id());
+ if (xen_initial_domain())
+ pci_xen_initial_domain();
+ }
+ if (xen_feature(XENFEAT_hvm_callback_vector))
xen_callback_vector();
+
+ if (xen_hvm_domain()) {
native_init_IRQ();
/* pci_xen_hvm_init must be called after native_init_IRQ so that
* __acpi_register_gsi can point at the right function */
@@ -1918,13 +1704,10 @@ void __init xen_init_IRQ(void)
int rc;
struct physdev_pirq_eoi_gmfn eoi_gmfn;
- irq_ctx_init(smp_processor_id());
- if (xen_initial_domain())
- pci_xen_initial_domain();
-
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
+ /* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 000000000000..1de2a191b395
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,428 @@
+/*
+ * Xen event channels (FIFO-based ABI)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D ltd.
+ *
+ * This source code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Or, when distributed separately from the Linux kernel or
+ * incorporated into other software packages, subject to the following
+ * license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
+
+struct evtchn_fifo_queue {
+ uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
+static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
+static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
+static unsigned event_array_pages __read_mostly;
+
+#define BM(w) ((unsigned long *)(w))
+
+static inline event_word_t *event_word_from_port(unsigned port)
+{
+ unsigned i = port / EVENT_WORDS_PER_PAGE;
+
+ return event_array[i] + port % EVENT_WORDS_PER_PAGE;
+}
+
+static unsigned evtchn_fifo_max_channels(void)
+{
+ return EVTCHN_FIFO_NR_CHANNELS;
+}
+
+static unsigned evtchn_fifo_nr_channels(void)
+{
+ return event_array_pages * EVENT_WORDS_PER_PAGE;
+}
+
+static void free_unused_array_pages(void)
+{
+ unsigned i;
+
+ for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
+ if (!event_array[i])
+ break;
+ free_page((unsigned long)event_array[i]);
+ event_array[i] = NULL;
+ }
+}
+
+static void init_array_page(event_word_t *array_page)
+{
+ unsigned i;
+
+ for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
+ array_page[i] = 1 << EVTCHN_FIFO_MASKED;
+}
+
+static int evtchn_fifo_setup(struct irq_info *info)
+{
+ unsigned port = info->evtchn;
+ unsigned new_array_pages;
+ int ret;
+
+ new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
+
+ if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
+ return -EINVAL;
+
+ while (event_array_pages < new_array_pages) {
+ void *array_page;
+ struct evtchn_expand_array expand_array;
+
+ /* Might already have a page if we've resumed. */
+ array_page = event_array[event_array_pages];
+ if (!array_page) {
+ array_page = (void *)__get_free_page(GFP_KERNEL);
+ if (array_page == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ event_array[event_array_pages] = array_page;
+ }
+
+ /* Mask all events in this page before adding it. */
+ init_array_page(array_page);
+
+ expand_array.array_gfn = virt_to_mfn(array_page);
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
+ if (ret < 0)
+ goto error;
+
+ event_array_pages++;
+ }
+ return 0;
+
+ error:
+ if (event_array_pages == 0)
+ panic("xen: unable to expand event array with initial page (%d)\n", ret);
+ else
+ pr_err("unable to expand event array (%d)\n", ret);
+ free_unused_array_pages();
+ return ret;
+}
+
+static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ /* no-op */
+}
+
+static void evtchn_fifo_clear_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static void evtchn_fifo_set_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool evtchn_fifo_is_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool evtchn_fifo_test_and_set_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+static void evtchn_fifo_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+/*
+ * Clear MASKED, spinning if BUSY is set.
+ */
+static void clear_masked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w & ~(1 << EVTCHN_FIFO_BUSY);
+ new = old & ~(1 << EVTCHN_FIFO_MASKED);
+ w = sync_cmpxchg(word, old, new);
+ } while (w != old);
+}
+
+static void evtchn_fifo_unmask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+
+ BUG_ON(!irqs_disabled());
+
+ clear_masked(word);
+ if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ }
+}
+
+static uint32_t clear_linked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w;
+ new = (w & ~((1 << EVTCHN_FIFO_LINKED)
+ | EVTCHN_FIFO_LINK_MASK));
+ } while ((w = sync_cmpxchg(word, old, new)) != old);
+
+ return w & EVTCHN_FIFO_LINK_MASK;
+}
+
+static void handle_irq_for_port(unsigned port)
+{
+ int irq;
+ struct irq_desc *desc;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+}
+
+static void consume_one_event(unsigned cpu,
+ struct evtchn_fifo_control_block *control_block,
+ unsigned priority, uint32_t *ready)
+{
+ struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+ uint32_t head;
+ unsigned port;
+ event_word_t *word;
+
+ head = q->head[priority];
+
+ /*
+ * Reached the tail last time? Read the new HEAD from the
+ * control block.
+ */
+ if (head == 0) {
+ rmb(); /* Ensure word is up-to-date before reading head. */
+ head = control_block->head[priority];
+ }
+
+ port = head;
+ word = event_word_from_port(port);
+ head = clear_linked(word);
+
+ /*
+ * If the link is non-zero, there are more events in the
+ * queue, otherwise the queue is empty.
+ *
+ * If the queue is empty, clear this priority from our local
+ * copy of the ready word.
+ */
+ if (head == 0)
+ clear_bit(priority, BM(ready));
+
+ if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
+ && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
+ handle_irq_for_port(port);
+
+ q->head[priority] = head;
+}
+
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+ struct evtchn_fifo_control_block *control_block;
+ uint32_t ready;
+ unsigned q;
+
+ control_block = per_cpu(cpu_control_block, cpu);
+
+ ready = xchg(&control_block->ready, 0);
+
+ while (ready) {
+ q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
+ consume_one_event(cpu, control_block, q, &ready);
+ ready |= xchg(&control_block->ready, 0);
+ }
+}
+
+static void evtchn_fifo_resume(void)
+{
+ unsigned cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *control_block = per_cpu(cpu_control_block, cpu);
+ struct evtchn_init_control init_control;
+ int ret;
+
+ if (!control_block)
+ continue;
+
+ /*
+ * If this CPU is offline, take the opportunity to
+ * free the control block while it is not being
+ * used.
+ */
+ if (!cpu_online(cpu)) {
+ free_page((unsigned long)control_block);
+ per_cpu(cpu_control_block, cpu) = NULL;
+ continue;
+ }
+
+ init_control.control_gfn = virt_to_mfn(control_block);
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
+ &init_control);
+ if (ret < 0)
+ BUG();
+ }
+
+ /*
+ * The event array starts out as empty again and is extended
+ * as normal when events are bound. The existing pages will
+ * be reused.
+ */
+ event_array_pages = 0;
+}
+
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+};
+
+static int evtchn_fifo_init_control_block(unsigned cpu)
+{
+ struct page *control_block = NULL;
+ struct evtchn_init_control init_control;
+ int ret = -ENOMEM;
+
+ control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (control_block == NULL)
+ goto error;
+
+ init_control.control_gfn = virt_to_mfn(page_address(control_block));
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
+ if (ret < 0)
+ goto error;
+
+ per_cpu(cpu_control_block, cpu) = page_address(control_block);
+
+ return 0;
+
+ error:
+ __free_page(control_block);
+ return ret;
+}
+
+static int evtchn_fifo_cpu_notification(struct notifier_block *self,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int ret = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (!per_cpu(cpu_control_block, cpu))
+ ret = evtchn_fifo_init_control_block(cpu);
+ break;
+ default:
+ break;
+ }
+ return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static struct notifier_block evtchn_fifo_cpu_notifier = {
+ .notifier_call = evtchn_fifo_cpu_notification,
+};
+
+int __init xen_evtchn_fifo_init(void)
+{
+ int cpu = get_cpu();
+ int ret;
+
+ ret = evtchn_fifo_init_control_block(cpu);
+ if (ret < 0)
+ goto out;
+
+ pr_info("Using FIFO-based ABI\n");
+
+ evtchn_ops = &evtchn_ops_fifo;
+
+ register_cpu_notifier(&evtchn_fifo_cpu_notifier);
+out:
+ put_cpu();
+ return ret;
+}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 000000000000..677f41a0fff9
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,150 @@
+/*
+ * Xen Event Channels (internal header)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later. See the file COPYING for more details.
+ */
+#ifndef __EVENTS_INTERNAL_H__
+#define __EVENTS_INTERNAL_H__
+
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info {
+ struct list_head list;
+ int refcnt;
+ enum xen_irq_type type; /* type */
+ unsigned irq;
+ unsigned int evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short pirq;
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
+ uint16_t domid;
+ } pirq;
+ } u;
+};
+
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
+
+struct evtchn_ops {
+ unsigned (*max_channels)(void);
+ unsigned (*nr_channels)(void);
+
+ int (*setup)(struct irq_info *info);
+ void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+
+ void (*clear_pending)(unsigned port);
+ void (*set_pending)(unsigned port);
+ bool (*is_pending)(unsigned port);
+ bool (*test_and_set_mask)(unsigned port);
+ void (*mask)(unsigned port);
+ void (*unmask)(unsigned port);
+
+ void (*handle_events)(unsigned cpu);
+ void (*resume)(void);
+};
+
+extern const struct evtchn_ops *evtchn_ops;
+
+extern int **evtchn_to_irq;
+int get_evtchn_to_irq(unsigned int evtchn);
+
+struct irq_info *info_for_irq(unsigned irq);
+unsigned cpu_from_irq(unsigned irq);
+unsigned cpu_from_evtchn(unsigned int evtchn);
+
+static inline unsigned xen_evtchn_max_channels(void)
+{
+ return evtchn_ops->max_channels();
+}
+
+/*
+ * Do any ABI specific setup for a bound event channel before it can
+ * be unmasked and used.
+ */
+static inline int xen_evtchn_port_setup(struct irq_info *info)
+{
+ if (evtchn_ops->setup)
+ return evtchn_ops->setup(info);
+ return 0;
+}
+
+static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
+ unsigned cpu)
+{
+ evtchn_ops->bind_to_cpu(info, cpu);
+}
+
+static inline void clear_evtchn(unsigned port)
+{
+ evtchn_ops->clear_pending(port);
+}
+
+static inline void set_evtchn(unsigned port)
+{
+ evtchn_ops->set_pending(port);
+}
+
+static inline bool test_evtchn(unsigned port)
+{
+ return evtchn_ops->is_pending(port);
+}
+
+static inline bool test_and_set_mask(unsigned port)
+{
+ return evtchn_ops->test_and_set_mask(port);
+}
+
+static inline void mask_evtchn(unsigned port)
+{
+ return evtchn_ops->mask(port);
+}
+
+static inline void unmask_evtchn(unsigned port)
+{
+ return evtchn_ops->unmask(port);
+}
+
+static inline void xen_evtchn_handle_events(unsigned cpu)
+{
+ return evtchn_ops->handle_events(cpu);
+}
+
+static inline void xen_evtchn_resume(void)
+{
+ if (evtchn_ops->resume)
+ evtchn_ops->resume();
+}
+
+void xen_evtchn_2l_init(void);
+int xen_evtchn_fifo_init(void);
+
+#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5de2063e16d3..00f40f051d95 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
break;
rc = -EINVAL;
- if (unbind.port >= NR_EVENT_CHANNELS)
+ if (unbind.port >= xen_evtchn_nr_channels())
break;
rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986ea..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
if (!xen_domain())
return -ENODEV;
- use_ptemod = xen_pv_domain();
+ use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
err = misc_register(&gntdev_miscdev);
if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a48f400..b84e3ab839aa 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -62,12 +62,10 @@
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
-static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
-unsigned long xen_hvm_resume_frames;
-EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
+struct grant_frames xen_auto_xlat_grant_frames;
static union {
struct grant_entry_v1 *v1;
@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
unsigned int gnttab_max_grant_frames(void)
{
unsigned int xen_max = __max_nr_grant_frames();
+ static unsigned int boot_max_nr_grant_frames;
+
+ /* First time, initialize it properly. */
+ if (!boot_max_nr_grant_frames)
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
if (xen_max > boot_max_nr_grant_frames)
return boot_max_nr_grant_frames;
@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
}
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
+int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
+{
+ xen_pfn_t *pfn;
+ unsigned int max_nr_gframes = __max_nr_grant_frames();
+ unsigned int i;
+ void *vaddr;
+
+ if (xen_auto_xlat_grant_frames.count)
+ return -EINVAL;
+
+ vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
+ if (vaddr == NULL) {
+ pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
+ &addr);
+ return -ENOMEM;
+ }
+ pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
+ if (!pfn) {
+ xen_unmap(vaddr);
+ return -ENOMEM;
+ }
+ for (i = 0; i < max_nr_gframes; i++)
+ pfn[i] = PFN_DOWN(addr) + i;
+
+ xen_auto_xlat_grant_frames.vaddr = vaddr;
+ xen_auto_xlat_grant_frames.pfn = pfn;
+ xen_auto_xlat_grant_frames.count = max_nr_gframes;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
+
+void gnttab_free_auto_xlat_frames(void)
+{
+ if (!xen_auto_xlat_grant_frames.count)
+ return;
+ kfree(xen_auto_xlat_grant_frames.pfn);
+ xen_unmap(xen_auto_xlat_grant_frames.vaddr);
+
+ xen_auto_xlat_grant_frames.pfn = NULL;
+ xen_auto_xlat_grant_frames.count = 0;
+ xen_auto_xlat_grant_frames.vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
+
/* Handling of paged out grant targets (GNTST_eagain) */
#define MAX_DELAY 256
static inline void
@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
unsigned int nr_gframes = end_idx + 1;
int rc;
- if (xen_hvm_domain()) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
+ BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
- xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
int rc;
struct gnttab_set_version gsv;
- if (xen_hvm_domain())
- gsv.version = 1;
- else
- gsv.version = 2;
+ gsv.version = 1;
+
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2) {
grant_table_version = 2;
@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- if (xen_pv_domain())
- return gnttab_map(0, nr_grant_frames - 1);
-
- if (gnttab_shared.addr == NULL) {
- gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
- PAGE_SIZE * max_nr_gframes);
+ if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
+ gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
- pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
- xen_hvm_resume_frames);
+ pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
+ (unsigned long)xen_auto_xlat_grant_frames.vaddr);
return -ENOMEM;
}
}
-
- gnttab_map(0, nr_grant_frames - 1);
-
- return 0;
+ return gnttab_map(0, nr_grant_frames - 1);
}
int gnttab_resume(void)
@@ -1227,13 +1267,12 @@ int gnttab_init(void)
gnttab_request_version();
nr_grant_frames = 1;
- boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
BUG_ON(grefs_per_grant_frame == 0);
- max_nr_glist_frames = (boot_max_nr_grant_frames *
+ max_nr_glist_frames = (gnttab_max_grant_frames() *
grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
return gnttab_init();
}
-
-core_initcall(__gnttab_init);
+/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
+ * beforehand to initialize xen_auto_xlat_grant_frames. */
+core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 188825122aae..dd9c249ea311 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,7 +26,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+#ifdef CONFIG_PCI_MMCONFIG
#include <asm/pci_x86.h>
+#endif
static bool __read_mostly pci_seg_supported = true;
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2f3528e93cb9..a1361c312c06 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
long ioaddr;
long mmio_addr, mmio_len;
unsigned int max_nr_gframes;
+ unsigned long grant_frames;
if (!xen_domain())
return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
}
max_nr_gframes = gnttab_max_grant_frames();
- xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
- ret = gnttab_init();
+ grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+ ret = gnttab_setup_auto_xlat_frames(grant_frames);
if (ret)
goto out;
+ ret = gnttab_init();
+ if (ret)
+ goto grant_out;
xenbus_probe(NULL);
return 0;
-
+grant_out:
+ gnttab_free_auto_xlat_frames();
out:
pci_release_region(pdev, 0);
mem_out:
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 1eac0731c349..ebd8f218a788 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -75,14 +75,32 @@ static unsigned long xen_io_tlb_nslabs;
static u64 start_dma_addr;
+/*
+ * Both of these functions should avoid PFN_PHYS because phys_addr_t
+ * can be 32bit when dma_addr_t is 64bit leading to a loss in
+ * information if the shift is done before casting to 64bit.
+ */
static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
{
- return phys_to_machine(XPADDR(paddr)).maddr;
+ unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr));
+ dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT;
+
+ dma |= paddr & ~PAGE_MASK;
+
+ return dma;
}
static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
{
- return machine_to_phys(XMADDR(baddr)).paddr;
+ unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr));
+ dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT;
+ phys_addr_t paddr = dma;
+
+ BUG_ON(paddr != dma); /* truncation has occurred, should never happen */
+
+ paddr |= baddr & ~PAGE_MASK;
+
+ return paddr;
}
static inline dma_addr_t xen_virt_to_bus(void *address)
diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c
index 8dae6c13063a..80875fb770ed 100644
--- a/drivers/xen/xen-acpi-cpuhotplug.c
+++ b/drivers/xen/xen-acpi-cpuhotplug.c
@@ -24,10 +24,7 @@
#include <linux/cpu.h>
#include <linux/acpi.h>
#include <linux/uaccess.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
#include <acpi/processor.h>
-
#include <xen/acpi.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
@@ -269,7 +266,8 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
if (!is_processor_present(handle))
break;
- if (!acpi_bus_get_device(handle, &device))
+ acpi_bus_get_device(handle, &device);
+ if (acpi_device_enumerated(device))
break;
result = acpi_bus_scan(handle);
@@ -277,8 +275,9 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
pr_err(PREFIX "Unable to add the device\n");
break;
}
- result = acpi_bus_get_device(handle, &device);
- if (result) {
+ device = NULL;
+ acpi_bus_get_device(handle, &device);
+ if (!acpi_device_enumerated(device)) {
pr_err(PREFIX "Missing device object\n");
break;
}
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
index 9083f1e474f8..f8d18626969a 100644
--- a/drivers/xen/xen-acpi-memhotplug.c
+++ b/drivers/xen/xen-acpi-memhotplug.c
@@ -22,7 +22,6 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/acpi.h>
-#include <acpi/acpi_drivers.h>
#include <xen/acpi.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
@@ -169,7 +168,7 @@ static int acpi_memory_get_device(acpi_handle handle,
acpi_scan_lock_acquire();
acpi_bus_get_device(handle, &device);
- if (device)
+ if (acpi_device_enumerated(device))
goto end;
/*
@@ -182,8 +181,9 @@ static int acpi_memory_get_device(acpi_handle handle,
result = -EINVAL;
goto out;
}
- result = acpi_bus_get_device(handle, &device);
- if (result) {
+ device = NULL;
+ acpi_bus_get_device(handle, &device);
+ if (!acpi_device_enumerated(device)) {
pr_warn(PREFIX "Missing device object\n");
result = -EINVAL;
goto out;
diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c
index 59708fdd068b..40c4bc06b5fa 100644
--- a/drivers/xen/xen-acpi-pad.c
+++ b/drivers/xen/xen-acpi-pad.c
@@ -18,11 +18,10 @@
#include <linux/kernel.h>
#include <linux/types.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-#include <asm/xen/hypercall.h>
+#include <linux/acpi.h>
#include <xen/interface/version.h>
#include <xen/xen-ops.h>
+#include <asm/xen/hypercall.h>
#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 13bc6c31c060..7231859119f1 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -28,10 +28,8 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/syscore_ops.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
+#include <linux/acpi.h>
#include <acpi/processor.h>
-
#include <xen/xen.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 21e18c18c7a1..745ad79c1d8e 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -175,6 +175,7 @@ static void frontswap_selfshrink(void)
#endif /* CONFIG_FRONTSWAP */
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+#define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT))
/*
* Use current balloon size, the goal (vm_committed_as), and hysteresis
@@ -525,6 +526,7 @@ EXPORT_SYMBOL(register_xen_selfballooning);
int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
{
bool enable = false;
+ unsigned long reserve_pages;
if (!xen_domain())
return -ENODEV;
@@ -549,6 +551,26 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
if (!enable)
return -ENODEV;
+ /*
+ * Give selfballoon_reserved_mb a default value(10% of total ram pages)
+ * to make selfballoon not so aggressive.
+ *
+ * There are mainly two reasons:
+ * 1) The original goal_page didn't consider some pages used by kernel
+ * space, like slab pages and memory used by device drivers.
+ *
+ * 2) The balloon driver may not give back memory to guest OS fast
+ * enough when the workload suddenly aquries a lot of physical memory.
+ *
+ * In both cases, the guest OS will suffer from memory pressure and
+ * OOM killer may be triggered.
+ * By reserving extra 10% of total ram pages, we can keep the system
+ * much more reliably and response faster in some cases.
+ */
+ if (!selfballoon_reserved_mb) {
+ reserve_pages = totalram_pages / 10;
+ selfballoon_reserved_mb = PAGES2MB(reserve_pages);
+ }
schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
return 0;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964d..01d59e66565d 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>
#include "xenbus_probe.h"
@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 129bf84c19ec..cb385c10d2b1 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
- if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ if (!xen_has_pv_devices())
return -ENODEV;
ready_to_wait_for_devices = 1;
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
deleted file mode 100644
index 4793fc594549..000000000000
--- a/drivers/xen/xencomm.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Copyright (C) IBM Corp. 2006
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <asm/page.h>
-#include <xen/xencomm.h>
-#include <xen/interface/xen.h>
-#include <asm/xen/xencomm.h> /* for xencomm_is_phys_contiguous() */
-
-static int xencomm_init(struct xencomm_desc *desc,
- void *buffer, unsigned long bytes)
-{
- unsigned long recorded = 0;
- int i = 0;
-
- while ((recorded < bytes) && (i < desc->nr_addrs)) {
- unsigned long vaddr = (unsigned long)buffer + recorded;
- unsigned long paddr;
- int offset;
- int chunksz;
-
- offset = vaddr % PAGE_SIZE; /* handle partial pages */
- chunksz = min(PAGE_SIZE - offset, bytes - recorded);
-
- paddr = xencomm_vtop(vaddr);
- if (paddr == ~0UL) {
- printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
- __func__, vaddr);
- return -EINVAL;
- }
-
- desc->address[i++] = paddr;
- recorded += chunksz;
- }
-
- if (recorded < bytes) {
- printk(KERN_DEBUG
- "%s: could only translate %ld of %ld bytes\n",
- __func__, recorded, bytes);
- return -ENOSPC;
- }
-
- /* mark remaining addresses invalid (just for safety) */
- while (i < desc->nr_addrs)
- desc->address[i++] = XENCOMM_INVALID;
-
- desc->magic = XENCOMM_MAGIC;
-
- return 0;
-}
-
-static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
- void *buffer, unsigned long bytes)
-{
- struct xencomm_desc *desc;
- unsigned long buffer_ulong = (unsigned long)buffer;
- unsigned long start = buffer_ulong & PAGE_MASK;
- unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
- unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
- unsigned long size = sizeof(*desc) +
- sizeof(desc->address[0]) * nr_addrs;
-
- /*
- * slab allocator returns at least sizeof(void*) aligned pointer.
- * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
- * cross page boundary.
- */
- if (sizeof(*desc) > sizeof(void *)) {
- unsigned long order = get_order(size);
- desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
- order);
- if (desc == NULL)
- return NULL;
-
- desc->nr_addrs =
- ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
- sizeof(*desc->address);
- } else {
- desc = kmalloc(size, gfp_mask);
- if (desc == NULL)
- return NULL;
-
- desc->nr_addrs = nr_addrs;
- }
- return desc;
-}
-
-void xencomm_free(struct xencomm_handle *desc)
-{
- if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
- struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
- if (sizeof(*desc__) > sizeof(void *)) {
- unsigned long size = sizeof(*desc__) +
- sizeof(desc__->address[0]) * desc__->nr_addrs;
- unsigned long order = get_order(size);
- free_pages((unsigned long)__va(desc), order);
- } else
- kfree(__va(desc));
- }
-}
-
-static int xencomm_create(void *buffer, unsigned long bytes,
- struct xencomm_desc **ret, gfp_t gfp_mask)
-{
- struct xencomm_desc *desc;
- int rc;
-
- pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
-
- if (bytes == 0) {
- /* don't create a descriptor; Xen recognizes NULL. */
- BUG_ON(buffer != NULL);
- *ret = NULL;
- return 0;
- }
-
- BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
-
- desc = xencomm_alloc(gfp_mask, buffer, bytes);
- if (!desc) {
- printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
- return -ENOMEM;
- }
-
- rc = xencomm_init(desc, buffer, bytes);
- if (rc) {
- printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
- xencomm_free((struct xencomm_handle *)__pa(desc));
- return rc;
- }
-
- *ret = desc;
- return 0;
-}
-
-static struct xencomm_handle *xencomm_create_inline(void *ptr)
-{
- unsigned long paddr;
-
- BUG_ON(!xencomm_is_phys_contiguous((unsigned long)ptr));
-
- paddr = (unsigned long)xencomm_pa(ptr);
- BUG_ON(paddr & XENCOMM_INLINE_FLAG);
- return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
-}
-
-/* "mini" routine, for stack-based communications: */
-static int xencomm_create_mini(void *buffer,
- unsigned long bytes, struct xencomm_mini *xc_desc,
- struct xencomm_desc **ret)
-{
- int rc = 0;
- struct xencomm_desc *desc;
- BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
-
- desc = (void *)xc_desc;
-
- desc->nr_addrs = XENCOMM_MINI_ADDRS;
-
- rc = xencomm_init(desc, buffer, bytes);
- if (!rc)
- *ret = desc;
-
- return rc;
-}
-
-struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
-{
- int rc;
- struct xencomm_desc *desc;
-
- if (xencomm_is_phys_contiguous((unsigned long)ptr))
- return xencomm_create_inline(ptr);
-
- rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
-
- if (rc || desc == NULL)
- return NULL;
-
- return xencomm_pa(desc);
-}
-
-struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
- struct xencomm_mini *xc_desc)
-{
- int rc;
- struct xencomm_desc *desc = NULL;
-
- if (xencomm_is_phys_contiguous((unsigned long)ptr))
- return xencomm_create_inline(ptr);
-
- rc = xencomm_create_mini(ptr, bytes, xc_desc,
- &desc);
-
- if (rc)
- return NULL;
-
- return xencomm_pa(desc);
-}