aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/kvm_host.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/kvm_host.h')
-rw-r--r--include/linux/kvm_host.h330
1 files changed, 289 insertions, 41 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8583ed3ff344..041ca7f15ea4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
#include <linux/rcuwait.h>
#include <linux/refcount.h>
#include <linux/nospec.h>
+#include <linux/notifier.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -149,6 +150,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -157,6 +159,15 @@ static inline bool is_error_page(struct page *page)
})
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
+bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except,
+ unsigned long *vcpu_bitmap, cpumask_var_t tmp);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+ unsigned long *vcpu_bitmap);
+
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -304,7 +315,6 @@ struct kvm_vcpu {
struct pid __rcu *pid;
int sigset_active;
sigset_t sigset;
- struct kvm_vcpu_stat stat;
unsigned int halt_poll_ns;
bool valid_wakeup;
@@ -341,7 +351,16 @@ struct kvm_vcpu {
bool preempted;
bool ready;
struct kvm_vcpu_arch arch;
+ struct kvm_vcpu_stat stat;
+ char stats_id[KVM_STATS_NAME_SIZE];
struct kvm_dirty_ring dirty_ring;
+
+ /*
+ * The index of the most recently used memslot by this vCPU. It's ok
+ * if this becomes stale due to memslot changes since we always check
+ * it is a valid slot.
+ */
+ int last_used_slot;
};
/* must be called with irqs disabled */
@@ -510,7 +529,7 @@ struct kvm_memslots {
u64 generation;
/* The mapping table from slot id to the index in memslots[]. */
short id_to_index[KVM_MEM_SLOTS_NUM];
- atomic_t lru_slot;
+ atomic_t last_used_slot;
int used_slots;
struct kvm_memory_slot memslots[];
};
@@ -523,10 +542,24 @@ struct kvm {
#endif /* KVM_HAVE_MMU_RWLOCK */
struct mutex slots_lock;
+
+ /*
+ * Protects the arch-specific fields of struct kvm_memory_slots in
+ * use by the VM. To be used under the slots_lock (above) or in a
+ * kvm->srcu critical section where acquiring the slots_lock would
+ * lead to deadlock with the synchronize_srcu in
+ * install_new_memslots.
+ */
+ struct mutex slots_arch_lock;
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ /* Used to wait for completion of MMU notifiers. */
+ spinlock_t mn_invalidate_lock;
+ unsigned long mn_active_invalidate_count;
+ struct rcuwait mn_memslots_update_rcuwait;
+
/*
* created_vcpus is protected by kvm->lock, and is incremented
* at the beginning of KVM_CREATE_VCPU. online_vcpus is only
@@ -585,6 +618,12 @@ struct kvm {
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
+ bool vm_bugged;
+
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+ struct notifier_block pm_notifier;
+#endif
+ char stats_id[KVM_STATS_NAME_SIZE];
};
#define kvm_err(fmt, ...) \
@@ -613,6 +652,30 @@ struct kvm {
#define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+static inline void kvm_vm_bugged(struct kvm *kvm)
+{
+ kvm->vm_bugged = true;
+ kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+}
+
+#define KVM_BUG(cond, kvm, fmt...) \
+({ \
+ int __ret = (cond); \
+ \
+ if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
+#define KVM_BUG_ON(cond, kvm) \
+({ \
+ int __ret = (cond); \
+ \
+ if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
{
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
@@ -704,6 +767,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
+bool kvm_get_kvm_safe(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
bool file_is_kvm(struct file *file);
void kvm_put_kvm_no_destroy(struct kvm *kvm);
@@ -808,7 +872,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_get_pfn(kvm_pfn_t pfn);
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
@@ -927,14 +990,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except,
- unsigned long *vcpu_bitmap, cpumask_var_t tmp);
-bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
-bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
- unsigned long *vcpu_bitmap);
+void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
+ unsigned long end);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -998,6 +1057,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
+#endif
+
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
#endif
@@ -1014,6 +1077,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
+int kvm_arch_create_vm_debugfs(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1137,29 +1201,49 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
- * search_memslots() and __gfn_to_memslot() are here because they are
- * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
- * gfn_to_memslot() itself isn't here as an inline because that would
- * bloat other code too much.
+ * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Otherwise returns NULL.
+ */
+static inline struct kvm_memory_slot *
+try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+
+ if (slot_index < 0 || slot_index >= slots->used_slots)
+ return NULL;
+
+ /*
+ * slot_index can come from vcpu->last_used_slot which is not kept
+ * in sync with userspace-controllable memslot deletion. So use nospec
+ * to prevent the CPU from speculating past the end of memslots[].
+ */
+ slot_index = array_index_nospec(slot_index, slots->used_slots);
+ slot = &slots->memslots[slot_index];
+
+ if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
+ return slot;
+ else
+ return NULL;
+}
+
+/*
+ * Returns a pointer to the memslot that contains gfn and records the index of
+ * the slot in index. Otherwise returns NULL.
*
* IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
*/
static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
{
int start = 0, end = slots->used_slots;
- int slot = atomic_read(&slots->lru_slot);
struct kvm_memory_slot *memslots = slots->memslots;
+ struct kvm_memory_slot *slot;
if (unlikely(!slots->used_slots))
return NULL;
- if (gfn >= memslots[slot].base_gfn &&
- gfn < memslots[slot].base_gfn + memslots[slot].npages)
- return &memslots[slot];
-
while (start < end) {
- slot = start + (end - start) / 2;
+ int slot = start + (end - start) / 2;
if (gfn >= memslots[slot].base_gfn)
end = slot;
@@ -1167,19 +1251,37 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn)
start = slot + 1;
}
- if (start < slots->used_slots && gfn >= memslots[start].base_gfn &&
- gfn < memslots[start].base_gfn + memslots[start].npages) {
- atomic_set(&slots->lru_slot, start);
- return &memslots[start];
+ slot = try_get_memslot(slots, start, gfn);
+ if (slot) {
+ *index = start;
+ return slot;
}
return NULL;
}
+/*
+ * __gfn_to_memslot() and its descendants are here because it is called from
+ * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
+ * itself isn't here as an inline because that would bloat other code too much.
+ */
static inline struct kvm_memory_slot *
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
{
- return search_memslots(slots, gfn);
+ struct kvm_memory_slot *slot;
+ int slot_index = atomic_read(&slots->last_used_slot);
+
+ slot = try_get_memslot(slots, slot_index, gfn);
+ if (slot)
+ return slot;
+
+ slot = search_memslots(slots, gfn, &slot_index);
+ if (slot) {
+ atomic_set(&slots->last_used_slot, slot_index);
+ return slot;
+ }
+
+ return NULL;
}
static inline unsigned long
@@ -1244,27 +1346,173 @@ enum kvm_stat_kind {
struct kvm_stat_data {
struct kvm *kvm;
- struct kvm_stats_debugfs_item *dbgfs_item;
-};
-
-struct kvm_stats_debugfs_item {
- const char *name;
- int offset;
+ const struct _kvm_stats_desc *desc;
enum kvm_stat_kind kind;
- int mode;
};
-#define KVM_DBGFS_GET_MODE(dbgfs_item) \
- ((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
+struct _kvm_stats_desc {
+ struct kvm_stats_desc desc;
+ char name[KVM_STATS_NAME_SIZE];
+};
-#define VM_STAT(n, x, ...) \
- { n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
-#define VCPU_STAT(n, x, ...) \
- { n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \
+ .flags = type | unit | base | \
+ BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \
+ BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \
+ BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \
+ .exponent = exp, \
+ .size = sz, \
+ .bucket_size = bsz
+
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
+ .offset = offsetof(struct kvm_vm_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
+ .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
+ .offset = offsetof(struct kvm_vm_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
+ { \
+ { \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
+ .offset = offsetof(struct kvm_vcpu_stat, stat) \
+ }, \
+ .name = #stat, \
+ }
+/* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \
+ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
+
+#define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, \
+ unit, base, exponent, 1, 0)
+#define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, \
+ unit, base, exponent, 1, 0)
+#define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, \
+ unit, base, exponent, 1, 0)
+#define STATS_DESC_LINEAR_HIST(SCOPE, name, unit, base, exponent, sz, bsz) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LINEAR_HIST, \
+ unit, base, exponent, sz, bsz)
+#define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \
+ unit, base, exponent, sz, 0)
+
+/* Cumulative counter, read/write */
+#define STATS_DESC_COUNTER(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Instantaneous counter, read only */
+#define STATS_DESC_ICOUNTER(SCOPE, name) \
+ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+/* Peak counter, read/write */
+#define STATS_DESC_PCOUNTER(SCOPE, name) \
+ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
+ KVM_STATS_BASE_POW10, 0)
+
+/* Cumulative time in nanosecond */
+#define STATS_DESC_TIME_NSEC(SCOPE, name) \
+ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9)
+/* Linear histogram for time in nanosecond */
+#define STATS_DESC_LINHIST_TIME_NSEC(SCOPE, name, sz, bsz) \
+ STATS_DESC_LINEAR_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9, sz, bsz)
+/* Logarithmic histogram for time in nanosecond */
+#define STATS_DESC_LOGHIST_TIME_NSEC(SCOPE, name, sz) \
+ STATS_DESC_LOG_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9, sz)
+
+#define KVM_GENERIC_VM_STATS() \
+ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush), \
+ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush_requests)
+
+#define KVM_GENERIC_VCPU_STATS() \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_attempted_poll), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid), \
+ STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_wait_ns), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_success_hist, \
+ HALT_POLL_HIST_COUNT), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \
+ HALT_POLL_HIST_COUNT), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
+ HALT_POLL_HIST_COUNT)
-extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+ const struct _kvm_stats_desc *desc,
+ void *stats, size_t size_stats,
+ char __user *user_buffer, size_t size, loff_t *offset);
+
+/**
+ * kvm_stats_linear_hist_update() - Update bucket value for linear histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the linear histogram's bucket
+ * @bucket_size: the size (width) of a bucket
+ */
+static inline void kvm_stats_linear_hist_update(u64 *data, size_t size,
+ u64 value, size_t bucket_size)
+{
+ size_t index = div64_u64(value, bucket_size);
+
+ index = min(index, size - 1);
+ ++data[index];
+}
+
+/**
+ * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the logarithmic histogram's bucket
+ */
+static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
+{
+ size_t index = fls64(value);
+
+ index = min(index, size - 1);
+ ++data[index];
+}
+
+#define KVM_STATS_LINEAR_HIST_UPDATE(array, value, bsize) \
+ kvm_stats_linear_hist_update(array, ARRAY_SIZE(array), value, bsize)
+#define KVM_STATS_LOG_HIST_UPDATE(array, value) \
+ kvm_stats_log_hist_update(array, ARRAY_SIZE(array), value)
+
+
+extern const struct kvm_stats_header kvm_vm_stats_header;
+extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
+extern const struct kvm_stats_header kvm_vcpu_stats_header;
+extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
+
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
{