aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/kvm_host.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/kvm_host.h')
-rw-r--r--include/linux/kvm_host.h242
1 files changed, 193 insertions, 49 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..0f18df7fe874 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,6 +150,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -158,6 +159,15 @@ static inline bool is_error_page(struct page *page)
})
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
+bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except,
+ unsigned long *vcpu_bitmap, cpumask_var_t tmp);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except);
+bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
+ unsigned long *vcpu_bitmap);
+
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -344,6 +354,13 @@ struct kvm_vcpu {
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
struct kvm_dirty_ring dirty_ring;
+
+ /*
+ * The index of the most recently used memslot by this vCPU. It's ok
+ * if this becomes stale due to memslot changes since we always check
+ * it is a valid slot.
+ */
+ int last_used_slot;
};
/* must be called with irqs disabled */
@@ -512,7 +529,7 @@ struct kvm_memslots {
u64 generation;
/* The mapping table from slot id to the index in memslots[]. */
short id_to_index[KVM_MEM_SLOTS_NUM];
- atomic_t lru_slot;
+ atomic_t last_used_slot;
int used_slots;
struct kvm_memory_slot memslots[];
};
@@ -538,6 +555,11 @@ struct kvm {
struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ /* Used to wait for completion of MMU notifiers. */
+ spinlock_t mn_invalidate_lock;
+ unsigned long mn_active_invalidate_count;
+ struct rcuwait mn_memslots_update_rcuwait;
+
/*
* created_vcpus is protected by kvm->lock, and is incremented
* at the beginning of KVM_CREATE_VCPU. online_vcpus is only
@@ -586,7 +608,6 @@ struct kvm {
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
#endif
- long tlbs_dirty;
struct list_head devices;
u64 manual_dirty_log_protect;
struct dentry *debugfs_dentry;
@@ -596,6 +617,7 @@ struct kvm {
pid_t userspace_pid;
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
+ bool vm_bugged;
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
@@ -629,6 +651,30 @@ struct kvm {
#define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+static inline void kvm_vm_bugged(struct kvm *kvm)
+{
+ kvm->vm_bugged = true;
+ kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+}
+
+#define KVM_BUG(cond, kvm, fmt...) \
+({ \
+ int __ret = (cond); \
+ \
+ if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
+#define KVM_BUG_ON(cond, kvm) \
+({ \
+ int __ret = (cond); \
+ \
+ if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
{
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
@@ -674,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
return NULL;
}
-static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
-{
- return vcpu->vcpu_idx;
-}
-
#define kvm_for_each_memslot(memslot, slots) \
for (memslot = &slots->memslots[0]; \
memslot < slots->memslots + slots->used_slots; memslot++) \
@@ -720,6 +761,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
+bool kvm_get_kvm_safe(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
bool file_is_kvm(struct file *file);
void kvm_put_kvm_no_destroy(struct kvm *kvm);
@@ -824,7 +866,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_get_pfn(kvm_pfn_t pfn);
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
@@ -943,14 +984,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except,
- unsigned long *vcpu_bitmap, cpumask_var_t tmp);
-bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
-bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
- struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
- unsigned long *vcpu_bitmap);
+void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
+ unsigned long end);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -1034,6 +1071,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
+int kvm_arch_create_vm_debugfs(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1157,29 +1195,49 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
- * search_memslots() and __gfn_to_memslot() are here because they are
- * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
- * gfn_to_memslot() itself isn't here as an inline because that would
- * bloat other code too much.
+ * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Otherwise returns NULL.
+ */
+static inline struct kvm_memory_slot *
+try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+
+ if (slot_index < 0 || slot_index >= slots->used_slots)
+ return NULL;
+
+ /*
+ * slot_index can come from vcpu->last_used_slot which is not kept
+ * in sync with userspace-controllable memslot deletion. So use nospec
+ * to prevent the CPU from speculating past the end of memslots[].
+ */
+ slot_index = array_index_nospec(slot_index, slots->used_slots);
+ slot = &slots->memslots[slot_index];
+
+ if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
+ return slot;
+ else
+ return NULL;
+}
+
+/*
+ * Returns a pointer to the memslot that contains gfn and records the index of
+ * the slot in index. Otherwise returns NULL.
*
* IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
*/
static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
{
int start = 0, end = slots->used_slots;
- int slot = atomic_read(&slots->lru_slot);
struct kvm_memory_slot *memslots = slots->memslots;
+ struct kvm_memory_slot *slot;
if (unlikely(!slots->used_slots))
return NULL;
- if (gfn >= memslots[slot].base_gfn &&
- gfn < memslots[slot].base_gfn + memslots[slot].npages)
- return &memslots[slot];
-
while (start < end) {
- slot = start + (end - start) / 2;
+ int slot = start + (end - start) / 2;
if (gfn >= memslots[slot].base_gfn)
end = slot;
@@ -1187,19 +1245,37 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn)
start = slot + 1;
}
- if (start < slots->used_slots && gfn >= memslots[start].base_gfn &&
- gfn < memslots[start].base_gfn + memslots[start].npages) {
- atomic_set(&slots->lru_slot, start);
- return &memslots[start];
+ slot = try_get_memslot(slots, start, gfn);
+ if (slot) {
+ *index = start;
+ return slot;
}
return NULL;
}
+/*
+ * __gfn_to_memslot() and its descendants are here because it is called from
+ * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
+ * itself isn't here as an inline because that would bloat other code too much.
+ */
static inline struct kvm_memory_slot *
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
{
- return search_memslots(slots, gfn);
+ struct kvm_memory_slot *slot;
+ int slot_index = atomic_read(&slots->last_used_slot);
+
+ slot = try_get_memslot(slots, slot_index, gfn);
+ if (slot)
+ return slot;
+
+ slot = search_memslots(slots, gfn, &slot_index);
+ if (slot) {
+ atomic_set(&slots->last_used_slot, slot_index);
+ return slot;
+ }
+
+ return NULL;
}
static inline unsigned long
@@ -1273,56 +1349,66 @@ struct _kvm_stats_desc {
char name[KVM_STATS_NAME_SIZE];
};
-#define STATS_DESC_COMMON(type, unit, base, exp) \
+#define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \
.flags = type | unit | base | \
BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \
BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \
BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \
.exponent = exp, \
- .size = 1
+ .size = sz, \
+ .bucket_size = bsz
-#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
{ \
{ \
- STATS_DESC_COMMON(type, unit, base, exp), \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
.offset = offsetof(struct kvm_vm_stat, generic.stat) \
}, \
.name = #stat, \
}
-#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp) \
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
{ \
{ \
- STATS_DESC_COMMON(type, unit, base, exp), \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
.offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
}, \
.name = #stat, \
}
-#define VM_STATS_DESC(stat, type, unit, base, exp) \
+#define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
{ \
{ \
- STATS_DESC_COMMON(type, unit, base, exp), \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
.offset = offsetof(struct kvm_vm_stat, stat) \
}, \
.name = #stat, \
}
-#define VCPU_STATS_DESC(stat, type, unit, base, exp) \
+#define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \
{ \
{ \
- STATS_DESC_COMMON(type, unit, base, exp), \
+ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \
.offset = offsetof(struct kvm_vcpu_stat, stat) \
}, \
.name = #stat, \
}
/* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
-#define STATS_DESC(SCOPE, stat, type, unit, base, exp) \
- SCOPE##_STATS_DESC(stat, type, unit, base, exp)
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \
+ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz)
#define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \
- STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, unit, base, exponent)
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, \
+ unit, base, exponent, 1, 0)
#define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \
- STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, unit, base, exponent)
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, \
+ unit, base, exponent, 1, 0)
#define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \
- STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, unit, base, exponent)
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, \
+ unit, base, exponent, 1, 0)
+#define STATS_DESC_LINEAR_HIST(SCOPE, name, unit, base, exponent, sz, bsz) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LINEAR_HIST, \
+ unit, base, exponent, sz, bsz)
+#define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \
+ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \
+ unit, base, exponent, sz, 0)
/* Cumulative counter, read/write */
#define STATS_DESC_COUNTER(SCOPE, name) \
@@ -1341,9 +1427,18 @@ struct _kvm_stats_desc {
#define STATS_DESC_TIME_NSEC(SCOPE, name) \
STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
KVM_STATS_BASE_POW10, -9)
+/* Linear histogram for time in nanosecond */
+#define STATS_DESC_LINHIST_TIME_NSEC(SCOPE, name, sz, bsz) \
+ STATS_DESC_LINEAR_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9, sz, bsz)
+/* Logarithmic histogram for time in nanosecond */
+#define STATS_DESC_LOGHIST_TIME_NSEC(SCOPE, name, sz) \
+ STATS_DESC_LOG_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
+ KVM_STATS_BASE_POW10, -9, sz)
#define KVM_GENERIC_VM_STATS() \
- STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
+ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush), \
+ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush_requests)
#define KVM_GENERIC_VCPU_STATS() \
STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll), \
@@ -1351,13 +1446,62 @@ struct _kvm_stats_desc {
STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid), \
STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup), \
STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns), \
- STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns)
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns), \
+ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_wait_ns), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_success_hist, \
+ HALT_POLL_HIST_COUNT), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \
+ HALT_POLL_HIST_COUNT), \
+ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
+ HALT_POLL_HIST_COUNT)
extern struct dentry *kvm_debugfs_dir;
+
ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
const struct _kvm_stats_desc *desc,
void *stats, size_t size_stats,
char __user *user_buffer, size_t size, loff_t *offset);
+
+/**
+ * kvm_stats_linear_hist_update() - Update bucket value for linear histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the linear histogram's bucket
+ * @bucket_size: the size (width) of a bucket
+ */
+static inline void kvm_stats_linear_hist_update(u64 *data, size_t size,
+ u64 value, size_t bucket_size)
+{
+ size_t index = div64_u64(value, bucket_size);
+
+ index = min(index, size - 1);
+ ++data[index];
+}
+
+/**
+ * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram
+ * statistics data.
+ *
+ * @data: start address of the stats data
+ * @size: the number of bucket of the stats data
+ * @value: the new value used to update the logarithmic histogram's bucket
+ */
+static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
+{
+ size_t index = fls64(value);
+
+ index = min(index, size - 1);
+ ++data[index];
+}
+
+#define KVM_STATS_LINEAR_HIST_UPDATE(array, value, bsize) \
+ kvm_stats_linear_hist_update(array, ARRAY_SIZE(array), value, bsize)
+#define KVM_STATS_LOG_HIST_UPDATE(array, value) \
+ kvm_stats_log_hist_update(array, ARRAY_SIZE(array), value)
+
+
extern const struct kvm_stats_header kvm_vm_stats_header;
extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
extern const struct kvm_stats_header kvm_vcpu_stats_header;