aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/svm.c89
-rw-r--r--arch/x86/kvm/vmx.c22
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat123
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt6
-rw-r--r--virt/kvm/kvm_main.c12
5 files changed, 223 insertions, 29 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6e3095d1bad4..03df7c1da581 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -189,6 +189,7 @@ struct vcpu_svm {
struct nested_state nested;
bool nmi_singlestep;
+ u64 nmi_singlestep_guest_rflags;
unsigned int3_injected;
unsigned long int3_rip;
@@ -963,6 +964,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+ svm->nmi_singlestep = false;
+ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+ /* Clear our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+ }
+}
+
/* Note:
* This hash table is used to map VM_ID to a struct kvm_arch,
* when handling AMD IOMMU GALOG notification to schedule in
@@ -1712,11 +1725,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
- return to_svm(vcpu)->vmcb->save.rflags;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long rflags = svm->vmcb->save.rflags;
+
+ if (svm->nmi_singlestep) {
+ /* Hide our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ rflags &= ~X86_EFLAGS_RF;
+ }
+ return rflags;
}
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ if (to_svm(vcpu)->nmi_singlestep)
+ rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
/*
* Any change of EFLAGS.VM is accompanied by a reload of SS
* (caused by either a task switch or an inter-privilege IRET),
@@ -2111,10 +2137,7 @@ static int db_interception(struct vcpu_svm *svm)
}
if (svm->nmi_singlestep) {
- svm->nmi_singlestep = false;
- if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
- svm->vmcb->save.rflags &=
- ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ disable_nmi_singlestep(svm);
}
if (svm->vcpu.guest_debug &
@@ -2533,6 +2556,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+ unsigned long dr6;
+
+ /* if we're not singlestepping, it's not ours */
+ if (!svm->nmi_singlestep)
+ return NESTED_EXIT_DONE;
+
+ /* if it's not a singlestep exception, it's not ours */
+ if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+ return NESTED_EXIT_DONE;
+ if (!(dr6 & DR6_BS))
+ return NESTED_EXIT_DONE;
+
+ /* if the guest is singlestepping, it should get the vmexit */
+ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+ disable_nmi_singlestep(svm);
+ return NESTED_EXIT_DONE;
+ }
+
+ /* it's ours, the nested hypervisor must not see this one */
+ return NESTED_EXIT_HOST;
+}
+
static int nested_svm_exit_special(struct vcpu_svm *svm)
{
u32 exit_code = svm->vmcb->control.exit_code;
@@ -2588,8 +2636,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits)
- vmexit = NESTED_EXIT_DONE;
+ if (svm->nested.intercept_exceptions & excp_bits) {
+ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+ vmexit = nested_svm_intercept_db(svm);
+ else
+ vmexit = NESTED_EXIT_DONE;
+ }
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
svm->apf_reason != 0)
@@ -4626,10 +4678,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
+ if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ return; /* STGI will cause a vm exit */
+
+ if (svm->nested.exit_required)
+ return; /* we're not going to run the guest yet */
+
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
+ svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
@@ -4770,6 +4829,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->nested.exit_required))
return;
+ /*
+ * Disable singlestep if we're injecting an interrupt/exception.
+ * We don't want our modified rflags to be pushed on the stack where
+ * we might not be able to easily reset them if we disabled NMI
+ * singlestep later.
+ */
+ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+ /*
+ * Event injection happens before external interrupts cause a
+ * vmexit and interrupts are disabled here, so smp_send_reschedule
+ * is enough to force an immediate vmexit.
+ */
+ disable_nmi_singlestep(svm);
+ smp_send_reschedule(vcpu->cpu);
+ }
+
pre_svm_run(svm);
sync_lapic_to_cr8(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6dec552b28f..e8b61ad84a8e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7653,7 +7653,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
unsigned long type, types;
gva_t gva;
struct x86_exception e;
- int vpid;
+ struct {
+ u64 vpid;
+ u64 gla;
+ } operand;
if (!(vmx->nested.nested_vmx_secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -7683,17 +7686,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
- sizeof(u32), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+ sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
+ if (operand.vpid >> 16) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+ if (is_noncanonical_address(operand.gla)) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ /* fall through */
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
- if (!vpid) {
+ if (!operand.vpid) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 2cf5176bbeee..dd8f00cfb8b4 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -662,7 +662,7 @@ class TracepointProvider(Provider):
self.setup_traces()
self.fields = self._fields
- def read(self):
+ def read(self, by_guest=0):
"""Returns 'event name: current value' for all enabled events."""
ret = defaultdict(int)
for group in self.group_leaders:
@@ -681,12 +681,14 @@ class TracepointProvider(Provider):
class DebugfsProvider(Provider):
"""Provides data from the files that KVM creates in the kvm debugfs
folder."""
- def __init__(self, pid, fields_filter):
+ def __init__(self, pid, fields_filter, include_past):
self.update_fields(fields_filter)
self._baseline = {}
self.do_read = True
self.paths = []
self.pid = pid
+ if include_past:
+ self.restore()
def get_available_fields(self):
""""Returns a list of available fields.
@@ -729,8 +731,15 @@ class DebugfsProvider(Provider):
self.do_read = True
self.reset()
- def read(self, reset=0):
- """Returns a dict with format:'file name / field -> current value'."""
+ def read(self, reset=0, by_guest=0):
+ """Returns a dict with format:'file name / field -> current value'.
+
+ Parameter 'reset':
+ 0 plain read
+ 1 reset field counts to 0
+ 2 restore the original field counts
+
+ """
results = {}
# If no debugfs filtering support is available, then don't read.
@@ -747,12 +756,22 @@ class DebugfsProvider(Provider):
for field in self._fields:
value = self.read_field(field, path)
key = path + field
- if reset:
+ if reset == 1:
self._baseline[key] = value
+ if reset == 2:
+ self._baseline[key] = 0
if self._baseline.get(key, -1) == -1:
self._baseline[key] = value
- results[field] = (results.get(field, 0) + value -
- self._baseline.get(key, 0))
+ increment = (results.get(field, 0) + value -
+ self._baseline.get(key, 0))
+ if by_guest:
+ pid = key.split('-')[0]
+ if pid in results:
+ results[pid] += increment
+ else:
+ results[pid] = increment
+ else:
+ results[field] = increment
return results
@@ -771,6 +790,11 @@ class DebugfsProvider(Provider):
self._baseline = {}
self.read(1)
+ def restore(self):
+ """Reset field counters"""
+ self._baseline = {}
+ self.read(2)
+
class Stats(object):
"""Manages the data providers and the data they provide.
@@ -791,7 +815,8 @@ class Stats(object):
providers = []
if options.debugfs:
- providers.append(DebugfsProvider(options.pid, options.fields))
+ providers.append(DebugfsProvider(options.pid, options.fields,
+ options.dbgfs_include_past))
if options.tracepoints or not providers:
providers.append(TracepointProvider(options.pid, options.fields))
@@ -832,18 +857,44 @@ class Stats(object):
for provider in self.providers:
provider.pid = self._pid_filter
- def get(self):
+ def get(self, by_guest=0):
"""Returns a dict with field -> (value, delta to last value) of all
provider data."""
for provider in self.providers:
- new = provider.read()
- for key in provider.fields:
+ new = provider.read(by_guest=by_guest)
+ for key in new if by_guest else provider.fields:
oldval = self.values.get(key, (0, 0))[0]
newval = new.get(key, 0)
newdelta = newval - oldval
self.values[key] = (newval, newdelta)
return self.values
+ def toggle_display_guests(self, to_pid):
+ """Toggle between collection of stats by individual event and by
+ guest pid
+
+ Events reported by DebugfsProvider change when switching to/from
+ reading by guest values. Hence we have to remove the excess event
+ names from self.values.
+
+ """
+ if any(isinstance(ins, TracepointProvider) for ins in self.providers):
+ return 1
+ if to_pid:
+ for provider in self.providers:
+ if isinstance(provider, DebugfsProvider):
+ for key in provider.fields:
+ if key in self.values.keys():
+ del self.values[key]
+ else:
+ oldvals = self.values.copy()
+ for key in oldvals:
+ if key.isdigit():
+ del self.values[key]
+ # Update oldval (see get())
+ self.get(to_pid)
+ return 0
+
DELAY_DEFAULT = 3.0
MAX_GUEST_NAME_LEN = 48
MAX_REGEX_LEN = 44
@@ -859,6 +910,7 @@ class Tui(object):
self._delay_initial = 0.25
self._delay_regular = DELAY_DEFAULT
self._sorting = SORT_DEFAULT
+ self._display_guests = 0
def __enter__(self):
"""Initialises curses for later use. Based on curses.wrapper
@@ -1007,8 +1059,12 @@ class Tui(object):
if len(regex) > MAX_REGEX_LEN:
regex = regex[:MAX_REGEX_LEN] + '...'
self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
+ if self._display_guests:
+ col_name = 'Guest Name'
+ else:
+ col_name = 'Event'
self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
- ('Event', 'Total', '%Total', 'CurAvg/s'),
+ (col_name, 'Total', '%Total', 'CurAvg/s'),
curses.A_STANDOUT)
self.screen.addstr(4, 1, 'Collecting data...')
self.screen.refresh()
@@ -1017,7 +1073,7 @@ class Tui(object):
row = 3
self.screen.move(row, 0)
self.screen.clrtobot()
- stats = self.stats.get()
+ stats = self.stats.get(self._display_guests)
def sortCurAvg(x):
# sort by current events if available
@@ -1045,6 +1101,8 @@ class Tui(object):
break
if values[0] is not None:
cur = int(round(values[1] / sleeptime)) if values[1] else ''
+ if self._display_guests:
+ key = self.get_gname_from_pid(key)
self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
(key, values[0], values[0] * 100 / total,
cur))
@@ -1053,9 +1111,26 @@ class Tui(object):
self.screen.addstr(4, 1, 'No matching events reported yet')
self.screen.refresh()
+ def show_msg(self, text):
+ """Display message centered text and exit on key press"""
+ hint = 'Press any key to continue'
+ curses.cbreak()
+ self.screen.erase()
+ (x, term_width) = self.screen.getmaxyx()
+ row = 2
+ for line in text:
+ start = (term_width - len(line)) / 2
+ self.screen.addstr(row, start, line)
+ row += 1
+ self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+ curses.A_STANDOUT)
+ self.screen.getkey()
+
def show_help_interactive(self):
"""Display help with list of interactive commands"""
- msg = (' c clear filter',
+ msg = (' b toggle events by guests (debugfs only, honors'
+ ' filters)',
+ ' c clear filter',
' f filter by regular expression',
' g filter by guest name',
' h display interactive commands reference',
@@ -1195,7 +1270,7 @@ class Tui(object):
'This might limit the shown data to the trace '
'statistics.')
self.screen.addstr(5, 0, msg)
- self.print_all_gnames()
+ self.print_all_gnames(7)
curses.echo()
self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
gname = self.screen.getstr()
@@ -1236,6 +1311,14 @@ class Tui(object):
sleeptime = self._delay_regular
try:
char = self.screen.getkey()
+ if char == 'b':
+ self._display_guests = not self._display_guests
+ if self.stats.toggle_display_guests(self._display_guests):
+ self.show_msg(['Command not available with tracepoints'
+ ' enabled', 'Restart with debugfs only '
+ '(see option \'-d\') and try again!'])
+ self._display_guests = not self._display_guests
+ self.refresh_header()
if char == 'c':
self.stats.fields_filter = DEFAULT_REGEX
self.refresh_header(0)
@@ -1270,6 +1353,8 @@ class Tui(object):
sleeptime = self._delay_initial
if char == 'x':
self.update_drilldown()
+ # prevents display of current values on next refresh
+ self.stats.get()
except KeyboardInterrupt:
break
except curses.error:
@@ -1337,6 +1422,7 @@ Requirements:
the large number of files that are possibly opened.
Interactive Commands:
+ b toggle events by guests (debugfs only, honors filters)
c clear filter
f filter by regular expression
g filter by guest name
@@ -1381,6 +1467,13 @@ Press any other key to refresh statistics immediately.
dest='once',
help='run in batch mode for one second',
)
+ optparser.add_option('-i', '--debugfs-include-past',
+ action='store_true',
+ default=False,
+ dest='dbgfs_include_past',
+ help='include all available data on past events for '
+ 'debugfs',
+ )
optparser.add_option('-l', '--log',
action='store_true',
default=False,
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index e24ac464d341..e5cf836be8a1 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -29,6 +29,8 @@ meaning of events.
INTERACTIVE COMMANDS
--------------------
[horizontal]
+*b*:: toggle events by guests (debugfs only, honors filters)
+
*c*:: clear filter
*f*:: filter by regular expression
@@ -70,6 +72,10 @@ OPTIONS
--debugfs::
retrieve statistics from debugfs
+-i::
+--debugfs-include-past::
+ include all available data on past events for debugfs
+
-p<pid>::
--pid=<pid>::
limit statistics to one virtual machine (pid)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f0fe9d02f6bb..19f0ecb9b93e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */
unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_grow, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */
unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_shrink, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/*
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return PTR_ERR(file);
}
+ /*
+ * Don't call kvm_put_kvm anymore at this point; file->f_op is
+ * already set, with ->release() being kvm_vm_release(). In error
+ * cases it will be called by the final fput(file) and will take
+ * care of doing kvm_put_kvm(kvm).
+ */
if (kvm_create_vm_debugfs(kvm, r) < 0) {
put_unused_fd(r);
fput(file);