diff options
-rw-r--r-- | arch/x86/kvm/svm.c | 89 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 22 | ||||
-rwxr-xr-x | tools/kvm/kvm_stat/kvm_stat | 123 | ||||
-rw-r--r-- | tools/kvm/kvm_stat/kvm_stat.txt | 6 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 12 |
5 files changed, 223 insertions, 29 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6e3095d1bad4..03df7c1da581 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -189,6 +189,7 @@ struct vcpu_svm { struct nested_state nested; bool nmi_singlestep; + u64 nmi_singlestep_guest_rflags; unsigned int3_injected; unsigned long int3_rip; @@ -963,6 +964,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +static void disable_nmi_singlestep(struct vcpu_svm *svm) +{ + svm->nmi_singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { + /* Clear our flags if they were not set by the guest */ + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) + svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) + svm->vmcb->save.rflags &= ~X86_EFLAGS_RF; + } +} + /* Note: * This hash table is used to map VM_ID to a struct kvm_arch, * when handling AMD IOMMU GALOG notification to schedule in @@ -1712,11 +1725,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) { - return to_svm(vcpu)->vmcb->save.rflags; + struct vcpu_svm *svm = to_svm(vcpu); + unsigned long rflags = svm->vmcb->save.rflags; + + if (svm->nmi_singlestep) { + /* Hide our flags if they were not set by the guest */ + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) + rflags &= ~X86_EFLAGS_TF; + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) + rflags &= ~X86_EFLAGS_RF; + } + return rflags; } static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + if (to_svm(vcpu)->nmi_singlestep) + rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + /* * Any change of EFLAGS.VM is accompanied by a reload of SS * (caused by either a task switch or an inter-privilege IRET), @@ -2111,10 +2137,7 @@ static int db_interception(struct vcpu_svm *svm) } if (svm->nmi_singlestep) { - svm->nmi_singlestep = false; - if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) - svm->vmcb->save.rflags &= - ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + disable_nmi_singlestep(svm); } if (svm->vcpu.guest_debug & @@ -2533,6 +2556,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } +/* DB exceptions for our internal use must not cause vmexit */ +static int nested_svm_intercept_db(struct vcpu_svm *svm) +{ + unsigned long dr6; + + /* if we're not singlestepping, it's not ours */ + if (!svm->nmi_singlestep) + return NESTED_EXIT_DONE; + + /* if it's not a singlestep exception, it's not ours */ + if (kvm_get_dr(&svm->vcpu, 6, &dr6)) + return NESTED_EXIT_DONE; + if (!(dr6 & DR6_BS)) + return NESTED_EXIT_DONE; + + /* if the guest is singlestepping, it should get the vmexit */ + if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { + disable_nmi_singlestep(svm); + return NESTED_EXIT_DONE; + } + + /* it's ours, the nested hypervisor must not see this one */ + return NESTED_EXIT_HOST; +} + static int nested_svm_exit_special(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; @@ -2588,8 +2636,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); - if (svm->nested.intercept_exceptions & excp_bits) - vmexit = NESTED_EXIT_DONE; + if (svm->nested.intercept_exceptions & excp_bits) { + if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) + vmexit = nested_svm_intercept_db(svm); + else + vmexit = NESTED_EXIT_DONE; + } /* async page fault always cause vmexit */ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && svm->apf_reason != 0) @@ -4626,10 +4678,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ + if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) + return; /* STGI will cause a vm exit */ + + if (svm->nested.exit_required) + return; /* we're not going to run the guest yet */ + /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) */ + svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu); svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } @@ -4770,6 +4829,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->nested.exit_required)) return; + /* + * Disable singlestep if we're injecting an interrupt/exception. + * We don't want our modified rflags to be pushed on the stack where + * we might not be able to easily reset them if we disabled NMI + * singlestep later. + */ + if (svm->nmi_singlestep && svm->vmcb->control.event_inj) { + /* + * Event injection happens before external interrupts cause a + * vmexit and interrupts are disabled here, so smp_send_reschedule + * is enough to force an immediate vmexit. + */ + disable_nmi_singlestep(svm); + smp_send_reschedule(vcpu->cpu); + } + pre_svm_run(svm); sync_lapic_to_cr8(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c6dec552b28f..e8b61ad84a8e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7653,7 +7653,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) unsigned long type, types; gva_t gva; struct x86_exception e; - int vpid; + struct { + u64 vpid; + u64 gla; + } operand; if (!(vmx->nested.nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -7683,17 +7686,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmx_instruction_info, false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, - sizeof(u32), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, + sizeof(operand), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } + if (operand.vpid >> 16) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return kvm_skip_emulated_instruction(vcpu); + } switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: + if (is_noncanonical_address(operand.gla)) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return kvm_skip_emulated_instruction(vcpu); + } + /* fall through */ case VMX_VPID_EXTENT_SINGLE_CONTEXT: case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: - if (!vpid) { + if (!operand.vpid) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); return kvm_skip_emulated_instruction(vcpu); diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 2cf5176bbeee..dd8f00cfb8b4 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -662,7 +662,7 @@ class TracepointProvider(Provider): self.setup_traces() self.fields = self._fields - def read(self): + def read(self, by_guest=0): """Returns 'event name: current value' for all enabled events.""" ret = defaultdict(int) for group in self.group_leaders: @@ -681,12 +681,14 @@ class TracepointProvider(Provider): class DebugfsProvider(Provider): """Provides data from the files that KVM creates in the kvm debugfs folder.""" - def __init__(self, pid, fields_filter): + def __init__(self, pid, fields_filter, include_past): self.update_fields(fields_filter) self._baseline = {} self.do_read = True self.paths = [] self.pid = pid + if include_past: + self.restore() def get_available_fields(self): """"Returns a list of available fields. @@ -729,8 +731,15 @@ class DebugfsProvider(Provider): self.do_read = True self.reset() - def read(self, reset=0): - """Returns a dict with format:'file name / field -> current value'.""" + def read(self, reset=0, by_guest=0): + """Returns a dict with format:'file name / field -> current value'. + + Parameter 'reset': + 0 plain read + 1 reset field counts to 0 + 2 restore the original field counts + + """ results = {} # If no debugfs filtering support is available, then don't read. @@ -747,12 +756,22 @@ class DebugfsProvider(Provider): for field in self._fields: value = self.read_field(field, path) key = path + field - if reset: + if reset == 1: self._baseline[key] = value + if reset == 2: + self._baseline[key] = 0 if self._baseline.get(key, -1) == -1: self._baseline[key] = value - results[field] = (results.get(field, 0) + value - - self._baseline.get(key, 0)) + increment = (results.get(field, 0) + value - + self._baseline.get(key, 0)) + if by_guest: + pid = key.split('-')[0] + if pid in results: + results[pid] += increment + else: + results[pid] = increment + else: + results[field] = increment return results @@ -771,6 +790,11 @@ class DebugfsProvider(Provider): self._baseline = {} self.read(1) + def restore(self): + """Reset field counters""" + self._baseline = {} + self.read(2) + class Stats(object): """Manages the data providers and the data they provide. @@ -791,7 +815,8 @@ class Stats(object): providers = [] if options.debugfs: - providers.append(DebugfsProvider(options.pid, options.fields)) + providers.append(DebugfsProvider(options.pid, options.fields, + options.dbgfs_include_past)) if options.tracepoints or not providers: providers.append(TracepointProvider(options.pid, options.fields)) @@ -832,18 +857,44 @@ class Stats(object): for provider in self.providers: provider.pid = self._pid_filter - def get(self): + def get(self, by_guest=0): """Returns a dict with field -> (value, delta to last value) of all provider data.""" for provider in self.providers: - new = provider.read() - for key in provider.fields: + new = provider.read(by_guest=by_guest) + for key in new if by_guest else provider.fields: oldval = self.values.get(key, (0, 0))[0] newval = new.get(key, 0) newdelta = newval - oldval self.values[key] = (newval, newdelta) return self.values + def toggle_display_guests(self, to_pid): + """Toggle between collection of stats by individual event and by + guest pid + + Events reported by DebugfsProvider change when switching to/from + reading by guest values. Hence we have to remove the excess event + names from self.values. + + """ + if any(isinstance(ins, TracepointProvider) for ins in self.providers): + return 1 + if to_pid: + for provider in self.providers: + if isinstance(provider, DebugfsProvider): + for key in provider.fields: + if key in self.values.keys(): + del self.values[key] + else: + oldvals = self.values.copy() + for key in oldvals: + if key.isdigit(): + del self.values[key] + # Update oldval (see get()) + self.get(to_pid) + return 0 + DELAY_DEFAULT = 3.0 MAX_GUEST_NAME_LEN = 48 MAX_REGEX_LEN = 44 @@ -859,6 +910,7 @@ class Tui(object): self._delay_initial = 0.25 self._delay_regular = DELAY_DEFAULT self._sorting = SORT_DEFAULT + self._display_guests = 0 def __enter__(self): """Initialises curses for later use. Based on curses.wrapper @@ -1007,8 +1059,12 @@ class Tui(object): if len(regex) > MAX_REGEX_LEN: regex = regex[:MAX_REGEX_LEN] + '...' self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex)) + if self._display_guests: + col_name = 'Guest Name' + else: + col_name = 'Event' self.screen.addstr(2, 1, '%-40s %10s%7s %8s' % - ('Event', 'Total', '%Total', 'CurAvg/s'), + (col_name, 'Total', '%Total', 'CurAvg/s'), curses.A_STANDOUT) self.screen.addstr(4, 1, 'Collecting data...') self.screen.refresh() @@ -1017,7 +1073,7 @@ class Tui(object): row = 3 self.screen.move(row, 0) self.screen.clrtobot() - stats = self.stats.get() + stats = self.stats.get(self._display_guests) def sortCurAvg(x): # sort by current events if available @@ -1045,6 +1101,8 @@ class Tui(object): break if values[0] is not None: cur = int(round(values[1] / sleeptime)) if values[1] else '' + if self._display_guests: + key = self.get_gname_from_pid(key) self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, values[0], values[0] * 100 / total, cur)) @@ -1053,9 +1111,26 @@ class Tui(object): self.screen.addstr(4, 1, 'No matching events reported yet') self.screen.refresh() + def show_msg(self, text): + """Display message centered text and exit on key press""" + hint = 'Press any key to continue' + curses.cbreak() + self.screen.erase() + (x, term_width) = self.screen.getmaxyx() + row = 2 + for line in text: + start = (term_width - len(line)) / 2 + self.screen.addstr(row, start, line) + row += 1 + self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, + curses.A_STANDOUT) + self.screen.getkey() + def show_help_interactive(self): """Display help with list of interactive commands""" - msg = (' c clear filter', + msg = (' b toggle events by guests (debugfs only, honors' + ' filters)', + ' c clear filter', ' f filter by regular expression', ' g filter by guest name', ' h display interactive commands reference', @@ -1195,7 +1270,7 @@ class Tui(object): 'This might limit the shown data to the trace ' 'statistics.') self.screen.addstr(5, 0, msg) - self.print_all_gnames() + self.print_all_gnames(7) curses.echo() self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") gname = self.screen.getstr() @@ -1236,6 +1311,14 @@ class Tui(object): sleeptime = self._delay_regular try: char = self.screen.getkey() + if char == 'b': + self._display_guests = not self._display_guests + if self.stats.toggle_display_guests(self._display_guests): + self.show_msg(['Command not available with tracepoints' + ' enabled', 'Restart with debugfs only ' + '(see option \'-d\') and try again!']) + self._display_guests = not self._display_guests + self.refresh_header() if char == 'c': self.stats.fields_filter = DEFAULT_REGEX self.refresh_header(0) @@ -1270,6 +1353,8 @@ class Tui(object): sleeptime = self._delay_initial if char == 'x': self.update_drilldown() + # prevents display of current values on next refresh + self.stats.get() except KeyboardInterrupt: break except curses.error: @@ -1337,6 +1422,7 @@ Requirements: the large number of files that are possibly opened. Interactive Commands: + b toggle events by guests (debugfs only, honors filters) c clear filter f filter by regular expression g filter by guest name @@ -1381,6 +1467,13 @@ Press any other key to refresh statistics immediately. dest='once', help='run in batch mode for one second', ) + optparser.add_option('-i', '--debugfs-include-past', + action='store_true', + default=False, + dest='dbgfs_include_past', + help='include all available data on past events for ' + 'debugfs', + ) optparser.add_option('-l', '--log', action='store_true', default=False, diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index e24ac464d341..e5cf836be8a1 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -29,6 +29,8 @@ meaning of events. INTERACTIVE COMMANDS -------------------- [horizontal] +*b*:: toggle events by guests (debugfs only, honors filters) + *c*:: clear filter *f*:: filter by regular expression @@ -70,6 +72,10 @@ OPTIONS --debugfs:: retrieve statistics from debugfs +-i:: +--debugfs-include-past:: + include all available data on past events for debugfs + -p<pid>:: --pid=<pid>:: limit statistics to one virtual machine (pid) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0fe9d02f6bb..19f0ecb9b93e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -73,17 +73,17 @@ MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_grow, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_grow); /* Default resets per-vcpu halt_poll_ns . */ unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_shrink, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); /* @@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return PTR_ERR(file); } + /* + * Don't call kvm_put_kvm anymore at this point; file->f_op is + * already set, with ->release() being kvm_vm_release(). In error + * cases it will be called by the final fput(file) and will take + * care of doing kvm_put_kvm(kvm). + */ if (kvm_create_vm_debugfs(kvm, r) < 0) { put_unused_fd(r); fput(file); |