// SPDX-License-Identifier: GPL-2.0 /* * Test for x86 KVM_SET_PMU_EVENT_FILTER. * * Copyright (C) 2022, Google LLC. * * This work is licensed under the terms of the GNU GPL, version 2. * * Verifies the expected behavior of allow lists and deny lists for * virtual PMU events. */ #define _GNU_SOURCE /* for program_invocation_short_name */ #include "test_util.h" #include "kvm_util.h" #include "processor.h" /* * In lieu of copying perf_event.h into tools... */ #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) union cpuid10_eax { struct { unsigned int version_id:8; unsigned int num_counters:8; unsigned int bit_width:8; unsigned int mask_length:8; } split; unsigned int full; }; union cpuid10_ebx { struct { unsigned int no_unhalted_core_cycles:1; unsigned int no_instructions_retired:1; unsigned int no_unhalted_reference_cycles:1; unsigned int no_llc_reference:1; unsigned int no_llc_misses:1; unsigned int no_branch_instruction_retired:1; unsigned int no_branch_misses_retired:1; } split; unsigned int full; }; /* End of stuff taken from perf_event.h. */ /* Oddly, this isn't in perf_event.h. */ #define ARCH_PERFMON_BRANCHES_RETIRED 5 #define VCPU_ID 0 #define NUM_BRANCHES 42 /* * This is how the event selector and unit mask are stored in an AMD * core performance event-select register. Intel's format is similar, * but the event selector is only 8 bits. */ #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \ (umask & 0xff) << 8) /* * "Branch instructions retired", from the Intel SDM, volume 3, * "Pre-defined Architectural Performance Events." */ #define INTEL_BR_RETIRED EVENT(0xc4, 0) /* * "Retired branch instructions", from Processor Programming Reference * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, * Preliminary Processor Programming Reference (PPR) for AMD Family * 17h Model 31h, Revision B0 Processors, and Preliminary Processor * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision * B1 Processors Volume 1 of 2. */ #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) /* * This event list comprises Intel's eight architectural events plus * AMD's "retired branch instructions" for Zen[123] (and possibly * other AMD CPUs). */ static const uint64_t event_list[] = { EVENT(0x3c, 0), EVENT(0xc0, 0), EVENT(0x3c, 1), EVENT(0x2e, 0x4f), EVENT(0x2e, 0x41), EVENT(0xc4, 0), EVENT(0xc5, 0), EVENT(0xa4, 1), AMD_ZEN_BR_RETIRED, }; /* * If we encounter a #GP during the guest PMU sanity check, then the guest * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). */ static void guest_gp_handler(struct ex_regs *regs) { GUEST_SYNC(0); } /* * Check that we can write a new value to the given MSR and read it back. * The caller should provide a non-empty set of bits that are safe to flip. * * Return on success. GUEST_SYNC(0) on error. */ static void check_msr(uint32_t msr, uint64_t bits_to_flip) { uint64_t v = rdmsr(msr) ^ bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) GUEST_SYNC(0); v ^= bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) GUEST_SYNC(0); } static void intel_guest_code(void) { check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); check_msr(MSR_P6_EVNTSEL0, 0xffff); check_msr(MSR_IA32_PMC0, 0xffff); GUEST_SYNC(1); for (;;) { uint64_t br0, br1; wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); br0 = rdmsr(MSR_IA32_PMC0); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); br1 = rdmsr(MSR_IA32_PMC0); GUEST_SYNC(br1 - br0); } } /* * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], * this code uses the always-available, legacy K7 PMU MSRs, which alias to * the first four of the six extended core PMU MSRs. */ static void amd_guest_code(void) { check_msr(MSR_K7_EVNTSEL0, 0xffff); check_msr(MSR_K7_PERFCTR0, 0xffff); GUEST_SYNC(1); for (;;) { uint64_t br0, br1; wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); br0 = rdmsr(MSR_K7_PERFCTR0); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); br1 = rdmsr(MSR_K7_PERFCTR0); GUEST_SYNC(br1 - br0); } } /* * Run the VM to the next GUEST_SYNC(value), and return the value passed * to the sync. Any other exit from the guest is fatal. */ static uint64_t run_vm_to_sync(struct kvm_vm *vm) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); get_ucall(vm, VCPU_ID, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, "Received ucall other than UCALL_SYNC: %lu", uc.cmd); return uc.args[1]; } /* * In a nested environment or if the vPMU is disabled, the guest PMU * might not work as architected (accessing the PMU MSRs may raise * #GP, or writes could simply be discarded). In those situations, * there is no point in running these tests. The guest code will perform * a sanity check and then GUEST_SYNC(success). In the case of failure, * the behavior of the guest on resumption is undefined. */ static bool sanity_check_pmu(struct kvm_vm *vm) { bool success; vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); success = run_vm_to_sync(vm); vm_install_exception_handler(vm, GP_VECTOR, NULL); return success; } static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) { struct kvm_pmu_event_filter *f; int size = sizeof(*f) + nevents * sizeof(f->events[0]); f = malloc(size); TEST_ASSERT(f, "Out of memory"); memset(f, 0, size); f->nevents = nevents; return f; } static struct kvm_pmu_event_filter * create_pmu_event_filter(const uint64_t event_list[], int nevents, uint32_t action) { struct kvm_pmu_event_filter *f; int i; f = alloc_pmu_event_filter(nevents); f->action = action; for (i = 0; i < nevents; i++) f->events[i] = event_list[i]; return f; } static struct kvm_pmu_event_filter *event_filter(uint32_t action) { return create_pmu_event_filter(event_list, ARRAY_SIZE(event_list), action); } /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, uint64_t event) { bool found = false; int i; for (i = 0; i < f->nevents; i++) { if (found) f->events[i - 1] = f->events[i]; else found = f->events[i] == event; } if (found) f->nevents--; return f; } static void test_without_filter(struct kvm_vm *vm) { uint64_t count = run_vm_to_sync(vm); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", __func__, count, NUM_BRANCHES); TEST_ASSERT(count, "Allowed PMU event is not counting"); } static uint64_t test_with_filter(struct kvm_vm *vm, struct kvm_pmu_event_filter *f) { vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); return run_vm_to_sync(vm); } static void test_amd_deny_list(struct kvm_vm *vm) { uint64_t event = EVENT(0x1C2, 0); struct kvm_pmu_event_filter *f; uint64_t count; f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); count = test_with_filter(vm, f); free(f); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", __func__, count, NUM_BRANCHES); TEST_ASSERT(count, "Allowed PMU event is not counting"); } static void test_member_deny_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); uint64_t count = test_with_filter(vm, f); free(f); if (count) pr_info("%s: Branch instructions retired = %lu (expected 0)\n", __func__, count); TEST_ASSERT(!count, "Disallowed PMU Event is counting"); } static void test_member_allow_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); uint64_t count = test_with_filter(vm, f); free(f); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", __func__, count, NUM_BRANCHES); TEST_ASSERT(count, "Allowed PMU event is not counting"); } static void test_not_member_deny_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); uint64_t count; remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); count = test_with_filter(vm, f); free(f); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", __func__, count, NUM_BRANCHES); TEST_ASSERT(count, "Allowed PMU event is not counting"); } static void test_not_member_allow_list(struct kvm_vm *vm) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); uint64_t count; remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); count = test_with_filter(vm, f); free(f); if (count) pr_info("%s: Branch instructions retired = %lu (expected 0)\n", __func__, count); TEST_ASSERT(!count, "Disallowed PMU Event is counting"); } /* * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU. * * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs. */ static void test_pmu_config_disable(void (*guest_code)(void)) { int r; struct kvm_vm *vm; struct kvm_enable_cap cap = { 0 }; r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); if (!(r & KVM_PMU_CAP_DISABLE)) return; vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES); cap.cap = KVM_CAP_PMU_CAPABILITY; cap.args[0] = KVM_PMU_CAP_DISABLE; TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE."); vm_vcpu_add_default(vm, VCPU_ID, guest_code); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); TEST_ASSERT(!sanity_check_pmu(vm), "Guest should not be able to use disabled PMU."); kvm_vm_free(vm); } /* * Check for a non-zero PMU version, at least one general-purpose * counter per logical processor, an EBX bit vector of length greater * than 5, and EBX[5] clear. */ static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) { union cpuid10_eax eax = { .full = entry->eax }; union cpuid10_ebx ebx = { .full = entry->ebx }; return eax.split.version_id && eax.split.num_counters > 0 && eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && !ebx.split.no_branch_instruction_retired; } /* * Note that CPUID leaf 0xa is Intel-specific. This leaf should be * clear on AMD hardware. */ static bool use_intel_pmu(void) { struct kvm_cpuid_entry2 *entry; entry = kvm_get_supported_cpuid_index(0xa, 0); return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); } static bool is_zen1(uint32_t eax) { return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; } static bool is_zen2(uint32_t eax) { return x86_family(eax) == 0x17 && x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; } static bool is_zen3(uint32_t eax) { return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; } /* * Determining AMD support for a PMU event requires consulting the AMD * PPR for the CPU or reference material derived therefrom. The AMD * test code herein has been verified to work on Zen1, Zen2, and Zen3. * * Feel free to add more AMD CPUs that are documented to support event * select 0xc2 umask 0 as "retired branch instructions." */ static bool use_amd_pmu(void) { struct kvm_cpuid_entry2 *entry; entry = kvm_get_supported_cpuid_index(1, 0); return is_amd_cpu() && entry && (is_zen1(entry->eax) || is_zen2(entry->eax) || is_zen3(entry->eax)); } int main(int argc, char *argv[]) { void (*guest_code)(void) = NULL; struct kvm_vm *vm; int r; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER); if (!r) { print_skip("KVM_CAP_PMU_EVENT_FILTER not supported"); exit(KSFT_SKIP); } if (use_intel_pmu()) guest_code = intel_guest_code; else if (use_amd_pmu()) guest_code = amd_guest_code; if (!guest_code) { print_skip("Don't know how to test this guest PMU"); exit(KSFT_SKIP); } vm = vm_create_default(VCPU_ID, 0, guest_code); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); if (!sanity_check_pmu(vm)) { print_skip("Guest PMU is not functional"); exit(KSFT_SKIP); } if (use_amd_pmu()) test_amd_deny_list(vm); test_without_filter(vm); test_member_deny_list(vm); test_member_allow_list(vm); test_not_member_deny_list(vm); test_not_member_allow_list(vm); kvm_vm_free(vm); test_pmu_config_disable(guest_code); return 0; }