From 2a31b9db153530df4aa02dac8c32837bf5f47019 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 02:36:47 +0200 Subject: kvm: introduce manual dirty log reprotect There are two problems with KVM_GET_DIRTY_LOG. First, and less important, it can take kvm->mmu_lock for an extended period of time. Second, its user can actually see many false positives in some cases. The latter is due to a benign race like this: 1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects them. 2. The guest modifies the pages, causing them to be marked ditry. 3. Userspace actually copies the pages. 4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though they were not written to since (3). This is especially a problem for large guests, where the time between (1) and (3) can be substantial. This patch introduces a new capability which, when enabled, makes KVM_GET_DIRTY_LOG not write-protect the pages it returns. Instead, userspace has to explicitly clear the dirty log bits just before using the content of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a 64-page granularity rather than requiring to sync a full memslot; this way, the mmu_lock is taken for small amounts of time, and only a small amount of time will pass between write protection of pages and the sending of their content. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 ++ tools/testing/selftests/kvm/clear_dirty_log_test.c | 2 ++ tools/testing/selftests/kvm/dirty_log_test.c | 19 +++++++++++++++++++ tools/testing/selftests/kvm/include/kvm_util.h | 2 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 13 +++++++++++++ 5 files changed, 38 insertions(+) create mode 100644 tools/testing/selftests/kvm/clear_dirty_log_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 52bfe5e76907..caaa0d5eba92 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -16,8 +16,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c new file mode 100644 index 000000000000..749336937d37 --- /dev/null +++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c @@ -0,0 +1,2 @@ +#define USE_CLEAR_DIRTY_LOG +#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aeff95a91b15..4629c7ccfa28 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -275,6 +275,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT; + cap.args[0] = 1; + vm_enable_cap(vm, &cap); +#endif + /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_mem, @@ -316,6 +324,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); +#ifdef USE_CLEAR_DIRTY_LOG + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + DIV_ROUND_UP(host_num_pages, 64) * 64); +#endif vm_dirty_log_verify(bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -392,6 +404,13 @@ int main(int argc, char *argv[]) unsigned int mode; int opt, i; +#ifdef USE_CLEAR_DIRTY_LOG + if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) { + fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); + exit(KSFT_SKIP); + } +#endif + while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) { switch (opt) { case 'i': diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a4e59e3b4826..c51bfaba017a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -58,6 +58,8 @@ void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages); int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1b41e71283d5..c9e94d6503af 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -231,6 +231,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) strerror(-ret)); } +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, + .first_page = first_page, + .num_pages = num_pages }; + int ret; + + ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); + TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", + strerror(-ret)); +} + /* * Userspace Memory Region Find * -- cgit v1.2.3-59-g8ed1b