diff options
author | pd <pd@openbsd.org> | 2019-05-12 20:56:34 +0000 |
---|---|---|
committer | pd <pd@openbsd.org> | 2019-05-12 20:56:34 +0000 |
commit | c504343966815207f643aa71f5cf216fdd222a28 (patch) | |
tree | 1752a5ba88570a4064f78a7b4fde18395db2eb10 /usr.sbin/vmd/vm.c | |
parent | Remove a now obsolete comment about BITSTRING. (diff) | |
download | wireguard-openbsd-c504343966815207f643aa71f5cf216fdd222a28.tar.xz wireguard-openbsd-c504343966815207f643aa71f5cf216fdd222a28.zip |
vmm: add a x86 page table walker
Add a first cut of x86 page table walker to vmd(8) and vmm(4). This function is
not used right now but is a building block for future features like HPET, OUTSB
and INSB emulation, nested virtualisation support, etc.
With help from Mike Larkin
ok mlarkin@
Diffstat (limited to 'usr.sbin/vmd/vm.c')
-rw-r--r-- | usr.sbin/vmd/vm.c | 138 |
1 files changed, 137 insertions, 1 deletions
diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c index dd3915ae726..99e4fab109c 100644 --- a/usr.sbin/vmd/vm.c +++ b/usr.sbin/vmd/vm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm.c,v 1.47 2019/05/11 19:59:32 jasper Exp $ */ +/* $OpenBSD: vm.c,v 1.48 2019/05/12 20:56:34 pd Exp $ */ /* * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> @@ -93,6 +93,8 @@ void restore_mem(int, struct vm_create_params *); void pause_vm(struct vm_create_params *); void unpause_vm(struct vm_create_params *); +int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int); + static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t, size_t); @@ -1950,3 +1952,137 @@ get_input_data(struct vm_exit *vei, uint32_t *data) } } + +/* + * translate_gva + * + * Translates a guest virtual address to a guest physical address by walking + * the currently active page table (if needed). + * + * Note - this function can possibly alter the supplied VCPU state. + * Specifically, it may inject exceptions depending on the current VCPU + * configuration, and may alter %cr2 on #PF. Consequently, this function + * should only be used as part of instruction emulation. + * + * Parameters: + * exit: The VCPU this translation should be performed for (guest MMU settings + * are gathered from this VCPU) + * va: virtual address to translate + * pa: pointer to paddr_t variable that will receive the translated physical + * address. 'pa' is unchanged on error. + * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which + * the address should be translated + * + * Return values: + * 0: the address was successfully translated - 'pa' contains the physical + * address currently mapped by 'va'. + * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case + * and %cr2 set in the vcpu structure. + * EINVAL: an error occurred reading paging table structures + */ +int +translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode) +{ + int level, shift, pdidx; + uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; + uint64_t shift_width, pte_size; + struct vcpu_reg_state *vrs; + + vrs = &exit->vrs; + + if (!pa) + return (EINVAL); + + if (!(vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PG)) { + log_debug("%s: unpaged, va=pa=0x%llx", __func__, va); + *pa = va; + return (0); + } + + pt_paddr = vrs->vrs_crs[VCPU_REGS_CR3]; + + log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__, + vrs->vrs_crs[VCPU_REGS_CR0], vrs->vrs_crs[VCPU_REGS_CR3]); + + if (vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PE) { + if (vrs->vrs_crs[VCPU_REGS_CR4] & CR4_PAE) { + pte_size = sizeof(uint64_t); + shift_width = 9; + + if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) { + /* 4 level paging */ + level = 4; + mask = L4_MASK; + shift = L4_SHIFT; + } else { + /* 32 bit with PAE paging */ + level = 3; + mask = L3_MASK; + shift = L3_SHIFT; + } + } else { + /* 32 bit paging */ + level = 2; + shift_width = 10; + mask = 0xFFC00000; + shift = 22; + pte_size = sizeof(uint32_t); + } + } else + return (EINVAL); + + /* XXX: Check for R bit in segment selector and set A bit */ + + for (;level > 0; level--) { + pdidx = (va & mask) >> shift; + pte_paddr = (pt_paddr) + (pdidx * pte_size); + + log_debug("%s: read pte level %d @ GPA 0x%llx", __func__, + level, pte_paddr); + if (read_mem(pte_paddr, &pte, pte_size)) { + log_warn("%s: failed to read pte", __func__); + return (EFAULT); + } + + log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr, + pte); + + /* XXX: Set CR2 */ + if (!(pte & PG_V)) + return (EFAULT); + + /* XXX: Check for SMAP */ + if ((mode == PROT_WRITE) && !(pte & PG_RW)) + return (EPERM); + + if ((exit->cpl > 0) && !(pte & PG_u)) + return (EPERM); + + pte = pte | PG_U; + if (mode == PROT_WRITE) + pte = pte | PG_M; + if (write_mem(pte_paddr, &pte, pte_size)) { + log_warn("%s: failed to write back flags to pte", + __func__); + return (EIO); + } + + /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ + if (pte & PG_PS) + break; + + if (level > 1) { + pt_paddr = pte & PG_FRAME; + shift -= shift_width; + mask = mask >> shift_width; + } + } + + low_mask = (1 << shift) - 1; + high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; + *pa = (pte & high_mask) | (va & low_mask); + + log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa); + + return (0); +} |