aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2022-12-12 11:14:19 -0700
committerJason A. Donenfeld <Jason@zx2c4.com>2024-07-29 18:15:05 +0200
commit8f9e92b36972415b97abc598bbdf1908806b1882 (patch)
treeefa8d96a10a6cbc587e1e4fc1befaf18402b8806 /arch/x86/mm/fault.c
parentminmax: simplify min()/max()/clamp() implementation (diff)
downloadlinux-rng-jd/vdso-skip-insn.tar.xz
linux-rng-jd/vdso-skip-insn.zip
x86: mm: Skip faulting instruction for VM_DROPPABLE faultsjd/vdso-skip-insn
VM_DROPPABLE allocations can, by definition, be dropped and then filled with zeros at any time. For reads, this is working as intended: userspace reads memory, and at some point it comes back as zeros. Writes, however, are another story. If the kernel has dropped the memory and userspace writes to those addresses, the page fault handler traps, the memory is allocated, and control is returned to userspace to retry the write, which succeeds. But if the memory fails to be allocated when trapping, control is still returned to userspace to retry the write, and the whole thing happens again. And it doesn't make sense to kill the process for a droppable mapping. Eventually hopefully there's enough memory and this succeeds. However, that retry loop really is unnecessary. Instead, the write could just be skipped, because skipping a write is the same as the write succeeding and then immediately after the kernel dropping the page so that subsequent reads return zeros. So, rather than merely returning to userspace at the same write instruction to be retried, skip that write instruction. Implement it on x86, where instructions are variable size, by copying userspace's %rip to a stack buffer of size MAX_INSN_SIZE, decoding it, and then adding the length of the decoded instruction to userspace's %rip. In the event any of these fail, just fallback to not advancing %rip and trying again. Cc: linux-mm@kvack.org Cc: x86@kernel.org Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c19
1 files changed, 19 insertions, 0 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e6c469b323cc..8887c6d63ef2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -37,6 +37,8 @@
#include <asm/irq_stack.h>
#include <asm/fred.h>
#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
+#include <asm/insn.h> /* struct insn */
+#include <asm/insn-eval.h> /* insn_fetch_from_user(), ... */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -1415,6 +1417,23 @@ retry:
}
mmap_read_unlock(mm);
+
+ if (fault & VM_FAULT_SKIP_INSN) {
+ u8 buf[MAX_INSN_SIZE];
+ struct insn insn;
+ int nr_copied;
+
+ nr_copied = insn_fetch_from_user(regs, buf);
+ if (nr_copied <= 0)
+ return;
+
+ if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
+ return;
+
+ regs->ip += insn.length;
+ return;
+ }
+
done:
if (likely(!(fault & VM_FAULT_ERROR)))
return;