summaryrefslogtreecommitdiffstats
path: root/sys/uvm
diff options
context:
space:
mode:
authorderaadt <deraadt@openbsd.org>2019-11-29 06:34:44 +0000
committerderaadt <deraadt@openbsd.org>2019-11-29 06:34:44 +0000
commite50417250fb1d11dd90538c8f1a10723b2e4b3af (patch)
tree18b789ba719e74203d4f59bf332c93e337c4c980 /sys/uvm
parentdrm_sched_entity_flush() doesn't care about specific threads, so just (diff)
downloadwireguard-openbsd-e50417250fb1d11dd90538c8f1a10723b2e4b3af.tar.xz
wireguard-openbsd-e50417250fb1d11dd90538c8f1a10723b2e4b3af.zip
Repurpose the "syscalls must be on a writeable page" mechanism to
enforce a new policy: system calls must be in pre-registered regions. We have discussed more strict checks than this, but none satisfy the cost/benefit based upon our understanding of attack methods, anyways let's see what the next iteration looks like. This is intended to harden (translation: attackers must put extra effort into attacking) against a mixture of W^X failures and JIT bugs which allow syscall misinterpretation, especially in environments with polymorphic-instruction/variable-sized instructions. It fits in a bit with libc/libcrypto/ld.so random relink on boot and no-restart-at-crash behaviour, particularily for remote problems. Less effective once on-host since someone the libraries can be read. For static-executables the kernel registers the main program's PIE-mapped exec section valid, as well as the randomly-placed sigtramp page. For dynamic executables ELF ld.so's exec segment is also labelled valid; ld.so then has enough information to register libc's exec section as valid via call-once msyscall(2) For dynamic binaries, we continue to to permit the main program exec segment because "go" (and potentially a few other applications) have embedded system calls in the main program. Hopefully at least go gets fixed soon. We declare the concept of embedded syscalls a bad idea for numerous reasons, as we notice the ecosystem has many of static-syscall-in-base-binary which are dynamically linked against libraries which in turn use libc, which contains another set of syscall stubs. We've been concerned about adding even one additional syscall entry point... but go's approach tends to double the entry-point attack surface. This was started at a nano-hackathon in Bob Beck's basement 2 weeks ago during a long discussion with mortimer trying to hide from the SSL scream-conversations, and finished in more comfortable circumstances next to a wood-stove at Elk Lakes cabin with UVM scream-conversations. ok guenther kettenis mortimer, lots of feedback from others conversations about go with jsing tb sthen
Diffstat (limited to 'sys/uvm')
-rw-r--r--sys/uvm/uvm.h3
-rw-r--r--sys/uvm/uvm_extern.h3
-rw-r--r--sys/uvm/uvm_map.c58
-rw-r--r--sys/uvm/uvm_map.h4
-rw-r--r--sys/uvm/uvm_mmap.c4
5 files changed, 64 insertions, 8 deletions
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index a2147155523..731ce8dba42 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm.h,v 1.65 2019/07/18 23:47:33 cheloha Exp $ */
+/* $OpenBSD: uvm.h,v 1.66 2019/11/29 06:34:45 deraadt Exp $ */
/* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */
/*
@@ -91,6 +91,7 @@ struct uvm {
#define UVM_ET_STACK 0x0040 /* this is a stack */
#define UVM_ET_WC 0x0080 /* write combining */
#define UVM_ET_CONCEAL 0x0100 /* omit from dumps */
+#define UVM_ET_SYSCALL 0x0200 /* syscall text segment */
#define UVM_ET_FREEMAPPED 0x8000 /* map entry is on free list (DEBUG) */
#define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0)
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 203955c6375..779f7654d9d 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.150 2019/11/28 23:42:52 guenther Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.151 2019/11/29 06:34:45 deraadt Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -114,6 +114,7 @@ typedef int vm_prot_t;
#define UVM_FLAG_STACK 0x2000000 /* page may contain a stack */
#define UVM_FLAG_WC 0x4000000 /* write combining */
#define UVM_FLAG_CONCEAL 0x8000000 /* omit from dumps */
+#define UVM_FLAG_SYSCALL 0x10000000 /* system calls allowed */
/* macros to extract info */
#define UVM_PROTECTION(X) ((X) & PROT_MASK)
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index d35620c66cd..3791b155920 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.c,v 1.252 2019/11/26 18:23:48 mlarkin Exp $ */
+/* $OpenBSD: uvm_map.c,v 1.253 2019/11/29 06:34:45 deraadt Exp $ */
/* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
/*
@@ -1080,6 +1080,10 @@ uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
entry->advice = advice;
if (prot & PROT_WRITE)
map->wserial++;
+ if (flags & UVM_FLAG_SYSCALL) {
+ entry->etype |= UVM_ET_SYSCALL;
+ map->wserial++;
+ }
if (flags & UVM_FLAG_STACK) {
entry->etype |= UVM_ET_STACK;
if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
@@ -1345,6 +1349,10 @@ uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
entry->advice = advice;
if (prot & PROT_WRITE)
map->wserial++;
+ if (flags & UVM_FLAG_SYSCALL) {
+ entry->etype |= UVM_ET_SYSCALL;
+ map->wserial++;
+ }
if (flags & UVM_FLAG_STACK) {
entry->etype |= UVM_ET_STACK;
if (flags & UVM_FLAG_UNMAP)
@@ -1808,12 +1816,15 @@ uvm_map_inentry_sp(vm_map_entry_t entry)
/*
* If a syscall comes from a writeable entry, W^X is violated.
* (Would be nice if we can spot aliasing, which is also kind of bad)
+ * Ensure system call comes from libc or ld.so's text segment.
*/
int
uvm_map_inentry_pc(vm_map_entry_t entry)
{
if (entry->protection & PROT_WRITE)
return (0); /* not permitted */
+ if ((entry->etype & UVM_ET_SYSCALL) == 0)
+ return (0); /* not permitted */
return (1);
}
@@ -3089,12 +3100,14 @@ uvm_map_printit(struct vm_map *map, boolean_t full,
entry, entry->start, entry->end, entry->object.uvm_obj,
(long long)entry->offset, entry->aref.ar_amap,
entry->aref.ar_pageoff);
- (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, "
+ (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
+ "syscall=%c, prot(max)=%d/%d, inh=%d, "
"wc=%d, adv=%d\n",
(entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
(entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
(entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
(entry->etype & UVM_ET_STACK) ? 'T' : 'F',
+ (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
entry->protection, entry->max_protection,
entry->inheritance, entry->wired_count, entry->advice);
@@ -3511,7 +3524,7 @@ uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
* when a process execs another program image.
*/
vm_map_lock(map);
- vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
+ vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
/*
* now unmap the old program
@@ -4288,6 +4301,45 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
return (0);
}
+/*
+ * uvm_map_syscall: permit system calls for range of addrs in map.
+ *
+ * => map must be unlocked
+ */
+int
+uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
+{
+ struct vm_map_entry *entry;
+
+ if (start > end)
+ return EINVAL;
+ start = MAX(start, map->min_offset);
+ end = MIN(end, map->max_offset);
+ if (start >= end)
+ return 0;
+ if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */
+ return (EPERM);
+
+ vm_map_lock(map);
+
+ entry = uvm_map_entrybyaddr(&map->addr, start);
+ if (entry->end > start)
+ UVM_MAP_CLIP_START(map, entry, start);
+ else
+ entry = RBT_NEXT(uvm_map_addr, entry);
+
+ while (entry != NULL && entry->start < end) {
+ UVM_MAP_CLIP_END(map, entry, end);
+ entry->etype |= UVM_ET_SYSCALL;
+ entry = RBT_NEXT(uvm_map_addr, entry);
+ }
+
+ map->wserial++;
+ map->flags |= VM_MAP_SYSCALL_ONCE;
+ vm_map_unlock(map);
+ return (0);
+}
+
/*
* uvm_map_advice: set advice code for range of addrs in map.
*
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index bb511e2ed6e..7ee39f50e81 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.h,v 1.64 2019/11/02 09:36:08 mpi Exp $ */
+/* $OpenBSD: uvm_map.h,v 1.65 2019/11/29 06:34:46 deraadt Exp $ */
/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -350,6 +350,7 @@ struct vm_map {
#define VM_MAP_WANTLOCK 0x10 /* rw: want to write-lock */
#define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */
#define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */
+#define VM_MAP_SYSCALL_ONCE 0x80 /* rw: libc syscall registered */
/* XXX: number of kernel maps and entries to statically allocate */
@@ -395,6 +396,7 @@ int uvm_map_extract(struct vm_map*, vaddr_t, vsize_t, vaddr_t*,
int);
vaddr_t uvm_map_pie(vaddr_t);
vaddr_t uvm_map_hint(struct vmspace *, vm_prot_t, vaddr_t, vaddr_t);
+int uvm_map_syscall(vm_map_t, vaddr_t, vaddr_t);
int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t);
int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int);
void uvm_map_init(void);
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 30250a197b3..8e9bd66bc7b 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_mmap.c,v 1.159 2019/11/28 17:19:22 mlarkin Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.160 2019/11/29 06:34:46 deraadt Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -606,7 +606,7 @@ sys_msyscall(struct proc *p, void *v, register_t *retval)
if (addr > SIZE_MAX - size)
return (EINVAL); /* disallow wrap-around. */
- return (0);
+ return (uvm_map_syscall(&p->p_vmspace->vm_map, addr, addr+size));
}
/*