diff options
author | deraadt <deraadt@openbsd.org> | 2019-11-29 06:34:44 +0000 |
---|---|---|
committer | deraadt <deraadt@openbsd.org> | 2019-11-29 06:34:44 +0000 |
commit | e50417250fb1d11dd90538c8f1a10723b2e4b3af (patch) | |
tree | 18b789ba719e74203d4f59bf332c93e337c4c980 /sys/uvm | |
parent | drm_sched_entity_flush() doesn't care about specific threads, so just (diff) | |
download | wireguard-openbsd-e50417250fb1d11dd90538c8f1a10723b2e4b3af.tar.xz wireguard-openbsd-e50417250fb1d11dd90538c8f1a10723b2e4b3af.zip |
Repurpose the "syscalls must be on a writeable page" mechanism to
enforce a new policy: system calls must be in pre-registered regions.
We have discussed more strict checks than this, but none satisfy the
cost/benefit based upon our understanding of attack methods, anyways
let's see what the next iteration looks like.
This is intended to harden (translation: attackers must put extra
effort into attacking) against a mixture of W^X failures and JIT bugs
which allow syscall misinterpretation, especially in environments with
polymorphic-instruction/variable-sized instructions. It fits in a bit
with libc/libcrypto/ld.so random relink on boot and no-restart-at-crash
behaviour, particularily for remote problems. Less effective once on-host
since someone the libraries can be read.
For static-executables the kernel registers the main program's
PIE-mapped exec section valid, as well as the randomly-placed sigtramp
page. For dynamic executables ELF ld.so's exec segment is also
labelled valid; ld.so then has enough information to register libc's
exec section as valid via call-once msyscall(2)
For dynamic binaries, we continue to to permit the main program exec
segment because "go" (and potentially a few other applications) have
embedded system calls in the main program. Hopefully at least go gets
fixed soon.
We declare the concept of embedded syscalls a bad idea for numerous
reasons, as we notice the ecosystem has many of
static-syscall-in-base-binary which are dynamically linked against
libraries which in turn use libc, which contains another set of
syscall stubs. We've been concerned about adding even one additional
syscall entry point... but go's approach tends to double the entry-point
attack surface.
This was started at a nano-hackathon in Bob Beck's basement 2 weeks
ago during a long discussion with mortimer trying to hide from the SSL
scream-conversations, and finished in more comfortable circumstances
next to a wood-stove at Elk Lakes cabin with UVM scream-conversations.
ok guenther kettenis mortimer, lots of feedback from others
conversations about go with jsing tb sthen
Diffstat (limited to 'sys/uvm')
-rw-r--r-- | sys/uvm/uvm.h | 3 | ||||
-rw-r--r-- | sys/uvm/uvm_extern.h | 3 | ||||
-rw-r--r-- | sys/uvm/uvm_map.c | 58 | ||||
-rw-r--r-- | sys/uvm/uvm_map.h | 4 | ||||
-rw-r--r-- | sys/uvm/uvm_mmap.c | 4 |
5 files changed, 64 insertions, 8 deletions
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index a2147155523..731ce8dba42 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm.h,v 1.65 2019/07/18 23:47:33 cheloha Exp $ */ +/* $OpenBSD: uvm.h,v 1.66 2019/11/29 06:34:45 deraadt Exp $ */ /* $NetBSD: uvm.h,v 1.24 2000/11/27 08:40:02 chs Exp $ */ /* @@ -91,6 +91,7 @@ struct uvm { #define UVM_ET_STACK 0x0040 /* this is a stack */ #define UVM_ET_WC 0x0080 /* write combining */ #define UVM_ET_CONCEAL 0x0100 /* omit from dumps */ +#define UVM_ET_SYSCALL 0x0200 /* syscall text segment */ #define UVM_ET_FREEMAPPED 0x8000 /* map entry is on free list (DEBUG) */ #define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0) diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 203955c6375..779f7654d9d 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_extern.h,v 1.150 2019/11/28 23:42:52 guenther Exp $ */ +/* $OpenBSD: uvm_extern.h,v 1.151 2019/11/29 06:34:45 deraadt Exp $ */ /* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */ /* @@ -114,6 +114,7 @@ typedef int vm_prot_t; #define UVM_FLAG_STACK 0x2000000 /* page may contain a stack */ #define UVM_FLAG_WC 0x4000000 /* write combining */ #define UVM_FLAG_CONCEAL 0x8000000 /* omit from dumps */ +#define UVM_FLAG_SYSCALL 0x10000000 /* system calls allowed */ /* macros to extract info */ #define UVM_PROTECTION(X) ((X) & PROT_MASK) diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index d35620c66cd..3791b155920 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.c,v 1.252 2019/11/26 18:23:48 mlarkin Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.253 2019/11/29 06:34:45 deraadt Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ /* @@ -1080,6 +1080,10 @@ uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, entry->advice = advice; if (prot & PROT_WRITE) map->wserial++; + if (flags & UVM_FLAG_SYSCALL) { + entry->etype |= UVM_ET_SYSCALL; + map->wserial++; + } if (flags & UVM_FLAG_STACK) { entry->etype |= UVM_ET_STACK; if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) @@ -1345,6 +1349,10 @@ uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, entry->advice = advice; if (prot & PROT_WRITE) map->wserial++; + if (flags & UVM_FLAG_SYSCALL) { + entry->etype |= UVM_ET_SYSCALL; + map->wserial++; + } if (flags & UVM_FLAG_STACK) { entry->etype |= UVM_ET_STACK; if (flags & UVM_FLAG_UNMAP) @@ -1808,12 +1816,15 @@ uvm_map_inentry_sp(vm_map_entry_t entry) /* * If a syscall comes from a writeable entry, W^X is violated. * (Would be nice if we can spot aliasing, which is also kind of bad) + * Ensure system call comes from libc or ld.so's text segment. */ int uvm_map_inentry_pc(vm_map_entry_t entry) { if (entry->protection & PROT_WRITE) return (0); /* not permitted */ + if ((entry->etype & UVM_ET_SYSCALL) == 0) + return (0); /* not permitted */ return (1); } @@ -3089,12 +3100,14 @@ uvm_map_printit(struct vm_map *map, boolean_t full, entry, entry->start, entry->end, entry->object.uvm_obj, (long long)entry->offset, entry->aref.ar_amap, entry->aref.ar_pageoff); - (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, " + (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " + "syscall=%c, prot(max)=%d/%d, inh=%d, " "wc=%d, adv=%d\n", (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', (entry->etype & UVM_ET_STACK) ? 'T' : 'F', + (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', entry->protection, entry->max_protection, entry->inheritance, entry->wired_count, entry->advice); @@ -3511,7 +3524,7 @@ uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) * when a process execs another program image. */ vm_map_lock(map); - vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); + vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); /* * now unmap the old program @@ -4288,6 +4301,45 @@ uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, return (0); } +/* + * uvm_map_syscall: permit system calls for range of addrs in map. + * + * => map must be unlocked + */ +int +uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) +{ + struct vm_map_entry *entry; + + if (start > end) + return EINVAL; + start = MAX(start, map->min_offset); + end = MIN(end, map->max_offset); + if (start >= end) + return 0; + if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ + return (EPERM); + + vm_map_lock(map); + + entry = uvm_map_entrybyaddr(&map->addr, start); + if (entry->end > start) + UVM_MAP_CLIP_START(map, entry, start); + else + entry = RBT_NEXT(uvm_map_addr, entry); + + while (entry != NULL && entry->start < end) { + UVM_MAP_CLIP_END(map, entry, end); + entry->etype |= UVM_ET_SYSCALL; + entry = RBT_NEXT(uvm_map_addr, entry); + } + + map->wserial++; + map->flags |= VM_MAP_SYSCALL_ONCE; + vm_map_unlock(map); + return (0); +} + /* * uvm_map_advice: set advice code for range of addrs in map. * diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index bb511e2ed6e..7ee39f50e81 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.h,v 1.64 2019/11/02 09:36:08 mpi Exp $ */ +/* $OpenBSD: uvm_map.h,v 1.65 2019/11/29 06:34:46 deraadt Exp $ */ /* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */ /* @@ -350,6 +350,7 @@ struct vm_map { #define VM_MAP_WANTLOCK 0x10 /* rw: want to write-lock */ #define VM_MAP_GUARDPAGES 0x20 /* rw: add guard pgs to map */ #define VM_MAP_ISVMSPACE 0x40 /* ro: map is a vmspace */ +#define VM_MAP_SYSCALL_ONCE 0x80 /* rw: libc syscall registered */ /* XXX: number of kernel maps and entries to statically allocate */ @@ -395,6 +396,7 @@ int uvm_map_extract(struct vm_map*, vaddr_t, vsize_t, vaddr_t*, int); vaddr_t uvm_map_pie(vaddr_t); vaddr_t uvm_map_hint(struct vmspace *, vm_prot_t, vaddr_t, vaddr_t); +int uvm_map_syscall(vm_map_t, vaddr_t, vaddr_t); int uvm_map_inherit(vm_map_t, vaddr_t, vaddr_t, vm_inherit_t); int uvm_map_advice(vm_map_t, vaddr_t, vaddr_t, int); void uvm_map_init(void); diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 30250a197b3..8e9bd66bc7b 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_mmap.c,v 1.159 2019/11/28 17:19:22 mlarkin Exp $ */ +/* $OpenBSD: uvm_mmap.c,v 1.160 2019/11/29 06:34:46 deraadt Exp $ */ /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ /* @@ -606,7 +606,7 @@ sys_msyscall(struct proc *p, void *v, register_t *retval) if (addr > SIZE_MAX - size) return (EINVAL); /* disallow wrap-around. */ - return (0); + return (uvm_map_syscall(&p->p_vmspace->vm_map, addr, addr+size)); } /* |