diff options
author | 2019-02-07 15:11:38 +0000 | |
---|---|---|
committer | 2019-02-07 15:11:38 +0000 | |
commit | eb34598c0a0d10289c8cec9201e26fb543c8a989 (patch) | |
tree | 323d6b184335a1077d5934d55cb9169fcd13b7bf | |
parent | Consistently use m_freem(9). This fixes possible leaks in a few (diff) | |
download | wireguard-openbsd-eb34598c0a0d10289c8cec9201e26fb543c8a989.tar.xz wireguard-openbsd-eb34598c0a0d10289c8cec9201e26fb543c8a989.zip |
Add lock stack trace saving for witness(4).
This lets witness(4) save a stack trace on each lock acquisition.
The saved traces can be viewed in ddb(4) when showing the currently
held locks, which may help when debugging incorrect locking.
Sample output:
ddb{0}> show all locks
Process 63836 (rm) thread 0xffff8000221e52c8 (435004)
exclusive rrwlock inode r = 0 (0xfffffd8119a092c0) locked @ /usr/src/sys/ufs/ufs/ufs_vnops.c:1547
#0 witness_lock+0x419
#1 _rw_enter+0x2bb
#2 _rrw_enter+0x42
#3 VOP_LOCK+0x3f
#4 vn_lock+0x36
#5 vfs_lookup+0xa1
#6 namei+0x2b3
#7 dounlinkat+0x85
#8 syscall+0x338
#9 Xsyscall+0x128
exclusive kernel_lock &kernel_lock r = 1 (0xffffffff81e6a5f0) locked @ /usr/src/sys/arch/amd64/amd64/intr.c:525
#0 witness_lock+0x419
#1 syscall+0x2b6
#2 Xsyscall+0x128
The saving adds overhead, so it is not enabled by default. It can be
taken into use by setting sysctl kern.witness.locktrace=1 at runtime
or by defining WITNESS_LOCKTRACE in the kernel configuration.
Feedback and OK anton@
-rw-r--r-- | lib/libc/sys/sysctl.2 | 13 | ||||
-rw-r--r-- | share/man/man4/options.4 | 12 | ||||
-rw-r--r-- | share/man/man9/malloc.9 | 7 | ||||
-rw-r--r-- | sys/kern/subr_witness.c | 160 | ||||
-rw-r--r-- | sys/sys/malloc.h | 6 | ||||
-rw-r--r-- | sys/sys/sysctl.h | 6 |
6 files changed, 189 insertions, 15 deletions
diff --git a/lib/libc/sys/sysctl.2 b/lib/libc/sys/sysctl.2 index 084009dee79..ef28cdb84b8 100644 --- a/lib/libc/sys/sysctl.2 +++ b/lib/libc/sys/sysctl.2 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.2,v 1.21 2019/01/29 14:07:15 visa Exp $ +.\" $OpenBSD: sysctl.2,v 1.22 2019/02/07 15:11:38 visa Exp $ .\" .\" Copyright (c) 1993 .\" The Regents of the University of California. All rights reserved. @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: January 29 2019 $ +.Dd $Mdocdate: February 7 2019 $ .Dt SYSCTL 2 .Os .Sh NAME @@ -1105,13 +1105,20 @@ Set to 0 to disable the watchdog timer. .It Dv KERN_WITNESS Pq Va kern.witness Control settings of .Xr witness 4 . -.Bl -column "KERN_WITNESS_WATCH" "integer" "Changeable" -offset indent +.Bl -column "KERN_WITNESS_LOCKTRACE" "integer" "Changeable" -offset indent .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable" +.It Dv KERN_WITNESS_LOCKTRACE Ta "integer" Ta "yes" .It Dv KERN_WITNESS_WATCH Ta "integer" Ta "yes" .El .Pp The variables are as follows: .Bl -tag -width "123456" +.It Dv KERN_WITNESS_LOCKTRACE Pq Va kern.witness.locktrace +When set, +.Xr witness 4 +saves a stack trace on each lock acquisition. +The stack traces of acquired locks can be viewed using +.Xr ddb 4 . .It Dv KERN_WITNESS_WATCH Pq Va kern.witness.watch Control how .Xr witness 4 diff --git a/share/man/man4/options.4 b/share/man/man4/options.4 index ce991e68141..6c9f58b9935 100644 --- a/share/man/man4/options.4 +++ b/share/man/man4/options.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: options.4,v 1.261 2019/02/06 10:59:49 visa Exp $ +.\" $OpenBSD: options.4,v 1.262 2019/02/07 15:11:38 visa Exp $ .\" $NetBSD: options.4,v 1.21 1997/06/25 03:13:00 thorpej Exp $ .\" .\" Copyright (c) 1998 Theo de Raadt @@ -34,7 +34,7 @@ .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" -.Dd $Mdocdate: February 6 2019 $ +.Dd $Mdocdate: February 7 2019 $ .Dt OPTIONS 4 .Os .Sh NAME @@ -141,6 +141,14 @@ See Maximum number of lock types that are tracked by .Xr witness 4 . It defaults to 1536. +.It Cd option WITNESS_LOCKTRACE +Enable +.Xr witness 4 +lock stack trace saving at boot. +The feature is disabled by default and has to be enabled by setting the +.Va kern.witness.locktrace +.Xr sysctl 8 +variable. .It Cd option WITNESS_WATCH Enable .Xr witness 4 diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9 index 0b958b0649b..729b76eec17 100644 --- a/share/man/man9/malloc.9 +++ b/share/man/man9/malloc.9 @@ -1,4 +1,4 @@ -.\" $OpenBSD: malloc.9,v 1.66 2019/01/18 17:52:18 visa Exp $ +.\" $OpenBSD: malloc.9,v 1.67 2019/02/07 15:11:38 visa Exp $ .\" $NetBSD: malloc.9,v 1.2 1996/10/30 05:29:54 lukem Exp $ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd $Mdocdate: January 18 2019 $ +.Dd $Mdocdate: February 7 2019 $ .Dt MALLOC 9 .Os .Sh NAME @@ -267,6 +267,9 @@ USB general. USB device driver. .It Dv M_USBHC USB host controller. +.It Dv M_WITNESS +.Xr witness 4 +memory. .It Dv M_MEMDESC Memory range. .It Dv M_CRYPTO_DATA diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 8b15f818e47..3b7f0cd852e 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_witness.c,v 1.28 2019/02/04 13:28:55 visa Exp $ */ +/* $OpenBSD: subr_witness.c,v 1.29 2019/02/07 15:11:38 visa Exp $ */ /*- * Copyright (c) 2008 Isilon Systems, Inc. @@ -189,6 +189,11 @@ struct lock_class { u_int lc_flags; }; +union lock_stack { + union lock_stack *ls_next; + struct db_stack_trace ls_stack; +}; + #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ #define LC_SPINLOCK 0x00000002 /* Spin lock. */ #define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */ @@ -203,6 +208,7 @@ struct lock_class { */ struct lock_instance { struct lock_object *li_lock; + union lock_stack *li_stack; const char *li_file; int li_line; u_int li_flags; @@ -292,10 +298,13 @@ struct witness_pendhelp { struct witness_cpu { struct lock_list_entry *wc_spinlocks; struct lock_list_entry *wc_lle_cache; + union lock_stack *wc_stk_cache; unsigned int wc_lle_count; + unsigned int wc_stk_count; } __aligned(CACHELINESIZE); #define WITNESS_LLE_CACHE_MAX 8 +#define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN) struct witness_cpu witness_cpu[MAXCPUS]; @@ -340,6 +349,7 @@ static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...), static void witness_ddb_level_descendants(struct witness *parent, int l); static void witness_ddb_list(struct proc *td); #endif +static int witness_alloc_stacks(void); static void witness_debugger(int dump); static void witness_free(struct witness *m); static struct witness *witness_get(void); @@ -353,6 +363,8 @@ static int witness_list_locks(struct lock_list_entry **, int (*)(const char *, ...)); static void witness_lock_list_free(struct lock_list_entry *lle); static struct lock_list_entry *witness_lock_list_get(void); +static void witness_lock_stack_free(union lock_stack *stack); +static union lock_stack *witness_lock_stack_get(void); static int witness_lock_order_add(struct witness *parent, struct witness *child); static int witness_lock_order_check(struct witness *parent, @@ -377,9 +389,16 @@ static int witness_watch = 3; static int witness_watch = 2; #endif +#ifdef WITNESS_LOCKTRACE +static int witness_locktrace = 1; +#else +static int witness_locktrace = 0; +#endif + int witness_count = WITNESS_COUNT; static struct mutex w_mtx; +static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock"); /* w_list */ static struct witness_list w_free = SIMPLEQ_HEAD_INITIALIZER(w_free); @@ -408,6 +427,9 @@ static struct witness_lock_order_hash w_lohash; static int w_max_used_index = 0; static unsigned int w_generation = 0; +static union lock_stack *w_lock_stack_free; +static unsigned int w_lock_stack_num; + static struct lock_class lock_class_kernel_lock = { .lc_name = "kernel_lock", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE @@ -474,6 +496,7 @@ void witness_initialize(void) { struct lock_object *lock; + union lock_stack *stacks; struct witness *w; int i, s; @@ -510,7 +533,15 @@ witness_initialize(void) (witness_count + 1)); } + if (witness_locktrace) { + w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN; + stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) * + w_lock_stack_num); + } + s = splhigh(); + for (i = 0; i < w_lock_stack_num; i++) + witness_lock_stack_free(&stacks[i]); for (i = 0; i < LOCK_CHILDCOUNT; i++) witness_lock_list_free(&w_locklistdata[i]); splx(s); @@ -1167,6 +1198,12 @@ witness_lock(struct lock_object *lock, int flags, const char *file, int line) instance->li_flags = LI_EXCLUSIVE; else instance->li_flags = 0; + instance->li_stack = NULL; + if (witness_locktrace) { + instance->li_stack = witness_lock_stack_get(); + if (instance->li_stack != NULL) + db_save_stack_trace(&instance->li_stack->ls_stack); + } out: splx(s); } @@ -1341,7 +1378,13 @@ found: panic("lock marked norelease"); } - /* Otherwise, remove this item from the list. */ + /* Release the stack buffer, if any. */ + if (instance->li_stack != NULL) { + witness_lock_stack_free(instance->li_stack); + instance->li_stack = NULL; + } + + /* Remove this item from the list. */ for (j = i; j < (*lock_list)->ll_count - 1; j++) (*lock_list)->ll_children[j] = (*lock_list)->ll_children[j + 1]; @@ -1805,6 +1848,7 @@ witness_lock_list_get(void) static void witness_lock_list_free(struct lock_list_entry *lle) { + union lock_stack *stack; struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; splassert(IPL_HIGH); @@ -1817,9 +1861,67 @@ witness_lock_list_free(struct lock_list_entry *lle) } mtx_enter(&w_mtx); + /* Put the entry on the shared free list. */ lle->ll_next = w_lock_list_free; w_lock_list_free = lle; + /* Put any excess stacks on the shared free list. */ + while (wcpu->wc_stk_count > WITNESS_STK_CACHE_MAX) { + stack = wcpu->wc_stk_cache; + wcpu->wc_stk_cache = stack->ls_next; + wcpu->wc_stk_count--; + stack->ls_next = w_lock_stack_free; + w_lock_stack_free = stack; + } + mtx_leave(&w_mtx); +} + +static union lock_stack * +witness_lock_stack_get(void) +{ + union lock_stack *stack = NULL; + struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; + + splassert(IPL_HIGH); + + if (wcpu->wc_stk_count > 0) { + stack = wcpu->wc_stk_cache; + wcpu->wc_stk_cache = stack->ls_next; + wcpu->wc_stk_count--; + return (stack); + } + + mtx_enter(&w_mtx); + /* Reserve stacks for one lock list entry. */ + while (w_lock_stack_free != NULL) { + stack = w_lock_stack_free; + w_lock_stack_free = stack->ls_next; + if (wcpu->wc_stk_count + 1 == LOCK_NCHILDREN) + break; + stack->ls_next = wcpu->wc_stk_cache; + wcpu->wc_stk_cache = stack; + wcpu->wc_stk_count++; + } mtx_leave(&w_mtx); + return (stack); +} + +/* + * Put the stack buffer on the CPU-local free list. + * A call to this function has to be followed by a call + * to witness_lock_list_free() which will move excess stack buffers + * to the shared free list. + * This split of work reduces contention of w_mtx. + */ +static void +witness_lock_stack_free(union lock_stack *stack) +{ + struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; + + splassert(IPL_HIGH); + + stack->ls_next = wcpu->wc_stk_cache; + wcpu->wc_stk_cache = stack; + wcpu->wc_stk_count++; } static struct lock_instance * @@ -1851,6 +1953,8 @@ witness_list_lock(struct lock_instance *instance, prnt(" r = %d (%p) locked @ %s:%d\n", instance->li_flags & LI_RECURSEMASK, lock, fixup_filename(instance->li_file), instance->li_line); + if (instance->li_stack != NULL) + db_print_stack_trace(&instance->li_stack->ls_stack, prnt); } #ifdef DDB @@ -2550,24 +2654,74 @@ witness_debugger(int dump) } } +static int +witness_alloc_stacks(void) +{ + union lock_stack *stacks; + unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN; + + rw_assert_wrlock(&w_ctlock); + + if (w_lock_stack_num >= nstacks) + return (0); + + nstacks -= w_lock_stack_num; + stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS, + M_WAITOK | M_CANFAIL | M_ZERO); + if (stacks == NULL) + return (ENOMEM); + + mtx_enter(&w_mtx); + for (i = 0; i < nstacks; i++) { + stacks[i].ls_next = w_lock_stack_free; + w_lock_stack_free = &stacks[i]; + } + mtx_leave(&w_mtx); + w_lock_stack_num += nstacks; + + return (0); +} + int witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int error; + int error, value; if (namelen != 1) return (ENOTDIR); + rw_enter_write(&w_ctlock); + switch (name[0]) { case KERN_WITNESS_WATCH: error = witness_sysctl_watch(oldp, oldlenp, newp, newlen); break; + case KERN_WITNESS_LOCKTRACE: + value = witness_locktrace; + error = sysctl_int(oldp, oldlenp, newp, newlen, &value); + if (error == 0 && newp != NULL) { + switch (value) { + case 1: + error = witness_alloc_stacks(); + /* FALLTHROUGH */ + case 0: + if (error == 0) + witness_locktrace = value; + break; + default: + error = EINVAL; + break; + } + } + break; default: error = EOPNOTSUPP; break; } + rw_exit_write(&w_ctlock); + return (error); } diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index b2b71f9b4d6..a183d57f208 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: malloc.h,v 1.117 2018/11/12 15:09:17 visa Exp $ */ +/* $OpenBSD: malloc.h,v 1.118 2019/02/07 15:11:38 visa Exp $ */ /* $NetBSD: malloc.h,v 1.39 1998/07/12 19:52:01 augustss Exp $ */ /* @@ -140,7 +140,7 @@ #define M_USB 101 /* USB general */ #define M_USBDEV 102 /* USB device driver */ #define M_USBHC 103 /* USB host controller */ -/* 104 - free */ +#define M_WITNESS 104 /* witness data */ #define M_MEMDESC 105 /* Memory range */ /* 106-107 - free */ #define M_CRYPTO_DATA 108 /* Crypto framework data buffers (keys etc.) */ @@ -278,7 +278,7 @@ "USB", /* 101 M_USB */ \ "USB device", /* 102 M_USBDEV */ \ "USB HC", /* 103 M_USBHC */ \ - NULL, \ + "witness", /* 104 M_WITNESS */ \ "memdesc", /* 105 M_MEMDESC */ \ NULL, /* 106 */ \ NULL, \ diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 04e6bbbe77b..6512ab706b1 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.h,v 1.183 2019/01/31 05:33:51 tedu Exp $ */ +/* $OpenBSD: sysctl.h,v 1.184 2019/02/07 15:11:38 visa Exp $ */ /* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */ /* @@ -320,11 +320,13 @@ struct ctlname { * KERN_WITNESS */ #define KERN_WITNESS_WATCH 1 /* int: operating mode */ -#define KERN_WITNESS_MAXID 2 +#define KERN_WITNESS_LOCKTRACE 2 /* int: stack trace saving mode */ +#define KERN_WITNESS_MAXID 3 #define CTL_KERN_WITNESS_NAMES { \ { 0, 0 }, \ { "watch", CTLTYPE_INT }, \ + { "locktrace", CTLTYPE_INT }, \ } /* |