diff options
author | mpi <mpi@openbsd.org> | 2020-01-21 16:16:22 +0000 |
---|---|---|
committer | mpi <mpi@openbsd.org> | 2020-01-21 16:16:22 +0000 |
commit | 91b2ecf67e2a667ccefd071f95f44a8068f1bbc4 (patch) | |
tree | 4844bb05cf2473deb3060725603f0582ebfa6974 | |
parent | regen (diff) | |
download | wireguard-openbsd-91b2ecf67e2a667ccefd071f95f44a8068f1bbc4.tar.xz wireguard-openbsd-91b2ecf67e2a667ccefd071f95f44a8068f1bbc4.zip |
Import dt(4) a driver and framework for Dynamic Profiling.
The design is fairly simple: events, in the form of descriptors on a
ring, are being produced in any kernel context and being consumed by
a userland process reading /dev/dt.
Code and hooks are all guarded under '#if NDT > 0' so this commit
shouldn't introduce any change as long as dt(4) is disable in GENERIC.
ok kettenis@, visa@, jasper@, deraadt@
-rw-r--r-- | sys/arch/amd64/amd64/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/arm/arm/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/arm64/arm64/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/i386/i386/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/landisk/landisk/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/loongson/loongson/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/luna88k/luna88k/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/macppc/macppc/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/octeon/octeon/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/sgi/sgi/conf.c | 5 | ||||
-rw-r--r-- | sys/arch/sparc64/sparc64/conf.c | 5 | ||||
-rw-r--r-- | sys/conf/GENERIC | 3 | ||||
-rw-r--r-- | sys/conf/files | 8 | ||||
-rw-r--r-- | sys/dev/dt/dt_dev.c | 714 | ||||
-rw-r--r-- | sys/dev/dt/dt_prov_profile.c | 147 | ||||
-rw-r--r-- | sys/dev/dt/dt_prov_static.c | 136 | ||||
-rw-r--r-- | sys/dev/dt/dt_prov_syscall.c | 206 | ||||
-rw-r--r-- | sys/dev/dt/dtvar.h | 317 | ||||
-rw-r--r-- | sys/kern/kern_clock.c | 13 | ||||
-rw-r--r-- | sys/kern/kern_sched.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 7 | ||||
-rw-r--r-- | sys/kern/sched_bsd.c | 7 | ||||
-rw-r--r-- | sys/sys/conf.h | 11 | ||||
-rw-r--r-- | sys/sys/syscall_mi.h | 24 | ||||
-rw-r--r-- | sys/sys/tracepoint.h | 36 |
25 files changed, 1658 insertions, 31 deletions
diff --git a/sys/arch/amd64/amd64/conf.c b/sys/arch/amd64/amd64/conf.c index 6330f6e442d..d456a10c054 100644 --- a/sys/arch/amd64/amd64/conf.c +++ b/sys/arch/amd64/amd64/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.65 2019/12/17 13:08:54 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.66 2020/01/21 16:16:22 mpi Exp $ */ /* * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved. @@ -176,6 +176,7 @@ cdev_decl(viocon); cdev_decl(pci); #endif +#include "dt.h" #include "pf.h" #include "hotplug.h" #include "gpio.h" @@ -223,7 +224,7 @@ struct cdevsw cdevsw[] = cdev_spkr_init(NSPKR,spkr), /* 27: PC speaker */ cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30 */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31 */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ diff --git a/sys/arch/arm/arm/conf.c b/sys/arch/arm/arm/conf.c index 26f72c4e2fc..2491a459225 100644 --- a/sys/arch/arm/arm/conf.c +++ b/sys/arch/arm/arm/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.52 2019/12/18 06:53:46 deraadt Exp $ */ +/* $OpenBSD: conf.c,v 1.53 2020/01/21 16:16:22 mpi Exp $ */ /* $NetBSD: conf.c,v 1.10 2002/04/19 01:04:38 wiz Exp $ */ /* @@ -68,6 +68,7 @@ * Standard pseudo-devices */ #include "bpfilter.h" +#include "dt.h" #include "pf.h" #include "pty.h" #include "tun.h" @@ -299,7 +300,7 @@ struct cdevsw cdevsw[] = { cdev_ch_init(NCH,ch), /* 27: SCSI autochanger */ cdev_uk_init(NUK,uk), /* 28: SCSI unknown */ cdev_notdef(), /* 29: */ - cdev_notdef(), /* 30: */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31: */ cdev_notdef(), /* 32: */ cdev_tun_init(NTUN,tun), /* 33: network tunnel */ diff --git a/sys/arch/arm64/arm64/conf.c b/sys/arch/arm64/arm64/conf.c index 17aba177828..d7c814e58c0 100644 --- a/sys/arch/arm64/arm64/conf.c +++ b/sys/arch/arm64/arm64/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.10 2019/12/22 18:18:02 kettenis Exp $ */ +/* $OpenBSD: conf.c,v 1.11 2020/01/21 16:16:22 mpi Exp $ */ /* * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved. @@ -139,6 +139,7 @@ cdev_decl(drm); cdev_decl(pci); #endif +#include "dt.h" #include "pf.h" #include "hotplug.h" #include "vscsi.h" @@ -182,7 +183,7 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 27 */ cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30 */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31 */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ diff --git a/sys/arch/i386/i386/conf.c b/sys/arch/i386/i386/conf.c index adb6e4bef1e..c624c06c0f6 100644 --- a/sys/arch/i386/i386/conf.c +++ b/sys/arch/i386/i386/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.164 2019/12/17 13:08:55 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.165 2020/01/21 16:16:22 mpi Exp $ */ /* $NetBSD: conf.c,v 1.75 1996/05/03 19:40:20 christos Exp $ */ /* @@ -168,6 +168,7 @@ cdev_decl(drm); cdev_decl(pci); #endif +#include "dt.h" #include "pf.h" #include "hotplug.h" #include "gpio.h" @@ -216,7 +217,7 @@ struct cdevsw cdevsw[] = cdev_spkr_init(NSPKR,spkr), /* 27: PC speaker */ cdev_notdef(), /* 28: was LKM */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30 */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31 */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ diff --git a/sys/arch/landisk/landisk/conf.c b/sys/arch/landisk/landisk/conf.c index 5cb64822446..e2d891c673a 100644 --- a/sys/arch/landisk/landisk/conf.c +++ b/sys/arch/landisk/landisk/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.39 2019/12/17 13:08:55 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.40 2020/01/21 16:16:22 mpi Exp $ */ /* * Copyright (c) 1994-1998 Mark Brinicombe. @@ -67,6 +67,7 @@ * Standard pseudo-devices */ #include "bpfilter.h" +#include "dt.h" #include "pf.h" #include "bio.h" #include "pty.h" @@ -273,7 +274,7 @@ struct cdevsw cdevsw[] = { cdev_ch_init(NCH,ch), /* 27: SCSI autochanger */ cdev_uk_init(NUK,uk), /* 28: SCSI unknown */ cdev_notdef(), /* 29: */ - cdev_notdef(), /* 30: */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31: */ cdev_notdef(), /* 32: */ cdev_tun_init(NTUN,tun), /* 33: network tunnel */ diff --git a/sys/arch/loongson/loongson/conf.c b/sys/arch/loongson/loongson/conf.c index e57db5bd148..ff0fa19e6de 100644 --- a/sys/arch/loongson/loongson/conf.c +++ b/sys/arch/loongson/loongson/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.26 2019/12/20 13:25:02 visa Exp $ */ +/* $OpenBSD: conf.c,v 1.27 2020/01/21 16:16:22 mpi Exp $ */ /* * Copyright (c) 1992, 1993 @@ -116,6 +116,7 @@ cdev_decl(wd); #include "pci.h" cdev_decl(pci); +#include "dt.h" #include "pf.h" #include "usb.h" @@ -166,7 +167,7 @@ struct cdevsw cdevsw[] = #else cdev_notdef(), /* 29 */ #endif - cdev_notdef(), /* 30: */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_pf_init(NPF,pf), /* 31: packet filter */ cdev_uk_init(NUK,uk), /* 32: unknown SCSI */ cdev_random_init(1,random), /* 33: random data source */ diff --git a/sys/arch/luna88k/luna88k/conf.c b/sys/arch/luna88k/luna88k/conf.c index bf0a440bc38..571593d55a4 100644 --- a/sys/arch/luna88k/luna88k/conf.c +++ b/sys/arch/luna88k/luna88k/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.31 2016/12/17 05:22:34 aoyama Exp $ */ +/* $OpenBSD: conf.c,v 1.32 2020/01/21 16:16:23 mpi Exp $ */ /*- * Copyright (c) 1991 The Regents of the University of California. @@ -67,6 +67,7 @@ #include "wsmouse.h" #include "wsmux.h" +#include "dt.h" #include "pf.h" #include "vscsi.h" #include "pppx.h" @@ -131,7 +132,7 @@ struct cdevsw cdevsw[] = cdev_tty_init(NCOM, com), /* 27: serial port (on PCMCIA) */ cdev_disk_init(NWD,wd), /* 28: IDE disk (on PCMCIA) */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30 */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31 */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ diff --git a/sys/arch/macppc/macppc/conf.c b/sys/arch/macppc/macppc/conf.c index 061a11e9a2e..8907b7ce398 100644 --- a/sys/arch/macppc/macppc/conf.c +++ b/sys/arch/macppc/macppc/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.69 2019/12/17 13:08:56 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.70 2020/01/21 16:16:23 mpi Exp $ */ /* * Copyright (c) 1997 Per Fogelstrom @@ -116,6 +116,7 @@ cdev_decl(pci); #include "video.h" #include "midi.h" +#include "dt.h" #include "pf.h" #include "radio.h" @@ -159,7 +160,7 @@ struct cdevsw cdevsw[] = { cdev_notdef(), /* 27 */ cdev_notdef(), /* 28 */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30 */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31 */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ diff --git a/sys/arch/octeon/octeon/conf.c b/sys/arch/octeon/octeon/conf.c index 1e80c9d93f4..4447d97c1ab 100644 --- a/sys/arch/octeon/octeon/conf.c +++ b/sys/arch/octeon/octeon/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.22 2019/12/17 13:08:56 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.23 2020/01/21 16:16:23 mpi Exp $ */ /* * Copyright (c) 1992, 1993 @@ -129,6 +129,7 @@ cdev_decl(amdcf); #include "pci.h" cdev_decl(pci); +#include "dt.h" #include "pf.h" #include "usb.h" @@ -185,7 +186,7 @@ struct cdevsw cdevsw[] = #else cdev_notdef(), /* 29 */ #endif - cdev_notdef(), /* 30: */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_pf_init(NPF,pf), /* 31: packet filter */ cdev_uk_init(NUK,uk), /* 32: unknown SCSI */ cdev_random_init(1,random), /* 33: random data source */ diff --git a/sys/arch/sgi/sgi/conf.c b/sys/arch/sgi/sgi/conf.c index 062bdfc1974..7d9baf94a9b 100644 --- a/sys/arch/sgi/sgi/conf.c +++ b/sys/arch/sgi/sgi/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.40 2019/12/17 13:08:56 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.41 2020/01/21 16:16:23 mpi Exp $ */ /* * Copyright (c) 1992, 1993 @@ -115,6 +115,7 @@ cdev_decl(wd); #include "pci.h" cdev_decl(pci); +#include "dt.h" #include "pf.h" #include "usb.h" @@ -165,7 +166,7 @@ struct cdevsw cdevsw[] = #else cdev_notdef(), /* 29 */ #endif - cdev_notdef(), /* 30: */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_pf_init(NPF,pf), /* 31: packet filter */ cdev_uk_init(NUK,uk), /* 32: unknown SCSI */ cdev_random_init(1,random), /* 33: random data source */ diff --git a/sys/arch/sparc64/sparc64/conf.c b/sys/arch/sparc64/sparc64/conf.c index f7afcfeab56..9cba85deb65 100644 --- a/sys/arch/sparc64/sparc64/conf.c +++ b/sys/arch/sparc64/sparc64/conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.c,v 1.81 2019/12/17 13:08:56 reyk Exp $ */ +/* $OpenBSD: conf.c,v 1.82 2020/01/21 16:16:23 mpi Exp $ */ /* $NetBSD: conf.c,v 1.17 2001/03/26 12:33:26 lukem Exp $ */ /* @@ -108,6 +108,7 @@ cdev_decl(pci); #include "ulpt.h" #include "ucom.h" +#include "dt.h" #include "pf.h" #include "ksyms.h" @@ -181,7 +182,7 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 27 */ cdev_notdef(), /* 28: Systech VPC-2200 versatec/centronics */ cdev_notdef(), /* 29 */ - cdev_notdef(), /* 30: Xylogics tape */ + cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ cdev_notdef(), /* 31: /dev/cgtwo */ cdev_notdef(), /* 32: should be /dev/gpone */ cdev_notdef(), /* 33 */ diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC index 92240cf0ab3..b7f2550db74 100644 --- a/sys/conf/GENERIC +++ b/sys/conf/GENERIC @@ -1,4 +1,4 @@ -# $OpenBSD: GENERIC,v 1.266 2019/10/12 17:06:02 naddy Exp $ +# $OpenBSD: GENERIC,v 1.267 2020/01/21 16:16:23 mpi Exp $ # # Machine-independent option; used by all architectures for their # GENERIC kernel @@ -82,6 +82,7 @@ pseudo-device msts 1 # MSTS line discipline pseudo-device endrun 1 # EndRun line discipline pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device +#pseudo-device dt # Dynamic Tracer # clonable devices pseudo-device bpfilter # packet filter diff --git a/sys/conf/files b/sys/conf/files index dcd62565ae3..f0c5aa4df3d 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.680 2020/01/11 00:56:38 jsg Exp $ +# $OpenBSD: files,v 1.681 2020/01/21 16:16:23 mpi Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -603,6 +603,12 @@ file net/if_pppoe.c pppoe needs-flag pseudo-device kcov file dev/kcov.c kcov needs-flag +pseudo-device dt +file dev/dt/dt_dev.c dt needs-flag +file dev/dt/dt_prov_profile.c dt +file dev/dt/dt_prov_syscall.c dt +file dev/dt/dt_prov_static.c dt + # XXX machine-independent SCSI files should live somewhere here, maybe # kernel sources diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c new file mode 100644 index 00000000000..e8a5512684e --- /dev/null +++ b/sys/dev/dt/dt_dev.c @@ -0,0 +1,714 @@ +/* $OpenBSD: dt_dev.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/device.h> +#include <sys/malloc.h> +#include <sys/proc.h> + +#include <dev/dt/dtvar.h> + +/* + * How many frames are used by the profiling code? For example + * on amd64: + * + * From syscall provider: + * + * dt_prov_syscall_entry+0x141 + * syscall+0x205 <--- start here + * Xsyscall+0x128 + * + * From profile provider: + * + * dt_prov_profile_enter+0x6e + * hardclock+0x12c + * clockintr+0x59 + * intr_handler+0x6e + * Xresume_legacy0+0x1d3 + * cpu_idle_cycle+0x1b <---- start here. + * proc_trampoline+0x1c + */ +#if notyet +#define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(4) +#else +#define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(0) +#endif + +#define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */ + +#define DPRINTF(x...) /* nothing */ + +/* + * Descriptor associated with each program opening /dev/dt. It is used + * to keep track of enabled PCBs. + * + * Locks used to protect struct members in this file: + * m per-softc mutex + * k kernel lock + */ +struct dt_softc { + SLIST_ENTRY(dt_softc) ds_next; /* [k] descriptor list */ + int ds_unit; /* [I] D_CLONE unique unit */ + pid_t ds_pid; /* [I] PID of tracing program */ + + struct mutex ds_mtx; + + struct dt_pcb_list ds_pcbs; /* [k] list of enabled PCBs */ + struct dt_evt *ds_bufqueue; /* [k] copy evts to userland */ + size_t ds_bufqlen; /* [k] length of the queue */ + int ds_recording; /* [k] currently recording? */ + int ds_evtcnt; /* [m] # of readable evts */ + + /* Counters */ + uint64_t ds_readevt; /* [m] # of events read */ + uint64_t ds_dropevt; /* [m] # of events dropped */ +}; + +SLIST_HEAD(, dt_softc) dtdev_list; /* [k] list of open /dev/dt nodes */ + +/* + * Probes are created during dt_attach() and never modified/freed during + * the lifetime of the system. That's why we consider them as [I]mmutable. + */ +unsigned int dt_nprobes; /* [I] # of probes available */ +SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */ + +struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk"); +volatile uint32_t dt_tracing = 0; /* [d] # of processes tracing */ + +void dtattach(struct device *, struct device *, void *); +int dtopen(dev_t, int, int, struct proc *); +int dtclose(dev_t, int, int, struct proc *); +int dtread(dev_t, struct uio *, int); +int dtioctl(dev_t, u_long, caddr_t, int, struct proc *); + +struct dt_softc *dtlookup(int); + +int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *); +int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *); +int dt_ioctl_record_start(struct dt_softc *); +void dt_ioctl_record_stop(struct dt_softc *); +int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); +void dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); + +int dt_enter(void); +void dt_leave(uint32_t); + +int dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *); + +void +dtattach(struct device *parent, struct device *self, void *aux) +{ + SLIST_INIT(&dtdev_list); + SIMPLEQ_INIT(&dt_probe_list); + + /* Init providers */ + dt_nprobes += dt_prov_profile_init(); + dt_nprobes += dt_prov_syscall_init(); + dt_nprobes += dt_prov_static_init(); + + printf("dt: %u probes\n", dt_nprobes); +} + +int +dtopen(dev_t dev, int flags, int mode, struct proc *p) +{ + struct dt_softc *sc; + int unit = minor(dev); + + KASSERT(dtlookup(unit) == NULL); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return ENOMEM; + + /* + * Enough space to empty 2 full rings of events in a single read. + */ + sc->ds_bufqlen = 2 * DT_EVTRING_SIZE; + sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue), + M_DEVBUF, M_WAITOK|M_CANFAIL); + if (sc->ds_bufqueue == NULL) + goto bad; + + sc->ds_unit = unit; + sc->ds_pid = p->p_p->ps_pid; + TAILQ_INIT(&sc->ds_pcbs); + mtx_init(&sc->ds_mtx, IPL_HIGH); + sc->ds_evtcnt = 0; + sc->ds_readevt = 0; + sc->ds_dropevt = 0; + + SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next); + + DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid); + + return 0; + +bad: + free(sc, M_DEVBUF, sizeof(*sc)); + return ENOMEM; +} + +int +dtclose(dev_t dev, int flags, int mode, struct proc *p) +{ + struct dt_softc *sc; + int unit = minor(dev); + + sc = dtlookup(unit); + KASSERT(sc != NULL); + + DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid); + + SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next); + dt_ioctl_record_stop(sc); + dt_pcb_purge(&sc->ds_pcbs); + + free(sc->ds_bufqueue, M_DEVBUF, + sc->ds_bufqlen * sizeof(*sc->ds_bufqueue)); + free(sc, M_DEVBUF, sizeof(*sc)); + + return 0; +} + +int +dtread(dev_t dev, struct uio *uio, int flags) +{ + struct dt_softc *sc; + struct dt_evt *estq; + struct dt_pcb *dp; + int error, unit = minor(dev); + size_t qlen, count, read = 0; + uint64_t dropped = 0; + + sc = dtlookup(unit); + KASSERT(sc != NULL); + + count = howmany(uio->uio_resid, sizeof(struct dt_evt)); + if (count < 1) + return (EMSGSIZE); + + mtx_enter(&sc->ds_mtx); + while (!sc->ds_evtcnt) { + error = msleep(sc, &sc->ds_mtx, PWAIT|PCATCH, "dtread", 0); + if (error == EINTR || error == ERESTART) + break; + } + mtx_leave(&sc->ds_mtx); + + if (error) + return error; + + estq = sc->ds_bufqueue; + qlen = MIN(sc->ds_bufqlen, count); + + KERNEL_ASSERT_LOCKED(); + TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { + count = dt_pcb_ring_copy(dp, estq, qlen, &dropped); + read += count; + estq += count; /* pointer aritmetic */ + qlen -= count; + if (qlen == 0) + break; + } + if (read > 0) + uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio); + + mtx_enter(&sc->ds_mtx); + sc->ds_evtcnt -= read; + sc->ds_readevt += read; + sc->ds_dropevt += dropped; + mtx_leave(&sc->ds_mtx); + + return 0; +} + +int +dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) +{ + struct dt_softc *sc; + int unit = minor(dev); + int on, error = 0; + + sc = dtlookup(unit); + KASSERT(sc != NULL); + + switch (cmd) { + case DTIOCGPLIST: + return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr); + case DTIOCGSTATS: + return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr); + case DTIOCRECORD: + case DTIOCPRBENABLE: + /* root only ioctl(2) */ + break; + default: + return ENOTTY; + } + + if ((error = suser(p)) != 0) + return error; + + switch (cmd) { + case DTIOCRECORD: + on = *(int *)addr; + if (on) + error = dt_ioctl_record_start(sc); + else + dt_ioctl_record_stop(sc); + break; + case DTIOCPRBENABLE: + error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr); + break; + default: + KASSERT(0); + } + + return error; +} + +struct dt_softc * +dtlookup(int unit) +{ + struct dt_softc *sc; + + KERNEL_ASSERT_LOCKED(); + + SLIST_FOREACH(sc, &dtdev_list, ds_next) { + if (sc->ds_unit == unit) + break; + } + + return sc; +} + +int +dtioc_req_isvalid(struct dtioc_req *dtrq) +{ + switch (dtrq->dtrq_filter.dtf_operand) { + case DT_OP_NONE: + case DT_OP_EQ: + case DT_OP_NE: + break; + default: + return 0; + } + + switch (dtrq->dtrq_filter.dtf_variable) { + case DT_FV_NONE: + case DT_FV_PID: + case DT_FV_TID: + break; + default: + return 0; + } + + return 1; +} + +int +dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr) +{ + struct dtioc_probe_info info, *dtpi; + struct dt_probe *dtp; + size_t size; + int error = 0; + + if (dtpr->dtpr_size == 0) { + dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi); + return 0; + } + + size = dtpr->dtpr_size; + dtpi = dtpr->dtpr_probes; + memset(&info, 0, sizeof(info)); + SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { + if (size < sizeof(*dtpi)) { + error = ENOSPC; + break; + } + info.dtpi_pbn = dtp->dtp_pbn; + strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name, + sizeof(info.dtpi_prov)); + strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func)); + strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name)); + error = copyout(&info, dtpi, sizeof(*dtpi)); + if (error) + break; + size -= sizeof(*dtpi); + dtpi++; + }; + + return error; +} + +int +dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst) +{ + mtx_enter(&sc->ds_mtx); + dtst->dtst_readevt = sc->ds_readevt; + dtst->dtst_dropevt = sc->ds_dropevt; + mtx_leave(&sc->ds_mtx); + + return 0; +} + +int +dt_ioctl_record_start(struct dt_softc *sc) +{ + struct dt_pcb *dp; + int count; + + if (sc->ds_recording) + return EBUSY; + + KERNEL_ASSERT_LOCKED(); + if (TAILQ_EMPTY(&sc->ds_pcbs)) + return ENOENT; + + count = dt_enter(); + TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { + struct dt_probe *dtp = dp->dp_dtp; + + rw_assert_wrlock(&dt_lock); + SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext); + dtp->dtp_recording++; + dtp->dtp_prov->dtpv_recording++; + } + dt_leave(count); + + sc->ds_recording = 1; + dt_tracing++; + + return 0; +} + +void +dt_ioctl_record_stop(struct dt_softc *sc) +{ + struct dt_pcb *dp; + int count; + + KASSERT(suser(curproc) == 0); + + if (!sc->ds_recording) + return; + + DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid); + + dt_tracing--; + sc->ds_recording = 0; + + count = dt_enter(); + TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { + struct dt_probe *dtp = dp->dp_dtp; + + rw_assert_wrlock(&dt_lock); + dtp->dtp_recording--; + dtp->dtp_prov->dtpv_recording--; + SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext); + } + dt_leave(count); +} + +int +dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq) +{ + struct dt_pcb_list plist; + struct dt_probe *dtp; + struct dt_pcb *dp; + int error; + + KASSERT(suser(curproc) == 0); + + if (!dtioc_req_isvalid(dtrq)) + return EINVAL; + + SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { + if (dtp->dtp_pbn == dtrq->dtrq_pbn) + break; + } + if (dtp == NULL) + return ENOENT; + + TAILQ_INIT(&plist); + error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq); + if (error) + return error; + + DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid, + dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS); + + /* Append all PCBs to this instance */ + while ((dp = TAILQ_FIRST(&plist)) != NULL) { + TAILQ_REMOVE(&plist, dp, dp_snext); + TAILQ_INSERT_HEAD(&sc->ds_pcbs, dp, dp_snext); + } + + return 0; +} + +int +dt_enter(void) +{ + uint32_t count; + + rw_enter_write(&dt_lock); + count = dt_tracing; + dt_tracing = 0; + + smr_barrier(); + + return count; +} + +void +dt_leave(uint32_t count) +{ + dt_tracing = count; + rw_exit_write(&dt_lock); +} + +struct dt_probe * +dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) +{ + struct dt_probe *dtp; + + dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO); + if (dtp == NULL) + return NULL; + + SMR_SLIST_INIT(&dtp->dtp_pcbs); + dtp->dtp_prov = dtpv; + dtp->dtp_func = func; + dtp->dtp_name = name; + dtp->dtp_sysnum = -1; + + return dtp; +} + +void +dt_dev_register_probe(struct dt_probe *dtp) +{ + static uint64_t probe_nb; + + dtp->dtp_pbn = ++probe_nb; + SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next); +} + +struct dt_pcb * +dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc) +{ + struct dt_pcb *dp; + + dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO); + if (dp == NULL) + goto bad; + + dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT, + M_WAITOK|M_CANFAIL|M_ZERO); + if (dp->dp_ring == NULL) + goto bad; + + mtx_init(&dp->dp_mtx, IPL_HIGH); + dp->dp_sc = sc; + dp->dp_dtp = dtp; + return dp; +bad: + dt_pcb_free(dp); + return NULL; +} + +void +dt_pcb_free(struct dt_pcb *dp) +{ + if (dp == NULL) + return; + free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring)); + free(dp, M_DT, sizeof(*dp)); +} + +void +dt_pcb_purge(struct dt_pcb_list *plist) +{ + struct dt_pcb *dp; + + while ((dp = TAILQ_FIRST(plist)) != NULL) { + TAILQ_REMOVE(plist, dp, dp_snext); + dt_pcb_free(dp); + } +} + +int +dt_pcb_filter(struct dt_pcb *dp) +{ + struct dt_filter *dtf = &dp->dp_filter; + struct proc *p = curproc; + unsigned int var; + int match = 1; + + /* Filter out tracing program. */ + if (dp->dp_sc->ds_pid == p->p_p->ps_pid) + return 1; + + switch (dtf->dtf_variable) { + case DT_FV_PID: + var = p->p_p->ps_pid; + break; + case DT_FV_TID: + var = p->p_tid; + break; + case DT_FV_NONE: + break; + default: + KASSERT(0); + } + + switch (dtf->dtf_operand) { + case DT_OP_EQ: + match = !!(var == dtf->dtf_value); + break; + case DT_OP_NE: + match = !!(var != dtf->dtf_value); + break; + case DT_OP_NONE: + break; + default: + KASSERT(0); + } + + return !match; +} + + +/* + * Get a reference to the next free event state from the ring. + */ +struct dt_evt * +dt_pcb_ring_get(struct dt_pcb *dp) +{ + struct proc *p = curproc; + struct dt_evt *dtev; + int distance; + + if (dt_pcb_filter(dp)) + return NULL; + + mtx_enter(&dp->dp_mtx); + distance = dp->dp_prod - dp->dp_cons; + if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) { + /* read(2) isn't finished */ + dp->dp_dropevt++; + mtx_leave(&dp->dp_mtx); + return NULL; + } + + /* + * Save states in next free event slot. + */ + dtev = &dp->dp_ring[dp->dp_cons]; + memset(dtev, 0, sizeof(*dtev)); + + dtev->dtev_pbn = dp->dp_dtp->dtp_pbn; + dtev->dtev_cpu = cpu_number(); + dtev->dtev_pid = p->p_p->ps_pid; + dtev->dtev_tid = p->p_tid; + nanotime(&dtev->dtev_tsp); + + if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME)) + memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1); + + if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) { +#if notyet + stacktrace_save_at(&dtev->dtev_kstack, DT_HOOK_FRAME_ADDRESS); +#else + stacktrace_save(&dtev->dtev_kstack); +#endif + } + + return dtev; +} + +void +dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev) +{ + MUTEX_ASSERT_LOCKED(&dp->dp_mtx); + KASSERT(dtev == &dp->dp_ring[dp->dp_cons]); + + dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE; + mtx_leave(&dp->dp_mtx); + + mtx_enter(&dp->dp_sc->ds_mtx); + dp->dp_sc->ds_evtcnt++; + mtx_leave(&dp->dp_sc->ds_mtx); + wakeup(dp->dp_sc); +} + +/* + * Copy at most `qlen' events from `dp', producing the same amount + * of free slots. + */ +int +dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen, + uint64_t *dropped) +{ + size_t count, copied = 0; + unsigned int cons, prod; + + KASSERT(qlen > 0); + + mtx_enter(&dp->dp_mtx); + cons = dp->dp_cons; + prod = dp->dp_prod; + + if (cons < prod) + count = DT_EVTRING_SIZE - prod; + else + count = cons - prod; + + if (count == 0) + goto out; + + *dropped += dp->dp_dropevt; + dp->dp_dropevt = 0; + + count = MIN(count, qlen); + + memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq)); + copied += count; + + /* Produce */ + prod = (prod + count) % DT_EVTRING_SIZE; + + /* If the queue is full or the ring didn't wrap, stop here. */ + if (qlen == copied || prod != 0 || cons == 0) + goto out; + + count = MIN(cons, (qlen - copied)); + memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq)); + copied += count; + prod += count; + +out: + dp->dp_prod = prod; + mtx_leave(&dp->dp_mtx); + return copied; +} diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c new file mode 100644 index 00000000000..0a406ee2492 --- /dev/null +++ b/sys/dev/dt/dt_prov_profile.c @@ -0,0 +1,147 @@ +/* $OpenBSD: dt_prov_profile.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/atomic.h> + +#include <dev/dt/dtvar.h> + +struct dt_probe *dtpp_profile; /* per-CPU profile probe */ +struct dt_probe *dtpp_interval; /* global periodic probe */ + +/* Flags that make sense for this provider */ +#define DTEVT_PROV_PROFILE DTEVT_KSTACK + +int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *, + struct dt_pcb_list *, struct dtioc_req *); +void dt_prov_profile_enter(struct dt_provider *, ...); +void dt_prov_interval_enter(struct dt_provider *, ...); + +struct dt_provider dt_prov_profile = { + .dtpv_name = "profile", + .dtpv_alloc = dt_prov_profile_alloc, + .dtpv_enter = dt_prov_profile_enter, + .dtpv_leave = NULL, +}; + +struct dt_provider dt_prov_interval = { + .dtpv_name = "interval", + .dtpv_alloc = dt_prov_profile_alloc, + .dtpv_enter = dt_prov_interval_enter, + .dtpv_leave = NULL, +}; + +int +dt_prov_profile_init(void) +{ + dtpp_profile = dt_dev_alloc_probe("hz", "97", &dt_prov_profile); + dt_dev_register_probe(dtpp_profile); + if (dtpp_profile == NULL) + return 0; + dtpp_interval = dt_dev_alloc_probe("hz", "1", &dt_prov_interval); + dt_dev_register_probe(dtpp_interval); + if (dtpp_interval == NULL) + return 1; + return 2; +} + +int +dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dt_pcb_list *plist, struct dtioc_req *dtrq) +{ + struct dt_pcb *dp; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + extern int hz; + + KASSERT(dtioc_req_isvalid(dtrq)); + KASSERT(TAILQ_EMPTY(plist)); + KASSERT(dtp == dtpp_profile || dtp == dtpp_interval); + + if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate >= hz) + return EOPNOTSUPP; + + CPU_INFO_FOREACH(cii, ci) { + if (!CPU_IS_PRIMARY(ci) && (dtp == dtpp_interval)) + continue; + + dp = dt_pcb_alloc(dtp, sc); + if (dp == NULL) { + dt_pcb_purge(plist); + return ENOMEM; + } + + dp->dp_maxtick = dtrq->dtrq_rate; + dp->dp_cpuid = ci->ci_cpuid; + + dp->dp_filter = dtrq->dtrq_filter; + dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE; + TAILQ_INSERT_HEAD(plist, dp, dp_snext); + } + + return 0; +} + +static inline void +dt_prov_profile_fire(struct dt_pcb *dp) +{ + struct dt_evt *dtev; + + if (++dp->dp_nticks < dp->dp_maxtick) + return; + + dtev = dt_pcb_ring_get(dp); + if (dtev == NULL) + return; + dt_pcb_ring_consume(dp, dtev); + dp->dp_nticks = 0; +} + +void +dt_prov_profile_enter(struct dt_provider *dtpv, ...) +{ + struct cpu_info *ci = curcpu(); + struct dt_pcb *dp; + + KASSERT(dtpv == &dt_prov_profile); + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) { + if (dp->dp_cpuid != ci->ci_cpuid) + continue; + + dt_prov_profile_fire(dp); + } + smr_read_leave(); +} + +void +dt_prov_interval_enter(struct dt_provider *dtpv, ...) +{ + struct dt_pcb *dp; + + KASSERT(dtpv == &dt_prov_interval); + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) { + dt_prov_profile_fire(dp); + } + smr_read_leave(); +} diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c new file mode 100644 index 00000000000..65764619e02 --- /dev/null +++ b/sys/dev/dt/dt_prov_static.c @@ -0,0 +1,136 @@ +/* $OpenBSD: dt_prov_static.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/tracepoint.h> + +#include <dev/dt/dtvar.h> + +int dt_prov_static_alloc(struct dt_probe *, struct dt_softc *, + struct dt_pcb_list *, struct dtioc_req *); +void dt_prov_static_hook(struct dt_provider *, ...); + +struct dt_provider dt_prov_static = { + .dtpv_name = "tracepoint", + .dtpv_alloc = dt_prov_static_alloc, + .dtpv_enter = dt_prov_static_hook, +}; + +/* + * Scheduler provider + */ +DT_STATIC_PROBE2(sched, dequeue, "pid_t", "pid_t"); +DT_STATIC_PROBE2(sched, enqueue, "pid_t", "pid_t"); +DT_STATIC_PROBE2(sched, off__cpu, "pid_t", "pid_t"); +DT_STATIC_PROBE0(sched, on__cpu); +DT_STATIC_PROBE0(sched, remain__cpu); +DT_STATIC_PROBE0(sched, sleep); +DT_STATIC_PROBE0(sched, wakeup); + +/* + * Raw syscalls + */ +DT_STATIC_PROBE1(raw_syscalls, sys_enter, "register_t"); +DT_STATIC_PROBE1(raw_syscalls, sys_exit, "register_t"); + +/* + * List of all static probes + */ +struct dt_probe *dtps_static[] = { + /* Scheduler */ + &_DT_STATIC_P(sched, dequeue), + &_DT_STATIC_P(sched, enqueue), + &_DT_STATIC_P(sched, off__cpu), + &_DT_STATIC_P(sched, on__cpu), + &_DT_STATIC_P(sched, remain__cpu), + &_DT_STATIC_P(sched, sleep), + &_DT_STATIC_P(sched, wakeup), + /* Raw syscalls */ + &_DT_STATIC_P(raw_syscalls, sys_enter), + &_DT_STATIC_P(raw_syscalls, sys_exit), +}; + +int +dt_prov_static_init(void) +{ + int i; + + for (i = 0; i < nitems(dtps_static); i++) + dt_dev_register_probe(dtps_static[i]); + + return i; +} + +int +dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dt_pcb_list *plist, struct dtioc_req *dtrq) +{ + struct dt_pcb *dp; + + KASSERT(dtioc_req_isvalid(dtrq)); + KASSERT(TAILQ_EMPTY(plist)); + + dp = dt_pcb_alloc(dtp, sc); + if (dp == NULL) + return ENOMEM; + + dp->dp_filter = dtrq->dtrq_filter; + dp->dp_evtflags = dtrq->dtrq_evtflags; + TAILQ_INSERT_HEAD(plist, dp, dp_snext); + + return 0; +} + +void +dt_prov_static_hook(struct dt_provider *dtpv, ...) +{ + struct dt_probe *dtp; + struct dt_pcb *dp; + uintptr_t args[5]; + va_list ap; + int i; + + va_start(ap, dtpv); + dtp = va_arg(ap, struct dt_probe *); + for (i = 0; i < dtp->dtp_nargs; i++) { + args[i] = va_arg(ap, uintptr_t); + } + va_end(ap); + + KASSERT(dtpv == dtp->dtp_prov); + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { + struct dt_evt *dtev; + + dtev = dt_pcb_ring_get(dp); + if (dtev == NULL) + continue; + + dtev->dtev_sysargs[0] = args[0]; + dtev->dtev_sysargs[1] = args[1]; + dtev->dtev_sysargs[2] = args[2]; + dtev->dtev_sysargs[3] = args[3]; + dtev->dtev_sysargs[4] = args[4]; + + dt_pcb_ring_consume(dp, dtev); + } + smr_read_leave(); +} diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c new file mode 100644 index 00000000000..28ceb88b48c --- /dev/null +++ b/sys/dev/dt/dt_prov_syscall.c @@ -0,0 +1,206 @@ +/* $OpenBSD: dt_prov_syscall.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/atomic.h> +#include <sys/syscall.h> + +#include <dev/dt/dtvar.h> + +extern struct sysent sysent[]; + +/* Arrays of probes per syscall. */ +struct dt_probe **dtps_entry; +struct dt_probe **dtps_return; +unsigned int dtps_nsysent = SYS_MAXSYSCALL; + +/* Flags that make sense for this provider */ +#define DTEVT_PROV_SYSCALL (DTEVT_COMMON|DTEVT_FUNCARGS|DTEVT_RETVAL) + +int dt_prov_syscall_alloc(struct dt_probe *, struct dt_softc *, + struct dt_pcb_list *, struct dtioc_req *); +void dt_prov_syscall_entry(struct dt_provider *, ...); +void dt_prov_syscall_return(struct dt_provider *, ...); + +struct dt_provider dt_prov_syscall = { + .dtpv_name = "syscall", + .dtpv_alloc = dt_prov_syscall_alloc, + .dtpv_enter = dt_prov_syscall_entry, + .dtpv_leave = dt_prov_syscall_return, +}; + +int +dt_prov_syscall_init(void) +{ + struct dt_probe *dtp; + int i, len, nprobes = 0; + char *sysnb; + + dtps_entry = mallocarray(dtps_nsysent, sizeof(dtp), M_DT, + M_NOWAIT|M_ZERO); + if (dtps_entry == NULL) + return 0; + dtps_return = mallocarray(dtps_nsysent, sizeof(dtp), M_DT, + M_NOWAIT|M_ZERO); + if (dtps_return == NULL) { + free(dtps_entry, M_DT, dtps_nsysent * sizeof(dtp)); + return 0; + } + + for (i = 0; i < dtps_nsysent; i++) { + if (sysent[i].sy_call == sys_nosys) + continue; + + len = snprintf(NULL, 0, "sys%%%u", i); + sysnb = malloc(len + 1, M_DT, M_NOWAIT); + if (sysnb == NULL) + break; + snprintf(sysnb, len + 1, "sys%%%u", i); + dtp = dt_dev_alloc_probe(sysnb, "entry", &dt_prov_syscall); + if (dtp == NULL) { + free(sysnb, M_DT, len); + break; + } + dtp->dtp_sysnum = i; + dtps_entry[i] = dtp; + dt_dev_register_probe(dtp); + nprobes++; + dtp = dt_dev_alloc_probe(sysnb, "return", &dt_prov_syscall); + if (dtp == NULL) + break; + dtp->dtp_sysnum = i; + dtps_return[i] = dtp; + dt_dev_register_probe(dtp); + nprobes++; + } + + return nprobes; +} + +int +dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dt_pcb_list *plist, struct dtioc_req *dtrq) +{ + struct dt_pcb *dp; + + KASSERT(dtioc_req_isvalid(dtrq)); + KASSERT(TAILQ_EMPTY(plist)); + KASSERT(dtp->dtp_prov == &dt_prov_syscall); + KASSERT((dtp->dtp_sysnum >= 0) && (dtp->dtp_sysnum < dtps_nsysent)); + + dp = dt_pcb_alloc(dtp, sc); + if (dp == NULL) + return ENOMEM; + + dp->dp_filter = dtrq->dtrq_filter; + dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_SYSCALL; + TAILQ_INSERT_HEAD(plist, dp, dp_snext); + + + return 0; +} + +void +dt_prov_syscall_entry(struct dt_provider *dtpv, ...) +{ + struct dt_probe *dtp; + struct dt_pcb *dp; + register_t sysnum; + size_t argsize; + register_t *args; + va_list ap; + + KASSERT(dtpv == &dt_prov_syscall); + va_start(ap, dtpv); + sysnum = va_arg(ap, register_t); + argsize = va_arg(ap, size_t); + args = va_arg(ap, register_t*); + va_end(ap); + + KASSERT((argsize / sizeof(register_t)) <= DTMAXSYSARGS); + + if (sysnum < 0 || sysnum >= dtps_nsysent) + return; + + dtp = dtps_entry[sysnum]; + if (!dtp->dtp_recording) + return; + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { + struct dt_evt *dtev; + + dtev = dt_pcb_ring_get(dp); + if (dtev == NULL) + continue; + + if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS)) + memcpy(dtev->dtev_sysargs, args, argsize); + + dt_pcb_ring_consume(dp, dtev); + } + smr_read_leave(); +} + +void +dt_prov_syscall_return(struct dt_provider *dtpv, ...) +{ + struct dt_probe *dtp; + struct dt_pcb *dp; + register_t sysnum; + int error; + register_t retval[2]; + va_list ap; + + KASSERT(dtpv == &dt_prov_syscall); + + va_start(ap, dtpv); + sysnum = va_arg(ap, register_t); + error = va_arg(ap, int); + retval[0] = va_arg(ap, register_t); + retval[1] = va_arg(ap, register_t); + va_end(ap); + + if (sysnum < 0 || sysnum >= dtps_nsysent) + return; + + dtp = dtps_return[sysnum]; + if (!dtp->dtp_recording) + return; + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { + struct dt_evt *dtev; + + dtev = dt_pcb_ring_get(dp); + if (dtev == NULL) + continue; + + if (ISSET(dp->dp_evtflags, DTEVT_RETVAL)) { + dtev->dtev_sysretval[0] = retval[0]; + dtev->dtev_sysretval[1] = retval[1]; + dtev->dtev_syserror = error; + } + + dt_pcb_ring_consume(dp, dtev); + } + smr_read_leave(); +} diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h new file mode 100644 index 00000000000..05bad44680e --- /dev/null +++ b/sys/dev/dt/dtvar.h @@ -0,0 +1,317 @@ +/* $OpenBSD: dtvar.h,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _DT_H_ +#define _DT_H_ + +#include <sys/ioccom.h> +#include <sys/stacktrace.h> +#include <sys/time.h> + +/* + * Length of provider/probe/function names, including terminating '\0'. + */ +#define DTNAMESIZE 16 + +/* + * Length of process name, keep in sync with MAXCOMLEN. + */ +#define DTMAXCOMLEN 16 + +/* + * Maximum number of arguments passed to a syscall. + */ +#define DTMAXSYSARGS 10 + +/* + * Event state: where to store information when a probe fires. + */ +struct dt_evt { + unsigned int dtev_pbn; /* Probe number */ + unsigned int dtev_cpu; /* CPU id */ + pid_t dtev_pid; /* ID of current process */ + pid_t dtev_tid; /* ID of current thread */ + struct timespec dtev_tsp; /* timestamp (nsecs) */ + + /* + * Recorded if the corresponding flag is set. + */ + struct stacktrace dtev_kstack; /* kernel stack frame */ + char dtev_comm[DTMAXCOMLEN+1]; /* current pr. name */ + union { + register_t E_entry[DTMAXSYSARGS]; + struct { + register_t __retval[2]; + int __error; + } E_return; + } _sys; +#define dtev_sysargs _sys.E_entry /* syscall args. */ +#define dtev_sysretval _sys.E_return.__retval /* syscall retval */ +#define dtev_syserror _sys.E_return.__error /* syscall error */ + +}; + +/* + * States to record when a probe fires. + */ +#define DTEVT_EXECNAME (1 << 0) /* current process name */ +#define DTEVT_USTACK (1 << 1) /* userland stack */ +#define DTEVT_KSTACK (1 << 2) /* kernel stack */ +#define DTEVT_FUNCARGS (1 << 3) /* function arguments */ +#define DTEVT_RETVAL (1 << 4) /* function retval & error */ + +#define DTEVT_FLAG_BITS \ + "\020" \ + "\001EXECNAME" \ + "\002USTACK" \ + "\003KSTACK" \ + "\004FUNCARGS" \ + "\005RETVAL" \ + +/* + * Each PCB can have a filter attached to itself. A filter do not + * prevent an enabled probe to fire, but when that happens, event + * states are only recorded if it is matched. + */ +struct dt_filter { + enum dt_operand { + DT_OP_NONE = 0, + DT_OP_EQ, + DT_OP_NE, + } dtf_operand; + enum dt_filtervar { + DT_FV_NONE = 0, + DT_FV_PID, + DT_FV_TID, + } dtf_variable /* what should be filtered */; + unsigned int dtf_value; /* PID or TID to filter */ +}; + + +struct dtioc_probe_info { + uint32_t dtpi_pbn; /* Probe number */ + char dtpi_prov[DTNAMESIZE]; + char dtpi_func[DTNAMESIZE]; + char dtpi_name[DTNAMESIZE]; +}; + +struct dtioc_probe { + size_t dtpr_size; /* size of the buffer */ + struct dtioc_probe_info *dtpr_probes; /* array of probe info */ +}; + +struct dtioc_req { + uint32_t dtrq_pbn; /* probe number */ + struct dt_filter dtrq_filter; /* probe filter */ + uint32_t dtrq_rate; /* number of ticks */ + uint64_t dtrq_evtflags; /* states to record */ +}; + +struct dtioc_stat { + uint64_t dtst_readevt; /* events read */ + uint64_t dtst_dropevt; /* events dropped */ +}; + +#define DTIOCGPLIST _IOWR('D', 1, struct dtioc_probe) +#define DTIOCGSTATS _IOR('D', 2, struct dtioc_stat) + +#define DTIOCRECORD _IOW('D', 3, int) +#define DTIOCPRBENABLE _IOW('D', 4, struct dtioc_req) + + +#ifdef _KERNEL + +#include <sys/mutex.h> +#include <sys/queue.h> +#include <sys/smr.h> + +/* Flags that make sense for all providers. */ +#define DTEVT_COMMON (DTEVT_EXECNAME|DTEVT_KSTACK|DTEVT_USTACK) + +#define M_DT M_DEVBUF /* XXX FIXME */ + +struct dt_softc; + +int dtioc_req_isvalid(struct dtioc_req *); + +/* + * Probe control block, possibly per-CPU. + * + * At least a PCB is allocated for each probe enabled via the DTIOCPRBENABLE + * ioctl(2). It will hold the events written when the probe fires until + * userland read(2)s them. + * + * Locks used to protect struct members in this file: + * I immutable after creation + * k kernel lock + * k,s kernel lock for writting and SMR for reading + * m per-pcb mutex + * c owned (read & modified) by a single CPU + */ +struct dt_pcb { + SMR_SLIST_ENTRY(dt_pcb) dp_pnext; /* [k,s] next PCB per probe */ + TAILQ_ENTRY(dt_pcb) dp_snext; /* [k] next PCB per softc */ + + /* Event states ring */ + unsigned int dp_prod; /* [m] read index */ + unsigned int dp_cons; /* [m] write index */ + struct dt_evt *dp_ring; /* [m] ring of event sates */ + struct mutex dp_mtx; + + struct dt_softc *dp_sc; /* [I] related softc */ + struct dt_probe *dp_dtp; /* [I] related probe */ + uint64_t dp_evtflags; /* [I] event states to record */ + struct dt_filter dp_filter; /* [I] filter to match */ + + /* Provider specific fields. */ + unsigned int dp_cpuid; /* [I] on which CPU */ + unsigned int dp_maxtick; /* [I] freq. of profiling */ + unsigned int dp_nticks; /* [c] current tick count */ + + /* Counters */ + uint64_t dp_dropevt; /* [m] # dropped event */ +}; + +TAILQ_HEAD(dt_pcb_list, dt_pcb); + +struct dt_pcb *dt_pcb_alloc(struct dt_probe *, struct dt_softc *); +void dt_pcb_free(struct dt_pcb *); +void dt_pcb_purge(struct dt_pcb_list *); +int dt_pcb_filter(struct dt_pcb *); + +struct dt_evt *dt_pcb_ring_get(struct dt_pcb *); +void dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *); + +/* + * Probes are entry points in the system where events can be recorded. + * + * Locks used to protect struct members in this file: + * I immutable after creation + * k kernel lock + * d dt_lock + * d,s dt_lock for writting and SMR for reading + */ +struct dt_probe { + SIMPLEQ_ENTRY(dt_probe) dtp_next; /* [k] global list of probes */ + SMR_SLIST_HEAD(, dt_pcb) dtp_pcbs; /* [d,s] list of enabled PCBs */ + struct dt_provider *dtp_prov; /* [I] its to provider */ + const char *dtp_func; /* [I] probe function */ + const char *dtp_name; /* [I] probe name */ + uint32_t dtp_pbn; /* [I] unique ID */ + volatile uint32_t dtp_recording; /* [d] is it recording? */ + + /* Provider specific fields. */ + int dtp_sysnum; /* [I] related # of syscall */ + const char *dtp_argtype[5];/* [I] type of arguments */ + int dtp_nargs; /* [I] # of arguments */ +}; + + +/* + * Providers expose a set of probes and a method to record events. + */ +struct dt_provider { + const char *dtpv_name; /* [I] provider name */ + volatile uint32_t dtpv_recording;/* [d] # of recording PCBs */ + + int (*dtpv_alloc)(struct dt_probe *, struct dt_softc *, + struct dt_pcb_list *, struct dtioc_req *); + void (*dtpv_enter)(struct dt_provider *, ...); + void (*dtpv_leave)(struct dt_provider *, ...); +}; + +int dt_prov_profile_init(void); +int dt_prov_syscall_init(void); +int dt_prov_static_init(void); + +struct dt_probe *dt_dev_alloc_probe(const char *, const char *, + struct dt_provider *); +void dt_dev_register_probe(struct dt_probe *); + + +extern volatile uint32_t dt_tracing; /* currently tracing? */ + +#define DT_ENTER(provname, args...) do { \ + extern struct dt_provider dt_prov_ ## provname ; \ + struct dt_provider *dtpv = &dt_prov_ ## provname ; \ + \ + if (__predict_false(dt_tracing) && \ + __predict_false(dtpv->dtpv_recording)) { \ + dtpv->dtpv_enter(dtpv, args); \ + } \ +} while (0) + +#define DT_LEAVE(provname, args...) do { \ + extern struct dt_provider dt_prov_ ## provname ; \ + struct dt_provider *dtpv = &dt_prov_ ## provname ; \ + \ + if (__predict_false(dt_tracing) && \ + __predict_false(dtpv->dtpv_recording)) { \ + dtpv->dtpv_leave(dtpv, args); \ + } \ +} while (0) + +#define _DT_STATIC_P(func, name) (dt_static_##func##_##name) + +/* + * Probe definition for the static provider. + */ +#define _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, arg4, n) \ + struct dt_probe _DT_STATIC_P(func, name) = { \ + .dtp_next = { NULL }, \ + .dtp_pcbs = { NULL }, \ + .dtp_prov = &dt_prov_static, \ + .dtp_func = #func, \ + .dtp_name = #name, \ + .dtp_pbn = 0, \ + .dtp_sysnum = 0, \ + .dtp_argtype = { arg0, arg1, arg2, arg3, arg4 }, \ + .dtp_nargs = n, \ + } \ + +#define DT_STATIC_PROBE0(func, name) \ + _DT_STATIC_PROBEN(func, name, NULL, NULL, NULL, NULL, NULL, 0) + +#define DT_STATIC_PROBE1(func, name, arg0) \ + _DT_STATIC_PROBEN(func, name, arg0, NULL, NULL, NULL, NULL, 1) + +#define DT_STATIC_PROBE2(func, name, arg0, arg1) \ + _DT_STATIC_PROBEN(func, name, arg0, arg1, NULL, NULL, NULL, 2) + +#define DT_STATIC_PROBE3(func, name, arg0, arg1, arg2) \ + _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, NULL, NULL, 3) + +#define DT_STATIC_PROBE4(func, name, arg0, arg1, arg2, arg3) \ + _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, NULL, 4) + +#define DT_STATIC_PROBE5(func, name, arg0, arg1, arg2, arg3, arg4) \ + _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, arg4, 5) + +#define DT_STATIC_ENTER(func, name, args...) do { \ + extern struct dt_probe _DT_STATIC_P(func, name); \ + struct dt_probe *dtp = &_DT_STATIC_P(func, name); \ + struct dt_provider *dtpv = dtp->dtp_prov; \ + \ + if (__predict_false(dt_tracing) && \ + __predict_false(dtp->dtp_recording)) { \ + dtpv->dtpv_enter(dtpv, dtp, args); \ + } \ +} while (0) + +#endif /* !_KERNEL */ +#endif /* !_DT_H_ */ diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 0a3679b0510..b6e0ca4f65a 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_clock.c,v 1.100 2019/11/02 16:56:17 cheloha Exp $ */ +/* $OpenBSD: kern_clock.c,v 1.101 2020/01/21 16:16:23 mpi Exp $ */ /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ /*- @@ -55,6 +55,11 @@ #include <sys/gmon.h> #endif +#include "dt.h" +#if NDT > 0 +#include <dev/dt/dtvar.h> +#endif + /* * Clock handling routines. * @@ -168,6 +173,12 @@ hardclock(struct clockframe *frame) if (--ci->ci_schedstate.spc_rrticks <= 0) roundrobin(ci); +#if NDT > 0 + DT_ENTER(profile, NULL); + if (CPU_IS_PRIMARY(ci)) + DT_ENTER(interval, NULL); +#endif + /* * If we are not the primary CPU, we're not allowed to do * any more work. diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 46de8fa7800..e25993099ee 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sched.c,v 1.62 2019/11/04 18:06:03 visa Exp $ */ +/* $OpenBSD: kern_sched.c,v 1.63 2020/01/21 16:16:23 mpi Exp $ */ /* * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> * @@ -26,6 +26,7 @@ #include <sys/mutex.h> #include <sys/task.h> #include <sys/smr.h> +#include <sys/tracepoint.h> #include <uvm/uvm_extern.h> @@ -261,6 +262,7 @@ setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) spc = &p->p_cpu->ci_schedstate; spc->spc_nrun++; + TRACEPOINT(sched, enqueue, p->p_tid, p->p_p->ps_pid); TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); spc->spc_whichqs |= (1 << queue); @@ -282,6 +284,7 @@ remrunqueue(struct proc *p) SCHED_ASSERT_LOCKED(); spc = &p->p_cpu->ci_schedstate; spc->spc_nrun--; + TRACEPOINT(sched, dequeue, p->p_tid, p->p_p->ps_pid); TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); if (TAILQ_EMPTY(&spc->spc_qs[queue])) { diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 02c733ad8f1..3ca58be881e 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_synch.c,v 1.159 2020/01/21 15:20:47 visa Exp $ */ +/* $OpenBSD: kern_synch.c,v 1.160 2020/01/21 16:16:23 mpi Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /* @@ -51,6 +51,8 @@ #include <sys/refcnt.h> #include <sys/atomic.h> #include <sys/witness.h> +#include <sys/tracepoint.h> + #include <ddb/db_output.h> #include <machine/spinlock.h> @@ -380,6 +382,8 @@ sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, SCHED_LOCK(sls->sls_s); + TRACEPOINT(sched, sleep, NULL); + p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; @@ -552,6 +556,7 @@ unsleep(struct proc *p) if (p->p_wchan != NULL) { TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); p->p_wchan = NULL; + TRACEPOINT(sched, wakeup, p->p_tid, p->p_p->ps_pid); } } diff --git a/sys/kern/sched_bsd.c b/sys/kern/sched_bsd.c index 3b8c4a80536..9172bc7e24e 100644 --- a/sys/kern/sched_bsd.c +++ b/sys/kern/sched_bsd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sched_bsd.c,v 1.60 2019/12/11 07:30:09 guenther Exp $ */ +/* $OpenBSD: sched_bsd.c,v 1.61 2020/01/21 16:16:23 mpi Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -48,6 +48,7 @@ #include <sys/sched.h> #include <sys/timeout.h> #include <sys/smr.h> +#include <sys/tracepoint.h> #ifdef KTRACE #include <sys/ktrace.h> @@ -392,8 +393,12 @@ mi_switch(void) if (p != nextproc) { uvmexp.swtch++; + TRACEPOINT(sched, off__cpu, nextproc->p_tid, + nextproc->p_p->ps_pid); cpu_switchto(p, nextproc); + TRACEPOINT(sched, on__cpu, NULL); } else { + TRACEPOINT(sched, remain__cpu, NULL); p->p_stat = SONPROC; } diff --git a/sys/sys/conf.h b/sys/sys/conf.h index b43c8374fa5..c4317cd1e44 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: conf.h,v 1.146 2019/12/17 13:08:54 reyk Exp $ */ +/* $OpenBSD: conf.h,v 1.147 2020/01/21 16:16:23 mpi Exp $ */ /* $NetBSD: conf.h,v 1.33 1996/05/03 20:03:32 christos Exp $ */ /*- @@ -489,6 +489,13 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, selfalse, \ (dev_init(c,n,mmap)), 0, D_CLONE } +/* open, close, read, ioctl */ +#define cdev_dt_init(c,n) { \ + dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ + (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ + (dev_type_stop((*))) enodev, 0, selfalse, \ + (dev_type_mmap((*))) enodev, 0, D_CLONE } + #endif /* @@ -582,6 +589,8 @@ cdev_decl(rd); bdev_decl(uk); cdev_decl(uk); +cdev_decl(dt); + cdev_decl(diskmap); cdev_decl(bpf); diff --git a/sys/sys/syscall_mi.h b/sys/sys/syscall_mi.h index f7e87413faa..76f34e0b030 100644 --- a/sys/sys/syscall_mi.h +++ b/sys/sys/syscall_mi.h @@ -1,4 +1,4 @@ -/* $OpenBSD: syscall_mi.h,v 1.24 2019/11/29 06:34:46 deraadt Exp $ */ +/* $OpenBSD: syscall_mi.h,v 1.25 2020/01/21 16:16:23 mpi Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -33,12 +33,18 @@ #include <sys/param.h> #include <sys/pledge.h> +#include <sys/tracepoint.h> #include <uvm/uvm_extern.h> #ifdef KTRACE #include <sys/ktrace.h> #endif +#include "dt.h" +#if NDT > 0 +#include <dev/dt/dtvar.h> +#endif + /* * The MD setup for a system call has been done; here's the MI part. @@ -59,6 +65,10 @@ mi_syscall(struct proc *p, register_t code, const struct sysent *callp, scdebug_call(p, code, argp); KERNEL_UNLOCK(); #endif + TRACEPOINT(raw_syscalls, sys_enter, code, NULL); +#if NDT > 0 + DT_ENTER(syscall, code, callp->sy_argsize, argp); +#endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { KERNEL_LOCK(); @@ -108,6 +118,10 @@ mi_syscall_return(struct proc *p, register_t code, int error, scdebug_ret(p, code, error, retval); KERNEL_UNLOCK(); #endif +#if NDT > 0 + DT_LEAVE(syscall, code, error, retval[0], retval[1]); +#endif + TRACEPOINT(raw_syscalls, sys_exit, code, NULL); userret(p); @@ -126,17 +140,23 @@ mi_syscall_return(struct proc *p, register_t code, int error, static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; #endif + TRACEPOINT(sched, on__cpu, NULL); + #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, 0, child_retval); KERNEL_UNLOCK(); #endif +#if NDT > 0 + DT_LEAVE(syscall, code, 0, child_retval[0], child_retval[1]); +#endif + TRACEPOINT(raw_syscalls, sys_exit, code, NULL); userret(p); diff --git a/sys/sys/tracepoint.h b/sys/sys/tracepoint.h new file mode 100644 index 00000000000..d9a674ab1fb --- /dev/null +++ b/sys/sys/tracepoint.h @@ -0,0 +1,36 @@ +/* $OpenBSD: tracepoint.h,v 1.1 2020/01/21 16:16:23 mpi Exp $ */ + +/* + * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_TRACEPOINT_H_ +#define _SYS_TRACEPOINT_H_ + +#ifdef _KERNEL + +#include "dt.h" +#if NDT > 0 +#include <dev/dt/dtvar.h> + +#define TRACEPOINT(func, name, args...) DT_STATIC_ENTER(func, name, args) + +#else /* NDT > 0 */ + +#define TRACEPOINT(func, name, args...) + +#endif /* NDT > 0 */ +#endif /* _KERNEL */ +#endif /* _SYS_TRACEPOINT_H_ */ |