summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpi <mpi@openbsd.org>2020-01-21 16:16:22 +0000
committermpi <mpi@openbsd.org>2020-01-21 16:16:22 +0000
commit91b2ecf67e2a667ccefd071f95f44a8068f1bbc4 (patch)
tree4844bb05cf2473deb3060725603f0582ebfa6974
parentregen (diff)
downloadwireguard-openbsd-91b2ecf67e2a667ccefd071f95f44a8068f1bbc4.tar.xz
wireguard-openbsd-91b2ecf67e2a667ccefd071f95f44a8068f1bbc4.zip
Import dt(4) a driver and framework for Dynamic Profiling.
The design is fairly simple: events, in the form of descriptors on a ring, are being produced in any kernel context and being consumed by a userland process reading /dev/dt. Code and hooks are all guarded under '#if NDT > 0' so this commit shouldn't introduce any change as long as dt(4) is disable in GENERIC. ok kettenis@, visa@, jasper@, deraadt@
-rw-r--r--sys/arch/amd64/amd64/conf.c5
-rw-r--r--sys/arch/arm/arm/conf.c5
-rw-r--r--sys/arch/arm64/arm64/conf.c5
-rw-r--r--sys/arch/i386/i386/conf.c5
-rw-r--r--sys/arch/landisk/landisk/conf.c5
-rw-r--r--sys/arch/loongson/loongson/conf.c5
-rw-r--r--sys/arch/luna88k/luna88k/conf.c5
-rw-r--r--sys/arch/macppc/macppc/conf.c5
-rw-r--r--sys/arch/octeon/octeon/conf.c5
-rw-r--r--sys/arch/sgi/sgi/conf.c5
-rw-r--r--sys/arch/sparc64/sparc64/conf.c5
-rw-r--r--sys/conf/GENERIC3
-rw-r--r--sys/conf/files8
-rw-r--r--sys/dev/dt/dt_dev.c714
-rw-r--r--sys/dev/dt/dt_prov_profile.c147
-rw-r--r--sys/dev/dt/dt_prov_static.c136
-rw-r--r--sys/dev/dt/dt_prov_syscall.c206
-rw-r--r--sys/dev/dt/dtvar.h317
-rw-r--r--sys/kern/kern_clock.c13
-rw-r--r--sys/kern/kern_sched.c5
-rw-r--r--sys/kern/kern_synch.c7
-rw-r--r--sys/kern/sched_bsd.c7
-rw-r--r--sys/sys/conf.h11
-rw-r--r--sys/sys/syscall_mi.h24
-rw-r--r--sys/sys/tracepoint.h36
25 files changed, 1658 insertions, 31 deletions
diff --git a/sys/arch/amd64/amd64/conf.c b/sys/arch/amd64/amd64/conf.c
index 6330f6e442d..d456a10c054 100644
--- a/sys/arch/amd64/amd64/conf.c
+++ b/sys/arch/amd64/amd64/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.65 2019/12/17 13:08:54 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.66 2020/01/21 16:16:22 mpi Exp $ */
/*
* Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
@@ -176,6 +176,7 @@ cdev_decl(viocon);
cdev_decl(pci);
#endif
+#include "dt.h"
#include "pf.h"
#include "hotplug.h"
#include "gpio.h"
@@ -223,7 +224,7 @@ struct cdevsw cdevsw[] =
cdev_spkr_init(NSPKR,spkr), /* 27: PC speaker */
cdev_notdef(), /* 28 was LKM */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30 */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31 */
cdev_notdef(), /* 32 */
cdev_notdef(), /* 33 */
diff --git a/sys/arch/arm/arm/conf.c b/sys/arch/arm/arm/conf.c
index 26f72c4e2fc..2491a459225 100644
--- a/sys/arch/arm/arm/conf.c
+++ b/sys/arch/arm/arm/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.52 2019/12/18 06:53:46 deraadt Exp $ */
+/* $OpenBSD: conf.c,v 1.53 2020/01/21 16:16:22 mpi Exp $ */
/* $NetBSD: conf.c,v 1.10 2002/04/19 01:04:38 wiz Exp $ */
/*
@@ -68,6 +68,7 @@
* Standard pseudo-devices
*/
#include "bpfilter.h"
+#include "dt.h"
#include "pf.h"
#include "pty.h"
#include "tun.h"
@@ -299,7 +300,7 @@ struct cdevsw cdevsw[] = {
cdev_ch_init(NCH,ch), /* 27: SCSI autochanger */
cdev_uk_init(NUK,uk), /* 28: SCSI unknown */
cdev_notdef(), /* 29: */
- cdev_notdef(), /* 30: */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31: */
cdev_notdef(), /* 32: */
cdev_tun_init(NTUN,tun), /* 33: network tunnel */
diff --git a/sys/arch/arm64/arm64/conf.c b/sys/arch/arm64/arm64/conf.c
index 17aba177828..d7c814e58c0 100644
--- a/sys/arch/arm64/arm64/conf.c
+++ b/sys/arch/arm64/arm64/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.10 2019/12/22 18:18:02 kettenis Exp $ */
+/* $OpenBSD: conf.c,v 1.11 2020/01/21 16:16:22 mpi Exp $ */
/*
* Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
@@ -139,6 +139,7 @@ cdev_decl(drm);
cdev_decl(pci);
#endif
+#include "dt.h"
#include "pf.h"
#include "hotplug.h"
#include "vscsi.h"
@@ -182,7 +183,7 @@ struct cdevsw cdevsw[] =
cdev_notdef(), /* 27 */
cdev_notdef(), /* 28 was LKM */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30 */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31 */
cdev_notdef(), /* 32 */
cdev_notdef(), /* 33 */
diff --git a/sys/arch/i386/i386/conf.c b/sys/arch/i386/i386/conf.c
index adb6e4bef1e..c624c06c0f6 100644
--- a/sys/arch/i386/i386/conf.c
+++ b/sys/arch/i386/i386/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.164 2019/12/17 13:08:55 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.165 2020/01/21 16:16:22 mpi Exp $ */
/* $NetBSD: conf.c,v 1.75 1996/05/03 19:40:20 christos Exp $ */
/*
@@ -168,6 +168,7 @@ cdev_decl(drm);
cdev_decl(pci);
#endif
+#include "dt.h"
#include "pf.h"
#include "hotplug.h"
#include "gpio.h"
@@ -216,7 +217,7 @@ struct cdevsw cdevsw[] =
cdev_spkr_init(NSPKR,spkr), /* 27: PC speaker */
cdev_notdef(), /* 28: was LKM */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30 */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31 */
cdev_notdef(), /* 32 */
cdev_notdef(), /* 33 */
diff --git a/sys/arch/landisk/landisk/conf.c b/sys/arch/landisk/landisk/conf.c
index 5cb64822446..e2d891c673a 100644
--- a/sys/arch/landisk/landisk/conf.c
+++ b/sys/arch/landisk/landisk/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.39 2019/12/17 13:08:55 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.40 2020/01/21 16:16:22 mpi Exp $ */
/*
* Copyright (c) 1994-1998 Mark Brinicombe.
@@ -67,6 +67,7 @@
* Standard pseudo-devices
*/
#include "bpfilter.h"
+#include "dt.h"
#include "pf.h"
#include "bio.h"
#include "pty.h"
@@ -273,7 +274,7 @@ struct cdevsw cdevsw[] = {
cdev_ch_init(NCH,ch), /* 27: SCSI autochanger */
cdev_uk_init(NUK,uk), /* 28: SCSI unknown */
cdev_notdef(), /* 29: */
- cdev_notdef(), /* 30: */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31: */
cdev_notdef(), /* 32: */
cdev_tun_init(NTUN,tun), /* 33: network tunnel */
diff --git a/sys/arch/loongson/loongson/conf.c b/sys/arch/loongson/loongson/conf.c
index e57db5bd148..ff0fa19e6de 100644
--- a/sys/arch/loongson/loongson/conf.c
+++ b/sys/arch/loongson/loongson/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.26 2019/12/20 13:25:02 visa Exp $ */
+/* $OpenBSD: conf.c,v 1.27 2020/01/21 16:16:22 mpi Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -116,6 +116,7 @@ cdev_decl(wd);
#include "pci.h"
cdev_decl(pci);
+#include "dt.h"
#include "pf.h"
#include "usb.h"
@@ -166,7 +167,7 @@ struct cdevsw cdevsw[] =
#else
cdev_notdef(), /* 29 */
#endif
- cdev_notdef(), /* 30: */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_pf_init(NPF,pf), /* 31: packet filter */
cdev_uk_init(NUK,uk), /* 32: unknown SCSI */
cdev_random_init(1,random), /* 33: random data source */
diff --git a/sys/arch/luna88k/luna88k/conf.c b/sys/arch/luna88k/luna88k/conf.c
index bf0a440bc38..571593d55a4 100644
--- a/sys/arch/luna88k/luna88k/conf.c
+++ b/sys/arch/luna88k/luna88k/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.31 2016/12/17 05:22:34 aoyama Exp $ */
+/* $OpenBSD: conf.c,v 1.32 2020/01/21 16:16:23 mpi Exp $ */
/*-
* Copyright (c) 1991 The Regents of the University of California.
@@ -67,6 +67,7 @@
#include "wsmouse.h"
#include "wsmux.h"
+#include "dt.h"
#include "pf.h"
#include "vscsi.h"
#include "pppx.h"
@@ -131,7 +132,7 @@ struct cdevsw cdevsw[] =
cdev_tty_init(NCOM, com), /* 27: serial port (on PCMCIA) */
cdev_disk_init(NWD,wd), /* 28: IDE disk (on PCMCIA) */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30 */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31 */
cdev_notdef(), /* 32 */
cdev_notdef(), /* 33 */
diff --git a/sys/arch/macppc/macppc/conf.c b/sys/arch/macppc/macppc/conf.c
index 061a11e9a2e..8907b7ce398 100644
--- a/sys/arch/macppc/macppc/conf.c
+++ b/sys/arch/macppc/macppc/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.69 2019/12/17 13:08:56 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.70 2020/01/21 16:16:23 mpi Exp $ */
/*
* Copyright (c) 1997 Per Fogelstrom
@@ -116,6 +116,7 @@ cdev_decl(pci);
#include "video.h"
#include "midi.h"
+#include "dt.h"
#include "pf.h"
#include "radio.h"
@@ -159,7 +160,7 @@ struct cdevsw cdevsw[] = {
cdev_notdef(), /* 27 */
cdev_notdef(), /* 28 */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30 */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31 */
cdev_notdef(), /* 32 */
cdev_notdef(), /* 33 */
diff --git a/sys/arch/octeon/octeon/conf.c b/sys/arch/octeon/octeon/conf.c
index 1e80c9d93f4..4447d97c1ab 100644
--- a/sys/arch/octeon/octeon/conf.c
+++ b/sys/arch/octeon/octeon/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.22 2019/12/17 13:08:56 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.23 2020/01/21 16:16:23 mpi Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -129,6 +129,7 @@ cdev_decl(amdcf);
#include "pci.h"
cdev_decl(pci);
+#include "dt.h"
#include "pf.h"
#include "usb.h"
@@ -185,7 +186,7 @@ struct cdevsw cdevsw[] =
#else
cdev_notdef(), /* 29 */
#endif
- cdev_notdef(), /* 30: */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_pf_init(NPF,pf), /* 31: packet filter */
cdev_uk_init(NUK,uk), /* 32: unknown SCSI */
cdev_random_init(1,random), /* 33: random data source */
diff --git a/sys/arch/sgi/sgi/conf.c b/sys/arch/sgi/sgi/conf.c
index 062bdfc1974..7d9baf94a9b 100644
--- a/sys/arch/sgi/sgi/conf.c
+++ b/sys/arch/sgi/sgi/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.40 2019/12/17 13:08:56 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.41 2020/01/21 16:16:23 mpi Exp $ */
/*
* Copyright (c) 1992, 1993
@@ -115,6 +115,7 @@ cdev_decl(wd);
#include "pci.h"
cdev_decl(pci);
+#include "dt.h"
#include "pf.h"
#include "usb.h"
@@ -165,7 +166,7 @@ struct cdevsw cdevsw[] =
#else
cdev_notdef(), /* 29 */
#endif
- cdev_notdef(), /* 30: */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_pf_init(NPF,pf), /* 31: packet filter */
cdev_uk_init(NUK,uk), /* 32: unknown SCSI */
cdev_random_init(1,random), /* 33: random data source */
diff --git a/sys/arch/sparc64/sparc64/conf.c b/sys/arch/sparc64/sparc64/conf.c
index f7afcfeab56..9cba85deb65 100644
--- a/sys/arch/sparc64/sparc64/conf.c
+++ b/sys/arch/sparc64/sparc64/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.81 2019/12/17 13:08:56 reyk Exp $ */
+/* $OpenBSD: conf.c,v 1.82 2020/01/21 16:16:23 mpi Exp $ */
/* $NetBSD: conf.c,v 1.17 2001/03/26 12:33:26 lukem Exp $ */
/*
@@ -108,6 +108,7 @@ cdev_decl(pci);
#include "ulpt.h"
#include "ucom.h"
+#include "dt.h"
#include "pf.h"
#include "ksyms.h"
@@ -181,7 +182,7 @@ struct cdevsw cdevsw[] =
cdev_notdef(), /* 27 */
cdev_notdef(), /* 28: Systech VPC-2200 versatec/centronics */
cdev_notdef(), /* 29 */
- cdev_notdef(), /* 30: Xylogics tape */
+ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */
cdev_notdef(), /* 31: /dev/cgtwo */
cdev_notdef(), /* 32: should be /dev/gpone */
cdev_notdef(), /* 33 */
diff --git a/sys/conf/GENERIC b/sys/conf/GENERIC
index 92240cf0ab3..b7f2550db74 100644
--- a/sys/conf/GENERIC
+++ b/sys/conf/GENERIC
@@ -1,4 +1,4 @@
-# $OpenBSD: GENERIC,v 1.266 2019/10/12 17:06:02 naddy Exp $
+# $OpenBSD: GENERIC,v 1.267 2020/01/21 16:16:23 mpi Exp $
#
# Machine-independent option; used by all architectures for their
# GENERIC kernel
@@ -82,6 +82,7 @@ pseudo-device msts 1 # MSTS line discipline
pseudo-device endrun 1 # EndRun line discipline
pseudo-device vnd 4 # vnode disk devices
pseudo-device ksyms 1 # kernel symbols device
+#pseudo-device dt # Dynamic Tracer
# clonable devices
pseudo-device bpfilter # packet filter
diff --git a/sys/conf/files b/sys/conf/files
index dcd62565ae3..f0c5aa4df3d 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.680 2020/01/11 00:56:38 jsg Exp $
+# $OpenBSD: files,v 1.681 2020/01/21 16:16:23 mpi Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -603,6 +603,12 @@ file net/if_pppoe.c pppoe needs-flag
pseudo-device kcov
file dev/kcov.c kcov needs-flag
+pseudo-device dt
+file dev/dt/dt_dev.c dt needs-flag
+file dev/dt/dt_prov_profile.c dt
+file dev/dt/dt_prov_syscall.c dt
+file dev/dt/dt_prov_static.c dt
+
# XXX machine-independent SCSI files should live somewhere here, maybe
# kernel sources
diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c
new file mode 100644
index 00000000000..e8a5512684e
--- /dev/null
+++ b/sys/dev/dt/dt_dev.c
@@ -0,0 +1,714 @@
+/* $OpenBSD: dt_dev.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <dev/dt/dtvar.h>
+
+/*
+ * How many frames are used by the profiling code? For example
+ * on amd64:
+ *
+ * From syscall provider:
+ *
+ * dt_prov_syscall_entry+0x141
+ * syscall+0x205 <--- start here
+ * Xsyscall+0x128
+ *
+ * From profile provider:
+ *
+ * dt_prov_profile_enter+0x6e
+ * hardclock+0x12c
+ * clockintr+0x59
+ * intr_handler+0x6e
+ * Xresume_legacy0+0x1d3
+ * cpu_idle_cycle+0x1b <---- start here.
+ * proc_trampoline+0x1c
+ */
+#if notyet
+#define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(4)
+#else
+#define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(0)
+#endif
+
+#define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */
+
+#define DPRINTF(x...) /* nothing */
+
+/*
+ * Descriptor associated with each program opening /dev/dt. It is used
+ * to keep track of enabled PCBs.
+ *
+ * Locks used to protect struct members in this file:
+ * m per-softc mutex
+ * k kernel lock
+ */
+struct dt_softc {
+ SLIST_ENTRY(dt_softc) ds_next; /* [k] descriptor list */
+ int ds_unit; /* [I] D_CLONE unique unit */
+ pid_t ds_pid; /* [I] PID of tracing program */
+
+ struct mutex ds_mtx;
+
+ struct dt_pcb_list ds_pcbs; /* [k] list of enabled PCBs */
+ struct dt_evt *ds_bufqueue; /* [k] copy evts to userland */
+ size_t ds_bufqlen; /* [k] length of the queue */
+ int ds_recording; /* [k] currently recording? */
+ int ds_evtcnt; /* [m] # of readable evts */
+
+ /* Counters */
+ uint64_t ds_readevt; /* [m] # of events read */
+ uint64_t ds_dropevt; /* [m] # of events dropped */
+};
+
+SLIST_HEAD(, dt_softc) dtdev_list; /* [k] list of open /dev/dt nodes */
+
+/*
+ * Probes are created during dt_attach() and never modified/freed during
+ * the lifetime of the system. That's why we consider them as [I]mmutable.
+ */
+unsigned int dt_nprobes; /* [I] # of probes available */
+SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */
+
+struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk");
+volatile uint32_t dt_tracing = 0; /* [d] # of processes tracing */
+
+void dtattach(struct device *, struct device *, void *);
+int dtopen(dev_t, int, int, struct proc *);
+int dtclose(dev_t, int, int, struct proc *);
+int dtread(dev_t, struct uio *, int);
+int dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
+
+struct dt_softc *dtlookup(int);
+
+int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
+int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
+int dt_ioctl_record_start(struct dt_softc *);
+void dt_ioctl_record_stop(struct dt_softc *);
+int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
+void dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
+
+int dt_enter(void);
+void dt_leave(uint32_t);
+
+int dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
+
+void
+dtattach(struct device *parent, struct device *self, void *aux)
+{
+ SLIST_INIT(&dtdev_list);
+ SIMPLEQ_INIT(&dt_probe_list);
+
+ /* Init providers */
+ dt_nprobes += dt_prov_profile_init();
+ dt_nprobes += dt_prov_syscall_init();
+ dt_nprobes += dt_prov_static_init();
+
+ printf("dt: %u probes\n", dt_nprobes);
+}
+
+int
+dtopen(dev_t dev, int flags, int mode, struct proc *p)
+{
+ struct dt_softc *sc;
+ int unit = minor(dev);
+
+ KASSERT(dtlookup(unit) == NULL);
+
+ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
+ if (sc == NULL)
+ return ENOMEM;
+
+ /*
+ * Enough space to empty 2 full rings of events in a single read.
+ */
+ sc->ds_bufqlen = 2 * DT_EVTRING_SIZE;
+ sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue),
+ M_DEVBUF, M_WAITOK|M_CANFAIL);
+ if (sc->ds_bufqueue == NULL)
+ goto bad;
+
+ sc->ds_unit = unit;
+ sc->ds_pid = p->p_p->ps_pid;
+ TAILQ_INIT(&sc->ds_pcbs);
+ mtx_init(&sc->ds_mtx, IPL_HIGH);
+ sc->ds_evtcnt = 0;
+ sc->ds_readevt = 0;
+ sc->ds_dropevt = 0;
+
+ SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
+
+ DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
+
+ return 0;
+
+bad:
+ free(sc, M_DEVBUF, sizeof(*sc));
+ return ENOMEM;
+}
+
+int
+dtclose(dev_t dev, int flags, int mode, struct proc *p)
+{
+ struct dt_softc *sc;
+ int unit = minor(dev);
+
+ sc = dtlookup(unit);
+ KASSERT(sc != NULL);
+
+ DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
+
+ SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
+ dt_ioctl_record_stop(sc);
+ dt_pcb_purge(&sc->ds_pcbs);
+
+ free(sc->ds_bufqueue, M_DEVBUF,
+ sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
+ free(sc, M_DEVBUF, sizeof(*sc));
+
+ return 0;
+}
+
+int
+dtread(dev_t dev, struct uio *uio, int flags)
+{
+ struct dt_softc *sc;
+ struct dt_evt *estq;
+ struct dt_pcb *dp;
+ int error, unit = minor(dev);
+ size_t qlen, count, read = 0;
+ uint64_t dropped = 0;
+
+ sc = dtlookup(unit);
+ KASSERT(sc != NULL);
+
+ count = howmany(uio->uio_resid, sizeof(struct dt_evt));
+ if (count < 1)
+ return (EMSGSIZE);
+
+ mtx_enter(&sc->ds_mtx);
+ while (!sc->ds_evtcnt) {
+ error = msleep(sc, &sc->ds_mtx, PWAIT|PCATCH, "dtread", 0);
+ if (error == EINTR || error == ERESTART)
+ break;
+ }
+ mtx_leave(&sc->ds_mtx);
+
+ if (error)
+ return error;
+
+ estq = sc->ds_bufqueue;
+ qlen = MIN(sc->ds_bufqlen, count);
+
+ KERNEL_ASSERT_LOCKED();
+ TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
+ count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
+ read += count;
+ estq += count; /* pointer aritmetic */
+ qlen -= count;
+ if (qlen == 0)
+ break;
+ }
+ if (read > 0)
+ uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
+
+ mtx_enter(&sc->ds_mtx);
+ sc->ds_evtcnt -= read;
+ sc->ds_readevt += read;
+ sc->ds_dropevt += dropped;
+ mtx_leave(&sc->ds_mtx);
+
+ return 0;
+}
+
+int
+dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
+{
+ struct dt_softc *sc;
+ int unit = minor(dev);
+ int on, error = 0;
+
+ sc = dtlookup(unit);
+ KASSERT(sc != NULL);
+
+ switch (cmd) {
+ case DTIOCGPLIST:
+ return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
+ case DTIOCGSTATS:
+ return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
+ case DTIOCRECORD:
+ case DTIOCPRBENABLE:
+ /* root only ioctl(2) */
+ break;
+ default:
+ return ENOTTY;
+ }
+
+ if ((error = suser(p)) != 0)
+ return error;
+
+ switch (cmd) {
+ case DTIOCRECORD:
+ on = *(int *)addr;
+ if (on)
+ error = dt_ioctl_record_start(sc);
+ else
+ dt_ioctl_record_stop(sc);
+ break;
+ case DTIOCPRBENABLE:
+ error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
+ break;
+ default:
+ KASSERT(0);
+ }
+
+ return error;
+}
+
+struct dt_softc *
+dtlookup(int unit)
+{
+ struct dt_softc *sc;
+
+ KERNEL_ASSERT_LOCKED();
+
+ SLIST_FOREACH(sc, &dtdev_list, ds_next) {
+ if (sc->ds_unit == unit)
+ break;
+ }
+
+ return sc;
+}
+
+int
+dtioc_req_isvalid(struct dtioc_req *dtrq)
+{
+ switch (dtrq->dtrq_filter.dtf_operand) {
+ case DT_OP_NONE:
+ case DT_OP_EQ:
+ case DT_OP_NE:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (dtrq->dtrq_filter.dtf_variable) {
+ case DT_FV_NONE:
+ case DT_FV_PID:
+ case DT_FV_TID:
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
+{
+ struct dtioc_probe_info info, *dtpi;
+ struct dt_probe *dtp;
+ size_t size;
+ int error = 0;
+
+ if (dtpr->dtpr_size == 0) {
+ dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
+ return 0;
+ }
+
+ size = dtpr->dtpr_size;
+ dtpi = dtpr->dtpr_probes;
+ memset(&info, 0, sizeof(info));
+ SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
+ if (size < sizeof(*dtpi)) {
+ error = ENOSPC;
+ break;
+ }
+ info.dtpi_pbn = dtp->dtp_pbn;
+ strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
+ sizeof(info.dtpi_prov));
+ strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
+ strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
+ error = copyout(&info, dtpi, sizeof(*dtpi));
+ if (error)
+ break;
+ size -= sizeof(*dtpi);
+ dtpi++;
+ };
+
+ return error;
+}
+
+int
+dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
+{
+ mtx_enter(&sc->ds_mtx);
+ dtst->dtst_readevt = sc->ds_readevt;
+ dtst->dtst_dropevt = sc->ds_dropevt;
+ mtx_leave(&sc->ds_mtx);
+
+ return 0;
+}
+
+int
+dt_ioctl_record_start(struct dt_softc *sc)
+{
+ struct dt_pcb *dp;
+ int count;
+
+ if (sc->ds_recording)
+ return EBUSY;
+
+ KERNEL_ASSERT_LOCKED();
+ if (TAILQ_EMPTY(&sc->ds_pcbs))
+ return ENOENT;
+
+ count = dt_enter();
+ TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
+ struct dt_probe *dtp = dp->dp_dtp;
+
+ rw_assert_wrlock(&dt_lock);
+ SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
+ dtp->dtp_recording++;
+ dtp->dtp_prov->dtpv_recording++;
+ }
+ dt_leave(count);
+
+ sc->ds_recording = 1;
+ dt_tracing++;
+
+ return 0;
+}
+
+void
+dt_ioctl_record_stop(struct dt_softc *sc)
+{
+ struct dt_pcb *dp;
+ int count;
+
+ KASSERT(suser(curproc) == 0);
+
+ if (!sc->ds_recording)
+ return;
+
+ DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
+
+ dt_tracing--;
+ sc->ds_recording = 0;
+
+ count = dt_enter();
+ TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
+ struct dt_probe *dtp = dp->dp_dtp;
+
+ rw_assert_wrlock(&dt_lock);
+ dtp->dtp_recording--;
+ dtp->dtp_prov->dtpv_recording--;
+ SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
+ }
+ dt_leave(count);
+}
+
+int
+dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
+{
+ struct dt_pcb_list plist;
+ struct dt_probe *dtp;
+ struct dt_pcb *dp;
+ int error;
+
+ KASSERT(suser(curproc) == 0);
+
+ if (!dtioc_req_isvalid(dtrq))
+ return EINVAL;
+
+ SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
+ if (dtp->dtp_pbn == dtrq->dtrq_pbn)
+ break;
+ }
+ if (dtp == NULL)
+ return ENOENT;
+
+ TAILQ_INIT(&plist);
+ error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
+ if (error)
+ return error;
+
+ DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
+ dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
+
+ /* Append all PCBs to this instance */
+ while ((dp = TAILQ_FIRST(&plist)) != NULL) {
+ TAILQ_REMOVE(&plist, dp, dp_snext);
+ TAILQ_INSERT_HEAD(&sc->ds_pcbs, dp, dp_snext);
+ }
+
+ return 0;
+}
+
+int
+dt_enter(void)
+{
+ uint32_t count;
+
+ rw_enter_write(&dt_lock);
+ count = dt_tracing;
+ dt_tracing = 0;
+
+ smr_barrier();
+
+ return count;
+}
+
+void
+dt_leave(uint32_t count)
+{
+ dt_tracing = count;
+ rw_exit_write(&dt_lock);
+}
+
+struct dt_probe *
+dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
+{
+ struct dt_probe *dtp;
+
+ dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
+ if (dtp == NULL)
+ return NULL;
+
+ SMR_SLIST_INIT(&dtp->dtp_pcbs);
+ dtp->dtp_prov = dtpv;
+ dtp->dtp_func = func;
+ dtp->dtp_name = name;
+ dtp->dtp_sysnum = -1;
+
+ return dtp;
+}
+
+void
+dt_dev_register_probe(struct dt_probe *dtp)
+{
+ static uint64_t probe_nb;
+
+ dtp->dtp_pbn = ++probe_nb;
+ SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
+}
+
+struct dt_pcb *
+dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
+{
+ struct dt_pcb *dp;
+
+ dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
+ if (dp == NULL)
+ goto bad;
+
+ dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
+ M_WAITOK|M_CANFAIL|M_ZERO);
+ if (dp->dp_ring == NULL)
+ goto bad;
+
+ mtx_init(&dp->dp_mtx, IPL_HIGH);
+ dp->dp_sc = sc;
+ dp->dp_dtp = dtp;
+ return dp;
+bad:
+ dt_pcb_free(dp);
+ return NULL;
+}
+
+void
+dt_pcb_free(struct dt_pcb *dp)
+{
+ if (dp == NULL)
+ return;
+ free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
+ free(dp, M_DT, sizeof(*dp));
+}
+
+void
+dt_pcb_purge(struct dt_pcb_list *plist)
+{
+ struct dt_pcb *dp;
+
+ while ((dp = TAILQ_FIRST(plist)) != NULL) {
+ TAILQ_REMOVE(plist, dp, dp_snext);
+ dt_pcb_free(dp);
+ }
+}
+
+int
+dt_pcb_filter(struct dt_pcb *dp)
+{
+ struct dt_filter *dtf = &dp->dp_filter;
+ struct proc *p = curproc;
+ unsigned int var;
+ int match = 1;
+
+ /* Filter out tracing program. */
+ if (dp->dp_sc->ds_pid == p->p_p->ps_pid)
+ return 1;
+
+ switch (dtf->dtf_variable) {
+ case DT_FV_PID:
+ var = p->p_p->ps_pid;
+ break;
+ case DT_FV_TID:
+ var = p->p_tid;
+ break;
+ case DT_FV_NONE:
+ break;
+ default:
+ KASSERT(0);
+ }
+
+ switch (dtf->dtf_operand) {
+ case DT_OP_EQ:
+ match = !!(var == dtf->dtf_value);
+ break;
+ case DT_OP_NE:
+ match = !!(var != dtf->dtf_value);
+ break;
+ case DT_OP_NONE:
+ break;
+ default:
+ KASSERT(0);
+ }
+
+ return !match;
+}
+
+
+/*
+ * Get a reference to the next free event state from the ring.
+ */
+struct dt_evt *
+dt_pcb_ring_get(struct dt_pcb *dp)
+{
+ struct proc *p = curproc;
+ struct dt_evt *dtev;
+ int distance;
+
+ if (dt_pcb_filter(dp))
+ return NULL;
+
+ mtx_enter(&dp->dp_mtx);
+ distance = dp->dp_prod - dp->dp_cons;
+ if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
+ /* read(2) isn't finished */
+ dp->dp_dropevt++;
+ mtx_leave(&dp->dp_mtx);
+ return NULL;
+ }
+
+ /*
+ * Save states in next free event slot.
+ */
+ dtev = &dp->dp_ring[dp->dp_cons];
+ memset(dtev, 0, sizeof(*dtev));
+
+ dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
+ dtev->dtev_cpu = cpu_number();
+ dtev->dtev_pid = p->p_p->ps_pid;
+ dtev->dtev_tid = p->p_tid;
+ nanotime(&dtev->dtev_tsp);
+
+ if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
+ memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1);
+
+ if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) {
+#if notyet
+ stacktrace_save_at(&dtev->dtev_kstack, DT_HOOK_FRAME_ADDRESS);
+#else
+ stacktrace_save(&dtev->dtev_kstack);
+#endif
+ }
+
+ return dtev;
+}
+
+void
+dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
+{
+ MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
+ KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
+
+ dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
+ mtx_leave(&dp->dp_mtx);
+
+ mtx_enter(&dp->dp_sc->ds_mtx);
+ dp->dp_sc->ds_evtcnt++;
+ mtx_leave(&dp->dp_sc->ds_mtx);
+ wakeup(dp->dp_sc);
+}
+
+/*
+ * Copy at most `qlen' events from `dp', producing the same amount
+ * of free slots.
+ */
+int
+dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
+ uint64_t *dropped)
+{
+ size_t count, copied = 0;
+ unsigned int cons, prod;
+
+ KASSERT(qlen > 0);
+
+ mtx_enter(&dp->dp_mtx);
+ cons = dp->dp_cons;
+ prod = dp->dp_prod;
+
+ if (cons < prod)
+ count = DT_EVTRING_SIZE - prod;
+ else
+ count = cons - prod;
+
+ if (count == 0)
+ goto out;
+
+ *dropped += dp->dp_dropevt;
+ dp->dp_dropevt = 0;
+
+ count = MIN(count, qlen);
+
+ memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
+ copied += count;
+
+ /* Produce */
+ prod = (prod + count) % DT_EVTRING_SIZE;
+
+ /* If the queue is full or the ring didn't wrap, stop here. */
+ if (qlen == copied || prod != 0 || cons == 0)
+ goto out;
+
+ count = MIN(cons, (qlen - copied));
+ memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
+ copied += count;
+ prod += count;
+
+out:
+ dp->dp_prod = prod;
+ mtx_leave(&dp->dp_mtx);
+ return copied;
+}
diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c
new file mode 100644
index 00000000000..0a406ee2492
--- /dev/null
+++ b/sys/dev/dt/dt_prov_profile.c
@@ -0,0 +1,147 @@
+/* $OpenBSD: dt_prov_profile.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/atomic.h>
+
+#include <dev/dt/dtvar.h>
+
+struct dt_probe *dtpp_profile; /* per-CPU profile probe */
+struct dt_probe *dtpp_interval; /* global periodic probe */
+
+/* Flags that make sense for this provider */
+#define DTEVT_PROV_PROFILE DTEVT_KSTACK
+
+int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *,
+ struct dt_pcb_list *, struct dtioc_req *);
+void dt_prov_profile_enter(struct dt_provider *, ...);
+void dt_prov_interval_enter(struct dt_provider *, ...);
+
+struct dt_provider dt_prov_profile = {
+ .dtpv_name = "profile",
+ .dtpv_alloc = dt_prov_profile_alloc,
+ .dtpv_enter = dt_prov_profile_enter,
+ .dtpv_leave = NULL,
+};
+
+struct dt_provider dt_prov_interval = {
+ .dtpv_name = "interval",
+ .dtpv_alloc = dt_prov_profile_alloc,
+ .dtpv_enter = dt_prov_interval_enter,
+ .dtpv_leave = NULL,
+};
+
+int
+dt_prov_profile_init(void)
+{
+ dtpp_profile = dt_dev_alloc_probe("hz", "97", &dt_prov_profile);
+ dt_dev_register_probe(dtpp_profile);
+ if (dtpp_profile == NULL)
+ return 0;
+ dtpp_interval = dt_dev_alloc_probe("hz", "1", &dt_prov_interval);
+ dt_dev_register_probe(dtpp_interval);
+ if (dtpp_interval == NULL)
+ return 1;
+ return 2;
+}
+
+int
+dt_prov_profile_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+ struct dt_pcb_list *plist, struct dtioc_req *dtrq)
+{
+ struct dt_pcb *dp;
+ struct cpu_info *ci;
+ CPU_INFO_ITERATOR cii;
+ extern int hz;
+
+ KASSERT(dtioc_req_isvalid(dtrq));
+ KASSERT(TAILQ_EMPTY(plist));
+ KASSERT(dtp == dtpp_profile || dtp == dtpp_interval);
+
+ if (dtrq->dtrq_rate <= 0 || dtrq->dtrq_rate >= hz)
+ return EOPNOTSUPP;
+
+ CPU_INFO_FOREACH(cii, ci) {
+ if (!CPU_IS_PRIMARY(ci) && (dtp == dtpp_interval))
+ continue;
+
+ dp = dt_pcb_alloc(dtp, sc);
+ if (dp == NULL) {
+ dt_pcb_purge(plist);
+ return ENOMEM;
+ }
+
+ dp->dp_maxtick = dtrq->dtrq_rate;
+ dp->dp_cpuid = ci->ci_cpuid;
+
+ dp->dp_filter = dtrq->dtrq_filter;
+ dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_PROFILE;
+ TAILQ_INSERT_HEAD(plist, dp, dp_snext);
+ }
+
+ return 0;
+}
+
+static inline void
+dt_prov_profile_fire(struct dt_pcb *dp)
+{
+ struct dt_evt *dtev;
+
+ if (++dp->dp_nticks < dp->dp_maxtick)
+ return;
+
+ dtev = dt_pcb_ring_get(dp);
+ if (dtev == NULL)
+ return;
+ dt_pcb_ring_consume(dp, dtev);
+ dp->dp_nticks = 0;
+}
+
+void
+dt_prov_profile_enter(struct dt_provider *dtpv, ...)
+{
+ struct cpu_info *ci = curcpu();
+ struct dt_pcb *dp;
+
+ KASSERT(dtpv == &dt_prov_profile);
+
+ smr_read_enter();
+ SMR_SLIST_FOREACH(dp, &dtpp_profile->dtp_pcbs, dp_pnext) {
+ if (dp->dp_cpuid != ci->ci_cpuid)
+ continue;
+
+ dt_prov_profile_fire(dp);
+ }
+ smr_read_leave();
+}
+
+void
+dt_prov_interval_enter(struct dt_provider *dtpv, ...)
+{
+ struct dt_pcb *dp;
+
+ KASSERT(dtpv == &dt_prov_interval);
+
+ smr_read_enter();
+ SMR_SLIST_FOREACH(dp, &dtpp_interval->dtp_pcbs, dp_pnext) {
+ dt_prov_profile_fire(dp);
+ }
+ smr_read_leave();
+}
diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c
new file mode 100644
index 00000000000..65764619e02
--- /dev/null
+++ b/sys/dev/dt/dt_prov_static.c
@@ -0,0 +1,136 @@
+/* $OpenBSD: dt_prov_static.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/tracepoint.h>
+
+#include <dev/dt/dtvar.h>
+
+int dt_prov_static_alloc(struct dt_probe *, struct dt_softc *,
+ struct dt_pcb_list *, struct dtioc_req *);
+void dt_prov_static_hook(struct dt_provider *, ...);
+
+struct dt_provider dt_prov_static = {
+ .dtpv_name = "tracepoint",
+ .dtpv_alloc = dt_prov_static_alloc,
+ .dtpv_enter = dt_prov_static_hook,
+};
+
+/*
+ * Scheduler provider
+ */
+DT_STATIC_PROBE2(sched, dequeue, "pid_t", "pid_t");
+DT_STATIC_PROBE2(sched, enqueue, "pid_t", "pid_t");
+DT_STATIC_PROBE2(sched, off__cpu, "pid_t", "pid_t");
+DT_STATIC_PROBE0(sched, on__cpu);
+DT_STATIC_PROBE0(sched, remain__cpu);
+DT_STATIC_PROBE0(sched, sleep);
+DT_STATIC_PROBE0(sched, wakeup);
+
+/*
+ * Raw syscalls
+ */
+DT_STATIC_PROBE1(raw_syscalls, sys_enter, "register_t");
+DT_STATIC_PROBE1(raw_syscalls, sys_exit, "register_t");
+
+/*
+ * List of all static probes
+ */
+struct dt_probe *dtps_static[] = {
+ /* Scheduler */
+ &_DT_STATIC_P(sched, dequeue),
+ &_DT_STATIC_P(sched, enqueue),
+ &_DT_STATIC_P(sched, off__cpu),
+ &_DT_STATIC_P(sched, on__cpu),
+ &_DT_STATIC_P(sched, remain__cpu),
+ &_DT_STATIC_P(sched, sleep),
+ &_DT_STATIC_P(sched, wakeup),
+ /* Raw syscalls */
+ &_DT_STATIC_P(raw_syscalls, sys_enter),
+ &_DT_STATIC_P(raw_syscalls, sys_exit),
+};
+
+int
+dt_prov_static_init(void)
+{
+ int i;
+
+ for (i = 0; i < nitems(dtps_static); i++)
+ dt_dev_register_probe(dtps_static[i]);
+
+ return i;
+}
+
+int
+dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+ struct dt_pcb_list *plist, struct dtioc_req *dtrq)
+{
+ struct dt_pcb *dp;
+
+ KASSERT(dtioc_req_isvalid(dtrq));
+ KASSERT(TAILQ_EMPTY(plist));
+
+ dp = dt_pcb_alloc(dtp, sc);
+ if (dp == NULL)
+ return ENOMEM;
+
+ dp->dp_filter = dtrq->dtrq_filter;
+ dp->dp_evtflags = dtrq->dtrq_evtflags;
+ TAILQ_INSERT_HEAD(plist, dp, dp_snext);
+
+ return 0;
+}
+
+void
+dt_prov_static_hook(struct dt_provider *dtpv, ...)
+{
+ struct dt_probe *dtp;
+ struct dt_pcb *dp;
+ uintptr_t args[5];
+ va_list ap;
+ int i;
+
+ va_start(ap, dtpv);
+ dtp = va_arg(ap, struct dt_probe *);
+ for (i = 0; i < dtp->dtp_nargs; i++) {
+ args[i] = va_arg(ap, uintptr_t);
+ }
+ va_end(ap);
+
+ KASSERT(dtpv == dtp->dtp_prov);
+
+ smr_read_enter();
+ SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+ struct dt_evt *dtev;
+
+ dtev = dt_pcb_ring_get(dp);
+ if (dtev == NULL)
+ continue;
+
+ dtev->dtev_sysargs[0] = args[0];
+ dtev->dtev_sysargs[1] = args[1];
+ dtev->dtev_sysargs[2] = args[2];
+ dtev->dtev_sysargs[3] = args[3];
+ dtev->dtev_sysargs[4] = args[4];
+
+ dt_pcb_ring_consume(dp, dtev);
+ }
+ smr_read_leave();
+}
diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c
new file mode 100644
index 00000000000..28ceb88b48c
--- /dev/null
+++ b/sys/dev/dt/dt_prov_syscall.c
@@ -0,0 +1,206 @@
+/* $OpenBSD: dt_prov_syscall.c,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/atomic.h>
+#include <sys/syscall.h>
+
+#include <dev/dt/dtvar.h>
+
+extern struct sysent sysent[];
+
+/* Arrays of probes per syscall. */
+struct dt_probe **dtps_entry;
+struct dt_probe **dtps_return;
+unsigned int dtps_nsysent = SYS_MAXSYSCALL;
+
+/* Flags that make sense for this provider */
+#define DTEVT_PROV_SYSCALL (DTEVT_COMMON|DTEVT_FUNCARGS|DTEVT_RETVAL)
+
+int dt_prov_syscall_alloc(struct dt_probe *, struct dt_softc *,
+ struct dt_pcb_list *, struct dtioc_req *);
+void dt_prov_syscall_entry(struct dt_provider *, ...);
+void dt_prov_syscall_return(struct dt_provider *, ...);
+
+struct dt_provider dt_prov_syscall = {
+ .dtpv_name = "syscall",
+ .dtpv_alloc = dt_prov_syscall_alloc,
+ .dtpv_enter = dt_prov_syscall_entry,
+ .dtpv_leave = dt_prov_syscall_return,
+};
+
+int
+dt_prov_syscall_init(void)
+{
+ struct dt_probe *dtp;
+ int i, len, nprobes = 0;
+ char *sysnb;
+
+ dtps_entry = mallocarray(dtps_nsysent, sizeof(dtp), M_DT,
+ M_NOWAIT|M_ZERO);
+ if (dtps_entry == NULL)
+ return 0;
+ dtps_return = mallocarray(dtps_nsysent, sizeof(dtp), M_DT,
+ M_NOWAIT|M_ZERO);
+ if (dtps_return == NULL) {
+ free(dtps_entry, M_DT, dtps_nsysent * sizeof(dtp));
+ return 0;
+ }
+
+ for (i = 0; i < dtps_nsysent; i++) {
+ if (sysent[i].sy_call == sys_nosys)
+ continue;
+
+ len = snprintf(NULL, 0, "sys%%%u", i);
+ sysnb = malloc(len + 1, M_DT, M_NOWAIT);
+ if (sysnb == NULL)
+ break;
+ snprintf(sysnb, len + 1, "sys%%%u", i);
+ dtp = dt_dev_alloc_probe(sysnb, "entry", &dt_prov_syscall);
+ if (dtp == NULL) {
+ free(sysnb, M_DT, len);
+ break;
+ }
+ dtp->dtp_sysnum = i;
+ dtps_entry[i] = dtp;
+ dt_dev_register_probe(dtp);
+ nprobes++;
+ dtp = dt_dev_alloc_probe(sysnb, "return", &dt_prov_syscall);
+ if (dtp == NULL)
+ break;
+ dtp->dtp_sysnum = i;
+ dtps_return[i] = dtp;
+ dt_dev_register_probe(dtp);
+ nprobes++;
+ }
+
+ return nprobes;
+}
+
+int
+dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc *sc,
+ struct dt_pcb_list *plist, struct dtioc_req *dtrq)
+{
+ struct dt_pcb *dp;
+
+ KASSERT(dtioc_req_isvalid(dtrq));
+ KASSERT(TAILQ_EMPTY(plist));
+ KASSERT(dtp->dtp_prov == &dt_prov_syscall);
+ KASSERT((dtp->dtp_sysnum >= 0) && (dtp->dtp_sysnum < dtps_nsysent));
+
+ dp = dt_pcb_alloc(dtp, sc);
+ if (dp == NULL)
+ return ENOMEM;
+
+ dp->dp_filter = dtrq->dtrq_filter;
+ dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_SYSCALL;
+ TAILQ_INSERT_HEAD(plist, dp, dp_snext);
+
+
+ return 0;
+}
+
+void
+dt_prov_syscall_entry(struct dt_provider *dtpv, ...)
+{
+ struct dt_probe *dtp;
+ struct dt_pcb *dp;
+ register_t sysnum;
+ size_t argsize;
+ register_t *args;
+ va_list ap;
+
+ KASSERT(dtpv == &dt_prov_syscall);
+ va_start(ap, dtpv);
+ sysnum = va_arg(ap, register_t);
+ argsize = va_arg(ap, size_t);
+ args = va_arg(ap, register_t*);
+ va_end(ap);
+
+ KASSERT((argsize / sizeof(register_t)) <= DTMAXSYSARGS);
+
+ if (sysnum < 0 || sysnum >= dtps_nsysent)
+ return;
+
+ dtp = dtps_entry[sysnum];
+ if (!dtp->dtp_recording)
+ return;
+
+ smr_read_enter();
+ SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+ struct dt_evt *dtev;
+
+ dtev = dt_pcb_ring_get(dp);
+ if (dtev == NULL)
+ continue;
+
+ if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS))
+ memcpy(dtev->dtev_sysargs, args, argsize);
+
+ dt_pcb_ring_consume(dp, dtev);
+ }
+ smr_read_leave();
+}
+
+void
+dt_prov_syscall_return(struct dt_provider *dtpv, ...)
+{
+ struct dt_probe *dtp;
+ struct dt_pcb *dp;
+ register_t sysnum;
+ int error;
+ register_t retval[2];
+ va_list ap;
+
+ KASSERT(dtpv == &dt_prov_syscall);
+
+ va_start(ap, dtpv);
+ sysnum = va_arg(ap, register_t);
+ error = va_arg(ap, int);
+ retval[0] = va_arg(ap, register_t);
+ retval[1] = va_arg(ap, register_t);
+ va_end(ap);
+
+ if (sysnum < 0 || sysnum >= dtps_nsysent)
+ return;
+
+ dtp = dtps_return[sysnum];
+ if (!dtp->dtp_recording)
+ return;
+
+ smr_read_enter();
+ SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) {
+ struct dt_evt *dtev;
+
+ dtev = dt_pcb_ring_get(dp);
+ if (dtev == NULL)
+ continue;
+
+ if (ISSET(dp->dp_evtflags, DTEVT_RETVAL)) {
+ dtev->dtev_sysretval[0] = retval[0];
+ dtev->dtev_sysretval[1] = retval[1];
+ dtev->dtev_syserror = error;
+ }
+
+ dt_pcb_ring_consume(dp, dtev);
+ }
+ smr_read_leave();
+}
diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h
new file mode 100644
index 00000000000..05bad44680e
--- /dev/null
+++ b/sys/dev/dt/dtvar.h
@@ -0,0 +1,317 @@
+/* $OpenBSD: dtvar.h,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _DT_H_
+#define _DT_H_
+
+#include <sys/ioccom.h>
+#include <sys/stacktrace.h>
+#include <sys/time.h>
+
+/*
+ * Length of provider/probe/function names, including terminating '\0'.
+ */
+#define DTNAMESIZE 16
+
+/*
+ * Length of process name, keep in sync with MAXCOMLEN.
+ */
+#define DTMAXCOMLEN 16
+
+/*
+ * Maximum number of arguments passed to a syscall.
+ */
+#define DTMAXSYSARGS 10
+
+/*
+ * Event state: where to store information when a probe fires.
+ */
+struct dt_evt {
+ unsigned int dtev_pbn; /* Probe number */
+ unsigned int dtev_cpu; /* CPU id */
+ pid_t dtev_pid; /* ID of current process */
+ pid_t dtev_tid; /* ID of current thread */
+ struct timespec dtev_tsp; /* timestamp (nsecs) */
+
+ /*
+ * Recorded if the corresponding flag is set.
+ */
+ struct stacktrace dtev_kstack; /* kernel stack frame */
+ char dtev_comm[DTMAXCOMLEN+1]; /* current pr. name */
+ union {
+ register_t E_entry[DTMAXSYSARGS];
+ struct {
+ register_t __retval[2];
+ int __error;
+ } E_return;
+ } _sys;
+#define dtev_sysargs _sys.E_entry /* syscall args. */
+#define dtev_sysretval _sys.E_return.__retval /* syscall retval */
+#define dtev_syserror _sys.E_return.__error /* syscall error */
+
+};
+
+/*
+ * States to record when a probe fires.
+ */
+#define DTEVT_EXECNAME (1 << 0) /* current process name */
+#define DTEVT_USTACK (1 << 1) /* userland stack */
+#define DTEVT_KSTACK (1 << 2) /* kernel stack */
+#define DTEVT_FUNCARGS (1 << 3) /* function arguments */
+#define DTEVT_RETVAL (1 << 4) /* function retval & error */
+
+#define DTEVT_FLAG_BITS \
+ "\020" \
+ "\001EXECNAME" \
+ "\002USTACK" \
+ "\003KSTACK" \
+ "\004FUNCARGS" \
+ "\005RETVAL" \
+
+/*
+ * Each PCB can have a filter attached to itself. A filter do not
+ * prevent an enabled probe to fire, but when that happens, event
+ * states are only recorded if it is matched.
+ */
+struct dt_filter {
+ enum dt_operand {
+ DT_OP_NONE = 0,
+ DT_OP_EQ,
+ DT_OP_NE,
+ } dtf_operand;
+ enum dt_filtervar {
+ DT_FV_NONE = 0,
+ DT_FV_PID,
+ DT_FV_TID,
+ } dtf_variable /* what should be filtered */;
+ unsigned int dtf_value; /* PID or TID to filter */
+};
+
+
+struct dtioc_probe_info {
+ uint32_t dtpi_pbn; /* Probe number */
+ char dtpi_prov[DTNAMESIZE];
+ char dtpi_func[DTNAMESIZE];
+ char dtpi_name[DTNAMESIZE];
+};
+
+struct dtioc_probe {
+ size_t dtpr_size; /* size of the buffer */
+ struct dtioc_probe_info *dtpr_probes; /* array of probe info */
+};
+
+struct dtioc_req {
+ uint32_t dtrq_pbn; /* probe number */
+ struct dt_filter dtrq_filter; /* probe filter */
+ uint32_t dtrq_rate; /* number of ticks */
+ uint64_t dtrq_evtflags; /* states to record */
+};
+
+struct dtioc_stat {
+ uint64_t dtst_readevt; /* events read */
+ uint64_t dtst_dropevt; /* events dropped */
+};
+
+#define DTIOCGPLIST _IOWR('D', 1, struct dtioc_probe)
+#define DTIOCGSTATS _IOR('D', 2, struct dtioc_stat)
+
+#define DTIOCRECORD _IOW('D', 3, int)
+#define DTIOCPRBENABLE _IOW('D', 4, struct dtioc_req)
+
+
+#ifdef _KERNEL
+
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/smr.h>
+
+/* Flags that make sense for all providers. */
+#define DTEVT_COMMON (DTEVT_EXECNAME|DTEVT_KSTACK|DTEVT_USTACK)
+
+#define M_DT M_DEVBUF /* XXX FIXME */
+
+struct dt_softc;
+
+int dtioc_req_isvalid(struct dtioc_req *);
+
+/*
+ * Probe control block, possibly per-CPU.
+ *
+ * At least a PCB is allocated for each probe enabled via the DTIOCPRBENABLE
+ * ioctl(2). It will hold the events written when the probe fires until
+ * userland read(2)s them.
+ *
+ * Locks used to protect struct members in this file:
+ * I immutable after creation
+ * k kernel lock
+ * k,s kernel lock for writting and SMR for reading
+ * m per-pcb mutex
+ * c owned (read & modified) by a single CPU
+ */
+struct dt_pcb {
+ SMR_SLIST_ENTRY(dt_pcb) dp_pnext; /* [k,s] next PCB per probe */
+ TAILQ_ENTRY(dt_pcb) dp_snext; /* [k] next PCB per softc */
+
+ /* Event states ring */
+ unsigned int dp_prod; /* [m] read index */
+ unsigned int dp_cons; /* [m] write index */
+ struct dt_evt *dp_ring; /* [m] ring of event sates */
+ struct mutex dp_mtx;
+
+ struct dt_softc *dp_sc; /* [I] related softc */
+ struct dt_probe *dp_dtp; /* [I] related probe */
+ uint64_t dp_evtflags; /* [I] event states to record */
+ struct dt_filter dp_filter; /* [I] filter to match */
+
+ /* Provider specific fields. */
+ unsigned int dp_cpuid; /* [I] on which CPU */
+ unsigned int dp_maxtick; /* [I] freq. of profiling */
+ unsigned int dp_nticks; /* [c] current tick count */
+
+ /* Counters */
+ uint64_t dp_dropevt; /* [m] # dropped event */
+};
+
+TAILQ_HEAD(dt_pcb_list, dt_pcb);
+
+struct dt_pcb *dt_pcb_alloc(struct dt_probe *, struct dt_softc *);
+void dt_pcb_free(struct dt_pcb *);
+void dt_pcb_purge(struct dt_pcb_list *);
+int dt_pcb_filter(struct dt_pcb *);
+
+struct dt_evt *dt_pcb_ring_get(struct dt_pcb *);
+void dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *);
+
+/*
+ * Probes are entry points in the system where events can be recorded.
+ *
+ * Locks used to protect struct members in this file:
+ * I immutable after creation
+ * k kernel lock
+ * d dt_lock
+ * d,s dt_lock for writting and SMR for reading
+ */
+struct dt_probe {
+ SIMPLEQ_ENTRY(dt_probe) dtp_next; /* [k] global list of probes */
+ SMR_SLIST_HEAD(, dt_pcb) dtp_pcbs; /* [d,s] list of enabled PCBs */
+ struct dt_provider *dtp_prov; /* [I] its to provider */
+ const char *dtp_func; /* [I] probe function */
+ const char *dtp_name; /* [I] probe name */
+ uint32_t dtp_pbn; /* [I] unique ID */
+ volatile uint32_t dtp_recording; /* [d] is it recording? */
+
+ /* Provider specific fields. */
+ int dtp_sysnum; /* [I] related # of syscall */
+ const char *dtp_argtype[5];/* [I] type of arguments */
+ int dtp_nargs; /* [I] # of arguments */
+};
+
+
+/*
+ * Providers expose a set of probes and a method to record events.
+ */
+struct dt_provider {
+ const char *dtpv_name; /* [I] provider name */
+ volatile uint32_t dtpv_recording;/* [d] # of recording PCBs */
+
+ int (*dtpv_alloc)(struct dt_probe *, struct dt_softc *,
+ struct dt_pcb_list *, struct dtioc_req *);
+ void (*dtpv_enter)(struct dt_provider *, ...);
+ void (*dtpv_leave)(struct dt_provider *, ...);
+};
+
+int dt_prov_profile_init(void);
+int dt_prov_syscall_init(void);
+int dt_prov_static_init(void);
+
+struct dt_probe *dt_dev_alloc_probe(const char *, const char *,
+ struct dt_provider *);
+void dt_dev_register_probe(struct dt_probe *);
+
+
+extern volatile uint32_t dt_tracing; /* currently tracing? */
+
+#define DT_ENTER(provname, args...) do { \
+ extern struct dt_provider dt_prov_ ## provname ; \
+ struct dt_provider *dtpv = &dt_prov_ ## provname ; \
+ \
+ if (__predict_false(dt_tracing) && \
+ __predict_false(dtpv->dtpv_recording)) { \
+ dtpv->dtpv_enter(dtpv, args); \
+ } \
+} while (0)
+
+#define DT_LEAVE(provname, args...) do { \
+ extern struct dt_provider dt_prov_ ## provname ; \
+ struct dt_provider *dtpv = &dt_prov_ ## provname ; \
+ \
+ if (__predict_false(dt_tracing) && \
+ __predict_false(dtpv->dtpv_recording)) { \
+ dtpv->dtpv_leave(dtpv, args); \
+ } \
+} while (0)
+
+#define _DT_STATIC_P(func, name) (dt_static_##func##_##name)
+
+/*
+ * Probe definition for the static provider.
+ */
+#define _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, arg4, n) \
+ struct dt_probe _DT_STATIC_P(func, name) = { \
+ .dtp_next = { NULL }, \
+ .dtp_pcbs = { NULL }, \
+ .dtp_prov = &dt_prov_static, \
+ .dtp_func = #func, \
+ .dtp_name = #name, \
+ .dtp_pbn = 0, \
+ .dtp_sysnum = 0, \
+ .dtp_argtype = { arg0, arg1, arg2, arg3, arg4 }, \
+ .dtp_nargs = n, \
+ } \
+
+#define DT_STATIC_PROBE0(func, name) \
+ _DT_STATIC_PROBEN(func, name, NULL, NULL, NULL, NULL, NULL, 0)
+
+#define DT_STATIC_PROBE1(func, name, arg0) \
+ _DT_STATIC_PROBEN(func, name, arg0, NULL, NULL, NULL, NULL, 1)
+
+#define DT_STATIC_PROBE2(func, name, arg0, arg1) \
+ _DT_STATIC_PROBEN(func, name, arg0, arg1, NULL, NULL, NULL, 2)
+
+#define DT_STATIC_PROBE3(func, name, arg0, arg1, arg2) \
+ _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, NULL, NULL, 3)
+
+#define DT_STATIC_PROBE4(func, name, arg0, arg1, arg2, arg3) \
+ _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, NULL, 4)
+
+#define DT_STATIC_PROBE5(func, name, arg0, arg1, arg2, arg3, arg4) \
+ _DT_STATIC_PROBEN(func, name, arg0, arg1, arg2, arg3, arg4, 5)
+
+#define DT_STATIC_ENTER(func, name, args...) do { \
+ extern struct dt_probe _DT_STATIC_P(func, name); \
+ struct dt_probe *dtp = &_DT_STATIC_P(func, name); \
+ struct dt_provider *dtpv = dtp->dtp_prov; \
+ \
+ if (__predict_false(dt_tracing) && \
+ __predict_false(dtp->dtp_recording)) { \
+ dtpv->dtpv_enter(dtpv, dtp, args); \
+ } \
+} while (0)
+
+#endif /* !_KERNEL */
+#endif /* !_DT_H_ */
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 0a3679b0510..b6e0ca4f65a 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_clock.c,v 1.100 2019/11/02 16:56:17 cheloha Exp $ */
+/* $OpenBSD: kern_clock.c,v 1.101 2020/01/21 16:16:23 mpi Exp $ */
/* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */
/*-
@@ -55,6 +55,11 @@
#include <sys/gmon.h>
#endif
+#include "dt.h"
+#if NDT > 0
+#include <dev/dt/dtvar.h>
+#endif
+
/*
* Clock handling routines.
*
@@ -168,6 +173,12 @@ hardclock(struct clockframe *frame)
if (--ci->ci_schedstate.spc_rrticks <= 0)
roundrobin(ci);
+#if NDT > 0
+ DT_ENTER(profile, NULL);
+ if (CPU_IS_PRIMARY(ci))
+ DT_ENTER(interval, NULL);
+#endif
+
/*
* If we are not the primary CPU, we're not allowed to do
* any more work.
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
index 46de8fa7800..e25993099ee 100644
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sched.c,v 1.62 2019/11/04 18:06:03 visa Exp $ */
+/* $OpenBSD: kern_sched.c,v 1.63 2020/01/21 16:16:23 mpi Exp $ */
/*
* Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
*
@@ -26,6 +26,7 @@
#include <sys/mutex.h>
#include <sys/task.h>
#include <sys/smr.h>
+#include <sys/tracepoint.h>
#include <uvm/uvm_extern.h>
@@ -261,6 +262,7 @@ setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
spc = &p->p_cpu->ci_schedstate;
spc->spc_nrun++;
+ TRACEPOINT(sched, enqueue, p->p_tid, p->p_p->ps_pid);
TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
spc->spc_whichqs |= (1 << queue);
@@ -282,6 +284,7 @@ remrunqueue(struct proc *p)
SCHED_ASSERT_LOCKED();
spc = &p->p_cpu->ci_schedstate;
spc->spc_nrun--;
+ TRACEPOINT(sched, dequeue, p->p_tid, p->p_p->ps_pid);
TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 02c733ad8f1..3ca58be881e 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_synch.c,v 1.159 2020/01/21 15:20:47 visa Exp $ */
+/* $OpenBSD: kern_synch.c,v 1.160 2020/01/21 16:16:23 mpi Exp $ */
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
/*
@@ -51,6 +51,8 @@
#include <sys/refcnt.h>
#include <sys/atomic.h>
#include <sys/witness.h>
+#include <sys/tracepoint.h>
+
#include <ddb/db_output.h>
#include <machine/spinlock.h>
@@ -380,6 +382,8 @@ sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
SCHED_LOCK(sls->sls_s);
+ TRACEPOINT(sched, sleep, NULL);
+
p->p_wchan = ident;
p->p_wmesg = wmesg;
p->p_slptime = 0;
@@ -552,6 +556,7 @@ unsleep(struct proc *p)
if (p->p_wchan != NULL) {
TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
p->p_wchan = NULL;
+ TRACEPOINT(sched, wakeup, p->p_tid, p->p_p->ps_pid);
}
}
diff --git a/sys/kern/sched_bsd.c b/sys/kern/sched_bsd.c
index 3b8c4a80536..9172bc7e24e 100644
--- a/sys/kern/sched_bsd.c
+++ b/sys/kern/sched_bsd.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sched_bsd.c,v 1.60 2019/12/11 07:30:09 guenther Exp $ */
+/* $OpenBSD: sched_bsd.c,v 1.61 2020/01/21 16:16:23 mpi Exp $ */
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
/*-
@@ -48,6 +48,7 @@
#include <sys/sched.h>
#include <sys/timeout.h>
#include <sys/smr.h>
+#include <sys/tracepoint.h>
#ifdef KTRACE
#include <sys/ktrace.h>
@@ -392,8 +393,12 @@ mi_switch(void)
if (p != nextproc) {
uvmexp.swtch++;
+ TRACEPOINT(sched, off__cpu, nextproc->p_tid,
+ nextproc->p_p->ps_pid);
cpu_switchto(p, nextproc);
+ TRACEPOINT(sched, on__cpu, NULL);
} else {
+ TRACEPOINT(sched, remain__cpu, NULL);
p->p_stat = SONPROC;
}
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index b43c8374fa5..c4317cd1e44 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.h,v 1.146 2019/12/17 13:08:54 reyk Exp $ */
+/* $OpenBSD: conf.h,v 1.147 2020/01/21 16:16:23 mpi Exp $ */
/* $NetBSD: conf.h,v 1.33 1996/05/03 20:03:32 christos Exp $ */
/*-
@@ -489,6 +489,13 @@ extern struct cdevsw cdevsw[];
(dev_type_stop((*))) enodev, 0, selfalse, \
(dev_init(c,n,mmap)), 0, D_CLONE }
+/* open, close, read, ioctl */
+#define cdev_dt_init(c,n) { \
+ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \
+ (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
+ (dev_type_stop((*))) enodev, 0, selfalse, \
+ (dev_type_mmap((*))) enodev, 0, D_CLONE }
+
#endif
/*
@@ -582,6 +589,8 @@ cdev_decl(rd);
bdev_decl(uk);
cdev_decl(uk);
+cdev_decl(dt);
+
cdev_decl(diskmap);
cdev_decl(bpf);
diff --git a/sys/sys/syscall_mi.h b/sys/sys/syscall_mi.h
index f7e87413faa..76f34e0b030 100644
--- a/sys/sys/syscall_mi.h
+++ b/sys/sys/syscall_mi.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscall_mi.h,v 1.24 2019/11/29 06:34:46 deraadt Exp $ */
+/* $OpenBSD: syscall_mi.h,v 1.25 2020/01/21 16:16:23 mpi Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@@ -33,12 +33,18 @@
#include <sys/param.h>
#include <sys/pledge.h>
+#include <sys/tracepoint.h>
#include <uvm/uvm_extern.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
+#include "dt.h"
+#if NDT > 0
+#include <dev/dt/dtvar.h>
+#endif
+
/*
* The MD setup for a system call has been done; here's the MI part.
@@ -59,6 +65,10 @@ mi_syscall(struct proc *p, register_t code, const struct sysent *callp,
scdebug_call(p, code, argp);
KERNEL_UNLOCK();
#endif
+ TRACEPOINT(raw_syscalls, sys_enter, code, NULL);
+#if NDT > 0
+ DT_ENTER(syscall, code, callp->sy_argsize, argp);
+#endif
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
KERNEL_LOCK();
@@ -108,6 +118,10 @@ mi_syscall_return(struct proc *p, register_t code, int error,
scdebug_ret(p, code, error, retval);
KERNEL_UNLOCK();
#endif
+#if NDT > 0
+ DT_LEAVE(syscall, code, error, retval[0], retval[1]);
+#endif
+ TRACEPOINT(raw_syscalls, sys_exit, code, NULL);
userret(p);
@@ -126,17 +140,23 @@ mi_syscall_return(struct proc *p, register_t code, int error,
static inline void
mi_child_return(struct proc *p)
{
-#if defined(SYSCALL_DEBUG) || defined(KTRACE)
+#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0
int code = (p->p_flag & P_THREAD) ? SYS___tfork :
(p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork;
const register_t child_retval[2] = { 0, 1 };
#endif
+ TRACEPOINT(sched, on__cpu, NULL);
+
#ifdef SYSCALL_DEBUG
KERNEL_LOCK();
scdebug_ret(p, code, 0, child_retval);
KERNEL_UNLOCK();
#endif
+#if NDT > 0
+ DT_LEAVE(syscall, code, 0, child_retval[0], child_retval[1]);
+#endif
+ TRACEPOINT(raw_syscalls, sys_exit, code, NULL);
userret(p);
diff --git a/sys/sys/tracepoint.h b/sys/sys/tracepoint.h
new file mode 100644
index 00000000000..d9a674ab1fb
--- /dev/null
+++ b/sys/sys/tracepoint.h
@@ -0,0 +1,36 @@
+/* $OpenBSD: tracepoint.h,v 1.1 2020/01/21 16:16:23 mpi Exp $ */
+
+/*
+ * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _SYS_TRACEPOINT_H_
+#define _SYS_TRACEPOINT_H_
+
+#ifdef _KERNEL
+
+#include "dt.h"
+#if NDT > 0
+#include <dev/dt/dtvar.h>
+
+#define TRACEPOINT(func, name, args...) DT_STATIC_ENTER(func, name, args)
+
+#else /* NDT > 0 */
+
+#define TRACEPOINT(func, name, args...)
+
+#endif /* NDT > 0 */
+#endif /* _KERNEL */
+#endif /* _SYS_TRACEPOINT_H_ */