summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorderaadt <deraadt@openbsd.org>2007-09-12 18:18:27 +0000
committerderaadt <deraadt@openbsd.org>2007-09-12 18:18:27 +0000
commita456bd90c3cff001b2b72f9f9a87be2373ac77ea (patch)
tree150341cd73f27c70199a63ab5eae56fc7c8d7ca5
parentsync (diff)
downloadwireguard-openbsd-a456bd90c3cff001b2b72f9f9a87be2373ac77ea.tar.xz
wireguard-openbsd-a456bd90c3cff001b2b72f9f9a87be2373ac77ea.zip
port of i386 pctr code to amd64; Mike Belopuhov
-rw-r--r--share/man/man4/man4.amd64/Makefile4
-rw-r--r--share/man/man4/man4.amd64/pctr.4506
-rw-r--r--sys/arch/amd64/amd64/conf.c7
-rw-r--r--sys/arch/amd64/amd64/locore.S5
-rw-r--r--sys/arch/amd64/amd64/pctr.c169
-rw-r--r--sys/arch/amd64/conf/GENERIC3
-rw-r--r--sys/arch/amd64/conf/files.amd645
-rw-r--r--sys/arch/amd64/include/conf.h5
-rw-r--r--sys/arch/amd64/include/pctr.h78
9 files changed, 773 insertions, 9 deletions
diff --git a/share/man/man4/man4.amd64/Makefile b/share/man/man4/man4.amd64/Makefile
index 75221aefb25..1e8c07cb72c 100644
--- a/share/man/man4/man4.amd64/Makefile
+++ b/share/man/man4/man4.amd64/Makefile
@@ -1,6 +1,6 @@
-# $OpenBSD: Makefile,v 1.5 2007/08/02 16:40:27 deraadt Exp $
+# $OpenBSD: Makefile,v 1.6 2007/09/12 18:18:27 deraadt Exp $
-MAN= autoconf.4 bios.4 cpu.4 intro.4 ioapic.4 mem.4 nvram.4
+MAN= autoconf.4 bios.4 cpu.4 intro.4 ioapic.4 mem.4 nvram.4 pctr.4
MLINKS+= mem.4 kmem.4
MANSUBDIR=amd64
diff --git a/share/man/man4/man4.amd64/pctr.4 b/share/man/man4/man4.amd64/pctr.4
new file mode 100644
index 00000000000..6d9baa7d729
--- /dev/null
+++ b/share/man/man4/man4.amd64/pctr.4
@@ -0,0 +1,506 @@
+.\" $OpenBSD: pctr.4,v 1.1 2007/09/12 18:18:27 deraadt Exp $
+.\"
+.\" Pentium performance counter driver for OpenBSD.
+.\" Copyright 1996 David Mazieres <dm@lcs.mit.edu>.
+.\"
+.\" Modification and redistribution in source and binary forms is
+.\" permitted provided that due credit is given to the author and the
+.\" OpenBSD project by leaving this copyright notice intact.
+.\"
+.Dd $Mdocdate: September 12 2007 $
+.Dt PCTR 4 amd64
+.Os
+.Sh NAME
+.Nm pctr
+.Nd driver for CPU performance counters
+.Sh SYNOPSIS
+.Cd "pseudo-device pctr 1"
+.Sh DESCRIPTION
+The
+.Nm
+device provides access to the performance counters on Intel brand processors,
+and to the TSC on others.
+.Pp
+Intel processors have two 40-bit performance
+counters which can be programmed to count events such as cache misses,
+branch target buffer hits, TLB misses, dual-issues, interrupts,
+pipeline flushes, and more.
+.Pp
+There is one
+.Em ioctl
+call to read the status of all counters, and one
+.Em ioctl
+call to program the function of each counter.
+All require the following includes:
+.Bd -literal -offset indent
+#include <sys/types.h>
+#include <machine/cpu.h>
+#include <machine/pctr.h>
+.Ed
+.Pp
+The current state of all counters can be read with the
+.Dv PCIOCRD
+.Em ioctl ,
+which takes an argument of type
+.Dv "struct pctrst" :
+.Bd -literal -offset indent
+#define PCTR_NUM 2
+struct pctrst {
+ u_int pctr_fn[PCTR_NUM];
+ pctrval pctr_tsc;
+ pctrval pctr_hwc[PCTR_NUM];
+ pctrval pctr_idl;
+};
+.Ed
+.Pp
+In this structure,
+.Dv ctr_fn
+contains the functions of the two counters, as previously set by the
+.Dv PCIOCS0
+and
+.Dv PCIOCS1
+ioctls (see below).
+.Dv pctr_hwc
+contains the actual value of the two hardware counters.
+.Dv pctr_tsc
+is a free-running, 64-bit cycle counter.
+Finally,
+.Dv pctr_idl
+is a 64-bit count of idle-loop iterations.
+.Pp
+The functions of the two counters can be programmed with ioctls
+.Dv PCIOCS0
+and
+.Dv PCIOCS1 ,
+which require a writeable file descriptor and take an argument of type
+.Dv "unsigned int" . \&
+The meaning of this integer is dependent on the particular CPU.
+.\" The
+.\" following procedure can be used to determine which counters are
+.\" available on a given CPU:
+.\" .Bd -literal -offset indent
+.\" ctrval id = __cpuid();
+.\" if (__hasp5ctr(id)) {
+.\" /* The machine has Pentium counters */
+.\" } else if (__hasp6ctr(id)) {
+.\" /* The machine has Pentium Pro counters */
+.\" } else if (__hastsc(id)) {
+.\" /* The machine just has a time stamp counter */
+.\" } else {
+.\" /* No counters at all */
+.\"}
+.\" .Ed
+.Ss Time stamp counter
+The time stamp counter is available on all machines with Pentium and
+Pentium Pro counters, as well as on some 486s and non-intel CPUs.
+It is set to zero at boot time, and then increments with each cycle.
+Because the counter is 64-bits wide, it does not overflow.
+.Pp
+The time stamp counter can be read directly from user-mode using
+the
+.Fn rdtsc
+macro, which returns a 64-bit value of type
+.Dv pctrval .
+The following example illustrates a simple use of
+.Dv rdtsc
+to measure the execution time of a hypothetical subroutine called
+.Fn functionx :
+.Bd -literal -offset indent
+void
+time_functionx(void)
+{
+ pctrval tsc;
+
+ tsc = rdtsc();
+ functionx();
+ tsc = rdtsc() - tsc;
+ printf ("Functionx took %qd cycles.\en", tsc);
+}
+.Ed
+.Pp
+The value of the time stamp counter is also returned by the
+.Dv PCIOCRD
+.Em ioctl ,
+so that one can get an exact timestamp on readings of the hardware
+event counters.
+.Ss Pentium counters
+The Pentium counters are programmed with a 9 bit function.
+The top three bits contain the following flags:
+.Bl -tag -width P5CTR_C
+.It Dv P5CTR_K
+Enables counting of events that occur in kernel mode.
+.It Dv P5CTR_U
+Enables counting of events that occur in user mode.
+You must set at least one of
+.Dv P5CTR_U
+and
+.Dv P5CTR_K
+to count anything.
+.It Dv P5CTR_C
+When this flag is set, the counter attempts to count the number of
+cycles spent servicing a particular event, rather than simply the
+number of occurrences of that event.
+.El
+.Pp
+The bottom 6 bits set the particular event counted.
+Here is the event type of each permissible value for the bottom 6 bits of the
+counter function:
+.Pp
+.Bl -tag -width 0x00 -compact -offset indent
+.It 0x00
+Data read
+.It 0x01
+Data write
+.It 0x02
+Data TLB miss
+.It 0x03
+Data read miss
+.It 0x04
+Data write miss
+.It 0x05
+Write (hit) to M or E state lines
+.It 0x06
+Data cache lines written back
+.It 0x07
+Data cache snoops
+.It 0x08
+Data cache snoop hits
+.It 0x09
+Memory accesses in both pipes
+.It 0x0a
+Bank conflicts
+.It 0x0b
+Misaligned data memory references
+.It 0x0c
+Code read
+.It 0x0d
+Code TLB miss
+.It 0x0e
+Code cache miss
+.It 0x0f
+Any segment register load
+.It 0x12
+Branches
+.It 0x13
+BTB hits
+.It 0x14
+Taken branch or BTB hit
+.It 0x15
+Pipeline flushes
+.It 0x16
+Instructions executed
+.It 0x17
+Instructions executed in the V-pipe
+.It 0x18
+Bus utilization (clocks)
+.It 0x19
+Pipeline stalled by write backup
+.It 0x1a
+Pipeline stalled by data memory read
+.It 0x1b
+Pipeline stalled by write to E or M line
+.It 0x1c
+Locked bus cycle
+.It 0x1d
+I/O read or write cycle
+.It 0x1e
+Non-cacheable memory references
+.It 0x1f
+AGI (Address Generation Interlock)
+.It 0x22
+Floating-point operations
+.It 0x23
+Breakpoint 0 match
+.It 0x24
+Breakpoint 1 match
+.It 0x25
+Breakpoint 2 match
+.It 0x26
+Breakpoint 3 match
+.It 0x27
+Hardware interrupts
+.It 0x28
+Data read or data write
+.It 0x29
+Data read miss or data write miss
+.El
+.Ss Pentium Pro counters
+The Pentium Pro counter functions contain several parts.
+The most significant byte (an 8-bit integer shifted left by
+.Dv P6CTR_CM_SHIFT )
+contains a
+.Em "counter mask" . \&
+If non-zero, this sets a threshold for the number of times an event
+must occur in one cycle for the counter to be incremented.
+The
+.Em "counter mask"
+can therefore be used to count cycles in which an event
+occurs at least some number of times.
+The next byte contains several flags:
+.Bl -tag -width P6CTR_EN
+.It Dv P6CTR_U
+Enables counting of events that occur in user mode.
+.It Dv P6CTR_K
+Enables counting of events that occur in kernel mode.
+You must set at least one of
+.Dv P6CTR_K
+and
+.Dv P6CTR_U
+to count anything.
+.It Dv P6CTR_E
+Counts edges rather than cycles.
+For some functions this allows you
+to get an estimate of the number of events rather than the number of
+cycles occupied by those events.
+.It Dv P6CTR_EN
+Enable counters.
+This bit must be set in the function for counter 0
+in order for either of the counters to be enabled.
+This bit should probably be set in counter 1 as well.
+.It Dv P6CTR_I
+Inverts the sense of the
+.Em "counter mask" . \&
+When this bit is set, the counter only increments on cycles in which
+there are no
+.Em more
+events than specified in the
+.Em "counter mask" .
+.El
+.Pp
+The next byte, also known as the
+.Em "unit mask" ,
+contains flags specific to the event being counted.
+For events dealing with the L2 cache, the following flags are valid:
+.Bl -tag -width P6CTR_UM_M
+.It Dv P6CTR_UM_M
+Count events involving modified cache lines.
+.It Dv P6CTR_UM_E
+Count events involving exclusive cache lines.
+.It Dv P6CTR_UM_S
+Count events involving shared cache lines.
+.It Dv P6CTR_UM_I
+Count events involving invalid cache lines.
+.El
+To measure all L2 cache activity, all these bits should be set.
+They can be set with the macro
+.Dv P6CTR_UM_MESI
+which contains the bitwise or of all of the above.
+.Pp
+For event types dealing with bus transactions, there is another flag
+that can be set in the
+.Em "unit mask" :
+.Bl -tag -width P6CTR_UM_A
+.It Dv P6CTR_UM_A
+Count all appropriate bus events, not just those initiated by the
+processor.
+.El
+.Pp
+Finally, the least significant byte of the counter function is the
+event type to count.
+The following values are available:
+.Pp
+.Bl -tag -width 0x00 -compact
+.It 0x03 LD_BLOCKS
+Number of store buffer blocks.
+.It 0x04 SB_DRAINS
+Number of store buffer drain cycles.
+.It 0x05 MISALIGN_MEM_REF
+Number of misaligned data memory references.
+.It 0x06 SEGMENT_REG_LOADS
+Number of segment register loads.
+.It 0x10 FP_COMP_OPS_EXE (ctr0 only)
+Number of computational floating-point operations executed.
+.It 0x11 FP_ASSIST (ctr1 only)
+Number of floating-point exception cases handled by microcode.
+.It 0x12 MUL (ctr1 only)
+Number of multiplies.
+.It 0x13 DIV (ctr1 only)
+Number of divides.
+.It 0x14 CYCLES_DIV_BUSY (ctr0 only)
+Number of cycles during which the divider is busy.
+.It 0x21 L2_ADS
+Number of L2 address strobes.
+.It 0x22 L2_DBUS_BUSY
+Number of cycles during which the data bus was busy.
+.It 0x23 L2_DBUS_BUSY_RD
+Number of cycles during which the data bus was busy transferring data
+from L2 to the processor.
+.It 0x24 L2_LINES_IN
+Number of lines allocated in the L2.
+.It 0x25 L2_M_LINES_INM
+Number of modified lines allocated in the L2.
+.It 0x26 L2_LINES_OUT
+Number of lines removed from the L2 for any reason.
+.It 0x27 L2_M_LINES_OUTM
+Number of modified lines removed from the L2 for any reason.
+.It 0x28 L2_IFETCH/mesi
+Number of L2 instruction fetches.
+.It 0x29 L2_LD/mesi
+Number of L2 data loads.
+.It 0x2a L2_ST/mesi
+Number of L2 data stores.
+.It 0x2e L2_RQSTS/mesi
+Number of L2 requests.
+.It 0x43 DATA_MEM_REFS
+All memory references, both cacheable and non-cacheable.
+.It 0x45 DCU_LINES_IN
+Total lines allocated in the DCU.
+.It 0x46 DCU_M_LINES_IN
+Number of M state lines allocated in the DCU.
+.It 0x47 DCU_M_LINES_OUT
+Number of M state lines evicted from the DCU.
+This includes evictions via snoop HITM, intervention or replacement.
+.It 0x48 DCU_MISS_OUTSTANDING
+Weighted number of cycles while a DCU miss is outstanding.
+.It 0x60 BUS_REQ_OUTSTANDING
+Number of bus requests outstanding.
+.It 0x61 BUS_BNR_DRV
+Number of bus clock cycles during which the processor is driving the
+BNR pin.
+.It 0x62 BUS_DRDY_CLOCKS/a
+Number of clocks during which DRDY is asserted.
+.It 0x63 BUS_LOCK_CLOCKS/a
+Number of clocks during which LOCK is asserted.
+.It 0x64 BUS_DATA_RCV
+Number of bus clock cycles during which the processor is receiving
+data.
+.It 0x65 BUS_TRAN_BRD/a
+Number of burst read transactions.
+.It 0x66 BUS_TRAN_RFO/a
+Number of read for ownership transactions.
+.It 0x67 BUS_TRANS_WB/a
+Number of write back transactions.
+.It 0x68 BUS_TRAN_IFETCH/a
+Number of instruction fetch transactions.
+.It 0x69 BUS_TRAN_INVAL/a
+Number of invalidate transactions.
+.It 0x6a BUS_TRAN_PWR/a
+Number of partial write transactions.
+.It 0x6b BUS_TRANS_P/a
+Number of partial transactions.
+.It 0x6c BUS_TRANS_IO/a
+Number of I/O transactions.
+.It 0x6d BUS_TRAN_DEF/a
+Number of deferred transactions.
+.It 0x6e BUS_TRAN_BURST/a
+Number of burst transactions.
+.It 0x6f BUS_TRAN_MEM/a
+Number of memory transactions.
+.It 0x70 BUS_TRAN_ANY/a
+Number of all transactions.
+.It 0x79 CPU_CLK_UNHALTED
+Number of cycles during which the processor is not halted.
+.It 0x7a BUS_HIT_DRV
+Number of bus clock cycles during which the processor is driving the
+HIT pin.
+.It 0x7b BUS_HITM_DRV
+Number of bus clock cycles during which the processor is driving the
+HITM pin.
+.It 0x7e BUS_SNOOP_STALL
+Number of clock cycles during which the bus is snoop stalled.
+.It 0x80 IFU_IFETCH
+Number of instruction fetches, both cacheable and non-cacheable.
+.It 0x81 IFU_IFETCH_MISS
+Number of instruction fetch misses.
+.It 0x85 ITLB_MISS
+Number of ITLB misses.
+.It 0x86 IFU_MEM_STALL
+Number of cycles that the instruction fetch pipe stage is stalled,
+including cache misses, ITLB misses, ITLB faults, and victim cache
+evictions.
+.It 0x87 ILD_STALL
+Number of cycles that the instruction length decoder is stalled.
+.It 0xa2 RESOURCE_STALLS
+Number of cycles during which there are resource-related stalls.
+.It 0xc0 INST_RETIRED
+Number of instructions retired.
+.It 0xc1 FLOPS (ctr0 only)
+Number of computational floating-point operations retired.
+.It 0xc2 UOPS_RETIRED
+Number of UOPs retired.
+.It 0xc4 BR_INST_RETIRED
+Number of branch instructions retired.
+.It 0xc5 BR_MISS_PRED_RETIRED
+Number of mispredicted branches retired.
+.It 0xc6 CYCLES_INT_MASKED
+Number of processor cycles for which interrupts are disabled.
+.It 0xc7 CYCLES_INT_PENDING_AND_MASKED
+Number of processor cycles for which interrupts are disabled and
+interrupts are pending.
+.It 0xc8 HW_INT_RX
+Number of hardware interrupts received.
+.It 0xc9 BR_TAKEN_RETIRED
+Number of taken branches retired.
+.It 0xca BR_MISS_PRED_TAKEN_RET
+Number of taken mispredicted branches retired.
+.It 0xd0 INST_DECODER
+Number of instructions decoded.
+.It 0xd2 PARTIAL_RAT_STALLS
+Number of cycles or events for partial stalls.
+.It 0xe0 BR_INST_DECODED
+Number of branch instructions decoded.
+.It 0xe2 BTB_MISSES
+Number of branches that miss the BTB.
+.It 0xe4 BR_BOGUS
+Number of bogus branches.
+.It 0xe6 BACLEARS
+Number of times BACLEAR is asserted.
+.El
+.Pp
+Events marked /mesi require the
+.Dv P6CTR_UM_[MESI]
+bits in the
+.Em "unit mask" . \&
+Events marked /a can take the
+.Dv P6CTR_UM_A
+bit.
+.Pp
+Unlike the Pentium counters, the Pentium Pro counters can be read
+directly from user-mode without need to invoke the kernel.
+The macro
+.Fn rdpmc ctr
+takes 0 or 1 as an argument to specify a counter, and returns that
+counter's 40-bit value (which will be of type
+.Dv pctrval ) .
+This is generally preferable to making a system call as it introduces
+less distortion in measurements.
+However, you should be aware of the possibility of an interrupt between
+invocations of
+.Fn rdpmc
+and/or
+.Fn rdtsc .
+.Sh FILES
+.Bl -tag -width /dev/pctr -compact
+.It Pa /dev/pctr
+.El
+.Sh ERRORS
+.Bl -tag -width "[ENODEV]"
+.It Bq Er ENODEV
+An attempt was made to set the counter functions on a CPU that does
+not support counters.
+.It Bq Er EINVAL
+An invalid counter function was provided as an argument to the
+.Dv PCIOCS0
+or
+.Dv PCIOCS1
+.Em ioctl .
+.It Bq Er EPERM
+An attempt was made to set the counter functions, but the device was
+not open for writing.
+.El
+.Sh SEE ALSO
+.Xr pctr 1 ,
+.Xr ioctl 2
+.Sh HISTORY
+A
+.Nm
+device first appeared in
+.Ox 2.0 .
+.Sh AUTHORS
+The
+.Nm
+device was written by
+.An David Mazieres Aq dm@lcs.mit.edu .
+.Sh BUGS
+Not all counter functions are completely accurate.
+Some of the functions don't seem to make any sense at all.
diff --git a/sys/arch/amd64/amd64/conf.c b/sys/arch/amd64/amd64/conf.c
index c29ff624841..9f1323cb280 100644
--- a/sys/arch/amd64/amd64/conf.c
+++ b/sys/arch/amd64/amd64/conf.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.c,v 1.15 2007/08/02 16:40:27 deraadt Exp $ */
+/* $OpenBSD: conf.c,v 1.16 2007/09/12 18:18:27 deraadt Exp $ */
/*
* Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
@@ -100,7 +100,7 @@ int nblkdev = sizeof(bdevsw) / sizeof(bdevsw[0]);
#define cdev_ocis_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
- (dev_type_stop((*))) enodev, 0, dev_init(c,n,select), \
+ (dev_type_stop((*))) enodev, 0, dev_init(c,n,poll), \
(dev_type_mmap((*))) enodev, 0 }
/* open, close, read */
@@ -151,6 +151,7 @@ cdev_decl(mcd);
cdev_decl(music);
#include "acpi.h"
#include "bthub.h"
+#include "pctr.h"
#include "iop.h"
#ifdef XFS
#include <xfs/nxfs.h>
@@ -241,7 +242,7 @@ struct cdevsw cdevsw[] =
#endif
cdev_notdef(), /* 44 */
cdev_random_init(1,random), /* 45: random data source */
- cdev_notdef(), /* 46 */
+ cdev_ocis_init(NPCTR,pctr), /* 46: performance counters */
cdev_disk_init(NRD,rd), /* 47: ram disk driver */
cdev_notdef(), /* 48 */
cdev_bktr_init(NBKTR,bktr), /* 49: Bt848 video capture device */
diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S
index ae0d1245563..e44923662e8 100644
--- a/sys/arch/amd64/amd64/locore.S
+++ b/sys/arch/amd64/amd64/locore.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.22 2007/05/27 08:58:31 art Exp $ */
+/* $OpenBSD: locore.S,v 1.23 2007/09/12 18:18:27 deraadt Exp $ */
/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */
/*
@@ -866,6 +866,9 @@ idle_zero:
cmpl $0,_C_LABEL(whichqs)(%rip)
jnz idle_exit
idle_loop:
+#if NPCTR > 0
+ incq _C_LABEL(pctr_idlcnt)
+#endif
/* Try to zero some pages. */
movl _C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO(%rip),%ecx
testl %ecx,%ecx
diff --git a/sys/arch/amd64/amd64/pctr.c b/sys/arch/amd64/amd64/pctr.c
new file mode 100644
index 00000000000..3efe16604cb
--- /dev/null
+++ b/sys/arch/amd64/amd64/pctr.c
@@ -0,0 +1,169 @@
+/* $OpenBSD: pctr.c,v 1.1 2007/09/12 18:18:27 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2007 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Pentium performance counter driver for OpenBSD.
+ * Copyright 1996 David Mazieres <dm@lcs.mit.edu>.
+ *
+ * Modification and redistribution in source and binary forms is
+ * permitted provided that due credit is given to the author and the
+ * OpenBSD project by leaving this copyright notice intact.
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/systm.h>
+
+#include <machine/psl.h>
+#include <machine/pctr.h>
+#include <machine/cpu.h>
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+
+/* Check for Model Specific Registers and RDMSR/WRMSR support */
+#define usepctr (cpu_feature & CPUID_MSR)
+
+u_int64_t pctr_idlcnt; /* Gets incremented in locore.S */
+
+int pctr_isamd;
+int pctr_isintel;
+
+static void pctrrd(struct pctrst *);
+
+static void
+pctrrd(struct pctrst *st)
+{
+ int i, num, reg;
+
+ num = pctr_isamd ? PCTR_AMD_NUM : PCTR_INTEL_NUM;
+ reg = pctr_isamd ? MSR_K7_EVNTSEL0 : MSR_EVNTSEL0;
+
+ for (i = 0; i < num; i++)
+ st->pctr_fn[i] = rdmsr(reg + i);
+
+ reg = pctr_isamd ? MSR_K7_PERFCTR0 : MSR_PERFCTR0;
+
+ __asm __volatile("cli");
+
+ st->pctr_tsc = rdtsc();
+
+ for (i = 0; i < num; i++)
+ st->pctr_hwc[i] = rdmsr(reg + i);
+ /*st->pctr_hwc[i] = rdpmc(i);*/
+
+ __asm __volatile("sti");
+}
+
+void
+pctrattach(int num)
+{
+
+ if (num > 1)
+ return;
+
+ pctr_isamd = (strcmp(cpu_vendor, "AuthenticAMD") == 0);
+ if (!pctr_isamd)
+ pctr_isintel = (strcmp(cpu_vendor, "GenuineIntel") == 0);
+ if (!pctr_isintel && !pctr_isamd)
+ return;
+
+ /* Enable RDTSC and RDPMC instructions from user-level. */
+ if (usepctr) {
+ __asm __volatile("movq %%cr4,%%rax\n"
+ "\tandq %0,%%rax\n"
+ "\torq %1,%%rax\n"
+ "\tmovq %%rax,%%cr4"
+ :: "i" (~CR4_TSD), "i" (CR4_PCE) : "rax");
+ printf("pctr: user-level performance counters enabled\n");
+ }
+}
+
+int
+pctropen(dev_t dev, int oflags, int devtype, struct proc *p)
+{
+
+ if (minor(dev))
+ return (ENXIO);
+ return (0);
+}
+
+int
+pctrclose(dev_t dev, int oflags, int devtype, struct proc *p)
+{
+
+ return (0);
+}
+
+int
+pctrsel(int fflag, u_int32_t cmd, u_int32_t fn)
+{
+ int msrsel, msrval;
+
+ cmd -= PCIOCS0;
+ if (pctr_isamd) {
+ if (cmd > PCTR_AMD_NUM-1)
+ return (EINVAL);
+ msrsel = MSR_K7_EVNTSEL0 + cmd;
+ msrval = MSR_K7_PERFCTR0 + cmd;
+ } else {
+ if (cmd > PCTR_INTEL_NUM-1)
+ return (EINVAL);
+ msrsel = MSR_EVNTSEL0 + cmd;
+ msrval = MSR_PERFCTR0 + cmd;
+ }
+
+ if (!(fflag & FWRITE))
+ return (EPERM);
+ if (fn & 0x380000)
+ return (EINVAL);
+
+ wrmsr(msrval, 0);
+ wrmsr(msrsel, fn);
+ wrmsr(msrval, 0);
+
+ return (0);
+}
+
+int
+pctrioctl(dev_t dev, u_int64_t cmd, caddr_t data, int fflag, struct proc *p)
+{
+
+ switch (cmd) {
+ case PCIOCRD:
+ {
+ struct pctrst *st = (struct pctrst *)data;
+
+ if (usepctr)
+ pctrrd(st);
+ st->pctr_idl = pctr_idlcnt;
+ return (0);
+ }
+ case PCIOCS0:
+ case PCIOCS1:
+ case PCIOCS2:
+ case PCIOCS3:
+ if (usepctr)
+ return (pctrsel(fflag, cmd, *(u_int32_t *)data));
+ return (ENODEV);
+ default:
+ return (EINVAL);
+ }
+}
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index e4beedab336..55a7d16bfcf 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -1,4 +1,4 @@
-# $OpenBSD: GENERIC,v 1.195 2007/09/09 01:37:28 jsg Exp $
+# $OpenBSD: GENERIC,v 1.196 2007/09/12 18:18:27 deraadt Exp $
#
# For further information on compiling OpenBSD kernels, see the config(8)
# man page.
@@ -531,6 +531,7 @@ owid* at onewire? # ID
owsbm* at onewire? # Smart Battery Monitor
owtemp* at onewire? # Temperature
+pseudo-device pctr 1
pseudo-device nvram 1
pseudo-device sequencer 1
#pseudo-device raid 4 # RAIDframe disk driver
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 4ce1947ae75..bb14e4e4ae7 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -1,4 +1,4 @@
-# $OpenBSD: files.amd64,v 1.33 2007/08/02 16:40:27 deraadt Exp $
+# $OpenBSD: files.amd64,v 1.34 2007/09/12 18:18:27 deraadt Exp $
maxpartitions 16
maxusers 2 16 128
@@ -164,6 +164,9 @@ device fd: disk, isa_dma
attach fd at fdc
file dev/isa/fd.c fd needs-flag
+pseudo-device pctr
+file arch/amd64/amd64/pctr.c pctr needs-flag
+
pseudo-device nvram
file arch/amd64/amd64/nvram.c nvram needs-flag
diff --git a/sys/arch/amd64/include/conf.h b/sys/arch/amd64/include/conf.h
index 960b06eae53..ced984a5b7f 100644
--- a/sys/arch/amd64/include/conf.h
+++ b/sys/arch/amd64/include/conf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: conf.h,v 1.3 2005/12/31 21:22:32 miod Exp $ */
+/* $OpenBSD: conf.h,v 1.4 2007/09/12 18:18:27 deraadt Exp $ */
/* $NetBSD: conf.h,v 1.2 1996/05/05 19:28:34 christos Exp $ */
/*
@@ -50,3 +50,6 @@ cdev_decl(bios);
(dev_type_stop((*))) enodev, 0, (dev_type_poll((*))) enodev, \
(dev_type_mmap((*))) enodev, 0, D_KQFILTER, dev_init(c,n,kqfilter) }
cdev_decl(acpi);
+
+#define pctrpoll seltrue
+cdev_decl(pctr);
diff --git a/sys/arch/amd64/include/pctr.h b/sys/arch/amd64/include/pctr.h
new file mode 100644
index 00000000000..35c651ebffd
--- /dev/null
+++ b/sys/arch/amd64/include/pctr.h
@@ -0,0 +1,78 @@
+/* $OpenBSD: pctr.h,v 1.1 2007/09/12 18:18:27 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2007 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Pentium performance counter driver for OpenBSD.
+ * Copyright 1996 David Mazieres <dm@lcs.mit.edu>.
+ *
+ * Modification and redistribution in source and binary forms is
+ * permitted provided that due credit is given to the author and the
+ * OpenBSD project by leaving this copyright notice intact.
+ */
+
+#ifndef _AMD64_PCTR_H_
+#define _AMD64_PCTR_H_
+
+#include <sys/ioccom.h>
+
+#define PCTR_NUM 4
+#define PCTR_AMD_NUM PCTR_NUM
+#define PCTR_INTEL_NUM 2 /* Intel supports only 2 counters */
+
+struct pctrst {
+ u_int64_t pctr_hwc[PCTR_NUM]; /* Values of the hardware counters */
+ u_int64_t pctr_tsc; /* Free-running 64-bit cycle counter */
+ u_int64_t pctr_idl; /* Iterations of the idle loop */
+ u_int32_t pctr_fn[PCTR_NUM]; /* Current settings of counters */
+};
+
+/* Bit values in fn fields and PIOCS ioctl's */
+#define PCTR_U 0x010000 /* Monitor user-level events */
+#define PCTR_K 0x020000 /* Monitor kernel-level events */
+#define PCTR_E 0x040000 /* Edge detect */
+#define PCTR_EN 0x400000 /* Enable counters (counter 0 only) */
+#define PCTR_I 0x800000 /* Invert counter mask */
+
+/* Unit Mask bits */
+#define PCTR_UM_M 0x10 /* Modified cache lines */
+#define PCTR_UM_O 0x08 /* Owned cache lines */
+#define PCTR_UM_E 0x04 /* Exclusive cache lines */
+#define PCTR_UM_S 0x02 /* Shared cache lines */
+#define PCTR_UM_I 0x01 /* Invalid cache lines */
+#define PCTR_UM_MESI (PCTR_UM_O|PCTR_UM_E|PCTR_UM_S|PCTR_UM_I)
+#define PCTR_UM_MOESI (PCTR_UM_M|PCTR_UM_O|PCTR_UM_E|PCTR_UM_S|PCTR_UM_I)
+
+/* ioctl to set which counter a device tracks. */
+#define PCIOCRD _IOR('c', 1, struct pctrst) /* Read counter value */
+#define PCIOCS0 _IOW('c', 8, unsigned int) /* Set counter 0 function */
+#define PCIOCS1 _IOW('c', 9, unsigned int) /* Set counter 1 function */
+#define PCIOCS2 _IOW('c', 10, unsigned int) /* Set counter 2 function */
+#define PCIOCS3 _IOW('c', 11, unsigned int) /* Set counter 3 function */
+
+#define _PATH_PCTR "/dev/pctr"
+
+#ifdef _KERNEL
+
+void pctrattach(int);
+int pctropen(dev_t, int, int, struct proc *);
+int pctrclose(dev_t, int, int, struct proc *);
+int pctrioctl(dev_t, u_int64_t, caddr_t, int, struct proc *);
+int pctrsel(int fflag, u_int32_t, u_int32_t);
+
+#endif /* _KERNEL */
+#endif /* ! _AMD64_PCTR_H_ */