summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpjanzen <pjanzen@openbsd.org>1999-08-15 00:07:43 +0000
committerpjanzen <pjanzen@openbsd.org>1999-08-15 00:07:43 +0000
commit52f4ec15411aef334ba8f2274866b3fe48469294 (patch)
tree95312224a86ca2d48e861e6b7308db3795ed756b
parentpciide & atapiscsi (diff)
downloadwireguard-openbsd-52f4ec15411aef334ba8f2274866b3fe48469294.tar.xz
wireguard-openbsd-52f4ec15411aef334ba8f2274866b3fe48469294.zip
Adopt NetBSD fix for scheduler problems (nice was broken). From the NetBSD
commit messages: Scheduler bug fixes and reorganization * fix the ancient nice(1) bug, where nice +20 processes incorrectly steal 10 - 20% of the CPU, (or even more depending on load average) * provide a new schedclock() mechanism at a new clock at schedhz, so high platform hz values don't cause nice +0 processes to look like they are niced * change the algorithm slightly, and reorganize the code a lot * fix percent-CPU calculation bugs, and eliminate some no-op code === nice bug === Correctly divide the scheduler queues between niced and compute-bound processes. The current nice weight of two (sort of, see `algorithm change' below) neatly divides the USRPRI queues in half; this should have been used to clip p_estcpu, instead of UCHAR_MAX. Besides being the wrong amount, clipping an unsigned char to UCHAR_MAX is a no-op, and it was done after decay_cpu() which can only _reduce_ the value. It has to be kept <= NICE_WEIGHT * PRIO_MAX - PPQ or processes can scheduler-penalize themselves onto the same queue as nice +20 processes. (Or even a higher one.) === New schedclock() mechanism === Some platforms should be cutting down stathz before hitting the scheduler, since the scheduler algorithm only works right in the vicinity of 64 Hz. Rather than prescale hz, then scale back and forth by 4 every time p_estcpu is touched (each occurance an abstraction violation), use p_estcpu without scaling and require schedhz to be generated directly at the right frequency. Use a default stathz (well, actually, profhz) / 4, so nothing changes unless a platform defines schedhz and a new clock. [ To do: Define these for alpha, where hz==1024, and nice was totally broke.] === Algorithm change === The nice value used to be added to the exponentially-decayed scheduler history value p_estcpu, in _addition_ to be incorporated directly (with greater weight) into the priority calculation. At first glance, it appears to be a pointless increase of 1/8 the nice effect (pri = p_estcpu/4 + nice*2), but it's actually at least 3x that because it will ramp up linearly but be decayed only exponentially, thus converging to an additional .75 nice for a loadaverage of one. I killed this: it makes the behavior hard to control, almost impossible to analyze, and the effect (~~nothing at for the first second, then somewhat increased niceness after three seconds or more, depending on load average) pointless. === Other bugs === hz -> profhz in the p_pctcpu = f(p_cpticks) calcuation. Collect scheduler functionality. Try to put each abstraction in just one place.
-rw-r--r--sys/kern/kern_clock.c32
-rw-r--r--sys/kern/kern_exit.c7
-rw-r--r--sys/kern/kern_fork.c5
-rw-r--r--sys/kern/kern_synch.c44
-rw-r--r--sys/sys/sched.h123
5 files changed, 175 insertions, 36 deletions
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 18ba59ce7a3..51c088df066 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_clock.c,v 1.20 1998/08/27 05:00:17 deraadt Exp $ */
+/* $OpenBSD: kern_clock.c,v 1.21 1999/08/15 00:07:43 pjanzen Exp $ */
/* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */
/*-
@@ -52,6 +52,7 @@
#include <vm/vm.h>
#include <sys/sysctl.h>
#include <sys/timex.h>
+#include <sys/sched.h>
#include <machine/cpu.h>
@@ -277,6 +278,7 @@ long clock_cpu = 0; /* CPU clock adjust */
}
int stathz;
+int schedhz;
int profhz;
int profprocs;
int ticks;
@@ -902,6 +904,7 @@ statclock(frame)
register struct gmonparam *g;
register int i;
#endif
+ static int schedclk;
register struct proc *p;
if (CLKF_USERMODE(frame)) {
@@ -960,28 +963,15 @@ statclock(frame)
}
pscnt = psdiv;
- /*
- * We adjust the priority of the current process. The priority of
- * a process gets worse as it accumulates CPU time. The cpu usage
- * estimator (p_estcpu) is increased here. The formula for computing
- * priorities (in kern_synch.c) will compute a different value each
- * time p_estcpu increases by 4. The cpu usage estimator ramps up
- * quite quickly when the process is running (linearly), and decays
- * away exponentially, at a rate which is proportionally slower when
- * the system is busy. The basic principal is that the system will
- * 90% forget that the process used a lot of CPU time in 5 * loadav
- * seconds. This causes the system to favor processes which haven't
- * run much recently, and to round-robin among other processes.
- */
if (p != NULL) {
p->p_cpticks++;
- if (++p->p_estcpu == 0)
- p->p_estcpu--;
- if ((p->p_estcpu & 3) == 0) {
- resetpriority(p);
- if (p->p_priority >= PUSER)
- p->p_priority = p->p_usrpri;
- }
+ /*
+ * If no schedclock is provided, call it here at ~~12-25 Hz;
+ * ~~16 Hz is best
+ */
+ if (schedhz == 0)
+ if ((++schedclk & 3) == 0)
+ schedclock(p);
}
}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index b0d9b8263b2..e64e4174996 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exit.c,v 1.19 1999/07/15 14:11:05 art Exp $ */
+/* $OpenBSD: kern_exit.c,v 1.20 1999/08/15 00:07:43 pjanzen Exp $ */
/* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */
/*
@@ -62,6 +62,7 @@
#include <sys/acct.h>
#include <sys/filedesc.h>
#include <sys/signalvar.h>
+#include <sys/sched.h>
#ifdef SYSVSHM
#include <sys/shm.h>
#endif
@@ -369,9 +370,7 @@ loop:
return (0);
}
- /* Charge us for our child's sins */
- curproc->p_estcpu = min(curproc->p_estcpu +
- p->p_estcpu, UCHAR_MAX);
+ scheduler_wait_hook(curproc, p);
p->p_xstat = 0;
ruadd(&q->p_stats->p_cru, p->p_ru);
FREE(p->p_ru, M_ZOMBIE);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 1aade0e4ea0..ef16a476ee7 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_fork.c,v 1.22 1999/07/17 21:49:37 art Exp $ */
+/* $OpenBSD: kern_fork.c,v 1.23 1999/08/15 00:07:43 pjanzen Exp $ */
/* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */
/*
@@ -54,6 +54,7 @@
#include <sys/file.h>
#include <sys/acct.h>
#include <sys/ktrace.h>
+#include <sys/sched.h>
#include <dev/rndvar.h>
#include <sys/syscallargs.h>
@@ -302,7 +303,7 @@ again:
* XXX should move p_estcpu into the region of struct proc which gets
* copied.
*/
- p2->p_estcpu = p1->p_estcpu;
+ scheduler_fork_hook(p1, p2);
/*
* This begins the section where we must prevent the parent
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index e19399253cd..ef13cb74569 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_synch.c,v 1.15 1999/04/21 01:21:48 alex Exp $ */
+/* $OpenBSD: kern_synch.c,v 1.16 1999/08/15 00:07:44 pjanzen Exp $ */
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
/*-
@@ -49,6 +49,7 @@
#include <sys/signalvar.h>
#include <sys/resourcevar.h>
#include <vm/vm.h>
+#include <sys/sched.h>
#if defined(UVM)
#include <uvm/uvm_extern.h>
@@ -199,21 +200,21 @@ schedcpu(arg)
/*
* p_pctcpu is only for ps.
*/
+ KASSERT(profhz);
#if (FSHIFT >= CCPU_SHIFT)
- p->p_pctcpu += (hz == 100)?
+ p->p_pctcpu += (profhz == 100)?
((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
100 * (((fixpt_t) p->p_cpticks)
- << (FSHIFT - CCPU_SHIFT)) / hz;
+ << (FSHIFT - CCPU_SHIFT)) / profhz;
#else
p->p_pctcpu += ((FSCALE - ccpu) *
- (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
+ (p->p_cpticks * FSCALE / profhz)) >> FSHIFT;
#endif
p->p_cpticks = 0;
- newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
- p->p_estcpu = min(newcpu, UCHAR_MAX);
+ newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
+ p->p_estcpu = newcpu;
resetpriority(p);
if (p->p_priority >= PUSER) {
-#define PPQ (128 / NQS) /* priorities per queue */
if ((p != curproc) &&
p->p_stat == SRUN &&
(p->p_flag & P_INMEM) &&
@@ -253,7 +254,7 @@ updatepri(p)
p->p_slptime--; /* the first time was done in schedcpu */
while (newcpu && --p->p_slptime)
newcpu = (int) decay_cpu(loadfac, newcpu);
- p->p_estcpu = min(newcpu, UCHAR_MAX);
+ p->p_estcpu = newcpu;
}
resetpriority(p);
}
@@ -691,13 +692,38 @@ resetpriority(p)
{
register unsigned int newpriority;
- newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
+ newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
newpriority = min(newpriority, MAXPRI);
p->p_usrpri = newpriority;
if (newpriority < curpriority)
need_resched();
}
+/*
+ * We adjust the priority of the current process. The priority of a process
+ * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
+ * is increased here. The formula for computing priorities (in kern_synch.c)
+ * will compute a different value each time p_estcpu increases. This can
+ * cause a switch, but unless the priority crosses a PPQ boundary the actual
+ * queue will not change. The cpu usage estimator ramps up quite quickly
+ * when the process is running (linearly), and decays away exponentially, at
+ * a rate which is proportionally slower when the system is busy. The basic
+ * principal is that the system will 90% forget that the process used a lot
+ * of CPU time in 5 * loadav seconds. This causes the system to favor
+ * processes which haven't run much recently, and to round-robin among other
+ * processes.
+ */
+
+void
+schedclock(p)
+ struct proc *p;
+{
+ p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+}
+
#ifdef DDB
#include <machine/db_machdep.h>
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
new file mode 100644
index 00000000000..c44d449ec2b
--- /dev/null
+++ b/sys/sys/sched.h
@@ -0,0 +1,123 @@
+/* $OpenBSD: sched.h,v 1.1 1999/08/15 00:07:50 pjanzen Exp $ */
+/* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */
+
+/*-
+ * Copyright (c) 1999 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ross Harvey.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_SCHED_H_
+#define _SYS_SCHED_H_
+
+/*
+ * Posix defines a <sched.h> which may want to include <sys/sched.h>
+ */
+
+#ifdef _KERNEL
+
+#define PPQ (128 / NQS) /* priorities per queue */
+#define NICE_WEIGHT 2 /* priorities per nice level */
+#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ)
+
+/* int schedhz; * ideally: 16 */
+
+#ifdef _SYS_PROC_H_
+void schedclock __P((struct proc *p));
+static __inline void scheduler_fork_hook __P((
+ struct proc *parent, struct proc *child));
+static __inline void scheduler_wait_hook __P((
+ struct proc *parent, struct proc *child));
+
+/* Inherit the parent's scheduler history */
+
+static __inline void
+scheduler_fork_hook(parent, child)
+ struct proc *parent, *child;
+{
+ child->p_estcpu = parent->p_estcpu;
+}
+
+/* Chargeback parents for the sins of their children. */
+
+static __inline void
+scheduler_wait_hook(parent, child)
+ struct proc *parent, *child;
+{
+ /* XXX just return if parent == init?? */
+
+ parent->p_estcpu = ESTCPULIM(parent->p_estcpu + child->p_estcpu);
+}
+#endif /* _SYS_PROC_H_ */
+#endif /* _KERNEL */
+#endif /* _SYS_SCHED_H_ */