summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpd <pd@openbsd.org>2019-12-12 03:53:38 +0000
committerpd <pd@openbsd.org>2019-12-12 03:53:38 +0000
commitde12a377567363846a900563ea7c04db29c1b09c (patch)
treee91ea788daad739ce2486d3927313bf9f5b38fcb
parentMake ldpd lookup the adjacency not only by source IP address but also (diff)
downloadwireguard-openbsd-de12a377567363846a900563ea7c04db29c1b09c.tar.xz
wireguard-openbsd-de12a377567363846a900563ea7c04db29c1b09c.zip
vmd: start vms defined in vm.conf in a staggered fashion
This addresses 'thundering herd' problem when a lot of vms are configured in vm.conf. A lot of vms booting in parallel can overload the host and also mess up tsc calibration in openbsd guests as it uses PIT which doesn't fire reliably if the host is overloaded. We default to starting vms with parallelism of ncpuonline and a delay 30 seconds between batches. This is configurable in vm.conf. ok mlarkin@ (also addressed comments from cheloha@)
-rw-r--r--usr.sbin/vmctl/vmctl.c4
-rw-r--r--usr.sbin/vmd/parse.y16
-rw-r--r--usr.sbin/vmd/vm.conf.514
-rw-r--r--usr.sbin/vmd/vmd.c82
-rw-r--r--usr.sbin/vmd/vmd.h9
5 files changed, 90 insertions, 35 deletions
diff --git a/usr.sbin/vmctl/vmctl.c b/usr.sbin/vmctl/vmctl.c
index 195ffc8ab47..52f57268947 100644
--- a/usr.sbin/vmctl/vmctl.c
+++ b/usr.sbin/vmctl/vmctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmctl.c,v 1.71 2019/09/07 09:11:14 tobhe Exp $ */
+/* $OpenBSD: vmctl.c,v 1.72 2019/12/12 03:53:38 pd Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
@@ -716,6 +716,8 @@ vm_state(unsigned int mask)
{
if (mask & VM_STATE_PAUSED)
return "paused";
+ else if (mask & VM_STATE_WAITING)
+ return "waiting";
else if (mask & VM_STATE_RUNNING)
return "running";
else if (mask & VM_STATE_SHUTDOWN)
diff --git a/usr.sbin/vmd/parse.y b/usr.sbin/vmd/parse.y
index 81cfa04d466..8b323af25c4 100644
--- a/usr.sbin/vmd/parse.y
+++ b/usr.sbin/vmd/parse.y
@@ -1,4 +1,4 @@
-/* $OpenBSD: parse.y,v 1.52 2019/05/14 06:05:45 anton Exp $ */
+/* $OpenBSD: parse.y,v 1.53 2019/12/12 03:53:38 pd Exp $ */
/*
* Copyright (c) 2007-2016 Reyk Floeter <reyk@openbsd.org>
@@ -122,7 +122,8 @@ typedef struct {
%token INCLUDE ERROR
%token ADD ALLOW BOOT CDROM DEVICE DISABLE DISK DOWN ENABLE FORMAT GROUP
%token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
-%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID
+%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
+%token PARALLEL DELAY
%token <v.number> NUMBER
%token <v.string> STRING
%type <v.lladdr> lladdr
@@ -217,6 +218,11 @@ main : LOCAL INET6 {
env->vmd_ps.ps_csock.cs_uid = $3.uid;
env->vmd_ps.ps_csock.cs_gid = $3.gid == -1 ? 0 : $3.gid;
}
+ | STAGGERED START PARALLEL NUMBER DELAY NUMBER {
+ env->vmd_cfg.cfg_flags |= VMD_CFG_STAGGERED_START;
+ env->vmd_cfg.delay.tv_sec = $6;
+ env->vmd_cfg.parallelism = $4;
+ }
;
switch : SWITCH string {
@@ -368,6 +374,8 @@ vm : VM string vm_instance {
} else {
if (vcp_disable)
vm->vm_state |= VM_STATE_DISABLED;
+ else
+ vm->vm_state |= VM_STATE_WAITING;
log_debug("%s:%d: vm \"%s\" "
"registered (%s)",
file->name, yylval.lineno,
@@ -766,6 +774,7 @@ lookup(char *s)
{ "allow", ALLOW },
{ "boot", BOOT },
{ "cdrom", CDROM },
+ { "delay", DELAY },
{ "device", DEVICE },
{ "disable", DISABLE },
{ "disk", DISK },
@@ -785,10 +794,13 @@ lookup(char *s)
{ "memory", MEMORY },
{ "net", NET },
{ "owner", OWNER },
+ { "parallel", PARALLEL },
{ "prefix", PREFIX },
{ "rdomain", RDOMAIN },
{ "size", SIZE },
{ "socket", SOCKET },
+ { "staggered", STAGGERED },
+ { "start", START },
{ "switch", SWITCH },
{ "up", UP },
{ "vm", VM }
diff --git a/usr.sbin/vmd/vm.conf.5 b/usr.sbin/vmd/vm.conf.5
index 2d2fe974dcf..8c864682329 100644
--- a/usr.sbin/vmd/vm.conf.5
+++ b/usr.sbin/vmd/vm.conf.5
@@ -1,4 +1,4 @@
-.\" $OpenBSD: vm.conf.5,v 1.44 2019/05/14 12:47:17 schwarze Exp $
+.\" $OpenBSD: vm.conf.5,v 1.45 2019/12/12 03:53:38 pd Exp $
.\"
.\" Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
.\" Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 14 2019 $
+.Dd $Mdocdate: December 12 2019 $
.Dt VM.CONF 5
.Os
.Sh NAME
@@ -91,6 +91,16 @@ vm "vm1.example.com" {
.Sh GLOBAL CONFIGURATION
The following setting can be configured globally:
.Bl -tag -width Ds
+.It Ic staggered start parallel Ar parallelism Ic delay Ar seconds
+Start all configured vms in staggered fashion with
+.Ar parallelism
+instances in parallel every
+.Ar delay
+seconds. Defaults to
+.Ar parallelism
+equal to number of cpus and a
+.Ar delay
+of 30 seconds.
.It Ic local prefix Ar address Ns Li / Ns Ar prefix
Set the network prefix that is used to allocate subnets for
local interfaces, see
diff --git a/usr.sbin/vmd/vmd.c b/usr.sbin/vmd/vmd.c
index 9d329745a97..634c81e79ac 100644
--- a/usr.sbin/vmd/vmd.c
+++ b/usr.sbin/vmd/vmd.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmd.c,v 1.116 2019/09/04 07:02:03 mlarkin Exp $ */
+/* $OpenBSD: vmd.c,v 1.117 2019/12/12 03:53:38 pd Exp $ */
/*
* Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
@@ -21,6 +21,7 @@
#include <sys/wait.h>
#include <sys/cdefs.h>
#include <sys/stat.h>
+#include <sys/sysctl.h>
#include <sys/tty.h>
#include <sys/ttycom.h>
#include <sys/ioctl.h>
@@ -63,6 +64,7 @@ int vm_instance(struct privsep *, struct vmd_vm **,
struct vmop_create_params *, uid_t);
int vm_checkinsflag(struct vmop_create_params *, unsigned int, uid_t);
int vm_claimid(const char *, int, uint32_t *);
+void start_vm_batch(int, short, void*);
struct vmd *env;
@@ -73,6 +75,8 @@ static struct privsep_proc procs[] = {
{ "vmm", PROC_VMM, vmd_dispatch_vmm, vmm, vmm_shutdown },
};
+struct event staggered_start_timer;
+
/* For the privileged process */
static struct privsep_proc *proc_priv = &procs[0];
static struct passwd proc_privpw;
@@ -854,11 +858,40 @@ main(int argc, char **argv)
return (0);
}
+void
+start_vm_batch(int fd, short type, void *args)
+{
+ int i = 0;
+ struct vmd_vm *vm;
+
+ log_debug("%s: starting batch of %d vms", __func__,
+ env->vmd_cfg.parallelism);
+ TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
+ if (!(vm->vm_state & VM_STATE_WAITING)) {
+ log_debug("%s: not starting vm %s (disabled)",
+ __func__,
+ vm->vm_params.vmc_params.vcp_name);
+ continue;
+ }
+ i++;
+ if (i > env->vmd_cfg.parallelism) {
+ evtimer_add(&staggered_start_timer,
+ &env->vmd_cfg.delay);
+ break;
+ }
+ vm->vm_state &= ~VM_STATE_WAITING;
+ config_setvm(&env->vmd_ps, vm, -1, vm->vm_params.vmc_owner.uid);
+ }
+ log_debug("%s: done starting vms", __func__);
+}
+
int
vmd_configure(void)
{
- struct vmd_vm *vm;
+ int ncpus;
struct vmd_switch *vsw;
+ int ncpu_mib[] = {CTL_HW, HW_NCPUONLINE};
+ size_t ncpus_sz = sizeof(ncpus);
if ((env->vmd_ptmfd = open(PATH_PTMDEV, O_RDWR|O_CLOEXEC)) == -1)
fatal("open %s", PATH_PTMDEV);
@@ -906,18 +939,21 @@ vmd_configure(void)
}
}
- TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
- if (vm->vm_state & VM_STATE_DISABLED) {
- log_debug("%s: not creating vm %s (disabled)",
- __func__,
- vm->vm_params.vmc_params.vcp_name);
- continue;
- }
- if (config_setvm(&env->vmd_ps, vm,
- -1, vm->vm_params.vmc_owner.uid) == -1)
- return (-1);
+ if (!(env->vmd_cfg.cfg_flags & VMD_CFG_STAGGERED_START)) {
+ env->vmd_cfg.delay.tv_sec = VMD_DEFAULT_STAGGERED_START_DELAY;
+ if (sysctl(ncpu_mib, NELEM(ncpu_mib), &ncpus, &ncpus_sz, NULL, 0) == -1)
+ ncpus = 1;
+ env->vmd_cfg.parallelism = ncpus;
+ log_debug("%s: setting staggered start configuration to "
+ "parallelism: %d and delay: %lld",
+ __func__, ncpus, (long long) env->vmd_cfg.delay.tv_sec);
}
+ log_debug("%s: starting vms in staggered fashion", __func__);
+ evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
+ /* start first batch */
+ start_vm_batch(0, 0, NULL);
+
return (0);
}
@@ -983,24 +1019,12 @@ vmd_reload(unsigned int reset, const char *filename)
}
}
- TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
- if (!(vm->vm_state & VM_STATE_RUNNING)) {
- if (vm->vm_state & VM_STATE_DISABLED) {
- log_debug("%s: not creating vm %s"
- " (disabled)", __func__,
- vm->vm_params.vmc_params.vcp_name);
- continue;
- }
- if (config_setvm(&env->vmd_ps, vm,
- -1, vm->vm_params.vmc_owner.uid) == -1)
- return (-1);
- } else {
- log_debug("%s: not creating vm \"%s\": "
- "(running)", __func__,
- vm->vm_params.vmc_params.vcp_name);
- }
+ log_debug("%s: starting vms in staggered fashion", __func__);
+ evtimer_set(&staggered_start_timer, start_vm_batch, NULL);
+ /* start first batch */
+ start_vm_batch(0, 0, NULL);
+
}
- }
return (0);
}
diff --git a/usr.sbin/vmd/vmd.h b/usr.sbin/vmd/vmd.h
index f6368dc621d..d7efb329e53 100644
--- a/usr.sbin/vmd/vmd.h
+++ b/usr.sbin/vmd/vmd.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmd.h,v 1.97 2019/09/07 09:11:14 tobhe Exp $ */
+/* $OpenBSD: vmd.h,v 1.98 2019/12/12 03:53:38 pd Exp $ */
/*
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -39,6 +39,7 @@
#define SET(_v, _m) ((_v) |= (_m))
#define CLR(_v, _m) ((_v) &= ~(_m))
#define ISSET(_v, _m) ((_v) & (_m))
+#define NELEM(a) (sizeof(a) / sizeof((a)[0]))
#define VMD_USER "_vmd"
#define VMD_CONF "/etc/vm.conf"
@@ -56,6 +57,8 @@
#define VMD_SWITCH_TYPE "bridge"
#define VM_DEFAULT_MEMORY 512
+#define VMD_DEFAULT_STAGGERED_START_DELAY 30
+
/* Rate-limit fast reboots */
#define VM_START_RATE_SEC 6 /* min. seconds since last reboot */
#define VM_START_RATE_LIMIT 3 /* max. number of fast reboots */
@@ -280,6 +283,7 @@ struct vmd_vm {
#define VM_STATE_SHUTDOWN 0x04
#define VM_STATE_RECEIVED 0x08
#define VM_STATE_PAUSED 0x10
+#define VM_STATE_WAITING 0x20
/* For rate-limiting */
struct timeval vm_start_tv;
@@ -319,7 +323,10 @@ struct vmd_config {
unsigned int cfg_flags;
#define VMD_CFG_INET6 0x01
#define VMD_CFG_AUTOINET6 0x02
+#define VMD_CFG_STAGGERED_START 0x04
+ struct timeval delay;
+ int parallelism;
struct address cfg_localprefix;
struct address cfg_localprefix6;
};