In uvm Chuck decided backing store would not be allocated proactively

for blocks re-fetchable from the filesystem. However at reboot time, filesystems are unmounted, and since processes lack backing store they are killed. Since the scheduler is still running, in some cases init is killed... which drops us to ddb [noted by bluhm]. Solution is to convert filesystems to read-only [proposed by kettenis]. The tale follows: sys_reboot() should pass proc * to MD boot() to vfs_shutdown() which completes current IO with vfs_busy VB_WRITE|VB_WAIT, then calls VFS_MOUNT() with MNT_UPDATE | MNT_RDONLY, soon teaching us that *fs_mount() calls a copyin() late... so store the sizes in vfsconflist[] and move the copyin() to sys_mount()... and notice nfs_mount copyin() is size-variant, so kill legacy struct nfs_args3. Next we learn ffs_mount()'s MNT_UPDATE code is sharp and rusty especially wrt softdep, so fix some bugs adn add ~MNT_SOFTDEP to the downgrade. Some vnodes need a little more help, so tie them to &dead_vnops. ffs_mount calling DIOCCACHESYNC is causing a bit of grief still but this issue is seperate and will be dealt with in time. couple hundred reboots by bluhm and myself, advice from guenther and others at the hut
author: deraadt <deraadt@openbsd.org> 2017-12-11 05:27:40 +0000
committer: deraadt <deraadt@openbsd.org> 2017-12-11 05:27:40 +0000
commit: 7efda1a11d8bf31499aa02da4d14eddf1b5293ae (patch)
tree: 77661cbfae0631d2f5a2086c7baae6f06a9af989 /sys/kern/vfs_subr.c
parent: Sergey Bronnikov's code coverage analysis shows that a few more code paths (diff)
download: wireguard-openbsd-7efda1a11d8bf31499aa02da4d14eddf1b5293ae.tar.xz
wireguard-openbsd-7efda1a11d8bf31499aa02da4d14eddf1b5293ae.zip
1 files changed, 52 insertions, 38 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 5dc5a7813be..705ceb4b8b3 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vfs_subr.c,v 1.261 2017/12/04 09:38:20 mpi Exp $	*/
+/*	$OpenBSD: vfs_subr.c,v 1.262 2017/12/11 05:27:40 deraadt Exp $	*/
 /*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
 
 /*
@@ -857,7 +857,8 @@ struct vflush_args {
 };
 
 int
-vflush_vnode(struct vnode *vp, void *arg) {
+vflush_vnode(struct vnode *vp, void *arg)
+{
 	struct vflush_args *va = arg;
 	struct proc *p = curproc;
 
@@ -903,6 +904,12 @@ vflush_vnode(struct vnode *vp, void *arg) {
 		return (0);
 	}
 
+	if (va->flags & WRITEDEMOTE) {
+		vp->v_op = &dead_vops;
+		vp->v_tag = VT_NON;
+		return (0);
+	}
+
 #ifdef DEBUG
 	if (busyprt)
 		vprint("vflush: busy vnode", vp);
@@ -1572,37 +1579,53 @@ vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
 	return (file_mode & mask) == mask ? 0 : EACCES;
 }
 
+int
+vfs_readonly(struct mount *mp, struct proc *p)
+{
+	int error;
+
+	error = vfs_busy(mp, VB_WRITE|VB_WAIT);
+	if (error) {
+		printf("%s: busy\n", mp->mnt_stat.f_mntonname);
+		return (error);
+	}
+	uvm_vnp_sync(mp);
+	error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
+	if (error) {
+		printf("%s: failed to sync\n", mp->mnt_stat.f_mntonname);
+		vfs_unbusy(mp);
+		return (error);
+	}
+
+	mp->mnt_flag |= MNT_UPDATE | MNT_RDONLY;
+	mp->mnt_flag &= ~MNT_SOFTDEP;
+	error = VFS_MOUNT(mp, mp->mnt_stat.f_mntonname, NULL, NULL, curproc);
+	if (error) {
+		printf("%s: failed to remount rdonly, error %d\n",
+		    mp->mnt_stat.f_mntonname, error);
+		vfs_unbusy(mp);
+		return (error);
+	}
+	if (mp->mnt_syncer != NULL)
+		vgone(mp->mnt_syncer);
+	mp->mnt_syncer = NULL;
+	vfs_unbusy(mp);
+	return (error);
+}
+
 /*
- * Unmount all file systems.
+ * Read-only all file systems.
  * We traverse the list in reverse order under the assumption that doing so
  * will avoid needing to worry about dependencies.
  */
 void
-vfs_unmountall(void)
+vfs_rofs(struct proc *p)
 {
 	struct mount *mp, *nmp;
-	int allerror, error, again = 1;
 
- retry:
-	allerror = 0;
 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
-		if (vfs_busy(mp, VB_WRITE|VB_NOWAIT))
-			continue;
 		/* XXX Here is a race, the next pointer is not locked. */
-		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
-			printf("unmount of %s failed with error %d\n",
-			    mp->mnt_stat.f_mntonname, error);
-			allerror = 1;
-		}
-	}
-
-	if (allerror) {
-		printf("WARNING: some file systems would not unmount\n");
-		if (again) {
-			printf("retrying\n");
-			again = 0;
-			goto retry;
-		}
+		(void) vfs_readonly(mp, p);
 	}
 }
 
@@ -1610,26 +1633,21 @@ vfs_unmountall(void)
  * Sync and unmount file systems before shutting down.
  */
 void
-vfs_shutdown(void)
+vfs_shutdown(struct proc *p)
 {
 #ifdef ACCOUNTING
 	acct_shutdown();
 #endif
 
-	/* XXX Should suspend scheduling. */
-	(void) spl0();
-
 	printf("syncing disks... ");
 
 	if (panicstr == 0) {
-		/* Sync before unmount, in case we hang on something. */
-		sys_sync(&proc0, NULL, NULL);
-
-		/* Unmount file systems. */
-		vfs_unmountall();
+		/* Take all filesystems to read-only */
+		sys_sync(p, NULL, NULL);
+		vfs_rofs(p);
 	}
 
-	if (vfs_syncwait(1))
+	if (vfs_syncwait(p, 1))
 		printf("giving up\n");
 	else
 		printf("done\n");
@@ -1641,20 +1659,16 @@ vfs_shutdown(void)
 
 /*
  * perform sync() operation and wait for buffers to flush.
- * assumptions: called w/ scheduler disabled and physical io enabled
- * for now called at spl0() XXX
  */
 int
-vfs_syncwait(int verbose)
+vfs_syncwait(struct proc *p, int verbose)
 {
 	struct buf *bp;
 	int iter, nbusy, dcount, s;
-	struct proc *p;
 #ifdef MULTIPROCESSOR
 	int hold_count;
 #endif
 
-	p = curproc? curproc : &proc0;
 	sys_sync(p, NULL, NULL);
 
 	/* Wait for sync to finish. */
author	deraadt <deraadt@openbsd.org>	2017-12-11 05:27:40 +0000
committer	deraadt <deraadt@openbsd.org>	2017-12-11 05:27:40 +0000
commit	7efda1a11d8bf31499aa02da4d14eddf1b5293ae (patch)
tree	77661cbfae0631d2f5a2086c7baae6f06a9af989 /sys/kern/vfs_subr.c
parent	Sergey Bronnikov's code coverage analysis shows that a few more code paths (diff)
download	wireguard-openbsd-7efda1a11d8bf31499aa02da4d14eddf1b5293ae.tar.xz wireguard-openbsd-7efda1a11d8bf31499aa02da4d14eddf1b5293ae.zip