diff options
147 files changed, 6360 insertions, 4305 deletions
diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c index ad983e10e46..ce7f126381d 100644 --- a/sbin/fsck_ffs/dir.c +++ b/sbin/fsck_ffs/dir.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dir.c,v 1.5 1997/03/27 16:28:51 kstailey Exp $ */ +/* $OpenBSD: dir.c,v 1.6 1997/10/06 15:33:32 csapuntz Exp $ */ /* $NetBSD: dir.c,v 1.20 1996/09/27 22:45:11 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)dir.c 8.5 (Berkeley) 12/8/94"; #else -static char rcsid[] = "$OpenBSD: dir.c,v 1.5 1997/03/27 16:28:51 kstailey Exp $"; +static char rcsid[] = "$OpenBSD: dir.c,v 1.6 1997/10/06 15:33:32 csapuntz Exp $"; #endif #endif /* not lint */ @@ -333,12 +333,13 @@ adjust(idesc, lcnt) pinode(idesc->id_number); printf(" COUNT %d SHOULD BE %d", dp->di_nlink, dp->di_nlink - lcnt); - if (preen) { + if (preen || usedsoftdep) { if (lcnt < 0) { printf("\n"); pfatal("LINK COUNT INCREASING"); } - printf(" (ADJUSTED)\n"); + if (preen) + printf(" (ADJUSTED)\n"); } if (preen || reply("ADJUST") == 1) { dp->di_nlink -= lcnt; @@ -424,13 +425,15 @@ linkup(orphan, parentdir) lostdir = (dp->di_mode & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); - if (preen && dp->di_size == 0) + if ((preen || usedsoftdep) && dp->di_size == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); + if (parentdir != 0) + lncntp[parentdir]++; if (lfdir == 0) { dp = ginode(ROOTINO); idesc.id_name = lfname; diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h index 23892c6346f..3cc21c10b53 100644 --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fsck.h,v 1.4 1996/10/20 08:36:32 tholo Exp $ */ +/* $OpenBSD: fsck.h,v 1.5 1997/10/06 15:33:33 csapuntz Exp $ */ /* $NetBSD: fsck.h,v 1.13 1996/10/11 20:15:46 thorpej Exp $ */ /* @@ -176,7 +176,9 @@ int cvtlevel; /* convert to newer file system format */ int doinglevel1; /* converting to new cylinder group format */ int doinglevel2; /* converting to new inode format */ int newinofmt; /* filesystem has new inode format */ +char usedsoftdep; /* just fix soft dependency inconsistencies */ int preen; /* just fix normal inconsistencies */ +char resolved; /* cleared if unresolved changes => not clean */ char havesb; /* superblock has been read */ char skipclean; /* skip clean file systems if preening */ int fsmodified; /* 1 => write done to file system */ diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index 1be8bb0470b..1be39fa7e90 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: inode.c,v 1.10 1997/06/25 18:12:13 kstailey Exp $ */ +/* $OpenBSD: inode.c,v 1.11 1997/10/06 15:33:33 csapuntz Exp $ */ /* $NetBSD: inode.c,v 1.23 1996/10/11 20:15:47 thorpej Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)inode.c 8.5 (Berkeley) 2/8/95"; #else -static char rcsid[] = "$OpenBSD: inode.c,v 1.10 1997/06/25 18:12:13 kstailey Exp $"; +static char rcsid[] = "$OpenBSD: inode.c,v 1.11 1997/10/06 15:33:33 csapuntz Exp $"; #endif #endif /* not lint */ @@ -573,6 +573,8 @@ allocino(request, type) { register ino_t ino; register struct dinode *dp; + struct cg *cgp = &cgrp; + int cg; time_t t; if (request == 0) @@ -584,9 +586,17 @@ allocino(request, type) break; if (ino == maxino) return (0); + cg = ino_to_cg(&sblock, ino); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + setbit(cg_inosused(cgp), ino % sblock.fs_ipg); + cgp->cg_cs.cs_nifree--; + switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; + cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: @@ -595,6 +605,7 @@ allocino(request, type) default: return (0); } + cgdirty(); dp = ginode(ino); dp->di_db[0] = allocblk((long)1); if (dp->di_db[0] == 0) { @@ -602,6 +613,7 @@ allocino(request, type) return (0); } dp->di_mode = type; + dp->di_flags = 0; (void)time(&t); dp->di_atime = t; dp->di_mtime = dp->di_ctime = dp->di_atime; diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c index 3e8ca7b3357..159a2252814 100644 --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.10 1997/07/14 20:59:11 deraadt Exp $ */ +/* $OpenBSD: main.c,v 1.11 1997/10/06 15:33:33 csapuntz Exp $ */ /* $NetBSD: main.c,v 1.22 1996/10/11 20:15:48 thorpej Exp $ */ /* @@ -44,7 +44,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/23/94"; #else -static char rcsid[] = "$OpenBSD: main.c,v 1.10 1997/07/14 20:59:11 deraadt Exp $"; +static char rcsid[] = "$OpenBSD: main.c,v 1.11 1997/10/06 15:33:33 csapuntz Exp $"; #endif #endif /* not lint */ @@ -206,6 +206,13 @@ checkfilesys(filesys, mntpt, auxdata, child) case -1: return (0); } + + /* + * Cleared if any questions answered no. Used to decide if + * the superblock should be marked clean. + */ + resolved = 1; + /* * 1: scan inodes tallying blocks used */ @@ -221,7 +228,7 @@ checkfilesys(filesys, mntpt, auxdata, child) * 1b: locate first references to duplicates, if any */ if (duplist) { - if (preen) + if (preen || usedsoftdep) pfatal("INTERNAL ERROR: dups with -p"); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); @@ -304,16 +311,19 @@ checkfilesys(filesys, mntpt, auxdata, child) bwrite(fswritefd, (char *)&sblock, fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE); } - ckfini(1); - free(blockmap); - free(statemap); - free((char *)lncntp); if (!fsmodified) return (0); if (!preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); - if (rerun) + if (rerun) { + resolved = 0; printf("\n***** PLEASE RERUN FSCK *****\n"); + } + ckfini(resolved); + free(blockmap); + free(statemap); + free((char *)lncntp); + if (hotroot()) { struct statfs stfs_buf; /* diff --git a/sbin/fsck_ffs/pass1.c b/sbin/fsck_ffs/pass1.c index e35ed2b656a..11d22ef979c 100644 --- a/sbin/fsck_ffs/pass1.c +++ b/sbin/fsck_ffs/pass1.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pass1.c,v 1.4 1996/10/20 08:36:36 tholo Exp $ */ +/* $OpenBSD: pass1.c,v 1.5 1997/10/06 15:33:34 csapuntz Exp $ */ /* $NetBSD: pass1.c,v 1.16 1996/09/27 22:45:15 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)pass1.c 8.1 (Berkeley) 6/5/93"; #else -static char rcsid[] = "$OpenBSD: pass1.c,v 1.4 1996/10/20 08:36:36 tholo Exp $"; +static char rcsid[] = "$OpenBSD: pass1.c,v 1.5 1997/10/06 15:33:34 csapuntz Exp $"; #endif #endif /* not lint */ @@ -215,8 +215,10 @@ checkinode(inumber, idesc) zlnp = (struct zlncnt *)malloc(sizeof *zlnp); if (zlnp == NULL) { pfatal("LINK COUNT TABLE OVERFLOW"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } } else { zlnp->zlncnt = inumber; zlnp->next = zlnhead; @@ -285,8 +287,10 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } return (STOP); } } @@ -303,15 +307,19 @@ pass1check(idesc) idesc->id_number); if (preen) printf(" (SKIPPING)\n"); - else if (reply("CONTINUE") == 0) + else if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } return (STOP); } new = (struct dups *)malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } return (STOP); } new->dup = blkno; diff --git a/sbin/fsck_ffs/pass2.c b/sbin/fsck_ffs/pass2.c index 3b0d095d0b1..142fe1bdbd0 100644 --- a/sbin/fsck_ffs/pass2.c +++ b/sbin/fsck_ffs/pass2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pass2.c,v 1.4 1996/10/20 08:36:38 tholo Exp $ */ +/* $OpenBSD: pass2.c,v 1.5 1997/10/06 15:33:34 csapuntz Exp $ */ /* $NetBSD: pass2.c,v 1.17 1996/09/27 22:45:15 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)pass2.c 8.6 (Berkeley) 10/27/94"; #else -static char rcsid[] = "$OpenBSD: pass2.c,v 1.4 1996/10/20 08:36:38 tholo Exp $"; +static char rcsid[] = "$OpenBSD: pass2.c,v 1.5 1997/10/06 15:33:34 csapuntz Exp $"; #endif #endif /* not lint */ @@ -75,8 +75,10 @@ pass2() case USTATE: pfatal("ROOT INODE UNALLOCATED"); - if (reply("ALLOCATE") == 0) + if (reply("ALLOCATE") == 0) { + ckfini(0); errexit("%s", ""); + } if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO) errexit("CANNOT ALLOCATE ROOT INODE\n"); break; @@ -89,8 +91,10 @@ pass2() errexit("CANNOT ALLOCATE ROOT INODE\n"); break; } - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } break; case FSTATE: @@ -102,8 +106,10 @@ pass2() errexit("CANNOT ALLOCATE ROOT INODE\n"); break; } - if (reply("FIX") == 0) + if (reply("FIX") == 0) { + ckfini(0); errexit("%s", ""); + } dp = ginode(ROOTINO); dp->di_mode &= ~IFMT; dp->di_mode |= IFDIR; @@ -145,8 +151,14 @@ pass2() } } else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) { getpathname(pathbuf, inp->i_number, inp->i_number); - pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d", - pathbuf, inp->i_isize, DIRBLKSIZ); + if (usedsoftdep) + pfatal("%s %s: LENGTH %d NOT MULTIPLE of %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); + else + pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d", + "DIRECTORY", pathbuf, inp->i_isize, + DIRBLKSIZ); if (preen) printf(" (ADJUSTED)\n"); inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ); @@ -396,7 +408,7 @@ again: break; if (statemap[dirp->d_ino] == FCLEAR) errmsg = "DUP/BAD"; - else if (!preen) + else if (!preen && !usedsoftdep) errmsg = "ZERO LENGTH DIRECTORY"; else { n = 1; @@ -421,8 +433,11 @@ again: pwarn("%s %s %s\n", pathbuf, "IS AN EXTRANEOUS HARD LINK TO DIRECTORY", namebuf); - if (preen) - printf(" (IGNORED)\n"); + if (preen) { + printf (" (REMOVED)\n"); + n = 1; + break; + } else if ((n = reply("REMOVE")) == 1) break; } diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index ecfad373247..0abd3082cb3 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pass5.c,v 1.3 1996/10/20 08:36:40 tholo Exp $ */ +/* $OpenBSD: pass5.c,v 1.4 1997/10/06 15:33:35 csapuntz Exp $ */ /* $NetBSD: pass5.c,v 1.16 1996/09/27 22:45:18 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)pass5.c 8.6 (Berkeley) 11/30/94"; #else -static char rcsid[] = "$OpenBSD: pass5.c,v 1.3 1996/10/20 08:36:40 tholo Exp $"; +static char rcsid[] = "$OpenBSD: pass5.c,v 1.4 1997/10/06 15:33:35 csapuntz Exp $"; #endif #endif /* not lint */ @@ -46,7 +46,6 @@ static char rcsid[] = "$OpenBSD: pass5.c,v 1.3 1996/10/20 08:36:40 tholo Exp $"; #include <sys/time.h> #include <ufs/ufs/dinode.h> #include <ufs/ffs/fs.h> -#include <ufs/ffs/ffs_extern.h> #include <string.h> #include "fsutil.h" @@ -57,11 +56,12 @@ void pass5() { int c, blk, frags, basesize, sumsize, mapsize, savednrpos; + int inomapsize, blkmapsize; register struct fs *fs = &sblock; register struct cg *cg = &cgrp; daddr_t dbase, dmax; - register daddr_t d; - register long i, j; + daddr_t d; + long i, j, k; struct csum *cs; struct csum cstotal; struct inodesc idesc[3]; @@ -119,6 +119,8 @@ pass5() sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; + blkmapsize = howmany(fs->fs_fpg, NBBY); + inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; savednrpos = fs->fs_nrpos; fs->fs_nrpos = 8; @@ -133,12 +135,12 @@ pass5() fs->fs_cpg * fs->fs_nrpos * sizeof(int16_t); newcg->cg_freeoff = newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY); - if (fs->fs_contigsumsize <= 0) { - newcg->cg_nextfreeoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - } else { - newcg->cg_clustersumoff = newcg->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) - + inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff; + newcg->cg_nextfreeoff = newcg->cg_freeoff + + howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); + blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff; + if (fs->fs_contigsumsize > 0) { + newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(int32_t); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(int32_t)); @@ -155,6 +157,7 @@ pass5() break; default: + inomapsize = blkmapsize = sumsize = 0; errexit("UNKNOWN ROTATIONAL TABLE FORMAT %d\n", fs->fs_postblformat); } @@ -305,13 +308,6 @@ pass5() cgdirty(); continue; } - if (memcmp(cg_inosused(newcg), - cg_inosused(cg), mapsize) != 0 && - dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { - memcpy(cg_inosused(cg), cg_inosused(newcg), - (size_t)mapsize); - cgdirty(); - } if ((memcmp(newcg, cg, basesize) != 0 || memcmp(&cg_blktot(newcg)[0], &cg_blktot(cg)[0], sumsize) != 0) && @@ -321,6 +317,41 @@ pass5() &cg_blktot(newcg)[0], (size_t)sumsize); cgdirty(); } + if (usedsoftdep) { + for (i = 0; i < inomapsize; i++) { + j = cg_inosused(newcg)[i]; + if ((cg_inosused(cg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED INODE %d MARKED FREE", + c * fs->fs_ipg + i * 8 + k); + } + } + for (i = 0; i < blkmapsize; i++) { + j = cg_blksfree(cg)[i]; + if ((cg_blksfree(newcg)[i] & j) == j) + continue; + for (k = 0; k < NBBY; k++) { + if ((j & (1 << k)) == 0) + continue; + if (cg_inosused(cg)[i] & (1 << k)) + continue; + pwarn("ALLOCATED FRAG %d MARKED FREE", + c * fs->fs_fpg + i * 8 + k); + } + } + } + if (memcmp(cg_inosused(newcg), cg_inosused(cg), + mapsize) != 0 && + dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { + memmove(cg_inosused(cg), cg_inosused(newcg), + (size_t)mapsize); + cgdirty(); + } } if (fs->fs_postblformat == FS_42POSTBLFMT) fs->fs_nrpos = savednrpos; diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c index e48c5f0b12d..436fe96eeb9 100644 --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: setup.c,v 1.4 1996/10/20 08:36:41 tholo Exp $ */ +/* $OpenBSD: setup.c,v 1.5 1997/10/06 15:33:35 csapuntz Exp $ */ /* $NetBSD: setup.c,v 1.27 1996/09/27 22:45:19 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)setup.c 8.5 (Berkeley) 11/23/94"; #else -static char rcsid[] = "$OpenBSD: setup.c,v 1.4 1996/10/20 08:36:41 tholo Exp $"; +static char rcsid[] = "$OpenBSD: setup.c,v 1.5 1997/10/06 15:33:35 csapuntz Exp $"; #endif #endif /* not lint */ @@ -336,8 +336,10 @@ setup(dev) fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); - if (reply("CONTINUE") == 0) + if (reply("CONTINUE") == 0) { + ckfini(0); errexit("%s", ""); + } asked++; } } @@ -382,6 +384,10 @@ setup(dev) goto badsblabel; } bufinit(); + if (sblock.fs_flags & FS_DOSOFTDEP) + usedsoftdep = 1; + else + usedsoftdep = 0; return (1); badsblabel: diff --git a/sbin/fsck_ffs/utilities.c b/sbin/fsck_ffs/utilities.c index 92d4b69da80..3b6dfe2d017 100644 --- a/sbin/fsck_ffs/utilities.c +++ b/sbin/fsck_ffs/utilities.c @@ -1,4 +1,4 @@ -/* $OpenBSD: utilities.c,v 1.4 1997/06/25 18:12:17 kstailey Exp $ */ +/* $OpenBSD: utilities.c,v 1.5 1997/10/06 15:33:36 csapuntz Exp $ */ /* $NetBSD: utilities.c,v 1.18 1996/09/27 22:45:20 christos Exp $ */ /* @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "@(#)utilities.c 8.1 (Berkeley) 6/5/93"; #else -static char rcsid[] = "$OpenBSD: utilities.c,v 1.4 1997/06/25 18:12:17 kstailey Exp $"; +static char rcsid[] = "$OpenBSD: utilities.c,v 1.5 1997/10/06 15:33:36 csapuntz Exp $"; #endif #endif /* not lint */ @@ -96,6 +96,7 @@ reply(question) printf("\n"); if (!persevere && (nflag || fswritefd < 0)) { printf("%s? no\n\n", question); + resolved = 0; return (0); } if (yflag || (persevere && nflag)) { @@ -106,13 +107,17 @@ reply(question) printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); - while (c != '\n' && getc(stdin) != '\n') - if (feof(stdin)) + while (c != '\n' && getc(stdin) != '\n') { + if (feof(stdin)) { + resolved = 0; return (0); + } + } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); + resolved = 0; return (0); } @@ -374,7 +379,8 @@ int allocblk(frags) long frags; { - register int i, j, k; + int i, j, k, cg, baseblk; + struct cg *cgp = &cgrp; if (frags <= 0 || frags > sblock.fs_frag) return (0); @@ -389,9 +395,21 @@ allocblk(frags) j += k; continue; } - for (k = 0; k < frags; k++) + cg = dtog(&sblock, i + j); + getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); + if (!cg_chkmagic(cgp)) + pfatal("CG %d: BAD MAGIC NUMBER\n", cg); + baseblk = dtogd(&sblock, i + j); + + for (k = 0; k < frags; k++) { setbmap(i + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; return (i + j); } } diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c index 69ba40d3adc..8d92a562813 100644 --- a/sbin/tunefs/tunefs.c +++ b/sbin/tunefs/tunefs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tunefs.c,v 1.4 1997/09/04 00:51:56 mickey Exp $ */ +/* $OpenBSD: tunefs.c,v 1.5 1997/10/06 15:33:59 csapuntz Exp $ */ /* $NetBSD: tunefs.c,v 1.10 1995/03/18 15:01:31 cgd Exp $ */ /* @@ -44,7 +44,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)tunefs.c 8.2 (Berkeley) 4/19/94"; #else -static char rcsid[] = "$OpenBSD: tunefs.c,v 1.4 1997/09/04 00:51:56 mickey Exp $"; +static char rcsid[] = "$OpenBSD: tunefs.c,v 1.5 1997/10/06 15:33:59 csapuntz Exp $"; #endif #endif /* not lint */ @@ -90,7 +90,7 @@ main(argc, argv) int argc; char *argv[]; { - char *cp, *special, *name; + char *cp, *special, *name, *action; struct stat st; int i; int Aflag = 0; @@ -190,6 +190,25 @@ again: warnx(OPTWARN, "space", "<", MINFREE); continue; + case 'n': + name = "soft updates"; + if (argc < 1) + errx(10, "-s: missing %s", name); + argc--, argv++; + if (strcmp(*argv, "enable") == 0) { + sblock.fs_flags |= FS_DOSOFTDEP; + action = "set"; + } else if (strcmp(*argv, "disable") == 0) { + sblock.fs_flags &= ~FS_DOSOFTDEP; + action = "cleared"; + } else { + errx(10, "bad %s (options are %s)", + name, "`enable' or `disable'"); + } + warnx("%s %s", name, action); + continue; + + case 'o': name = "optimization preference"; if (argc < 1) @@ -245,6 +264,7 @@ usage() "\t-d rotational delay between contiguous blocks\n" "\t-e maximum blocks per file in a cylinder group\n" "\t-m minimum percentage of free space\n" + "\t-n soft updates ('enable' or 'disable')\n" "\t-o optimization preference (`space' or `time')\n" "\t-p no change - just prints current tuneable settings\n", __progname); @@ -270,6 +290,8 @@ getsb(fs, file) void printfs() { + warnx("soft updates: (-n) %s", + (sblock.fs_flags & FS_DOSOFTDEP) ? "yes" : "no"); warnx("maximum contiguous block count: (-a) %d", sblock.fs_maxcontig); warnx("rotational delay between contiguous blocks: (-d) %d ms", diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index 68b17182919..2a71810913a 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: linux_misc.c,v 1.7 1997/04/14 11:16:23 graichen Exp $ */ +/* $OpenBSD: linux_misc.c,v 1.8 1997/10/06 15:05:18 csapuntz Exp $ */ /* $NetBSD: linux_misc.c,v 1.27 1996/05/20 01:59:21 fvdl Exp $ */ /* @@ -768,8 +768,8 @@ linux_sys_getdents(p, v, retval) off_t off; /* true file offset */ int buflen, error, eofflag, nbytes, oldcall; struct vattr va; - u_long *cookiebuf, *cookie; - int ncookies; + u_long *cookiebuf = NULL, *cookie; + int ncookies = 0; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); @@ -795,9 +795,7 @@ linux_sys_getdents(p, v, retval) oldcall = 0; } buf = malloc(buflen, M_TEMP, M_WAITOK); - ncookies = buflen / 16; - cookiebuf = malloc(ncookies * sizeof(*cookiebuf), M_TEMP, M_WAITOK); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); off = fp->f_offset; again: aiov.iov_base = buf; @@ -813,11 +811,14 @@ again: * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookiebuf, - ncookies); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, + &cookiebuf); if (error) goto out; + if (!error && !cookiebuf) + goto out; + inp = buf; outp = SCARG(uap, dent); resid = nbytes; @@ -881,8 +882,9 @@ again: eof: *retval = nbytes - resid; out: - VOP_UNLOCK(vp); - free(cookiebuf, M_TEMP); + VOP_UNLOCK(vp, 0, p); + if (cookiebuf) + free(cookiebuf, M_TEMP); free(buf, M_TEMP); return error; } diff --git a/sys/compat/sunos/sunos_misc.c b/sys/compat/sunos/sunos_misc.c index 1af211391e0..e062dbf9c80 100644 --- a/sys/compat/sunos/sunos_misc.c +++ b/sys/compat/sunos/sunos_misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sunos_misc.c,v 1.11 1997/07/28 09:53:12 deraadt Exp $ */ +/* $OpenBSD: sunos_misc.c,v 1.12 1997/10/06 15:05:45 csapuntz Exp $ */ /* $NetBSD: sunos_misc.c,v 1.65 1996/04/22 01:44:31 christos Exp $ */ /* @@ -412,7 +412,7 @@ sunos_sys_getdents(p, v, retval) struct sunos_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; - u_long *cookiebuf, *cookie; + u_long *cookiebuf = NULL, *cookie; int ncookies; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) @@ -428,8 +428,6 @@ sunos_sys_getdents(p, v, retval) buflen = min(MAXBSIZE, SCARG(uap, nbytes)); buf = malloc(buflen, M_TEMP, M_WAITOK); - ncookies = buflen / 16; - cookiebuf = malloc(ncookies * sizeof(*cookiebuf), M_TEMP, M_WAITOK); VOP_LOCK(vp); off = fp->f_offset; again: @@ -446,10 +444,15 @@ again: * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookiebuf, - ncookies); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, + &ncookies, &cookiebuf); if (error) goto out; + + if (!error && !cookiebuf) { + error = EPERM; + goto out; + } inp = buf; outp = SCARG(uap, buf); @@ -501,7 +504,8 @@ eof: *retval = SCARG(uap, nbytes) - resid; out: VOP_UNLOCK(vp); - free(cookiebuf, M_TEMP); + if (cookiebuf) + free(cookiebuf, M_TEMP); free(buf, M_TEMP); return (error); } diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index ef8910910a2..07a72d870a1 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: svr4_misc.c,v 1.11 1997/09/11 10:48:12 deraadt Exp $ */ +/* $OpenBSD: svr4_misc.c,v 1.12 1997/10/06 15:06:25 csapuntz Exp $ */ /* $NetBSD: svr4_misc.c,v 1.42 1996/12/06 03:22:34 christos Exp $ */ /* @@ -227,8 +227,8 @@ svr4_sys_getdents(p, v, retval) struct svr4_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; - u_long *cookiebuf, *cookie; - int ncookies; + u_long *cookiebuf = NULL, *cookie; + int ncookies = 0; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); @@ -243,9 +243,7 @@ svr4_sys_getdents(p, v, retval) buflen = min(MAXBSIZE, SCARG(uap, nbytes)); buf = malloc(buflen, M_TEMP, M_WAITOK); - ncookies = buflen / 16; - cookiebuf = malloc(ncookies * sizeof(*cookiebuf), M_TEMP, M_WAITOK); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); off = fp->f_offset; again: aiov.iov_base = buf; @@ -261,11 +259,16 @@ again: * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookiebuf, - ncookies); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, + &cookiebuf); if (error) goto out; + if (!error && !cookiebuf) { + error = EPERM; + goto out; + } + inp = buf; outp = SCARG(uap, buf); resid = SCARG(uap, nbytes); @@ -314,8 +317,9 @@ again: eof: *retval = SCARG(uap, nbytes) - resid; out: - VOP_UNLOCK(vp); - free(cookiebuf, M_TEMP); + VOP_UNLOCK(vp, 0, p); + if (cookiebuf) + free(cookiebuf, M_TEMP); free(buf, M_TEMP); return error; } @@ -371,12 +375,12 @@ svr4_sys_fchroot(p, v, retval) if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) return error; vp = (struct vnode *) fp->f_data; - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return error; VREF(vp); diff --git a/sys/conf/files b/sys/conf/files index 102a2124751..4a334afbf5d 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.63 1997/08/20 17:09:41 angelos Exp $ +# $OpenBSD: files,v 1.64 1997/10/06 15:07:17 csapuntz Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -149,6 +149,7 @@ file kern/kern_exit.c file kern/kern_fork.c file kern/kern_ktrace.c ktrace file kern/kern_lkm.c lkm +file kern/kern_lock.c file kern/kern_ntptime.c file kern/kern_malloc.c file kern/kern_physio.c @@ -371,7 +372,7 @@ file netns/ns_proto.c ns file netns/spp_debug.c ns file netns/spp_usrreq.c ns file netipx/ipx.c ipx -file netipx/ipx_error.c ipx +file netipx/ipx_error.c ipxf file netipx/ipx_input.c ipx file netipx/ipx_ip.c ipx file netipx/ipx_outputfl.c ipx @@ -393,12 +394,29 @@ file nfs/nfs_subs.c nfsserver | nfsclient file nfs/nfs_syscalls.c nfsserver | nfsclient file nfs/nfs_vfsops.c nfsclient file nfs/nfs_vnops.c nfsclient + +file ufs/bffs/bffs_vfsops.c bffs +file ufs/bffs/bffs_vnops.c bffs + +file ufs/cffs/cffs_alloc.c cffs +file ufs/cffs/cffs_balloc.c cffs +file ufs/cffs/cffs_dir.c cffs +file ufs/cffs/cffs_group.c cffs +file ufs/cffs/cffs_hash.c cffs +file ufs/cffs/cffs_inode.c cffs +file ufs/cffs/cffs_lookup.c cffs +file ufs/cffs/cffs_subr.c cffs +file ufs/cffs/cffs_vfsops.c cffs +file ufs/cffs/cffs_vnops.c cffs +file ufs/cffs/iloctbl.c cffs + file ufs/ffs/ffs_alloc.c ffs | mfs file ufs/ffs/ffs_balloc.c ffs | mfs file ufs/ffs/ffs_inode.c ffs | mfs file ufs/ffs/ffs_subr.c ffs | mfs file ufs/ffs/ffs_tables.c ffs | mfs file ufs/ffs/ffs_vfsops.c ffs | mfs +file ufs/ffs/ffs_softdep.c ffs file ufs/ffs/ffs_vnops.c ffs | mfs file ufs/mfs/mfs_vfsops.c mfs file ufs/mfs/mfs_vnops.c mfs @@ -419,7 +437,6 @@ file ufs/ext2fs/ext2fs_subr.c ext2fs file ufs/ext2fs/ext2fs_vfsops.c ext2fs file ufs/ext2fs/ext2fs_vnops.c ext2fs file vm/device_pager.c devpager -file vm/kern_lock.c file vm/swap_pager.c swappager file vm/vm_fault.c file vm/vm_glue.c diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c index 1049486f517..044a88194df 100644 --- a/sys/dev/ccd.c +++ b/sys/dev/ccd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ccd.c,v 1.17 1997/08/08 21:47:01 niklas Exp $ */ +/* $OpenBSD: ccd.c,v 1.18 1997/10/06 15:07:46 csapuntz Exp $ */ /* $NetBSD: ccd.c,v 1.33 1996/05/05 04:21:14 thorpej Exp $ */ /*- @@ -1326,7 +1326,7 @@ ccdlookup(path, p, vpp) vp = nd.ni_vp; if (vp->v_usecount > 1) { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (EBUSY); } @@ -1336,14 +1336,14 @@ ccdlookup(path, p, vpp) if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccdlookup: getattr error = %d\n", error); #endif - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (error); } /* XXX: eventually we should handle VREG, too. */ if (va.va_type != VBLK) { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (ENOTBLK); } @@ -1353,7 +1353,7 @@ ccdlookup(path, p, vpp) vprint("ccdlookup: vnode info", vp); #endif - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); *vpp = vp; return (0); } diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c index f87a7e8b182..715a508aa55 100644 --- a/sys/dev/vnd.c +++ b/sys/dev/vnd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vnd.c,v 1.16 1997/08/08 21:47:01 niklas Exp $ */ +/* $OpenBSD: vnd.c,v 1.17 1997/10/06 15:07:48 csapuntz Exp $ */ /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ /* @@ -369,6 +369,7 @@ vndstrategy(bp) int sz, flags, error, s; struct iovec aiov; struct uio auio; + struct proc *p = curproc; #ifdef DEBUG if (vnddebug & VDB_FOLLOW) @@ -431,7 +432,7 @@ vndstrategy(bp) auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = NULL; - VOP_LOCK(vnd->sc_vp); + vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); vnd->sc_flags |= VNF_BUSY; if (bp->b_flags & B_READ) { auio.uio_rw = UIO_READ; @@ -443,7 +444,7 @@ vndstrategy(bp) vnd->sc_cred); } vnd->sc_flags &= ~VNF_BUSY; - VOP_UNLOCK(vnd->sc_vp); + VOP_UNLOCK(vnd->sc_vp, 0, p); if (bp->b_error) bp->b_flags |= B_ERROR; bp->b_resid = auio.uio_resid; @@ -477,9 +478,9 @@ vndstrategy(bp) int off, s, nra; nra = 0; - VOP_LOCK(vnd->sc_vp); + vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); - VOP_UNLOCK(vnd->sc_vp); + VOP_UNLOCK(vnd->sc_vp, 0, p); if (error == 0 && (long)nbn == -1) error = EIO; #ifdef DEBUG @@ -732,12 +733,12 @@ vndioctl(dev, cmd, addr, flag, p) } error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); if (error) { - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); vndunlock(vnd); return(error); } - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); vnd->sc_vp = nd.ni_vp; vnd->sc_size = btodb(vattr.va_size); /* note truncation */ if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { @@ -874,6 +875,7 @@ vndsetcred(vnd, cred) struct iovec aiov; char *tmpbuf; int error; + struct proc *p = curproc; vnd->sc_cred = crdup(cred); tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); @@ -887,9 +889,9 @@ vndsetcred(vnd, cred) auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = aiov.iov_len; - VOP_LOCK(vnd->sc_vp); + vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); - VOP_UNLOCK(vnd->sc_vp); + VOP_UNLOCK(vnd->sc_vp, 0, p); free(tmpbuf, M_TEMP); return (error); diff --git a/sys/isofs/cd9660/cd9660_lookup.c b/sys/isofs/cd9660/cd9660_lookup.c index 68ee77511a9..56991a4b880 100644 --- a/sys/isofs/cd9660/cd9660_lookup.c +++ b/sys/isofs/cd9660/cd9660_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_lookup.c,v 1.2 1996/02/29 10:12:17 niklas Exp $ */ +/* $OpenBSD: cd9660_lookup.c,v 1.3 1997/10/06 15:08:40 csapuntz Exp $ */ /* $NetBSD: cd9660_lookup.c,v 1.14 1996/02/09 21:31:56 christos Exp $ */ /*- @@ -130,7 +130,8 @@ cd9660_lookup(v) struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; - + struct proc *p = cnp->cn_proc; + bp = NULL; *vpp = NULL; vdp = ap->a_dvp; @@ -146,6 +147,10 @@ cd9660_lookup(v) return (ENOTDIR); if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) return (error); + + if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); /* * We now have a segment name to search for, and a directory to search. @@ -176,14 +181,14 @@ cd9660_lookup(v) VREF(vdp); error = 0; } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); - error = vget(vdp, 1); + VOP_UNLOCK(pdp, 0, p); + error = vget(vdp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) - error = VOP_LOCK(pdp); + error = vn_lock(pdp, LK_EXCLUSIVE, p); } else { - error = vget(vdp, 1); + error = vget(vdp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } /* * Check that the capability number did not change @@ -194,9 +199,9 @@ cd9660_lookup(v) return (0); vput(vdp); if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } - if ((error = VOP_LOCK(pdp)) != 0) + if ((error = vn_lock(pdp, LK_EXCLUSIVE, p)) != 0) return (error); vdp = pdp; dp = VTOI(pdp); @@ -419,16 +424,16 @@ found: * it's a relocated directory. */ if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ + VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp, dp->i_ino != ino, ep); brelse(bp); if (error) { - VOP_LOCK(pdp); + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (lockparent && (flags & ISLASTCN) && - (error = VOP_LOCK(pdp))) { + (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { vput(tdp); return (error); } @@ -444,7 +449,7 @@ found: if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); *vpp = tdp; } diff --git a/sys/isofs/cd9660/cd9660_node.c b/sys/isofs/cd9660/cd9660_node.c index 62c5ba2a66e..888a70df209 100644 --- a/sys/isofs/cd9660/cd9660_node.c +++ b/sys/isofs/cd9660/cd9660_node.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_node.c,v 1.3 1997/08/01 05:58:55 millert Exp $ */ +/* $OpenBSD: cd9660_node.c,v 1.4 1997/10/06 15:08:41 csapuntz Exp $ */ /* $NetBSD: cd9660_node.c,v 1.15 1996/02/09 21:31:58 christos Exp $ */ /*- @@ -63,6 +63,7 @@ struct iso_node **isohashtbl; u_long isohash; #define INOHASH(device, inum) (((device) + ((inum)>>12)) & isohash) +struct simplelock cd9660_ihash_slock; #ifdef ISODEVMAP struct iso_node **idvhashtbl; @@ -73,18 +74,20 @@ u_long idvhash; int prtactive; /* 1 => print out reclaim of active vnodes */ static u_int cd9660_chars2ui __P((u_char *, int)); - /* * Initialize hash links for inodes and dnodes. */ -void -cd9660_init() +int +cd9660_init(vfsp) + struct vfsconf *vfsp; { isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash); + simple_lock_init(&cd9660_ihash_slock); #ifdef ISODEVMAP idvhashtbl = hashinit(desiredvnodes / 8, M_ISOFSMNT, &idvhash); #endif + return (0); } #ifdef ISODEVMAP @@ -105,6 +108,7 @@ iso_dmap(device, inum, create) return (NULL); if (inum == dp->i_number && device == dp->i_dev) return (dp); + } if (!create) return (NULL); @@ -130,7 +134,7 @@ iso_dunmap(device) struct iso_dnode **dpp, *dp, *dq; for (dpp = idvhashtbl; dpp <= idvhashtbl + idvhash; dpp++) { - for (dp = *dpp; dp != NULL; dp = dq) + for (dp = *dpp; dp != NULL; dp = dq) { dq = dp->d_next; if (device == dp->i_dev) { if (dq) @@ -148,30 +152,28 @@ iso_dunmap(device) * to it. If it is in core, but locked, wait for it. */ struct vnode * -cd9660_ihashget(device, inum) - dev_t device; +cd9660_ihashget(dev, inum) + dev_t dev; ino_t inum; { - register struct iso_node *ip; + struct proc *p = curproc; /* XXX */ + struct iso_node *ip; struct vnode *vp; - for (;;) - for (ip = isohashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) { - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; - sleep(ip, PINOD); - break; - } - vp = ITOV(ip); - if (!vget(vp, 1)) - return (vp); - break; - } - } - /* NOTREACHED */ +loop: + simple_lock(&cd9660_ihash_slock); + for (ip = isohashtbl[INOHASH(dev, inum)]; ip; ip = ip->i_next) { + if (inum == ip->i_number && dev == ip->i_dev) { + vp = ITOV(ip); + simple_lock(&vp->v_interlock); + simple_unlock(&cd9660_ihash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + goto loop; + return (vp); + } + } + simple_unlock(&cd9660_ihash_slock); + return (NULL); } /* @@ -181,21 +183,19 @@ void cd9660_ihashins(ip) struct iso_node *ip; { + struct proc *p = curproc; struct iso_node **ipp, *iq; + simple_lock(&cd9660_ihash_slock); ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)]; if ((iq = *ipp) != NULL) iq->i_prev = &ip->i_next; ip->i_next = iq; ip->i_prev = ipp; *ipp = ip; - if (ip->i_flag & IN_LOCKED) - panic("cd9660_ihashins: already locked"); - if (curproc) - ip->i_lockholder = curproc->p_pid; - else - ip->i_lockholder = -1; - ip->i_flag |= IN_LOCKED; + simple_unlock(&cd9660_ihash_slock); + + lockmgr(&ip->i_lock, LK_EXCLUSIVE, 0, p); } /* @@ -207,6 +207,7 @@ cd9660_ihashrem(ip) { register struct iso_node *iq; + simple_lock(&cd9660_ihash_slock); if ((iq = ip->i_next) != NULL) iq->i_prev = ip->i_prev; *ip->i_prev = iq; @@ -214,6 +215,7 @@ cd9660_ihashrem(ip) ip->i_next = NULL; ip->i_prev = NULL; #endif + simple_unlock(&cd9660_ihash_slock); } /* @@ -226,8 +228,10 @@ cd9660_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; register struct iso_node *ip = VTOI(vp); int error = 0; @@ -235,12 +239,14 @@ cd9660_inactive(v) vprint("cd9660_inactive: pushing active", vp); ip->i_flag = 0; + VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (vp->v_usecount == 0 && ip->inode.iso_mode == 0) - vgone(vp); + if (ip->inode.iso_mode == 0) + vrecycle(vp, (struct simplelock *)0, p); + return error; } diff --git a/sys/isofs/cd9660/cd9660_node.h b/sys/isofs/cd9660/cd9660_node.h index 08490035ad0..c89a2f20f99 100644 --- a/sys/isofs/cd9660/cd9660_node.h +++ b/sys/isofs/cd9660/cd9660_node.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_node.h,v 1.2 1996/02/29 10:12:21 niklas Exp $ */ +/* $OpenBSD: cd9660_node.h,v 1.3 1997/10/06 15:08:42 csapuntz Exp $ */ /* $NetBSD: cd9660_node.h,v 1.11 1996/02/09 21:32:00 christos Exp $ */ /*- @@ -87,7 +87,7 @@ struct iso_node { doff_t i_diroff; /* offset in dir, where we found last entry */ doff_t i_offset; /* offset of free space in directory */ ino_t i_ino; /* inode number of found directory */ - pid_t i_lockholder, i_lockwaiter; + struct lock i_lock; /* node lock */ long iso_extent; /* extent of file */ long i_size; @@ -100,8 +100,6 @@ struct iso_node { #define i_back i_chain[1] /* flags */ -#define IN_LOCKED 0x0001 /* inode is locked */ -#define IN_WANTED 0x0002 /* some process waiting on lock */ #define IN_ACCESS 0x0020 /* inode access time to be updated */ #define VTOI(vp) ((struct iso_node *)(vp)->v_data) @@ -116,6 +114,7 @@ int cd9660_open __P((void *)); int cd9660_close __P((void *)); int cd9660_access __P((void *)); int cd9660_getattr __P((void *)); +int cd9660_setattr __P((void *)); int cd9660_read __P((void *)); int cd9660_ioctl __P((void *)); int cd9660_select __P((void *)); diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c index 8fb532f19c3..23ee8d0e01a 100644 --- a/sys/isofs/cd9660/cd9660_vfsops.c +++ b/sys/isofs/cd9660/cd9660_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_vfsops.c,v 1.8 1997/06/04 23:55:30 deraadt Exp $ */ +/* $OpenBSD: cd9660_vfsops.c,v 1.9 1997/10/06 15:08:44 csapuntz Exp $ */ /* $NetBSD: cd9660_vfsops.c,v 1.20 1996/02/09 21:32:08 christos Exp $ */ /*- @@ -64,7 +64,6 @@ #include <isofs/cd9660/cd9660_node.h> struct vfsops cd9660_vfsops = { - MOUNT_CD9660, cd9660_mount, cd9660_start, cd9660_unmount, @@ -76,14 +75,12 @@ struct vfsops cd9660_vfsops = { cd9660_fhtovp, cd9660_vptofh, cd9660_init, + cd9660_sysctl }; /* * Called by vfs_mountroot when iso is going to be mounted as root. - * - * Name is updated by mount(8) after booting. */ -#define ROOTNAME "root_device" static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct iso_args *argp)); @@ -93,48 +90,37 @@ int iso_disklabelspoof __P((dev_t dev, void (*strat) __P((struct buf *)), int cd9660_mountroot() { - register struct mount *mp; + struct mount *mp; extern struct vnode *rootvp; struct proc *p = curproc; /* XXX */ - struct iso_mnt *imp; - size_t size; int error; struct iso_args args; /* * Get vnodes for swapdev and rootdev. */ - if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) - panic("cd9660_mountroot: can't setup bdevvp's"); - - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = &cd9660_vfsops; - mp->mnt_flag = MNT_RDONLY; - LIST_INIT(&mp->mnt_vnodelist); + if ((error = bdevvp(swapdev, &swapdev_vp)) || + (error = bdevvp(rootdev, &rootvp))) { + printf("cd9660_mountroot: can't setup bdevvp's"); + return (error); + } + + + if ((error = vfs_rootmountalloc("cd9660", "root_device", &mp)) != 0) + return (error); args.flags = ISOFSMNT_ROOT; if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0) { - free(mp, M_MOUNT); - return (error); - } - if ((error = vfs_lock(mp)) != 0) { - (void)cd9660_unmount(mp, 0, p); - free(mp, M_MOUNT); - return (error); - } - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; - imp = VFSTOISOFS(mp); - (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)cd9660_statfs(mp, &mp->mnt_stat, p); - vfs_unlock(mp); - inittodr(0); /* XXX - can we get the cd creation time here?? */ - return (0); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); + free(mp, M_MOUNT); + return (error); + } + simple_lock(&mountlist_slock); + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); + (void)cd9660_statfs(mp, &mp->mnt_stat, p); + vfs_unbusy(mp, p); + return (0); } /* @@ -207,6 +193,7 @@ cd9660_mount(mp, path, data, ndp, p) (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void) cd9660_statfs(mp, &mp->mnt_stat, p); return 0; } @@ -318,14 +305,14 @@ iso_mountfs(devvp, mp, p, argp) mp->mnt_data = (qaddr_t)isomp; mp->mnt_stat.f_fsid.val[0] = (long)dev; - mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_CD9660); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { @@ -504,7 +491,7 @@ cd9660_unmount(mp, mntflags, p) iso_dunmap(isomp->im_dev); #endif - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; + isomp->im_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); @@ -579,7 +566,6 @@ cd9660_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); /* Use the first spare for flags: */ sbp->f_spare[0] = isomp->im_flags; return 0; @@ -708,6 +694,7 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir) MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK); bzero((caddr_t)ip, sizeof(struct iso_node)); + lockinit(&ip->i_lock, PINOD, "isoinode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_dev = dev; @@ -852,9 +839,8 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir) if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) { /* * Discard unneeded vnode, but save its iso_node. + * Note that the lock is carried over in the iso_node */ - cd9660_ihashrem(ip); - VOP_UNLOCK(vp); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; @@ -899,7 +885,7 @@ cd9660_vptofh(vp, fhp) { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; - + ifhp = (struct ifid *)fhp; ifhp->ifid_len = sizeof(struct ifid); diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c index 071c5831b2a..2723d80bf13 100644 --- a/sys/isofs/cd9660/cd9660_vnops.c +++ b/sys/isofs/cd9660/cd9660_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660_vnops.c,v 1.5 1996/04/21 22:26:38 deraadt Exp $ */ +/* $OpenBSD: cd9660_vnops.c,v 1.6 1997/10/06 15:08:46 csapuntz Exp $ */ /* $NetBSD: cd9660_vnops.c,v 1.32 1996/03/16 20:25:40 ws Exp $ */ /*- @@ -120,8 +120,7 @@ cd9660_mknod(ndp, vap, cred, p) dp = iso_dmap(ip->i_dev,ip->i_number,1); if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) { /* same as the unmapped one, delete the mapping */ - dp->d_next->d_prev = dp->d_prev; - *dp->d_prev = dp->d_next; + remque(dp); FREE(dp, M_CACHE); } else /* enter new mapping */ @@ -141,6 +140,48 @@ cd9660_mknod(ndp, vap, cred, p) #endif /* + * Setattr call. Only allowed for block and character special devices. + */ +int +cd9660_setattr(v) + void *v; + +{ + struct vop_setattr_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + + if (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) + return (EROFS); + if (vap->va_size != VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + return (EROFS); + case VCHR: + case VBLK: + case VSOCK: + case VFIFO: + return (0); + default: + return (EINVAL); + } + } + + return (EINVAL); +} + +/* * Open called. * * Nothing to do. @@ -245,15 +286,15 @@ cd9660_getattr(v) return (0); } +#if ISO_DEFAULT_BLOCK_SIZE >= NBPG #ifdef DEBUG extern int doclusterread; #else #define doclusterread 1 #endif - -/* XXX until cluster routines can handle block sizes less than one page */ -#define cd9660_doclusterread \ - (doclusterread && (ISO_DEFAULT_BLOCK_SIZE >= NBPG)) +#else +#define doclusterread 0 +#endif /* * Vnode op for reading. @@ -296,7 +337,7 @@ cd9660_read(v) n = diff; size = blksize(imp, ip, lbn); rablock = lbn + 1; - if (cd9660_doclusterread) { + if (doclusterread) { if (lblktosize(imp, rablock) <= ip->i_size) error = cluster_read(vp, (off_t)ip->i_size, lbn, size, NOCRED, &bp); @@ -319,7 +360,11 @@ cd9660_read(v) } error = uiomove(bp->b_data + on, (int)n, uio); - brelse(bp); + + if (n + on == imp->logical_block_size || + uio->uio_offset == (off_t)ip->i_size) + bp->b_flags |= B_AGE; + brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); } @@ -486,6 +531,8 @@ cd9660_readdir(v) int error = 0; int reclen; u_short namelen; + int ncookies = 0; + u_long *cookies = NULL; dp = VTOI(vdp); imp = dp->i_mnt; @@ -500,9 +547,19 @@ cd9660_readdir(v) idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type = DT_UNKNOWN; idp->uio = uio; + if (ap->a_ncookies == NULL) { + idp->cookies = NULL; + } else { + /* + * Guess the number of cookies needed. + */ + ncookies = uio->uio_resid / 16; + MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + idp->cookies = cookies; + idp->ncookies = ncookies; + } idp->eofflag = 1; - idp->cookies = ap->a_cookies; - idp->ncookies = ap->a_ncookies; idp->curroff = uio->uio_offset; if ((entryoffsetinblock = idp->curroff & bmask) && @@ -614,8 +671,20 @@ cd9660_readdir(v) if (error < 0) error = 0; + if (ap->a_ncookies != NULL) { + if (error) + free(cookies, M_TEMP); + else { + /* + * Work out the number of cookies actually used. + */ + *ap->a_ncookies = ncookies - idp->ncookies; + *ap->a_cookies = cookies; + } + } + if (bp) - brelse (bp); + brelse (bp); uio->uio_offset = idp->uio_off; *ap->a_eofflag = idp->eofflag; @@ -651,7 +720,7 @@ cd9660_readlink(v) u_short symlen; int error; char *symname; - + ip = VTOI(ap->a_vp); imp = ip->i_mnt; uio = ap->a_uio; @@ -780,48 +849,12 @@ cd9660_lock(v) void *v; { struct vop_lock_args /* { - struct vnode *a_vp; + struct vnode *a_vp; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct iso_node *ip; -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ -#endif + struct vnode *vp = ap->a_vp; -start: - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - sleep((caddr_t)vp, PINOD); - } - if (vp->v_tag == VT_NON) - return (ENOENT); - ip = VTOI(vp); - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; -#ifdef DIAGNOSTIC - if (p) { - if (p->p_pid == ip->i_lockholder) - panic("locking against myself"); - ip->i_lockwaiter = p->p_pid; - } else - ip->i_lockwaiter = -1; -#endif - (void) sleep((caddr_t)ip, PINOD); - goto start; - } -#ifdef DIAGNOSTIC - ip->i_lockwaiter = 0; - if (ip->i_lockholder != 0) - panic("lockholder (%d) != 0", ip->i_lockholder); - if (p && p->p_pid == 0) - printf("locking by process 0\n"); - if (p) - ip->i_lockholder = p->p_pid; - else - ip->i_lockholder = -1; -#endif - ip->i_flag |= IN_LOCKED; - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock, + ap->a_p)); } /* @@ -834,27 +867,10 @@ cd9660_unlock(v) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap = v; - register struct iso_node *ip = VTOI(ap->a_vp); - -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ + struct vnode *vp = ap->a_vp; - if ((ip->i_flag & IN_LOCKED) == 0) { - vprint("cd9660_unlock: unlocked inode", ap->a_vp); - panic("cd9660_unlock NOT LOCKED"); - } - if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 && - ip->i_lockholder > -1/* && lockcount++ < 100*/) - panic("unlocker (%d) != lock holder (%d)", - p->p_pid, ip->i_lockholder); - ip->i_lockholder = 0; -#endif - ip->i_flag &= ~IN_LOCKED; - if (ip->i_flag & IN_WANTED) { - ip->i_flag &= ~IN_WANTED; - wakeup((caddr_t)ip); - } - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE, + &vp->v_interlock, ap->a_p)); } /* @@ -920,9 +936,7 @@ cd9660_islocked(v) struct vnode *a_vp; } */ *ap = v; - if (VTOI(ap->a_vp)->i_flag & IN_LOCKED) - return (1); - return (0); + return (lockstatus(&VTOI(ap->a_vp)->i_lock)); } /* @@ -966,24 +980,11 @@ cd9660_pathconf(v) } /* - * Unsupported operation - */ -/*ARGSUSED*/ -int -cd9660_enotsupp(v) - void *v; -{ - - return (EOPNOTSUPP); -} - -/* * Global vfs data structures for isofs */ -#define cd9660_create cd9660_enotsupp -#define cd9660_mknod cd9660_enotsupp -#define cd9660_setattr cd9660_enotsupp -#define cd9660_write cd9660_enotsupp +#define cd9660_create eopnotsupp +#define cd9660_mknod eopnotsupp +#define cd9660_write eopnotsupp #ifdef NFSSERVER int lease_check __P((void *)); #define cd9660_lease_check lease_check @@ -991,16 +992,17 @@ int lease_check __P((void *)); #define cd9660_lease_check nullop #endif #define cd9660_fsync nullop -#define cd9660_remove cd9660_enotsupp -#define cd9660_rename cd9660_enotsupp -#define cd9660_mkdir cd9660_enotsupp -#define cd9660_rmdir cd9660_enotsupp -#define cd9660_advlock cd9660_enotsupp -#define cd9660_valloc cd9660_enotsupp -#define cd9660_vfree cd9660_enotsupp -#define cd9660_truncate cd9660_enotsupp -#define cd9660_update cd9660_enotsupp -#define cd9660_bwrite cd9660_enotsupp +#define cd9660_remove eopnotsupp +#define cd9660_rename eopnotsupp +#define cd9660_mkdir eopnotsupp +#define cd9660_rmdir eopnotsupp +#define cd9660_advlock eopnotsupp +#define cd9660_valloc eopnotsupp +#define cd9660_vfree eopnotsupp +#define cd9660_truncate eopnotsupp +#define cd9660_update eopnotsupp +#define cd9660_bwrite eopnotsupp +#define cd9660_revoke vop_revoke /* * Global vfs data structures for cd9660 @@ -1021,6 +1023,7 @@ struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = { { &vop_lease_desc, cd9660_lease_check },/* lease */ { &vop_ioctl_desc, cd9660_ioctl }, /* ioctl */ { &vop_select_desc, cd9660_select }, /* select */ + { &vop_revoke_desc, cd9660_revoke }, /* revoke */ { &vop_mmap_desc, cd9660_mmap }, /* mmap */ { &vop_fsync_desc, cd9660_fsync }, /* fsync */ { &vop_seek_desc, cd9660_seek }, /* seek */ @@ -1073,6 +1076,7 @@ struct vnodeopv_entry_desc cd9660_specop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ + { &vop_revoke_desc, spec_revoke }, /* revoke */ { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, spec_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -1123,6 +1127,7 @@ struct vnodeopv_entry_desc cd9660_fifoop_entries[] = { { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_select_desc, fifo_select }, /* select */ + { &vop_revoke_desc, fifo_revoke }, /* revoke */ { &vop_mmap_desc, fifo_mmap }, /* mmap */ { &vop_fsync_desc, fifo_fsync }, /* fsync */ { &vop_seek_desc, fifo_seek }, /* seek */ diff --git a/sys/isofs/cd9660/iso.h b/sys/isofs/cd9660/iso.h index 7602312acab..ac9ade108b1 100644 --- a/sys/isofs/cd9660/iso.h +++ b/sys/isofs/cd9660/iso.h @@ -1,4 +1,4 @@ -/* $OpenBSD: iso.h,v 1.3 1996/04/21 22:26:41 deraadt Exp $ */ +/* $OpenBSD: iso.h,v 1.4 1997/10/06 15:08:47 csapuntz Exp $ */ /* $NetBSD: iso.h,v 1.11 1996/03/16 20:25:42 ws Exp $ */ /*- @@ -179,7 +179,9 @@ int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); int cd9660_vptofh __P((struct vnode *, struct fid *)); -void cd9660_init __P((void)); +int cd9660_init __P((struct vfsconf *)); +#define cd9660_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) int cd9660_mountroot __P((void)); diff --git a/sys/kern/exec_script.c b/sys/kern/exec_script.c index 551c52c7602..91f6d638397 100644 --- a/sys/kern/exec_script.c +++ b/sys/kern/exec_script.c @@ -1,4 +1,4 @@ -/* $OpenBSD: exec_script.c,v 1.4 1996/10/20 15:30:07 dm Exp $ */ +/* $OpenBSD: exec_script.c,v 1.5 1997/10/06 15:12:10 csapuntz Exp $ */ /* $NetBSD: exec_script.c,v 1.13 1996/02/04 02:15:06 christos Exp $ */ /* @@ -228,7 +228,7 @@ check_shell: scriptvp = epp->ep_vp; oldpnbuf = epp->ep_ndp->ni_cnd.cn_pnbuf; - VOP_UNLOCK(scriptvp); + VOP_UNLOCK(scriptvp, 0, p); if ((error = check_exec(p, epp)) == 0) { /* note that we've clobbered the header */ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index a8f78c7511a..ae47944d74e 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: init_main.c,v 1.24 1997/07/28 09:13:17 deraadt Exp $ */ +/* $OpenBSD: init_main.c,v 1.25 1997/10/06 15:12:12 csapuntz Exp $ */ /* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */ /* @@ -335,17 +335,16 @@ main(framep) schedcpu(NULL); /* Mount the root file system. */ - if ((*mountroot)()) + if (vfs_mountroot()) panic("cannot mount root"); mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; - mountlist.cqh_first->mnt_op->vfs_refcount++; /* Get the vnode for '/'. Set filedesc0.fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) panic("cannot find root vnode"); filedesc0.fd_fd.fd_cdir = rootvnode; VREF(filedesc0.fd_fd.fd_cdir); - VOP_UNLOCK(rootvnode); + VOP_UNLOCK(rootvnode, 0, p); filedesc0.fd_fd.fd_rdir = NULL; swapinit(); @@ -584,6 +583,6 @@ start_update(p) */ p->p_flag |= P_INMEM | P_SYSTEM; /* XXX */ bcopy("update", curproc->p_comm, sizeof ("update")); - vn_update(); + sched_sync(p); /* NOTREACHED */ } diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index c371c085046..ccf5d0a79db 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_acct.c,v 1.2 1996/03/03 17:19:40 niklas Exp $ */ +/* $OpenBSD: kern_acct.c,v 1.3 1997/10/06 15:12:14 csapuntz Exp $ */ /* $NetBSD: kern_acct.c,v 1.42 1996/02/04 02:15:12 christos Exp $ */ /*- @@ -120,7 +120,7 @@ sys_acct(p, v, retval) p); if ((error = vn_open(&nd, FWRITE, 0)) != 0) return (error); - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); if (nd.ni_vp->v_type != VREG) { vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); return (EACCES); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 4dfb48a0e9c..b70c6bd3cd2 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exec.c,v 1.12 1997/08/01 22:54:50 deraadt Exp $ */ +/* $OpenBSD: kern_exec.c,v 1.13 1997/10/06 15:12:16 csapuntz Exp $ */ /* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */ /*- @@ -188,10 +188,10 @@ check_exec(p, epp) bad2: /* - * unlock and close the vnode, restore the old one, free the + * unlock and close the vnode, free the * pathname buf, and punt. */ - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); vn_close(vp, FREAD, p->p_ucred, p); FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI); return error; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 0f74d83a4f3..bab12bfe770 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exit.c,v 1.9 1997/09/15 05:46:12 millert Exp $ */ +/* $OpenBSD: kern_exit.c,v 1.10 1997/10/06 15:12:17 csapuntz Exp $ */ /* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */ /* @@ -179,7 +179,7 @@ exit1(p, rv) * if we blocked. */ if (sp->s_ttyvp) - vgoneall(sp->s_ttyvp); + VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } if (sp->s_ttyvp) vrele(sp->s_ttyvp); diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index ab28fda5075..973ba8e5827 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_ktrace.c,v 1.3 1996/03/03 17:19:46 niklas Exp $ */ +/* $OpenBSD: kern_ktrace.c,v 1.4 1997/10/06 15:12:18 csapuntz Exp $ */ /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ /* @@ -293,7 +293,9 @@ sys_ktrace(curp, v, retval) return (error); } vp = nd.ni_vp; - VOP_UNLOCK(vp); + + /* FIXME: Should be curp?? */ + VOP_UNLOCK(vp, 0, p); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); curp->p_traceflag &= ~KTRFAC_ACTIVE; @@ -468,9 +470,9 @@ ktrwrite(vp, kth) aiov[1].iov_len = kth->ktr_len; auio.uio_resid += kth->ktr_len; } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (!error) return; /* diff --git a/sys/kern/kern_lkm.c b/sys/kern/kern_lkm.c index 144df985707..524b3671468 100644 --- a/sys/kern/kern_lkm.c +++ b/sys/kern/kern_lkm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_lkm.c,v 1.18 1997/09/24 18:16:22 mickey Exp $ */ +/* $OpenBSD: kern_lkm.c,v 1.19 1997/10/06 15:12:19 csapuntz Exp $ */ /* $NetBSD: kern_lkm.c,v 1.31 1996/03/31 21:40:27 christos Exp $ */ /* @@ -689,57 +689,52 @@ _lkm_vfs(lkmtp, cmd) struct lkm_table *lkmtp; int cmd; { - struct lkm_vfs *args = lkmtp->private.lkm_vfs; - int i; int error = 0; - +#if 0 + struct lkm_vfs *args = lkmtp->private.lkm_vfs; + struct vfsconf *vfsp, **vfspp; +#endif switch(cmd) { case LKM_E_LOAD: /* don't load twice! */ if (lkmexists(lkmtp)) return (EEXIST); + return (EEXIST); +#if 0 /* make sure there's no VFS in the table with this name */ - for (i = 0; i < nvfssw; i++) - if (vfssw[i] != (struct vfsops *)0 && - strncmp(vfssw[i]->vfs_name, + for (vfspp = &vfsconf, vfsp = vfsconf; + vfsp; + vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) + if (strncmp(vfsp->vfc_name, args->lkm_vfsops->vfs_name, MFSNAMELEN) == 0) return (EEXIST); + /* pick the last available empty slot */ - for (i = nvfssw - 1; i >= 0; i--) - if (vfssw[i] == (struct vfsops *)0) - break; - if (i == -1) { /* or if none, punt */ - error = EINVAL; - break; - } + MALLOC (vfsp, struct vfsconf *, sizeof (struct vfsconf), + M_VFS, M_WAITOK); + + /* Add tot he end of the list */ + *vfspp = vfsp; /* * Set up file system */ - vfssw[i] = args->lkm_vfsops; - vfssw[i]->vfs_refcount = 0; + /* FIXME (CPS): Setup new vfsconf structure */ /* * Call init function for this VFS... */ - (*(vfssw[i]->vfs_init))(); + (*(vfsp->vfc_vfsops->vfs_init))(vfsp); /* done! */ - args->lkm_offset = i; /* slot in vfssw[] */ + /* Nope - can't return this */ break; +#endif case LKM_E_UNLOAD: - /* current slot... */ - i = args->lkm_offset; - - if (vfssw[i]->vfs_refcount != 0) - return (EBUSY); - - /* replace current slot contents with old contents */ - vfssw[i] = (struct vfsops *)0; break; case LKM_E_STAT: /* no special handling... */ diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c new file mode 100644 index 00000000000..c6793d24f58 --- /dev/null +++ b/sys/kern/kern_lock.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 1995 + * The Regents of the University of California. All rights reserved. + * + * This code contains ideas from software contributed to Berkeley by + * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating + * System project at Carnegie-Mellon University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_lock.c 8.18 (Berkeley) 5/21/95 + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/systm.h> + +#include <machine/cpu.h> + +void record_stacktrace __P((int *, int)); +void playback_stacktrace __P((int *, int)); + +/* + * Locking primitives implementation. + * Locks provide shared/exclusive sychronization. + */ + +#ifdef DEBUG +#define COUNT(p, x) if (p) (p)->p_locks += (x) +#else +#define COUNT(p, x) +#endif + +#if NCPUS > 1 + +/* + * For multiprocessor system, try spin lock first. + * + * This should be inline expanded below, but we cannot have #if + * inside a multiline define. + */ +int lock_wait_time = 100; +#define PAUSE(lkp, wanted) \ + if (lock_wait_time > 0) { \ + int i; \ + \ + simple_unlock(&lkp->lk_interlock); \ + for (i = lock_wait_time; i > 0; i--) \ + if (!(wanted)) \ + break; \ + simple_lock(&lkp->lk_interlock); \ + } \ + if (!(wanted)) \ + break; + +#else /* NCPUS == 1 */ + +/* + * It is an error to spin on a uniprocessor as nothing will ever cause + * the simple lock to clear while we are executing. + */ +#define PAUSE(lkp, wanted) + +#endif /* NCPUS == 1 */ + +/* + * Acquire a resource. + */ +#define ACQUIRE(lkp, error, extflags, wanted) \ + PAUSE(lkp, wanted); \ + for (error = 0; wanted; ) { \ + (lkp)->lk_waitcount++; \ + simple_unlock(&(lkp)->lk_interlock); \ + error = tsleep((void *)lkp, (lkp)->lk_prio, \ + (lkp)->lk_wmesg, (lkp)->lk_timo); \ + simple_lock(&(lkp)->lk_interlock); \ + (lkp)->lk_waitcount--; \ + if (error) \ + break; \ + if ((extflags) & LK_SLEEPFAIL) { \ + error = ENOLCK; \ + break; \ + } \ + } + +/* + * Initialize a lock; required before use. + */ +void +lockinit(lkp, prio, wmesg, timo, flags) + struct lock *lkp; + int prio; + char *wmesg; + int timo; + int flags; +{ + + bzero(lkp, sizeof(struct lock)); + simple_lock_init(&lkp->lk_interlock); + lkp->lk_flags = flags & LK_EXTFLG_MASK; + lkp->lk_prio = prio; + lkp->lk_timo = timo; + lkp->lk_wmesg = wmesg; + lkp->lk_lockholder = LK_NOPROC; +} + +/* + * Determine the status of a lock. + */ +int +lockstatus(lkp) + struct lock *lkp; +{ + int lock_type = 0; + + simple_lock(&lkp->lk_interlock); + if (lkp->lk_exclusivecount != 0) + lock_type = LK_EXCLUSIVE; + else if (lkp->lk_sharecount != 0) + lock_type = LK_SHARED; + simple_unlock(&lkp->lk_interlock); + return (lock_type); +} + +/* + * Set, change, or release a lock. + * + * Shared requests increment the shared count. Exclusive requests set the + * LK_WANT_EXCL flag (preventing further shared locks), and wait for already + * accepted shared locks and shared-to-exclusive upgrades to go away. + */ +int +lockmgr(lkp, flags, interlkp, p) + __volatile struct lock *lkp; + u_int flags; + struct simplelock *interlkp; + struct proc *p; +{ + int error; + pid_t pid; + int extflags; + + error = 0; + if (p) + pid = p->p_pid; + else + pid = LK_KERNPROC; + simple_lock(&lkp->lk_interlock); + if (flags & LK_INTERLOCK) + simple_unlock(interlkp); + extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; +#ifdef DIAGNOSTIC + /* + * Once a lock has drained, the LK_DRAINING flag is set and an + * exclusive lock is returned. The only valid operation thereafter + * is a single release of that exclusive lock. This final release + * clears the LK_DRAINING flag and sets the LK_DRAINED flag. Any + * further requests of any sort will result in a panic. The bits + * selected for these two flags are chosen so that they will be set + * in memory that is freed (freed memory is filled with 0xdeadbeef). + * The final release is permitted to give a new lease on life to + * the lock by specifying LK_REENABLE. + */ + if (lkp->lk_flags & (LK_DRAINING|LK_DRAINED)) { + if (lkp->lk_flags & LK_DRAINED) + panic("lockmgr: using decommissioned lock"); + if ((flags & LK_TYPE_MASK) != LK_RELEASE || + lkp->lk_lockholder != pid) + panic("lockmgr: non-release on draining lock: %d\n", + flags & LK_TYPE_MASK); + lkp->lk_flags &= ~LK_DRAINING; + if ((flags & LK_REENABLE) == 0) + lkp->lk_flags |= LK_DRAINED; + } +#endif DIAGNOSTIC + + switch (flags & LK_TYPE_MASK) { + + case LK_SHARED: + if (lkp->lk_lockholder != pid) { + /* + * If just polling, check to see if we will block. + */ + if ((extflags & LK_NOWAIT) && (lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE))) { + error = EBUSY; + break; + } + /* + * Wait for exclusive locks and upgrades to clear. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)); + if (error) + break; + lkp->lk_sharecount++; + COUNT(p, 1); + break; + } + /* + * We hold an exclusive lock, so downgrade it to shared. + * An alternative would be to fail with EDEADLK. + */ + lkp->lk_sharecount++; + COUNT(p, 1); + /* fall into downgrade */ + + case LK_DOWNGRADE: + if (lkp->lk_lockholder != pid || lkp->lk_exclusivecount == 0) + panic("lockmgr: not holding exclusive lock"); + lkp->lk_sharecount += lkp->lk_exclusivecount; + lkp->lk_exclusivecount = 0; + lkp->lk_flags &= ~LK_HAVE_EXCL; + lkp->lk_lockholder = LK_NOPROC; + if (lkp->lk_waitcount) + wakeup((void *)lkp); + break; + + case LK_EXCLUPGRADE: + /* + * If another process is ahead of us to get an upgrade, + * then we want to fail rather than have an intervening + * exclusive access. + */ + if (lkp->lk_flags & LK_WANT_UPGRADE) { + lkp->lk_sharecount--; + COUNT(p, -1); + error = EBUSY; + break; + } + /* fall into normal upgrade */ + + case LK_UPGRADE: + /* + * Upgrade a shared lock to an exclusive one. If another + * shared lock has already requested an upgrade to an + * exclusive lock, our shared lock is released and an + * exclusive lock is requested (which will be granted + * after the upgrade). If we return an error, the file + * will always be unlocked. + */ + if (lkp->lk_lockholder == pid || lkp->lk_sharecount <= 0) + panic("lockmgr: upgrade exclusive lock"); + lkp->lk_sharecount--; + COUNT(p, -1); + /* + * If we are just polling, check to see if we will block. + */ + if ((extflags & LK_NOWAIT) && + ((lkp->lk_flags & LK_WANT_UPGRADE) || + lkp->lk_sharecount > 1)) { + error = EBUSY; + break; + } + if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) { + /* + * We are first shared lock to request an upgrade, so + * request upgrade and wait for the shared count to + * drop to zero, then take exclusive lock. + */ + lkp->lk_flags |= LK_WANT_UPGRADE; + ACQUIRE(lkp, error, extflags, lkp->lk_sharecount); + lkp->lk_flags &= ~LK_WANT_UPGRADE; + if (error) + break; + lkp->lk_flags |= LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + if (lkp->lk_exclusivecount != 0) + panic("lockmgr: non-zero exclusive count"); + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + } + /* + * Someone else has requested upgrade. Release our shared + * lock, awaken upgrade requestor if we are the last shared + * lock, then request an exclusive lock. + */ + if (lkp->lk_sharecount == 0 && lkp->lk_waitcount) + wakeup((void *)lkp); + /* fall into exclusive request */ + + case LK_EXCLUSIVE: + if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) { + /* + * Recursive lock. + */ + if ((extflags & LK_CANRECURSE) == 0) + panic("lockmgr: locking against myself"); + lkp->lk_exclusivecount++; + COUNT(p, 1); + break; + } + /* + * If we are just polling, check to see if we will sleep. + */ + if ((extflags & LK_NOWAIT) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0)) { + error = EBUSY; + break; + } + /* + * Try to acquire the want_exclusive flag. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL)); + if (error) + break; + lkp->lk_flags |= LK_WANT_EXCL; + /* + * Wait for shared locks and upgrades to finish. + */ + ACQUIRE(lkp, error, extflags, lkp->lk_sharecount != 0 || + (lkp->lk_flags & LK_WANT_UPGRADE)); + lkp->lk_flags &= ~LK_WANT_EXCL; + if (error) + break; + lkp->lk_flags |= LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + if (lkp->lk_exclusivecount != 0) + panic("lockmgr: non-zero exclusive count"); + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + + case LK_RELEASE: + if (lkp->lk_exclusivecount != 0) { + if (pid != lkp->lk_lockholder) + panic("lockmgr: pid %d, not %s %d unlocking", + pid, "exclusive lock holder", + lkp->lk_lockholder); + lkp->lk_exclusivecount--; + COUNT(p, -1); + if (lkp->lk_exclusivecount == 0) { + lkp->lk_flags &= ~LK_HAVE_EXCL; + lkp->lk_lockholder = LK_NOPROC; + } + } else if (lkp->lk_sharecount != 0) { + lkp->lk_sharecount--; + COUNT(p, -1); + } + if (lkp->lk_waitcount) + wakeup((void *)lkp); + break; + + case LK_DRAIN: + /* + * Check that we do not already hold the lock, as it can + * never drain if we do. Unfortunately, we have no way to + * check for holding a shared lock, but at least we can + * check for an exclusive one. + */ + if (lkp->lk_lockholder == pid) + panic("lockmgr: draining against myself"); + /* + * If we are just polling, check to see if we will sleep. + */ + if ((extflags & LK_NOWAIT) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)) { + error = EBUSY; + break; + } + PAUSE(lkp, ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0)); + for (error = 0; ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || + lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) { + lkp->lk_flags |= LK_WAITDRAIN; + simple_unlock(&lkp->lk_interlock); + if ((error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio, + lkp->lk_wmesg, lkp->lk_timo)) != 0) + return (error); + if ((extflags) & LK_SLEEPFAIL) + return (ENOLCK); + simple_lock(&lkp->lk_interlock); + } + lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; + lkp->lk_lockholder = pid; + lkp->lk_exclusivecount = 1; + COUNT(p, 1); + break; + + default: + simple_unlock(&lkp->lk_interlock); + panic("lockmgr: unknown locktype request %d", + flags & LK_TYPE_MASK); + /* NOTREACHED */ + } + if ((lkp->lk_flags & LK_WAITDRAIN) && ((lkp->lk_flags & + (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) == 0 && + lkp->lk_sharecount == 0 && lkp->lk_waitcount == 0)) { + lkp->lk_flags &= ~LK_WAITDRAIN; + wakeup((void *)&lkp->lk_flags); + } + simple_unlock(&lkp->lk_interlock); + return (error); +} + +/* + * Print out information about state of a lock. Used by VOP_PRINT + * routines to display ststus about contained locks. + */ +void +lockmgr_printinfo(lkp) + struct lock *lkp; +{ + + if (lkp->lk_sharecount) + printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg, + lkp->lk_sharecount); + else if (lkp->lk_flags & LK_HAVE_EXCL) + printf(" lock type %s: EXCL (count %d) by pid %d", + lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder); + if (lkp->lk_waitcount > 0) + printf(" with %d pending", lkp->lk_waitcount); +} + +#if defined(DEBUG) && NCPUS == 1 +#include <sys/kernel.h> +#include <vm/vm.h> +#include <sys/sysctl.h> +int lockpausetime = 0; +struct ctldebug debug2 = { "lockpausetime", &lockpausetime }; +int simplelockrecurse; +/* + * Simple lock functions so that the debugger can see from whence + * they are being called. + */ +void +simple_lock_init(alp) + struct simplelock *alp; +{ + + alp->lock_data = 0; +} + +void +_simple_lock(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (simplelockrecurse) + return; + if (alp->lock_data == 1) { + if (lockpausetime == -1) + panic("%s:%d: simple_lock: lock held", id, l); + printf("%s:%d: simple_lock: lock held\n", id, l); + if (lockpausetime == 1) { + BACKTRACE(curproc); + } else if (lockpausetime > 1) { + printf("%s:%d: simple_lock: lock held...", id, l); + tsleep(&lockpausetime, PCATCH | PPAUSE, "slock", + lockpausetime * hz); + printf(" continuing\n"); + } + } + alp->lock_data = 1; + if (curproc) + curproc->p_simple_locks++; +} + +int +_simple_lock_try(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (alp->lock_data) + return (0); + if (simplelockrecurse) + return (1); + alp->lock_data = 1; + if (curproc) + curproc->p_simple_locks++; + return (1); +} + +void +_simple_unlock(alp, id, l) + __volatile struct simplelock *alp; + const char *id; + int l; +{ + + if (simplelockrecurse) + return; + if (alp->lock_data == 0) { + if (lockpausetime == -1) + panic("%s:%d: simple_unlock: lock not held", id, l); + printf("%s:%d: simple_unlock: lock not held\n", id, l); + if (lockpausetime == 1) { + BACKTRACE(curproc); + } else if (lockpausetime > 1) { + printf("%s:%d: simple_unlock: lock not held...", id, l); + tsleep(&lockpausetime, PCATCH | PPAUSE, "sunlock", + lockpausetime * hz); + printf(" continuing\n"); + } + } + alp->lock_data = 0; + if (curproc) + curproc->p_simple_locks--; +} +#endif /* DEBUG && NCPUS == 1 */ diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 98bc10fa302..2e29983bea4 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sig.c,v 1.19 1997/09/15 05:46:13 millert Exp $ */ +/* $OpenBSD: kern_sig.c,v 1.20 1997/10/06 15:12:21 csapuntz Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* @@ -1213,7 +1213,7 @@ coredump(p) UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p); } out: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error1 = vn_close(vp, FWRITE, cred, p); crfree(cred); if (error == 0) diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 91b556e6067..93d2459035d 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_synch.c,v 1.7 1997/07/28 09:13:17 deraadt Exp $ */ +/* $OpenBSD: kern_synch.c,v 1.8 1997/10/06 15:12:23 csapuntz Exp $ */ /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ /*- @@ -174,7 +174,6 @@ schedcpu(arg) register int s; register unsigned int newcpu; - wakeup((caddr_t)&lbolt); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { /* * Increment time in/out of memory and sleep time @@ -223,6 +222,7 @@ schedcpu(arg) splx(s); } vmmeter(); + wakeup((caddr_t)&lbolt); timeout(schedcpu, (void *)0, hz); } diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 2ab8e6d63c0..923a4bd9077 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.19 1997/09/03 13:51:08 kstailey Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.20 1997/10/06 15:12:25 csapuntz Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -110,7 +110,7 @@ sys___sysctl(p, v, retval) switch (name[0]) { case CTL_KERN: fn = kern_sysctl; - if (name[2] != KERN_VNODE) /* XXX */ + if (name[2] == KERN_VNODE) /* XXX */ dolock = 0; break; case CTL_HW: @@ -125,6 +125,9 @@ sys___sysctl(p, v, retval) case CTL_FS: fn = fs_sysctl; break; + case CTL_VFS: + fn = vfs_sysctl; + break; case CTL_MACHDEP: fn = cpu_sysctl; break; @@ -264,7 +267,7 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime, sizeof(struct timeval))); case KERN_VNODE: - return (sysctl_vnode(oldp, oldlenp)); + return (sysctl_vnode(oldp, oldlenp, p)); case KERN_PROC: return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp)); case KERN_FILE: diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c index 321e910cd1b..c9d59179f98 100644 --- a/sys/kern/spec_vnops.c +++ b/sys/kern/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.9 1997/01/04 17:10:04 kstailey Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.10 1997/10/06 15:19:12 csapuntz Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -79,6 +79,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ + { &vop_revoke_desc, spec_revoke }, /* revoke */ { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, spec_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -143,8 +144,13 @@ spec_open(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - struct vnode *bvp, *vp = ap->a_vp; - dev_t bdev, dev = (dev_t)vp->v_rdev; + struct proc *p = ap->a_p; + struct vnode *vp = ap->a_vp; +#if 0 + struct vnode *bvp; + dev_t bdev; +#endif + dev_t dev = (dev_t)vp->v_rdev; register int maj = major(dev); int error; @@ -172,6 +178,7 @@ spec_open(v) * devices whose corresponding block devices are * currently mounted. */ +#if 0 if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && @@ -181,12 +188,13 @@ spec_open(v) if (iskmemdev(dev)) return (EPERM); } +#endif } if (cdevsw[maj].d_type == D_TTY) vp->v_flag |= VISTTY; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -255,10 +263,10 @@ spec_read(v) switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -306,6 +314,19 @@ spec_read(v) /* NOTREACHED */ } +int +spec_inactive(v) + void *v; +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + /* * Vnode op for write */ @@ -341,10 +362,10 @@ spec_write(v) switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -514,6 +535,74 @@ loop: /* * Just call the device strategy routine */ +int fs_read[16], fs_write[16]; + +int cur_found[10]; + +int fs_bwrite[64][10]; +int fs_bwrite_cnt[64]; +int num_found; + +int num_levels = 4; +#include <machine/cpu.h> +#include <machine/pcb.h> + +int find_stack(int); + +int find_stack(int levels) + +{ + struct pcb stack; + int *eip, *ebp; + + savectx(&stack); + ebp = (int *)stack.pcb_ebp; + eip = (int *) *(ebp + 1); + + while ((int)ebp > 0xf0000000 && levels--) { + eip = (int *) *(ebp + 1); + + ebp = (int *) *ebp; + } + + return ((int)eip); +} + +void track_write __P((void)); + +void track_write(void) + +{ + int idx, cnt; + + for (idx = 0; idx < 10; idx++) { + cur_found[idx] = find_stack(idx + num_levels); + } + + for (cnt = 0; cnt < num_found; cnt++) { + for (idx = 0; idx < 10; idx++) { + if (fs_bwrite[cnt][idx] != cur_found[idx]) + goto next_iter; + } + + fs_bwrite_cnt[cnt]++; + break; + next_iter: + } + + if ((cnt == num_found) && + (num_found != 64)) { + for (idx = 0; idx < 10; idx++) { + fs_bwrite[num_found][idx] = cur_found[idx]; + } + + fs_bwrite_cnt[num_found] = 1; + num_found++; + } + + return; +} + int spec_strategy(v) void *v; @@ -521,8 +610,31 @@ spec_strategy(v) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap = v; + struct buf *bp; + + int maj = major(ap->a_bp->b_dev); + + if ((maj >= 0) && (maj < 16)) { + if (ap->a_bp->b_flags & B_READ) + fs_read[maj]++; + else { + fs_write[maj]++; + if (maj == 4) + track_write(); + + } + } + +#if 0 + assert (!(flags & (B_DELWRI | B_DONE))); +#endif - (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); + bp = ap->a_bp; + + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + + (*bdevsw[maj].d_strategy)(ap->a_bp); return (0); } @@ -538,33 +650,16 @@ spec_bmap(v) daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; + int *a_runp; } */ *ap = v; if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; - return (0); -} - -/* - * At the moment we do not do any locking. - */ -/* ARGSUSED */ -int -spec_lock(v) - void *v; -{ - - return (0); -} - -/* ARGSUSED */ -int -spec_unlock(v) - void *v; -{ - + if (ap->a_runp != NULL) + *ap->a_runp = 0; + return (0); } @@ -621,7 +716,9 @@ spec_close(v) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c index 0d44bc841b4..a19a8a11a1e 100644 --- a/sys/kern/subr_xxx.c +++ b/sys/kern/subr_xxx.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_xxx.c,v 1.3 1997/02/24 14:19:58 niklas Exp $ */ +/* $OpenBSD: subr_xxx.c,v 1.4 1997/10/06 15:12:26 csapuntz Exp $ */ /* $NetBSD: subr_xxx.c,v 1.10 1996/02/04 02:16:51 christos Exp $ */ /* @@ -91,8 +91,10 @@ enosys () * Return error for operation not supported * on a specific object or file type. */ +/*ARGSUSED*/ int -eopnotsupp() +eopnotsupp(v) + void *v; { return (EOPNOTSUPP); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index ef811b417ba..ab02e3bb9d2 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.8 1997/08/31 20:42:21 deraadt Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.9 1997/10/06 15:12:28 csapuntz Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -178,18 +178,12 @@ sys_readv(p, v, retval) goto done; auio.uio_resid = 0; for (i = 0; i < SCARG(uap, iovcnt); i++) { -#if 0 - /* Cannot happen iov_len is unsigned */ - if (iov->iov_len < 0) { + if (auio.uio_resid + iov->iov_len < auio.uio_resid) { error = EINVAL; goto done; } -#endif + auio.uio_resid += iov->iov_len; - if (auio.uio_resid < 0) { - error = EINVAL; - goto done; - } iov++; } #ifdef KTRACE @@ -337,18 +331,12 @@ sys_writev(p, v, retval) goto done; auio.uio_resid = 0; for (i = 0; i < SCARG(uap, iovcnt); i++) { -#if 0 - /* Cannot happen iov_len is unsigned */ - if (iov->iov_len < 0) { + if (auio.uio_resid + iov->iov_len < auio.uio_resid) { error = EINVAL; goto done; } -#endif + auio.uio_resid += iov->iov_len; - if (auio.uio_resid < 0) { - error = EINVAL; - goto done; - } iov++; } #ifdef KTRACE diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 78f38f71f29..b4f48d76604 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.5 1997/02/24 14:19:58 niklas Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.6 1997/10/06 15:12:29 csapuntz Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -77,7 +77,7 @@ #include <vm/vm.h> #include <vm/vm_prot.h> #include <vm/vm_param.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/vm_object.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 7548fd6befc..9a593842688 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tty.c,v 1.27 1997/03/26 18:03:57 deraadt Exp $ */ +/* $OpenBSD: tty.c,v 1.28 1997/10/06 15:12:31 csapuntz Exp $ */ /* $NetBSD: tty.c,v 1.68.4.2 1996/06/06 16:04:52 thorpej Exp $ */ /*- @@ -812,9 +812,9 @@ ttioctl(tp, cmd, data, flag, p) error = namei(&nid); if (error) return (error); - VOP_LOCK(nid.ni_vp); + vn_lock(nid.ni_vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(nid.ni_vp, VREAD, p->p_ucred, p); - VOP_UNLOCK(nid.ni_vp); + VOP_UNLOCK(nid.ni_vp, 0, p); vrele(nid.ni_vp); if (error) return (error); diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c index 38c033636a4..0f70fcbda85 100644 --- a/sys/kern/tty_tty.c +++ b/sys/kern/tty_tty.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tty_tty.c,v 1.3 1996/04/21 22:27:32 deraadt Exp $ */ +/* $OpenBSD: tty_tty.c,v 1.4 1997/10/06 15:12:32 csapuntz Exp $ */ /* $NetBSD: tty_tty.c,v 1.13 1996/03/30 22:24:46 christos Exp $ */ /*- @@ -63,7 +63,7 @@ cttyopen(dev, flag, mode, p) if (ttyvp == NULL) return (ENXIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); #ifdef PARANOID /* * Since group is tty and mode is 620 on most terminal lines @@ -78,7 +78,7 @@ cttyopen(dev, flag, mode, p) if (!error) #endif /* PARANOID */ error = VOP_OPEN(ttyvp, flag, NOCRED, p); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -89,14 +89,15 @@ cttyread(dev, uio, flag) struct uio *uio; int flag; { + struct proc *p = uio->uio_procp; register struct vnode *ttyvp = cttyvp(uio->uio_procp); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_READ(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -107,14 +108,15 @@ cttywrite(dev, uio, flag) struct uio *uio; int flag; { + struct proc *p = uio->uio_procp; register struct vnode *ttyvp = cttyvp(uio->uio_procp); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index f1843da7ccc..58e0fcc5bad 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.4 1997/06/05 08:13:12 deraadt Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.5 1997/10/06 15:12:33 csapuntz Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -427,7 +427,7 @@ unp_bind(unp, nam, p) vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (0); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d1d4592820c..c8e596db9a9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_bio.c,v 1.15 1997/06/14 06:10:36 tholo Exp $ */ +/* $OpenBSD: vfs_bio.c,v 1.16 1997/10/06 15:12:35 csapuntz Exp $ */ /* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */ /*- @@ -63,6 +63,8 @@ #include <vm/vm.h> +#include <miscfs/specfs/specdev.h> + /* Macros to clear/set/test flags. */ #define SET(t, f) (t) |= (f) #define CLR(t, f) (t) &= ~(f) @@ -94,6 +96,7 @@ u_long bufhash; TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; int needbuffer; +struct bio_ops bioops; /* * Insq/Remq for the buffer free lists. @@ -139,7 +142,6 @@ bufinit() register int i; int base, residual; - TAILQ_INIT(&bdirties); for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) TAILQ_INIT(dp); bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); @@ -153,6 +155,7 @@ bufinit() bp->b_wcred = NOCRED; bp->b_vnbufs.le_next = NOLIST; bp->b_data = buffers + i * MAXBSIZE; + LIST_INIT(&bp->b_dep); if (i < residual) bp->b_bufsize = (base + 1) * CLBYTES; else @@ -278,7 +281,7 @@ int bwrite(bp) struct buf *bp; { - int rv, sync, wasdelayed, s; + int rv, async, wasdelayed, s; /* * Remember buffer type, to switch on it later. If the write was @@ -287,34 +290,28 @@ bwrite(bp) * XXX note that this relies on delayed tape writes being converted * to async, not sync writes (which is safe, but ugly). */ - sync = !ISSET(bp->b_flags, B_ASYNC); - if (sync && bp->b_vp && bp->b_vp->v_mount && + async = ISSET(bp->b_flags, B_ASYNC); + if (!async && bp->b_vp && bp->b_vp->v_mount && ISSET(bp->b_vp->v_mount->mnt_flag, MNT_ASYNC)) { bdwrite(bp); return (0); } wasdelayed = ISSET(bp->b_flags, B_DELWRI); CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI)); + + s = splbio(); + /* - * If this was a delayed write, remove it from the - * list of dirty blocks now + * If not synchronous, pay for the I/O operation and make + * sure the buf is on the correct vnode queue. We have + * to do this now, because if we don't, the vnode may not + * be properly notified that its I/O has completed. */ if (wasdelayed) - TAILQ_REMOVE(&bdirties, bp, b_synclist); - - s = splbio(); - if (!sync) { - /* - * If not synchronous, pay for the I/O operation and make - * sure the buf is on the correct vnode queue. We have - * to do this now, because if we don't, the vnode may not - * be properly notified that its I/O has completed. - */ - if (wasdelayed) - reassignbuf(bp, bp->b_vp); - else - curproc->p_stats->p_ru.ru_oublock++; - } + reassignbuf(bp, bp->b_vp); + else + curproc->p_stats->p_ru.ru_oublock++; + /* Initiate disk write. Make sure the appropriate party is charged. */ bp->b_vp->v_numoutput++; @@ -322,31 +319,18 @@ bwrite(bp) SET(bp->b_flags, B_WRITEINPROG); VOP_STRATEGY(bp); - if (sync) { - /* - * If I/O was synchronous, wait for it to complete. - */ - rv = biowait(bp); + if (async) + return (0); - /* - * Pay for the I/O operation, if it's not been paid for, and - * make sure it's on the correct vnode queue. (async operatings - * were payed for above.) - */ - s = splbio(); - if (wasdelayed) - reassignbuf(bp, bp->b_vp); - else - curproc->p_stats->p_ru.ru_oublock++; - splx(s); + /* + * If I/O was synchronous, wait for it to complete. + */ + rv = biowait(bp); - /* Release the buffer. */ - brelse(bp); + /* Release the buffer. */ + brelse(bp); - return (rv); - } else { - return (0); - } + return (rv); } int @@ -382,23 +366,10 @@ bdwrite(bp) * (3) Make sure it's on its vnode's correct block list, * (4) If a buffer is rewritten, move it to end of dirty list */ - bp->b_synctime = time.tv_sec + 30; if (!ISSET(bp->b_flags, B_DELWRI)) { - /* - * Add the buffer to the list of dirty blocks. - * If it is the first entry on the list, schedule - * a timeout to flush it to disk - */ - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - if (bdirties.tqh_first == bp) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); /* XXX */ - timeout((void (*)__P((void *)))wakeup, - &bdirties, 30 * hz); - } SET(bp->b_flags, B_DELWRI); - curproc->p_stats->p_ru.ru_oublock++; /* XXX */ reassignbuf(bp, bp->b_vp); + curproc->p_stats->p_ru.ru_oublock++; /* XXX */ } /* If this is a tape block, write the block now. */ @@ -426,142 +397,17 @@ bawrite(bp) VOP_BWRITE(bp); } -/* - * Write out dirty buffers if they have been on the dirty - * list for more than 30 seconds; scan for such buffers - * once a second. - */ void -vn_update() -{ - struct mount *mp, *nmp; - struct timespec ts; - struct vnode *vp; +bdirty(bp) struct buf *bp; - int async, s; +{ + struct proc *p = curproc; /* XXX */ - /* - * In case any buffers got scheduled for write before the - * process got started (should never happen) - */ - untimeout((void (*)__P((void *)))wakeup, - &bdirties); - for (;;) { - s = splbio(); - /* - * Schedule a wakeup when the next buffer is to - * be flushed to disk. If no buffers are enqueued, - * a wakeup will be scheduled at the time a new - * buffer is enqueued - */ - if ((bp = bdirties.tqh_first) != NULL) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); /* XXX */ - timeout((void (*)__P((void *)))wakeup, - &bdirties, (bp->b_synctime - time.tv_sec) * hz); - } - tsleep(&bdirties, PZERO - 1, "dirty", 0); - /* - * Walk the dirty block list, starting an asyncroneous - * write of any block that has timed out - */ - while ((bp = bdirties.tqh_first) != NULL && - bp->b_synctime <= time.tv_sec) { - /* - * If the block is currently busy (perhaps being - * written), move it to the end of the dirty list - * and go to the next block - */ - if (ISSET(bp->b_flags, B_BUSY)) { - TAILQ_REMOVE(&bdirties, bp, b_synclist); - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - bp->b_synctime = time.tv_sec + 30; - continue; - } - /* - * Remove the block from the per-vnode dirty - * list and mark it as busy - */ - bremfree(bp); - SET(bp->b_flags, B_BUSY); - splx(s); - /* - * Start an asyncroneous write of the buffer. - * Note that this will also remove the buffer - * from the dirty list - */ - bawrite(bp); - s = splbio(); - } - splx(s); - /* - * We also need to flush out modified vnodes - */ - for (mp = mountlist.cqh_last; - mp != (void *)&mountlist; - mp = nmp) { - /* - * Get the next pointer in case we hang of vfs_busy() - * while being unmounted - */ - nmp = mp->mnt_list.cqe_prev; - /* - * The lock check below is to avoid races with mount - * and unmount - */ - if ((mp->mnt_flag & (MNT_MLOCK | MNT_RDONLY | MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { - /* - * Turn off the file system async flag until - * we are done writing out vnodes - */ - async = mp->mnt_flag & MNT_ASYNC; - mp->mnt_flag &= ~MNT_ASYNC; - /* - * Walk the vnode list for the file system, - * writing each modified vnode out - */ -loop: - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { - /* - * If the vnode is no longer associated - * with the file system in question, skip - * it - */ - if (vp->v_mount != mp) - goto loop; - /* - * If the vnode is currently locked, - * ignore it - */ - if (VOP_ISLOCKED(vp)) - continue; - /* - * Lock the vnode, start a write and - * release the vnode - */ - if (vget(vp, 1)) - goto loop; - TIMEVAL_TO_TIMESPEC(&time, &ts); - VOP_UPDATE(vp, &ts, &ts, 0); - vput(vp); - } - /* - * Restore the file system async flag if it - * were previously set for this file system - */ - mp->mnt_flag |= async; - /* - * Get the next pointer again as the next - * file system might have been unmounted - * while we were flushing vnodes - */ - nmp = mp->mnt_list.cqe_prev; - vfs_unbusy(mp); - } - } + if (ISSET(bp->b_flags, B_DELWRI) == 0) { + SET(bp->b_flags, B_DELWRI); + reassignbuf(bp, bp->b_vp); + if (p) + p->p_stats->p_ru.ru_oublock++; } } @@ -576,18 +422,6 @@ brelse(bp) struct bqueues *bufq; int s; - /* Wake up any processes waiting for any buffer to become free. */ - if (needbuffer) { - needbuffer = 0; - wakeup(&needbuffer); - } - - /* Wake up any proceeses waiting for _this_ buffer to become free. */ - if (ISSET(bp->b_flags, B_WANTED)) { - CLR(bp->b_flags, B_WANTED); - wakeup(bp); - } - /* Block disk interrupts. */ s = splbio(); @@ -622,11 +456,14 @@ brelse(bp) * If it's invalid or empty, dissociate it from its vnode * and put on the head of the appropriate queue. */ - if (bp->b_vp) - brelvp(bp); - if (ISSET(bp->b_flags, B_DELWRI)) - TAILQ_REMOVE(&bdirties, bp, b_synclist); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) { + (*bioops.io_deallocate)(bp); + } CLR(bp->b_flags, B_DELWRI); + if (bp->b_vp) { + reassignbuf(bp, bp->b_vp); + brelvp(bp); + } if (bp->b_bufsize <= 0) /* no data */ bufq = &bufqueues[BQ_EMPTY]; @@ -657,6 +494,18 @@ already_queued: /* Allow disk interrupts. */ splx(s); + + /* Wake up any processes waiting for any buffer to become free. */ + if (needbuffer) { + needbuffer = 0; + wakeup(&needbuffer); + } + + /* Wake up any proceeses waiting for _this_ buffer to become free. */ + if (ISSET(bp->b_flags, B_WANTED)) { + CLR(bp->b_flags, B_WANTED); + wakeup(bp); + } } /* @@ -806,7 +655,7 @@ allocbuf(bp, size) /* find a buffer */ while ((nbp = getnewbuf(0, 0)) == NULL) ; - SET(nbp->b_flags, B_INVAL); + SET(nbp->b_flags, B_INVAL); binshash(nbp, &invalhash); /* and steal its pages, up to the amount we need */ @@ -875,16 +724,16 @@ getnewbuf(slpflag, slptimeo) start: s = splbio(); - if ((bp = bufqueues[BQ_AGE].tqh_first) != NULL || - (bp = bufqueues[BQ_LRU].tqh_first) != NULL) { - bremfree(bp); - } else { + if ((bp = bufqueues[BQ_AGE].tqh_first) == NULL && + (bp = bufqueues[BQ_LRU].tqh_first) == NULL) { /* wait for a free buffer of any kind */ needbuffer = 1; tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo); splx(s); return (0); - } + } + + bremfree(bp); if (ISSET(bp->b_flags, B_VFLUSH)) { /* @@ -916,8 +765,12 @@ start: /* disassociate us from our vnode, if we had one... */ if (bp->b_vp) brelvp(bp); + splx(s); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate) + (*bioops.io_deallocate)(bp); + /* clear out various other fields */ bp->b_flags = B_BUSY; bp->b_dev = NODEV; @@ -962,7 +815,9 @@ biowait(bp) if (ISSET(bp->b_flags, B_EINTR)) { CLR(bp->b_flags, B_EINTR); return (EINTR); - } else if (ISSET(bp->b_flags, B_ERROR)) + } + + if (ISSET(bp->b_flags, B_ERROR)) return (bp->b_error ? bp->b_error : EIO); else return (0); @@ -992,13 +847,18 @@ biodone(bp) panic("biodone already"); SET(bp->b_flags, B_DONE); /* note that it's done */ - if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */ - vwakeup(bp); + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete) + (*bioops.io_complete)(bp); if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */ CLR(bp->b_flags, B_CALL); /* but note callout done */ (*bp->b_iodone)(bp); - } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */ + } + + if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */ + vwakeup(bp); + + if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */ brelse(bp); else { /* or just wakeup the buffer */ CLR(bp->b_flags, B_WANTED); diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index d3fc332b376..87b024600bc 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_cluster.c,v 1.8 1997/09/27 06:56:18 niklas Exp $ */ +/* $OpenBSD: vfs_cluster.c,v 1.9 1997/10/06 15:12:36 csapuntz Exp $ */ /* $NetBSD: vfs_cluster.c,v 1.12 1996/04/22 01:39:05 christos Exp $ */ /*- @@ -48,15 +48,6 @@ #include <vm/vm.h> -#ifdef DEBUG -#include <sys/sysctl.h> -int doreallocblks = 0; -struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; -#else -/* XXX for cluster_write */ -#define doreallocblks 0 -#endif - /* * Local declarations */ @@ -518,8 +509,7 @@ cluster_write(bp, filesize) * Otherwise try reallocating to make it sequential. */ cursize = vp->v_lastw - vp->v_cstart + 1; - if (!doreallocblks || - (lbn + 1) * bp->b_bcount != filesize || + if ((lbn + 1) * bp->b_bcount != filesize || lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { cluster_wbuild(vp, NULL, bp->b_bcount, vp->v_cstart, cursize, lbn); @@ -708,13 +698,14 @@ redo: panic("Clustered write to wrong blocks"); } + if (LIST_FIRST(&tbp->b_dep) != NULL && bioops.io_start) + (*bioops.io_start)(tbp); + pagemove(tbp->b_data, cp, size); bp->b_bcount += size; bp->b_bufsize += size; tbp->b_bufsize -= size; - if (tbp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, tbp, b_synclist); tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); /* * We might as well AGE the buffer here; it's either empty, or diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c index 800aff67811..527450f20e5 100644 --- a/sys/kern/vfs_conf.c +++ b/sys/kern/vfs_conf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_conf.c,v 1.5 1997/02/24 14:20:01 niklas Exp $ */ +/* $OpenBSD: vfs_conf.c,v 1.6 1997/10/06 15:12:37 csapuntz Exp $ */ /* $NetBSD: vfs_conf.c,v 1.21.4.1 1995/11/01 00:06:26 jtc Exp $ */ /* @@ -40,6 +40,28 @@ #include <sys/mount.h> #include <sys/vnode.h> +#ifdef FFS +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ffs/ffs_extern.h> +#endif + +#ifdef CD9660 +#include <isofs/cd9660/iso.h> +#endif + +#ifdef MFS +#include <ufs/mfs/mfs_extern.h> +#endif + +#ifdef NFSCLIENT +#include <nfs/rpcv2.h> +#include <nfs/nfsproto.h> +#include <nfs/nfsnode.h> +#include <nfs/nfs.h> +#include <nfs/nfsmount.h> +#endif + /* * These define the root filesystem and device. */ @@ -50,6 +72,8 @@ struct vnode *rootvnode; * Set up the filesystem operations for vnodes. * The types are defined in mount.h. */ + + #ifdef FFS extern struct vfsops ffs_vfsops; #endif @@ -115,107 +139,102 @@ extern struct vfsops ext2fs_vfsops; #endif /* - * XXX ORDERING MATTERS, for COMPAT_09. when that goes away, - * empty slots can go away. + * Set up the filesystem operations for vnodes. */ -struct vfsops *vfssw[] = { - NULL, /* 0 = MOUNT_NONE */ +static struct vfsconf vfsconflist[] = { + + /* Fast Filesystem */ #ifdef FFS - &ffs_vfsops, /* 1 = MOUNT_FFS */ -#else - NULL, + { &ffs_vfsops, "ffs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL }, #endif -#ifdef NFSCLIENT - &nfs_vfsops, /* 2 = MOUNT_NFS */ -#else - NULL, + + /* Log-based Filesystem */ +#ifdef LFS + { &lfs_vfsops, "lfs", 5, 0, MNT_LOCAL, lfs_mountroot, NULL }, #endif + + /* Memory-based Filesystem */ #ifdef MFS - &mfs_vfsops, /* 3 = MOUNT_MFS */ -#else - NULL, -#endif -#ifdef MSDOSFS - &msdosfs_vfsops, /* 4 = MOUNT_MSDOS */ -#else - NULL, -#endif -#ifdef LFS - &lfs_vfsops, /* 5 = MOUNT_LFS */ -#else - NULL, + { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL }, #endif - NULL, /* 6 = MOUNT_LOFS */ -#ifdef FDESC - &fdesc_vfsops, /* 7 = MOUNT_FDESC */ -#else - NULL, + + /* ISO9660 (aka CDROM) Filesystem */ +#ifdef CD9660 + { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL }, #endif -#ifdef PORTAL - &portal_vfsops, /* 8 = MOUNT_PORTAL */ -#else - NULL, + + /* MSDOS Filesystem */ +#ifdef MSDOSFS + { &msdosfs_vfsops, "msdos", 4, 0, MNT_LOCAL, NULL, NULL }, #endif -#ifdef NULLFS - &null_vfsops, /* 9 = MOUNT_NULL */ -#else - NULL, + + /* AmigaDOS Filesystem */ +#ifdef ADOSFS + { &adosfs_vfsops, "adosfs", 16, 0, MNT_LOCAL, NULL, NULL }, #endif -#ifdef UMAPFS - &umap_vfsops, /* 10 = MOUNT_UMAP */ -#else - NULL, + + /* Sun-compatible Network Filesystem */ +#ifdef NFSCLIENT + { &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL }, #endif -#ifdef KERNFS - &kernfs_vfsops, /* 11 = MOUNT_KERNFS */ -#else - NULL, + + /* Andrew Filesystem */ +#ifdef AFS + { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL }, #endif + + /* /proc Filesystem */ #ifdef PROCFS - &procfs_vfsops, /* 12 = MOUNT_PROCFS */ -#else - NULL, + { &procfs_vfsops, "procfs", 12, 0, 0, NULL, NULL }, #endif -#ifdef AFS - &afs_vfsops, /* 13 = MOUNT_AFS */ -#else - NULL, -#endif -#ifdef CD9660 - &cd9660_vfsops, /* 14 = MOUNT_ISOFS */ -#else - NULL, + + /* Loopback (Minimal) Filesystem Layer */ +#ifdef NULLFS + { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL }, #endif + + /* Union (translucent) Filesystem */ #ifdef UNION - &union_vfsops, /* 15 = MOUNT_UNION */ -#else - NULL, + { &union_vfsops, "union", 15, 0, 0, NULL, NULL }, #endif -#ifdef ADOSFS - &adosfs_vfsops, /* 16 = MOUNT_ADOSFS */ -#else - NULL, + + /* User/Group Identifer Remapping Filesystem */ +#ifdef UMAPFS + { &umap_vfsops, "umap", 10, 0, 0, NULL, NULL }, #endif -#ifdef EXT2FS - &ext2fs_vfsops, /* 17 = MOUNT_EXT2FS */ -#else - NULL, + + /* Portal Filesystem */ +#ifdef PORTAL + { &portal_vfsops, "portal", 8, 0, 0, NULL, NULL }, #endif -#ifdef LKM /* for LKM's. add new FS's before these */ - NULL, - NULL, - NULL, - NULL, + + /* File Descriptor Filesystem */ +#ifdef FDESC + { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL }, #endif - 0 + + /* Kernel Information Filesystem */ +#ifdef KERNFS + { &kernfs_vfsops, "kernfs", 11, 0, 0, NULL, NULL }, +#endif + }; -int nvfssw = sizeof(vfssw) / sizeof(vfssw[0]); + + +/* + * Initially the size of the list, vfs_init will set maxvfsconf + * to the highest defined type number. + */ +int maxvfsconf = sizeof(vfsconflist) / sizeof (struct vfsconf); +struct vfsconf *vfsconf = vfsconflist; + /* * vfs_opv_descs enumerates the list of vnode classes, each with it's own * vnode operation vector. It is consulted at system boot to build operation * vectors. It is NULL terminated. */ +extern struct vnodeopv_desc sync_vnodeop_opv_desc; extern struct vnodeopv_desc ffs_vnodeop_opv_desc; extern struct vnodeopv_desc ffs_specop_opv_desc; extern struct vnodeopv_desc ffs_fifoop_opv_desc; @@ -246,6 +265,7 @@ extern struct vnodeopv_desc ext2fs_specop_opv_desc; extern struct vnodeopv_desc ext2fs_fifoop_opv_desc; struct vnodeopv_desc *vfs_opv_descs[] = { + &sync_vnodeop_opv_desc, #ifdef FFS &ffs_vnodeop_opv_desc, &ffs_specop_opv_desc, diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index 2071a8f633f..a10e5a3ff41 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_init.c,v 1.4 1997/02/24 14:20:02 niklas Exp $ */ +/* $OpenBSD: vfs_init.c,v 1.5 1997/10/06 15:12:39 csapuntz Exp $ */ /* $NetBSD: vfs_init.c,v 1.6 1996/02/09 19:00:58 christos Exp $ */ /* @@ -243,7 +243,8 @@ struct vattr va_null; void vfsinit() { - struct vfsops **vfsp; + struct vfsconf *vfsp; + int i, maxtypenum; /* * Initialize the vnode table @@ -262,9 +263,15 @@ vfsinit() * Initialize each file system type. */ vattr_null(&va_null); - for (vfsp = &vfssw[0]; vfsp < &vfssw[nvfssw]; vfsp++) { - if (*vfsp == NULL) - continue; - (*(*vfsp)->vfs_init)(); - } + maxtypenum = 0; + + for (vfsp = vfsconf, i = 1; i <= maxvfsconf; i++, vfsp++) { + if (i < maxvfsconf) + vfsp->vfc_next = vfsp + 1; + if (maxtypenum <= vfsp->vfc_typenum) + maxtypenum = vfsp->vfc_typenum + 1; + (*vfsp->vfc_vfsops->vfs_init)(vfsp); + } + /* next vfc_typenum to be used */ + maxvfsconf = maxtypenum; } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 6d3e4f8567c..aa86b97412e 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_lookup.c,v 1.8 1997/06/18 17:37:38 tholo Exp $ */ +/* $OpenBSD: vfs_lookup.c,v 1.9 1997/10/06 15:12:40 csapuntz Exp $ */ /* $NetBSD: vfs_lookup.c,v 1.17 1996/02/09 19:00:59 christos Exp $ */ /* @@ -88,6 +88,7 @@ namei(ndp) struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; + struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC @@ -164,7 +165,7 @@ namei(ndp) return (0); } if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; @@ -271,7 +272,7 @@ lookup(ndp) int error = 0; int slashes; struct componentname *cnp = &ndp->ni_cnd; - + struct proc *p = cnp->cn_proc; /* * Setup: break out flag bits into variables. */ @@ -285,7 +286,7 @@ lookup(ndp) cnp->cn_flags &= ~ISSYMLINK; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); /* * If we have a leading string of slashes, remove them, and just make @@ -410,7 +411,7 @@ dirloop: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); } } @@ -419,6 +420,7 @@ dirloop: */ unionlookup: ndp->ni_dvp = dp; + ndp->ni_vp = NULL; if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { #ifdef DIAGNOSTIC if (ndp->ni_vp != NULL) @@ -434,7 +436,7 @@ unionlookup: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); goto unionlookup; } @@ -491,12 +493,11 @@ unionlookup: */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { - if (mp->mnt_flag & MNT_MLOCK) { - mp->mnt_flag |= MNT_MWAIT; - sleep((caddr_t)mp, PVFS); + if (vfs_busy(mp, 0, 0, p)) continue; - } - if ((error = VFS_ROOT(dp->v_mountedhere, &tdp)) != 0) + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) goto bad2; vput(dp); ndp->ni_vp = dp = tdp; @@ -558,12 +559,12 @@ terminal: vrele(ndp->ni_dvp); } if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); vrele(ndp->ni_dvp); bad: vput(dp); @@ -579,6 +580,7 @@ relookup(dvp, vpp, cnp) struct vnode *dvp, **vpp; struct componentname *cnp; { + struct proc *p = cnp->cn_proc; register struct vnode *dp = 0; /* the directory we are searching */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ @@ -600,7 +602,7 @@ relookup(dvp, vpp, cnp) rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); /* dirloop: */ /* @@ -694,15 +696,17 @@ relookup(dvp, vpp, cnp) if (!wantparent) vrele(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); vrele(dvp); bad: vput(dp); *vpp = NULL; return (error); } + + diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 10ed04c26ca..f265b15051d 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.10 1997/04/25 09:33:24 deraadt Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.11 1997/10/06 15:12:42 csapuntz Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -51,6 +51,7 @@ #include <sys/mount.h> #include <sys/time.h> #include <sys/fcntl.h> +#include <sys/kernel.h> #include <sys/vnode.h> #include <sys/stat.h> #include <sys/namei.h> @@ -88,8 +89,28 @@ int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } -TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ + +struct freelst vnode_hold_list; /* list of vnodes referencing buffers */ +struct freelst vnode_free_list; /* vnode free list */ + struct mntlist mountlist; /* mounted filesystem list */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; + +/* + * The workitem queue. + */ +#define SYNCER_MAXDELAY 32 +int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ +time_t syncdelay = 30; /* time to delay syncing vnodes */ + +static int syncer_delayno = 0; +static long syncer_mask; +LIST_HEAD(synclist, vnode); +static struct synclist *syncer_workitem_pending; int vfs_lock __P((struct mount *)); void vfs_unlock __P((struct mount *)); @@ -107,15 +128,16 @@ int bdevvp __P((dev_t, struct vnode **)); int cdevvp __P((dev_t, struct vnode **)); int getdevvp __P((dev_t, struct vnode **, enum vtype)); struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); -int vget __P((struct vnode *, int)); void vref __P((struct vnode *)); void vput __P((struct vnode *)); void vrele __P((struct vnode *)); +int vunref __P((struct vnode *)); void vhold __P((struct vnode *)); void holdrele __P((struct vnode *)); int vflush __P((struct mount *, struct vnode *, int)); void vgoneall __P((struct vnode *)); void vgone __P((struct vnode *)); +void vgonel __P((struct vnode *, struct proc *)); int vcount __P((struct vnode *)); void vprint __P((char *, struct vnode *)); int vfs_mountedon __P((struct vnode *)); @@ -126,10 +148,10 @@ int vaccess __P((mode_t, uid_t, gid_t, mode_t, struct ucred *)); void vfs_unmountall __P((void)); void vfs_shutdown __P((void)); -static int vfs_hang_addrlist __P((struct mount *, struct netexport *, +int vfs_hang_addrlist __P((struct mount *, struct netexport *, struct export_args *)); -static int vfs_free_netcred __P((struct radix_node *, void *)); -static void vfs_free_addrlist __P((struct netexport *)); +int vfs_free_netcred __P((struct radix_node *, void *)); +void vfs_free_addrlist __P((struct netexport *)); #ifdef DEBUG void printlockedvnodes __P((void)); @@ -142,124 +164,191 @@ void vntblinit() { + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); + TAILQ_INIT(&vnode_hold_list); TAILQ_INIT(&vnode_free_list); + simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); + /* + * Initialize the filesystem syncer. + */ + syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, + &syncer_mask); + syncer_maxdelay = syncer_mask + 1; + } + /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchornize access and to delay + * unmounting. Interlock is not released n failure. */ + int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp, p) + struct mount *mp; + int flags; + struct simplelock *interlkp; + struct proc *p; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - tsleep((caddr_t)mp, PVFS, "vfslock", 0); + if (interlkp) + simple_unlock(interlkp); + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + sleep((caddr_t)mp, PVFS); + if (interlkp) + simple_lock(interlkp); + return (ENOENT); } - mp->mnt_flag |= MNT_MLOCK; - return (0); + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) + panic("vfs_busy: unexpected lock failure"); + return (0); } + /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy file system */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp, p) + struct mount *mp; + struct proc *p; { - - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t)mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ -int -vfs_busy(mp) - register struct mount *mp; -{ - while(mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); - } - if (mp->mnt_flag & MNT_UNMOUNT) - return (1); - mp->mnt_flag |= MNT_MPBUSY; - return (0); -} +int +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; + { + struct proc *p = curproc; /* XXX */ + struct vfsconf *vfsp; + struct mount *mp; + + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_vfc = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; + return (0); + } /* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. + */ +int +vfs_mountroot() { - - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t)&mp->mnt_flag); - } + struct vfsconf *vfsp; + extern int (*mountroot)(void); + int error; + + if (mountroot != NULL) + return ((*mountroot)()); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) + continue; + if ((error = (*vfsp->vfc_mountroot)()) == 0) + return (0); + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); + } + return (ENODEV); } - + /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) + mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } + } + simple_unlock(&mountlist_slock); return ((struct mount *)0); } + /* * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp) struct mount *mp; - int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; + int mtype; - mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ + simple_lock(&mntid_slock); + mtype = mp->mnt_vfc->vfc_typenum; + mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; - tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); + tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -318,20 +407,56 @@ getnewvnode(tag, mp, vops, vpp) int (**vops) __P((void *)); struct vnode **vpp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct freelst *listhd; + static int toggle; + struct vnode *vp; #ifdef DIAGNOSTIC int s; #endif - if ((vnode_free_list.tqh_first == NULL && - numvnodes < 2 * desiredvnodes) || - numvnodes < desiredvnodes) { + /* + * We must choose whether to allocate a new vnode or recycle an + * existing one. The criterion for allocating a new one is that + * the total number of vnodes is less than the number desired or + * there are no vnodes on either free list. Generally we only + * want to recycle vnodes that have no buffers associated with + * them, so we look first on the vnode_free_list. If it is empty, + * we next consider vnodes with referencing buffers on the + * vnode_hold_list. The toggle ensures that half the time we + * will use a buffer from the vnode_hold_list, and half the time + * we will allocate a new one unless the list has grown to twice + * the desired size. We are reticent to recycle vnodes from the + * vnode_hold_list because we will lose the identity of all its + * referencing buffers. + */ + toggle ^= 1; + if (numvnodes > 2 * desiredvnodes) + toggle = 0; + + + simple_lock(&vnode_free_list_slock); + if ((numvnodes < desiredvnodes) || + ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) && + ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *)malloc((u_long)sizeof *vp, M_VNODE, M_WAITOK); bzero((char *)vp, sizeof *vp); numvnodes++; } else { - if ((vp = vnode_free_list.tqh_first) == NULL) { + for (vp = TAILQ_FIRST(listhd); vp != NULLVP; + vp = TAILQ_NEXT(vp, v_freelist)) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); tablefull("vnode"); *vpp = 0; return (ENFILE); @@ -340,12 +465,15 @@ getnewvnode(tag, mp, vops, vpp) vprint("free vnode", vp); panic("free vnode isn't"); } - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_REMOVE(listhd, vp, v_freelist); /* see comment on why 0xdeadb is set at end of vgone (below) */ - vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; + vp->v_flag |= VGONEHACK; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else + simple_unlock(&vp->v_interlock); #ifdef DIAGNOSTIC if (vp->v_data) { vprint("cleaned vnode", vp); @@ -385,18 +513,19 @@ insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { - + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ + if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) - return; - LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + if ((vp->v_mount = mp) != NULL) + LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -435,14 +564,15 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) struct buf *nbp, *blist; int s, error; - if (flags & V_SAVE) { + if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } for (;;) { - if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) + if ((blist = vp->v_cleanblkhd.lh_first) && + (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && @@ -562,30 +692,140 @@ brelvp(bp) register struct buf *bp; { struct vnode *vp; + struct buf *wasdirty; - if (bp->b_vp == (struct vnode *) 0) + if ((vp = bp->b_vp) == (struct vnode *) 0) panic("brelvp: NULL"); /* * Delete from old vnode list, if on one. */ + wasdirty = vp->v_dirtyblkhd.lh_first; if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); - vp = bp->b_vp; + if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) + LIST_REMOVE(vp, v_synclist); bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* - * Reassign a buffer from one vnode to another. - * Used to assign file specific control information - * (indirect blocks) to the vnode to which they belong. + * The workitem queue. + * + * It is useful to delay writes of file data and filesystem metadata + * for tens of seconds so that quickly created and deleted files need + * not waste disk bandwidth being created and removed. To realize this, + * we append vnodes to a "workitem" queue. When running with a soft + * updates implementation, most pending metadata dependencies should + * not wait for more than a few seconds. Thus, mounted on block devices + * are delayed only about a half the time that file data is delayed. + * Similarly, directory updates are more critical, so are only delayed + * about a third the time that file data is delayed. Thus, there are + * SYNCER_MAXDELAY queues that are processed round-robin at a rate of + * one each second (driven off the filesystem syner process). The + * syncer_delayno variable indicates the next queue that is to be processed. + * Items that need to be processed soon are placed in this queue: + * + * syncer_workitem_pending[syncer_delayno] + * + * A delay of fifteen seconds is done by placing the request fifteen + * entries later in the queue: + * + * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] + * + */ + +/* + * Add an item to the syncer work queue. + */ +void +vn_syncer_add_to_worklist(vp, delay) + struct vnode *vp; + int delay; +{ + int s, slot; + + s = splbio(); + if (delay > syncer_maxdelay - 2) + delay = syncer_maxdelay - 2; + slot = (syncer_delayno + delay) & syncer_mask; + LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); + splx(s); +} + +/* + * System filesystem synchronizer daemon. + */ + +extern int lbolt; + +void +sched_sync(p) + struct proc *p; +{ + struct synclist *slp; + struct vnode *vp; + long starttime; + int s; + + for (;;) { + starttime = time.tv_sec; + + /* + * Push files whose dirty time has expired. + */ + s = splbio(); + slp = &syncer_workitem_pending[syncer_delayno]; + syncer_delayno += 1; + if (syncer_delayno == syncer_maxdelay) + syncer_delayno = 0; + splx(s); + while ((vp = LIST_FIRST(slp)) != NULL) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); + VOP_UNLOCK(vp, 0, p); + if (LIST_FIRST(slp) == vp) { + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) + panic("sched_sync: fsync failed"); + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(vp, v_synclist); + vn_syncer_add_to_worklist(vp, syncdelay); + } + } + + /* + * Do soft update processing. + */ + if (bioops.io_sync) + (*bioops.io_sync)(NULL); + + /* + * If it has taken us less than a second to process the + * current work, then wait. Otherwise start right over + * again. We can still lose time if any single round + * takes more than two seconds, but it does not really + * matter as we are just trying to generally pace the + * filesystem activity. + */ + if (time.tv_sec == starttime) + tsleep(&lbolt, PPAUSE, "syncer", 0); + } +} + +/* + * Reassign a buffer from one vnode to another. Used to assign buffers + * to the appropriate clean or dirty list and to add newly dirty vnodes + * to the appropriate filesystem syncer list. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { - register struct buflists *listheadp; + struct buflists *listheadp; + struct buf *wasdirty; + int delay; if (newvp == NULL) { printf("reassignbuf: NULL"); @@ -594,16 +834,36 @@ reassignbuf(bp, newvp) /* * Delete from old vnode list, if on one. */ + wasdirty = newvp->v_dirtyblkhd.lh_first; if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* * If dirty, put on list of dirty buffers; * otherwise insert onto list of clean buffers. */ - if (bp->b_flags & B_DELWRI) - listheadp = &newvp->v_dirtyblkhd; - else + if ((bp->b_flags & B_DELWRI) == 0) { listheadp = &newvp->v_cleanblkhd; + if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) + LIST_REMOVE(newvp, v_synclist); + } else { + listheadp = &newvp->v_dirtyblkhd; + if (LIST_FIRST(listheadp) == NULL) { + switch (newvp->v_type) { + case VDIR: + delay = syncdelay / 3; + break; + case VBLK: + if (newvp->v_specmountpoint != NULL) { + delay = syncdelay / 2; + break; + } + /* fall through */ + default: + delay = syncdelay; + } + vn_syncer_add_to_worklist(newvp, delay); + } + } bufinsvn(bp, listheadp); } @@ -649,8 +909,10 @@ getdevvp(dev, vpp, type) struct vnode *nvp; int error; - if (dev == NODEV) + if (dev == NODEV) { + *vpp = NULLVP; return (0); + } error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); if (error) { *vpp = NULLVP; @@ -680,6 +942,7 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { + struct proc *p = curproc; register struct vnode *vp; struct vnode **vpp; @@ -688,18 +951,23 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { + simple_lock(&vp->v_interlock); if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { + simple_unlock(&spechash_slock); goto loop; + } break; } if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { @@ -708,18 +976,21 @@ loop: nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; + nvp->v_specmountpoint = NULL; nvp->v_speclockf = NULL; + simple_unlock(&spechash_slock); *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0, p); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -736,91 +1007,260 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) - register struct vnode *vp; - int lockflag; +vget(vp, flags, p) + struct vnode *vp; + int flags; + struct proc *p; { - + int error; /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined either by checking - * that the VXLOCK flag is set, or that the use count is - * zero with the back pointer set to show that it has been - * removed from the free list by getnewvnode. The VXLOCK - * flag may not have been set yet because vclean is blocked in - * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { - vp->v_flag |= VXWANT; + if ((flags & LK_INTERLOCK) == 0) + simple_lock(&vp->v_interlock); + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vget", 0); - return (1); + return (ENOENT); + } + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + else + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); } - if (vp->v_usecount == 0) - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - vp->v_usecount++; - if (lockflag) - VOP_LOCK(vp); + vp->v_usecount++; + if (flags & LK_TYPE_MASK) { + if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { + vunref(vp); + simple_unlock(&vp->v_interlock); + } + return (error); + } + simple_unlock(&vp->v_interlock); return (0); } /* - * Vnode reference, just increment the count + * Stubs to use when there is no locking to be done on the underlying object. + * A minimal shared lock is necessary to ensure that the underlying object + * is not revoked while an operation is in progress. So, an active shared + * count is maintained in an auxillary vnode lock structure. + */ +int +vop_nolock(v) + void *v; +{ + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + +#ifdef notyet + /* + * This code cannot be used until all the non-locking filesystems + * (notably NFS) are converted to properly lock and release nodes. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. Note that the inactive + * and lookup operations also change their lock state, but this + * cannot be avoided, so these two operations will always need + * to be handled in intermediate layers. + */ + struct vnode *vp = ap->a_vp; + int vnflags, flags = ap->a_flags; + + if (vp->v_vnlock == NULL) { + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), + M_VNODE, M_WAITOK); + lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + } + switch (flags & LK_TYPE_MASK) { + case LK_DRAIN: + vnflags = LK_DRAIN; + break; + case LK_EXCLUSIVE: + case LK_SHARED: + vnflags = LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUPGRADE: + case LK_DOWNGRADE: + return (0); + case LK_RELEASE: + default: + panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); + } + if (flags & LK_INTERLOCK) + vnflags |= LK_INTERLOCK; + return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); +#else /* for now */ + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) + simple_unlock(&ap->a_vp->v_interlock); + return (0); +#endif +} + +/* + * Decrement the active use count. + */ + +int +vop_nounlock(v) + void *v; +{ + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); +} + +/* + * Return whether or not the node is in use. + */ +int +vop_noislocked(v) + void *v; +{ + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockstatus(vp->v_vnlock)); +} + +/* + * Vnode reference. */ void vref(vp) struct vnode *vp; { - + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); vp->v_usecount++; + simple_unlock(&vp->v_interlock); } -/* - * vput(), just unlock and vrele() - */ -void -vput(vp) - register struct vnode *vp; + +int +vunref(vp) + struct vnode *vp; { +#ifdef DIAGNOSTIC + if (vp == NULL) + panic("vrele: null vp"); +#endif + simple_lock (&vp->v_interlock); + vp->v_usecount--; + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); + return (vp->v_usecount); + } +#ifdef DIAGNOSTIC + if (vp->v_usecount < 0 || vp->v_writecount != 0) { + vprint("vrele: bad ref count", vp); + panic("vrele: ref cnt"); + } +#endif + /* + * insert at tail of LRU list + */ + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + else + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); - VOP_UNLOCK(vp); - vrele(vp); + return (0); } /* - * Vnode release. - * If count drops to zero, call inactive routine and return to freelist. + * vput(), just unlock and vrele() */ void -vrele(vp) +vput(vp) register struct vnode *vp; { + struct proc *p = curproc; /* XXX */ -#ifdef DIAGNOSTIC +#ifdef DIGANOSTIC if (vp == NULL) - panic("vrele: null vp"); + panic("vput: null vp"); #endif + simple_lock(&vp->v_interlock); vp->v_usecount--; - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); + VOP_UNLOCK(vp, 0, p); return; + } #ifdef DIAGNOSTIC - if (vp->v_usecount != 0 || vp->v_writecount != 0) { - vprint("vrele: bad ref count", vp); - panic("vrele: ref cnt"); + if (vp->v_usecount < 0 || vp->v_writecount != 0) { + vprint("vput: bad ref count", vp); + panic("vput: ref cnt"); } #endif /* * insert at tail of LRU list */ - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - VOP_INACTIVE(vp); + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + else + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + simple_unlock(&vp->v_interlock); + VOP_INACTIVE(vp, p); } /* + * Vnode release - use for active VNODES. + * If count drops to zero, call inactive routine and return to freelist. + */ +void +vrele(vp) + register struct vnode *vp; +{ + struct proc *p = curproc; + + if (vunref(vp) == 0 && + vn_lock(vp, LK_EXCLUSIVE |LK_INTERLOCK, p) == 0) + VOP_INACTIVE(vp, p); +} + +#ifdef DIAGNOSTIC +/* * Page or buffer structure gets a reference. */ void @@ -828,7 +1268,27 @@ vhold(vp) register struct vnode *vp; { + /* + * If it is on the freelist and the hold count is currently + * zero, move it to the hold list. + * + * The VGONEHACK flag reflects a call from getnewvnode, + * which will remove the vnode from the free list, but + * will not increment the ref count until after it calls vgone + * If the ref count we're incremented first, vgone would + * (incorrectly) try to close the previous instance of the + * underlying object. + */ + simple_lock(&vp->v_interlock); + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -839,10 +1299,26 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + /* + * If it is on the holdlist and the hold count drops to + * zero, move it to the free list. + * + * See above for VGONEHACK + */ + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + simple_unlock(&vp->v_interlock); } +#endif /* DIAGNOSTIC */ /* * Remove any vnodes in the vnode table belonging to mount point mp. @@ -863,11 +1339,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { + struct proc *p = curproc; register struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) @@ -878,24 +1354,32 @@ loop: */ if (vp == skipvp) continue; + + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file * vnodes open for writing. */ if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + (vp->v_writecount == 0 || vp->v_type != VREG)) { + simple_unlock(&vp->v_interlock); continue; + } /* * With v_usecount == 0, all we need to do is clear * out the vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } /* @@ -904,21 +1388,25 @@ loop: * anonymous device. For all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *)0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DEBUG if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -926,11 +1414,13 @@ loop: /* * Disassociate the underlying file system from a vnode. + * The vnode interlock is held on entry. */ void -vclean(vp, flags) +vclean(vp, flags, p) register struct vnode *vp; int flags; + struct proc *p; { int active; @@ -941,15 +1431,8 @@ vclean(vp, flags) * race against ourselves to recycle it. */ if ((active = vp->v_usecount) != 0) - VREF(vp); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. - */ - VOP_LOCK(vp); + vp->v_usecount++; + /* * Prevent the vnode from being recycled or * brought into use while we clean it out. @@ -957,32 +1440,57 @@ vclean(vp, flags) if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; + + /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); + /* - * Any other processes trying to obtain this lock must first - * wait for VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); /* * If purging an active vnode, it must be closed and - * deactivated before being reclaimed. + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0, p); } + /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); - if (active) - vrele(vp); + if (active) { + if (vunref(vp) == 0 && + vp->v_holdcnt > 0) + panic("vclean: not clean"); + simple_unlock(&vp->v_interlock); + } + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } /* * Done with purge, notify sleepers of the grim news. @@ -1000,12 +1508,25 @@ vclean(vp, flags) * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ -void -vgoneall(vp) - register struct vnode *vp; +int +vop_revoke(v) + void *v; { - register struct vnode *vq; + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap = v; + struct vnode *vp, *vq; + struct proc *p = curproc; + +#ifdef DIAGNOSTIC + if ((ap->a_flags & REVOKEALL) == 0) + panic("vop_revoke"); +#endif + vp = ap->a_vp; + simple_lock(&vp->v_interlock); + if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, @@ -1013,19 +1534,23 @@ vgoneall(vp) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - tsleep((caddr_t)vp, PINOD, "vgoneall", 0); - return; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); + return(0); } /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ vp->v_flag |= VXLOCK; + simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; + simple_unlock(&spechash_slock); vgone(vq); break; } @@ -1035,9 +1560,34 @@ vgoneall(vp) * really eliminate the vnode after which time * vgone will awaken any sleepers. */ + simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; } - vgone(vp); + vgonel(vp, p); + return (0); +} + + +/* + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. + */ +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) + simple_unlock(inter_lkp); + vgonel(vp, p); + return (1); + } + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1048,6 +1598,20 @@ void vgone(vp) register struct vnode *vp; { + struct proc *p = curproc; + + simple_lock (&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) + struct vnode *vp; + struct proc *p; +{ register struct vnode *vq; struct vnode *vx; @@ -1057,21 +1621,25 @@ vgone(vp) */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - insmntque(vp, (struct mount *)0); + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* - * If special device, remove it from special device alias list. + * If special device, remove it from special device alias list + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1100,27 +1668,26 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, - * move it to the head of the list. The test of the back - * pointer and the reference count of zero is because - * it will be removed from the free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. + * move it to the head of the list. + * + * See above about the VGONEHACK */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && - vnode_free_list.tqh_first != vp) { - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if (vp->v_holdcnt > 0) + panic("vgonel: not clean"); + if (!(vp->v_flag & VGONEHACK) && + TAILQ_FIRST(&vnode_free_list) != vp) { + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + } + simple_unlock(&vnode_free_list_slock); } vp->v_type = VBAD; } @@ -1135,14 +1702,18 @@ vfinddev(dev, type, vpp) struct vnode **vpp; { register struct vnode *vp; + int rc =0; + simple_lock(&spechash_slock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; - return (1); + rc = 1; + break; } - return (0); + simple_unlock(&spechash_slock); + return (rc); } /* @@ -1150,14 +1721,15 @@ vfinddev(dev, type, vpp) */ int vcount(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq, *vnext; + struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); + simple_lock(&spechash_slock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -1166,11 +1738,13 @@ loop: * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { + simple_unlock(&spechash_slock); vgone(vq); goto loop; } count += vq->v_usecount; } + simple_unlock(&spechash_slock); return (count); } @@ -1225,21 +1799,77 @@ vprint(label, vp) void printlockedvnodes() { - register struct mount *mp; + struct proc *p = curproc; + register struct mount *mp, *nmp; register struct vnode *vp; printf("Locked vnodes\n"); + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { + mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cque_next; + continue; + } for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) + vp = vp->v_mntvnodes.le_next) { if (VOP_ISLOCKED(vp)) vprint((char *)0, vp); - } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); + } + simple_unlock(&mountlist_slock); + } #endif +/* + * Top level filesystem related information gathering. + */ +int +vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ + struct vfsconf *vfsp; + + /* all sysctl names at this level are at least name and field */ + if (namelen < 2) + return (ENOTDIR); /* overloaded */ + if (name[0] != VFS_GENERIC) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[0]) + break; + if (vfsp == NULL) + return (EOPNOTSUPP); + return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, + oldp, oldlenp, newp, newlen, p)); + } + switch (name[1]) { + case VFS_MAXTYPENUM: + return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); + case VFS_CONF: + if (namelen < 3) + return (ENOTDIR); /* overloaded */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[2]) + break; + if (vfsp == NULL) + return (EOPNOTSUPP); + return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, + sizeof(struct vfsconf))); + } + return (EOPNOTSUPP); +} + + int kinfo_vdebug = 1; int kinfo_vgetfailed; #define KINFO_VNODESLOP 10 @@ -1249,12 +1879,13 @@ int kinfo_vgetfailed; */ /* ARGSUSED */ int -sysctl_vnode(where, sizep) +sysctl_vnode(where, sizep, p) char *where; size_t *sizep; + struct proc *p; { register struct mount *mp, *nmp; - struct vnode *vp; + struct vnode *vp, *nvp; register char *bp = where, *savebp; char *ewhere; int error; @@ -1267,27 +1898,32 @@ sysctl_vnode(where, sizep) } ewhere = where + *sizep; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; continue; + } savebp = bp; again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp = nvp) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { + simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); bp = savebp; goto again; } + nvp = vp->v_mntvnodes.le_next; if (bp + VPTRSZ + VNODESZ > ewhere) { + simple_unlock(&mntvnode_slock); *sizep = bp - where; return (ENOMEM); } @@ -1295,10 +1931,17 @@ again: (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) return (error); bp += VPTRSZ + VNODESZ; + simple_lock(&mntvnode_slock); } - vfs_unbusy(mp); + + simple_unlock(&mntvnode_slock); + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); + *sizep = bp - where; return (0); } @@ -1311,26 +1954,31 @@ vfs_mountedon(vp) register struct vnode *vp; { register struct vnode *vq; + int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) + if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); - } + if (vq->v_specmountpoint != NULL) { + error = EBUSY; + break; + } + } + simple_unlock(&spechash_slock); } - return (0); + return (error); } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ -static int +int vfs_hang_addrlist(mp, nep, argp) struct mount *mp; struct netexport *nep; @@ -1404,7 +2052,7 @@ out: } /* ARGSUSED */ -static int +int vfs_free_netcred(rn, w) struct radix_node *rn; void *w; @@ -1419,7 +2067,7 @@ vfs_free_netcred(rn, w) /* * Free the net address hash lists that are hanging off the mount points. */ -static void +void vfs_free_addrlist(nep) struct netexport *nep; { @@ -1666,3 +2314,161 @@ fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) } return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p); } + +/* + * Routine to create and manage a filesystem syncer vnode. + */ +#define sync_close nullop +int sync_fsync __P((void *)); +int sync_inactive __P((void *)); +#define sync_reclaim nullop +#define sync_lock vop_nolock +#define sync_unlock vop_nounlock +int sync_print __P((void *)); +#define sync_islocked vop_noislocked + +int (**sync_vnodeop_p) __P((void *)); +struct vnodeopv_entry_desc sync_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_close_desc, sync_close }, /* close */ + { &vop_fsync_desc, sync_fsync }, /* fsync */ + { &vop_inactive_desc, sync_inactive }, /* inactive */ + { &vop_reclaim_desc, sync_reclaim }, /* reclaim */ + { &vop_lock_desc, sync_lock }, /* lock */ + { &vop_unlock_desc, sync_unlock }, /* unlock */ + { &vop_print_desc, sync_print }, /* print */ + { &vop_islocked_desc, sync_islocked }, /* islocked */ + { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL } +}; +struct vnodeopv_desc sync_vnodeop_opv_desc = + { &sync_vnodeop_p, sync_vnodeop_entries }; + +/* + * Create a new filesystem syncer vnode for the specified mount point. + */ +int +vfs_allocate_syncvnode(mp) + struct mount *mp; +{ + struct vnode *vp; + static long start, incr, next; + int error; + + /* Allocate a new vnode */ + if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { + mp->mnt_syncer = NULL; + return (error); + } + vp->v_writecount = 1; + vp->v_type = VNON; + /* + * Place the vnode onto the syncer worklist. We attempt to + * scatter them about on the list so that they will go off + * at evenly distributed times even if all the filesystems + * are mounted at once. + */ + next += incr; + if (next == 0 || next > syncer_maxdelay) { + start /= 2; + incr /= 2; + if (start == 0) { + start = syncer_maxdelay / 2; + incr = syncer_maxdelay; + } + next = start; + } + vn_syncer_add_to_worklist(vp, next); + mp->mnt_syncer = vp; + return (0); +} + +/* + * Do a lazy sync of the filesystem. + */ +int +sync_fsync(v) + void *v; +{ + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap = v; + + struct vnode *syncvp = ap->a_vp; + struct mount *mp = syncvp->v_mount; + int asyncflag; + + /* + * We only need to do something if this is a lazy evaluation. + */ + if (ap->a_waitfor != MNT_LAZY) + return (0); + + /* + * Move ourselves to the back of the sync list. + */ + LIST_REMOVE(syncvp, v_synclist); + vn_syncer_add_to_worklist(syncvp, syncdelay); + + /* + * Walk the list of vnodes pushing all that are dirty and + * not already on the sync list. + */ + simple_lock(&mountlist_slock); + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, ap->a_p) == 0) { + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p); + if (asyncflag) + mp->mnt_flag |= MNT_ASYNC; + vfs_unbusy(mp, ap->a_p); + } + return (0); +} + +/* + * The syncer vnode is no longer needed and is being decommissioned. + */ +int +sync_inactive(v) + void *v; + +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + + struct vnode *vp = ap->a_vp; + + if (vp->v_usecount == 0) + return (0); + vp->v_mount->mnt_syncer = NULL; + LIST_REMOVE(vp, v_synclist); + vp->v_writecount = 0; + vput(vp); + return (0); +} + +/* + * Print out a syncer vnode. + */ +int +sync_print(v) + void *v; + +{ + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + printf("syncer vnode"); + if (vp->v_vnlock != NULL) + lockmgr_printinfo(vp->v_vnlock); + printf("\n"); + return (0); +} + diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 74d914ee7e8..f1e566ae6b8 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_syscalls.c,v 1.25 1997/03/02 09:38:35 millert Exp $ */ +/* $OpenBSD: vfs_syscalls.c,v 1.26 1997/10/06 15:12:43 csapuntz Exp $ */ /* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */ /* @@ -102,10 +102,11 @@ sys_mount(p, v, retval) register struct vnode *vp; register struct mount *mp; int error, flag = 0; - u_long fsindex = 0; + u_long fstypenum = 0; char fstypename[MFSNAMELEN]; struct vattr va; struct nameidata nd; + struct vfsconf *vfsp; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); @@ -156,7 +157,7 @@ sys_mount(p, v, retval) } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); goto update; } /* @@ -195,12 +196,19 @@ sys_mount(p, v, retval) * string, we check to see if it matches one of the historic * filesystem types. */ - fsindex = (u_long)SCARG(uap, type); - if (fsindex >= nvfssw || vfssw[fsindex] == NULL) { - vput(vp); - return (ENODEV); + fstypenum = (u_long)SCARG(uap, type); + + if (fstypenum < maxvfsconf) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } - strncpy(fstypename, vfssw[fsindex]->vfs_name, MFSNAMELEN); #else vput(vp); return (error); @@ -212,14 +220,16 @@ sys_mount(p, v, retval) strncpy( fstypename, "ffs", MFSNAMELEN); } #endif - for (fsindex = 0; fsindex < nvfssw; fsindex++) - if (vfssw[fsindex] != NULL && - !strncmp(vfssw[fsindex]->vfs_name, fstypename, MFSNAMELEN)) + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (!strcmp(vfsp->vfc_name, fstypename)) break; - if (fsindex >= nvfssw) { + } + + if (vfsp == NULL) { vput(vp); return (ENODEV); } + if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); @@ -231,14 +241,14 @@ sys_mount(p, v, retval) mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = vfssw[fsindex]; - if ((error = vfs_lock(mp)) != 0) { - free((caddr_t)mp, M_MOUNT); - vput(vp); - return (error); - } - /* Do this early in case we block later. */ - vfssw[fsindex]->vfs_refcount++; + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= (vfsp->vfc_flags & MNT_VISFLAGMASK); + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; @@ -266,6 +276,17 @@ update: (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR); if (error) mp->mnt_flag = flag; + + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vgone(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + + vfs_unbusy(mp, p); return (error); } /* @@ -273,16 +294,20 @@ update: */ cache_purge(vp); if (!error) { + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); checkdirs(vp); - VOP_UNLOCK(vp); - vfs_unlock(mp); + VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, p); (void) VFS_STATFS(mp, &mp->mnt_stat, p); - error = VFS_START(mp, 0, p); + if ((error = VFS_START(mp, 0, p)) != 0) + vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfssw[fsindex]->vfs_refcount--; - vfs_unlock(mp); + vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } @@ -397,36 +422,40 @@ dounmount(mp, flags, p) struct vnode *coveredvp; int error; - coveredvp = mp->mnt_vnodecovered; - if (vfs_busy(mp)) - return (EBUSY); + simple_lock(&mountlist_slock); mp->mnt_flag |= MNT_UNMOUNT; - if ((error = vfs_lock(mp)) != 0) + lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); + mp->mnt_flag &=~ MNT_ASYNC; + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vgone(mp->mnt_syncer); + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || + (flags & MNT_FORCE)) + error = VFS_UNMOUNT(mp, flags, p); + simple_lock(&mountlist_slock); + if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); + mp->mnt_flag &= ~MNT_UNMOUNT; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_slock, p); return (error); - - mp->mnt_flag &=~ MNT_ASYNC; - vnode_pager_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 || - (flags & MNT_FORCE)) - error = VFS_UNMOUNT(mp, flags, p); - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); - if (error) { - vfs_unlock(mp); - } else { - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); - if (coveredvp != NULLVP) { - vrele(coveredvp); - coveredvp->v_mountedhere = (struct mount *)0; - } - mp->mnt_op->vfs_refcount--; - vfs_unlock(mp); - if (mp->mnt_vnodelist.lh_first != NULL) - panic("unmount: dangling vnode"); - free((caddr_t)mp, M_MOUNT); } - return (error); + CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { + coveredvp->v_mountedhere = (struct mount *)0; + vrele(coveredvp); + } + mp->mnt_vfc->vfc_refcount--; + if (mp->mnt_vnodelist.lh_first != NULL) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + if (mp->mnt_flag & MNT_MWAIT) + wakeup((caddr_t)mp); + free((caddr_t)mp, M_MOUNT); + return (0); } /* @@ -447,31 +476,25 @@ sys_sync(p, v, retval) register struct mount *mp, *nmp; int asyncflag; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - /* - * Get the next pointer in case we hang on vfs_busy - * while we are being unmounted. - */ - nmp = mp->mnt_list.cqe_prev; - /* - * The lock check below is to avoid races with mount - * and unmount. - */ - if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - /* - * Get the next pointer again, as the next filesystem - * might have been unmounted while we were sync'ing. - */ - nmp = mp->mnt_list.cqe_prev; - vfs_unbusy(mp); } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); + #ifdef DEBUG if (syncprt) vfs_bufstats(); @@ -596,7 +619,7 @@ sys_getfsstat(p, v, retval) syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap = v; - register struct mount *mp; + register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; @@ -604,20 +627,28 @@ sys_getfsstat(p, v, retval) maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); - for (count = 0, mp = mountlist.cqh_first; - mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { - if (sfsp && count < maxcount && - ((mp->mnt_flag & MNT_MLOCK) == 0)) { + count = 0; + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. + */ + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) - continue; + (error = VFS_STATFS(mp, sp, p))) { + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); + continue; + } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (suser(p->p_ucred, &p->p_acflag)) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); @@ -630,7 +661,11 @@ sys_getfsstat(p, v, retval) sfsp += sizeof(*sp); } count++; + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) *retval = maxcount; else @@ -661,7 +696,7 @@ sys_fchdir(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else @@ -677,11 +712,21 @@ sys_fchdir(p, v, retval) vput(vp); vp = tdp; } - VOP_UNLOCK(vp); + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, p)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) + break; + vput(vp); + vp = tdp; + } if (error) { - vrele(vp); + vput(vp); return (error); } + VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); @@ -768,9 +813,10 @@ change_dir(ndp, p) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); if (error) - vrele(vp); + vput(vp); + else + VOP_UNLOCK(vp, 0, p); return (error); } @@ -837,7 +883,7 @@ sys_open(p, v, retval) type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); if (error) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); @@ -845,10 +891,10 @@ sys_open(p, v, retval) fdp->fd_ofiles[indx] = NULL; return (error); } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); *retval = indx; return (0); } @@ -1417,7 +1463,7 @@ sys_chflags(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1452,7 +1498,7 @@ sys_fchflags(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1460,7 +1506,7 @@ sys_fchflags(p, v, retval) vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1488,7 +1534,7 @@ sys_chmod(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1523,7 +1569,7 @@ sys_fchmod(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1531,7 +1577,7 @@ sys_fchmod(p, v, retval) vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1561,7 +1607,7 @@ sys_chown(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1613,7 +1659,7 @@ sys_lchown(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1658,88 +1704,88 @@ sys_fchown(p, v, retval) struct vattr vattr; int error; struct file *fp; - u_short mode; - - if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) - return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); - if (vp->v_mount->mnt_flag & MNT_RDONLY) - error = EROFS; - else { - if (suser(p->p_ucred, &p->p_acflag) || - suid_clear) { - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); - if (error) - goto out; - mode = vattr.va_mode & ~(VSUID | VSGID); - if (mode == vattr.va_mode) - mode = VNOVAL; - } - else - mode = VNOVAL; - VATTR_NULL(&vattr); - vattr.va_uid = SCARG(uap, uid); - vattr.va_gid = SCARG(uap, gid); - vattr.va_mode = mode; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } + u_short mode; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else { + if (suser(p->p_ucred, &p->p_acflag) || + suid_clear) { + error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); + if (error) + goto out; + mode = vattr.va_mode & ~(VSUID | VSGID); + if (mode == vattr.va_mode) + mode = VNOVAL; + } + else + mode = VNOVAL; + VATTR_NULL(&vattr); + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); + vattr.va_mode = mode; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + } out: - VOP_UNLOCK(vp); - return (error); + VOP_UNLOCK(vp, 0, p); + return (error); } - /* * Set the access and modification times given a path name. */ /* ARGSUSED */ int sys_utimes(p, v, retval) - struct proc *p; - void *v; - register_t *retval; + struct proc *p; + void *v; + register_t *retval; { - register struct sys_utimes_args /* { - syscallarg(char *) path; - syscallarg(struct timeval *) tptr; - } */ *uap = v; - register struct vnode *vp; - struct timeval tv[2]; - struct vattr vattr; - int error; - struct nameidata nd; - - VATTR_NULL(&vattr); - if (SCARG(uap, tptr) == NULL) { - microtime(&tv[0]); - tv[1] = tv[0]; - vattr.va_vaflags |= VA_UTIMES_NULL; - } else { - error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, - sizeof (tv)); - if (error) - return (error); - } - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); - if ((error = namei(&nd)) != 0) - return (error); - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); - if (vp->v_mount->mnt_flag & MNT_RDONLY) - error = EROFS; - else { - vattr.va_atime.tv_sec = tv[0].tv_sec; - vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; - vattr.va_mtime.tv_sec = tv[1].tv_sec; - vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } + register struct sys_utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap = v; + register struct vnode *vp; + struct timeval tv[2]; + struct vattr vattr; + int error; + struct nameidata nd; + + VATTR_NULL(&vattr); + if (SCARG(uap, tptr) == NULL) { + microtime(&tv[0]); + tv[1] = tv[0]; + vattr.va_vaflags |= VA_UTIMES_NULL; + } else { + error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, + sizeof (tv)); + if (error) + return (error); + } + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_mount->mnt_flag & MNT_RDONLY) + error = EROFS; + else { + vattr.va_atime.tv_sec = tv[0].tv_sec; + vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; + vattr.va_mtime.tv_sec = tv[1].tv_sec; + vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + } vput(vp); - return (error); + return (error); } + /* * Set the access and modification times given a file descriptor. */ @@ -1775,7 +1821,7 @@ sys_futimes(p, v, retval) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else { @@ -1785,7 +1831,7 @@ sys_futimes(p, v, retval) vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1814,7 +1860,7 @@ sys_truncate(p, v, retval) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && @@ -1853,7 +1899,7 @@ sys_ftruncate(p, v, retval) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { @@ -1861,7 +1907,7 @@ sys_ftruncate(p, v, retval) vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1885,9 +1931,9 @@ sys_fsync(p, v, retval) if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -2108,11 +2154,11 @@ unionread: auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, (u_long *)0, 0); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 0, 0); fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); if ((SCARG(uap, count) == auio.uio_resid) && @@ -2182,17 +2228,13 @@ sys_revoke(p, v, retval) if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - if (vp->v_type != VCHR && vp->v_type != VBLK) { - error = EINVAL; - goto out; - } if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - vgoneall(vp); + VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index b99a001a165..3037cad20fe 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.5 1997/08/04 08:24:54 deraadt Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.6 1997/10/06 15:12:45 csapuntz Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -133,9 +133,9 @@ vn_open(ndp, fmode, cmode) } } if (fmode & O_TRUNC) { - VOP_UNLOCK(vp); /* XXX */ + VOP_UNLOCK(vp, 0, p); /* XXX */ VOP_LEASE(vp, p, cred, LEASE_WRITE); - VOP_LOCK(vp); /* XXX */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(&va); va.va_size = 0; if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0) @@ -153,14 +153,14 @@ bad: /* * Check for write permissions on the specified vnode. - * The read-only status of the file system is checked. - * Also, prototype text segments cannot be written. + * Prototype text segments cannot be written. */ int vn_writechk(vp) register struct vnode *vp; { +#if 0 /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character @@ -175,6 +175,7 @@ vn_writechk(vp) break; } } +#endif /* * If there's shared text associated with * the vnode, try to free it up once. If @@ -225,7 +226,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) int error; if ((ioflg & IO_NODELOCKED) == 0) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; @@ -246,7 +247,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -261,16 +262,17 @@ vn_read(fp, uio, cred) { register struct vnode *vp = (struct vnode *)fp->f_data; int count, error = 0; + struct proc *p = uio->uio_procp; VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; if (vp->v_type != VDIR) error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); fp->f_offset += count - uio->uio_resid; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -284,14 +286,18 @@ vn_write(fp, uio, cred) struct ucred *cred; { register struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = uio->uio_procp; int count, error, ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; + if ((fp->f_flag & O_FSYNC) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) + ioflag |= IO_SYNC; VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); @@ -299,7 +305,7 @@ vn_write(fp, uio, cred) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -427,6 +433,36 @@ vn_select(fp, which, p) } /* + * Check that the vnode is still valid, and if so + * acquire requested lock. + */ +int +vn_lock(vp, flags, p) + struct vnode *vp; + int flags; + struct proc *p; +{ + int error; + + do { + if ((flags & LK_INTERLOCK) == 0) + simple_lock(&vp->v_interlock); + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vn_lock", 0); + error = ENOENT; + } else { + error = VOP_LOCK(vp, flags | LK_INTERLOCK, p); + if (error == 0) + return (error); + } + flags &= ~LK_INTERLOCK; + } while (flags & LK_RETRY); + return (error); +} + +/* * File table vnode close routine. */ int diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index 14f1f0c5a09..b373b57c591 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -218,6 +218,22 @@ struct vnodeop_desc vop_write_desc = { NULL, }; +int vop_lease_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_lease_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_lease_desc = { + 0, + "vop_lease", + 0, + vop_lease_vp_offsets, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_lease_args, a_cred), + VOPARG_OFFSETOF(struct vop_lease_args, a_p), + VDESC_NO_OFFSET, + NULL, +}; + int vop_ioctl_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_ioctl_args,a_vp), VDESC_NO_OFFSET @@ -250,6 +266,22 @@ struct vnodeop_desc vop_select_desc = { NULL, }; +int vop_revoke_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_revoke_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_revoke_desc = { + 0, + "vop_revoke", + 0, + vop_revoke_vp_offsets, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + NULL, +}; + int vop_mmap_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_mmap_args,a_vp), VDESC_NO_OFFSET @@ -459,7 +491,7 @@ struct vnodeop_desc vop_inactive_desc = { vop_inactive_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_inactive_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -475,7 +507,7 @@ struct vnodeop_desc vop_reclaim_desc = { vop_reclaim_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_reclaim_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -491,7 +523,7 @@ struct vnodeop_desc vop_lock_desc = { vop_lock_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_lock_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -507,7 +539,7 @@ struct vnodeop_desc vop_unlock_desc = { vop_unlock_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_unlock_args, a_p), VDESC_NO_OFFSET, NULL, }; @@ -624,6 +656,22 @@ struct vnodeop_desc vop_valloc_desc = { NULL, }; +int vop_balloc_vp_offsets[] = { + VOPARG_OFFSETOF(struct vop_balloc_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vop_balloc_desc = { + 0, + "vop_balloc", + 0, + vop_balloc_vp_offsets, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vop_balloc_args, a_cred), + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + NULL, +}; + int vop_reallocblks_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_reallocblks_args,a_vp), VDESC_NO_OFFSET @@ -688,22 +736,6 @@ struct vnodeop_desc vop_update_desc = { NULL, }; -int vop_lease_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_lease_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_lease_desc = { - 0, - "vop_lease", - 0, - vop_lease_vp_offsets, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_lease_args, a_cred), - VOPARG_OFFSETOF(struct vop_lease_args, a_p), - VDESC_NO_OFFSET, - NULL, -}; - int vop_whiteout_vp_offsets[] = { VOPARG_OFFSETOF(struct vop_whiteout_args,a_dvp), VDESC_NO_OFFSET @@ -769,8 +801,10 @@ struct vnodeop_desc *vfs_op_descs[] = { &vop_setattr_desc, &vop_read_desc, &vop_write_desc, + &vop_lease_desc, &vop_ioctl_desc, &vop_select_desc, + &vop_revoke_desc, &vop_mmap_desc, &vop_fsync_desc, &vop_seek_desc, @@ -794,11 +828,11 @@ struct vnodeop_desc *vfs_op_descs[] = { &vop_advlock_desc, &vop_blkatoff_desc, &vop_valloc_desc, + &vop_balloc_desc, &vop_reallocblks_desc, &vop_vfree_desc, &vop_truncate_desc, &vop_update_desc, - &vop_lease_desc, &vop_whiteout_desc, NULL }; diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 0a8c45ace58..76edff456c6 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -1,4 +1,4 @@ -# $OpenBSD: vnode_if.src,v 1.4 1996/05/22 11:47:12 deraadt Exp $ +# $OpenBSD: vnode_if.src,v 1.5 1997/10/06 15:12:48 csapuntz Exp $ # $NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $ # # Copyright (c) 1992, 1993 @@ -34,12 +34,43 @@ # # @(#)vnode_if.src 8.3 (Berkeley) 2/3/94 # + + +# +# Above each of the vop descriptors is a specification of the locking +# protocol used by each vop call. The first column is the name of +# the variable, the remaining three columns are in, out and error +# respectively. The "in" column defines the lock state on input, +# the "out" column defines the state on succesful return, and the +# "error" column defines the locking state on error exit. +# +# The locking value can take the following values: +# L: locked. +# U: unlocked/ +# -: not applicable. vnode does not yet (or no longer) exists. +# =: the same on input and output, may be either L or U. +# X: locked if not nil. +# + +# +#% lookup dvp L ? ? +#% lookup vpp - L - +# +# XXX - the lookup locking protocol defies simple description and depends +# on the flags and operation fields in the (cnp) structure. Note +# especially that *vpp may equal dvp and both may be locked. + vop_lookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; +# +#% create dvp L U U +#% create vpp - L - +# + vop_create { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -47,6 +78,11 @@ vop_create { IN struct vattr *vap; }; +# +#% mknod dvp L U U +#% mknod vpp - X - +# + vop_mknod { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -54,6 +90,10 @@ vop_mknod { IN struct vattr *vap; }; +# +#% open vp L L L +# + vop_open { IN struct vnode *vp; IN int mode; @@ -61,6 +101,10 @@ vop_open { IN struct proc *p; }; +# +#% close vp U U U +# + vop_close { IN struct vnode *vp; IN int fflag; @@ -68,6 +112,10 @@ vop_close { IN struct proc *p; }; +# +#% access vp L L L +# + vop_access { IN struct vnode *vp; IN int mode; @@ -75,6 +123,10 @@ vop_access { IN struct proc *p; }; +# +#% getattr vp = = = +# + vop_getattr { IN struct vnode *vp; IN struct vattr *vap; @@ -82,6 +134,11 @@ vop_getattr { IN struct proc *p; }; + +# +#% setattr vp L L L +# + vop_setattr { IN struct vnode *vp; IN struct vattr *vap; @@ -89,6 +146,10 @@ vop_setattr { IN struct proc *p; }; +# +#% read vp L L L +# + vop_read { IN struct vnode *vp; INOUT struct uio *uio; @@ -96,6 +157,10 @@ vop_read { IN struct ucred *cred; }; +# +#% write vp L L L +# + vop_write { IN struct vnode *vp; INOUT struct uio *uio; @@ -103,6 +168,20 @@ vop_write { IN struct ucred *cred; }; +# +#% lease vp = = = +# +vop_lease { + IN struct vnode *vp; + IN struct proc *p; + IN struct ucred *cred; + IN int flag; +}; + +# +#% ioctl vp U U U +# + vop_ioctl { IN struct vnode *vp; IN u_long command; @@ -112,7 +191,11 @@ vop_ioctl { IN struct proc *p; }; +# +#% select vp U U U +# # Needs work? (fflags) +# vop_select { IN struct vnode *vp; IN int which; @@ -121,6 +204,17 @@ vop_select { IN struct proc *p; }; +# +#% revoke vp U U U +# +vop_revoke { + IN struct vnode *vp; + IN int flags; +}; + +# +# XXX - not used +# vop_mmap { IN struct vnode *vp; IN int fflags; @@ -128,6 +222,9 @@ vop_mmap { IN struct proc *p; }; +# +#% fsync vp L L L +# vop_fsync { IN struct vnode *vp; IN struct ucred *cred; @@ -135,7 +232,10 @@ vop_fsync { IN struct proc *p; }; -# Needs word: Is newoff right? What's it mean? +# +# XXX - not used +# Needs work: Is newoff right? What's it mean? +# vop_seek { IN struct vnode *vp; IN off_t oldoff; @@ -143,18 +243,34 @@ vop_seek { IN struct ucred *cred; }; +# +#% remove dvp L U U +#% remove vp L U U +# + vop_remove { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% link vp U U U +#% link tdvp L U U +# vop_link { IN WILLRELE struct vnode *dvp; IN struct vnode *vp; IN struct componentname *cnp; }; +# +#% rename fdvp U U U +#% rename fvp U U U +#% rename tdvp L U U +#% rename tvp X U U +# + vop_rename { IN WILLRELE struct vnode *fdvp; IN WILLRELE struct vnode *fvp; @@ -164,6 +280,11 @@ vop_rename { IN struct componentname *tcnp; }; +# +#% mkdir dvp L U U +#% mkdir vpp - L - +# + vop_mkdir { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -171,12 +292,26 @@ vop_mkdir { IN struct vattr *vap; }; +# +#% rmdir dvp L U U +#% rmdir vp L U U +# + vop_rmdir { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% symlink dvp L U U +#% symlink vpp - U - +# +# XXX - note that the return vnode has already been VRELE'ed +# by the filesystem layer. To use it you must use vget, +# possibly with a further namei. +# + vop_symlink { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -185,42 +320,79 @@ vop_symlink { IN char *target; }; +# +#% readdir vp L L L +# + vop_readdir { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; - OUT int *eofflag; - OUT u_long *cookies; - IN int ncookies; + INOUT int *eofflag; + OUT int *ncookies; + INOUT u_long **cookies; }; +# +#% readlink vp L L L +# vop_readlink { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; }; +# +#% abortop dvp = = = +# vop_abortop { IN struct vnode *dvp; IN struct componentname *cnp; }; + +# +#% inactive vp L U U +# vop_inactive { IN struct vnode *vp; + IN struct proc *p; }; +# +#% reclaim vp U U U +# + vop_reclaim { IN struct vnode *vp; + IN struct proc *p; }; +# +#% lock vp U L U +# + vop_lock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% unlock vp L U L +# + vop_unlock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% bmap vp L L L +#% bmap vpp - U - +# + vop_bmap { IN struct vnode *vp; IN daddr_t bn; @@ -229,24 +401,39 @@ vop_bmap { OUT int *runp; }; +# +# Needs work: no vp? +# #vop_strategy { # IN struct buf *bp; #}; +# +#% print vp = = = +# vop_print { IN struct vnode *vp; }; +# +#% islocked vp = = = +# vop_islocked { IN struct vnode *vp; }; +# +#% pathconf vp L L L +# vop_pathconf { IN struct vnode *vp; IN int name; OUT register_t *retval; }; +# +#% advlock vp U U U +# vop_advlock { IN struct vnode *vp; IN caddr_t id; @@ -255,6 +442,9 @@ vop_advlock { IN int flags; }; +# +#% blkatoff vp L L L +# vop_blkatoff { IN struct vnode *vp; IN off_t offset; @@ -262,6 +452,9 @@ vop_blkatoff { OUT struct buf **bpp; }; +# +#% valloc pvp L L L +# vop_valloc { IN struct vnode *pvp; IN int mode; @@ -269,17 +462,40 @@ vop_valloc { OUT struct vnode **vpp; }; +# +#% balloc vp L L L +# +vop_balloc { + IN struct vnode *vp; + IN off_t startoffset; + IN int size; + IN struct ucred *cred; + IN int flags; + OUT struct buf **bpp; +}; + +# +#% reallocblks vp L L L +# vop_reallocblks { IN struct vnode *vp; IN struct cluster_save *buflist; }; +# +#% vfree pvp L L L +# + vop_vfree { IN struct vnode *pvp; IN ino_t ino; IN int mode; }; +# +#% truncate vp L L L +# + vop_truncate { IN struct vnode *vp; IN off_t length; @@ -288,6 +504,10 @@ vop_truncate { IN struct proc *p; }; +# +#% update vp L L L +# + vop_update { IN struct vnode *vp; IN struct timespec *access; @@ -295,12 +515,11 @@ vop_update { IN int waitfor; }; -vop_lease { - IN struct vnode *vp; - IN struct proc *p; - IN struct ucred *cred; - IN int flag; -}; +# +#% whiteout dvp L L L +#% whiteout cnp - - - +#% whiteout flag - - - +# vop_whiteout { IN struct vnode *dvp; diff --git a/sys/lib/libsa/cd9660.c b/sys/lib/libsa/cd9660.c index 031ec03edb8..352407aaf06 100644 --- a/sys/lib/libsa/cd9660.c +++ b/sys/lib/libsa/cd9660.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cd9660.c,v 1.3 1997/02/16 14:39:38 mickey Exp $ */ +/* $OpenBSD: cd9660.c,v 1.4 1997/10/06 15:14:59 csapuntz Exp $ */ /* $NetBSD: cd9660.c,v 1.1 1996/09/30 16:01:19 ws Exp $ */ /* @@ -51,6 +51,8 @@ struct netexport { int x; }; struct proc; struct statfs; struct ucred; +struct vfsconf; + #include <isofs/cd9660/iso.h> /* These once were in iso.h, but got deleted??? */ extern __inline int diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c index 0d44c0bbff7..e1a6ffc0493 100644 --- a/sys/miscfs/deadfs/dead_vnops.c +++ b/sys/miscfs/deadfs/dead_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dead_vnops.c,v 1.2 1996/02/27 07:49:43 niklas Exp $ */ +/* $OpenBSD: dead_vnops.c,v 1.3 1997/10/06 15:18:59 csapuntz Exp $ */ /* $NetBSD: dead_vnops.c,v 1.16 1996/02/13 13:12:48 mycroft Exp $ */ /* @@ -78,11 +78,11 @@ int dead_select __P((void *)); #define dead_inactive nullop #define dead_reclaim nullop int dead_lock __P((void *)); -#define dead_unlock nullop +#define dead_unlock vop_nounlock int dead_bmap __P((void *)); int dead_strategy __P((void *)); int dead_print __P((void *)); -#define dead_islocked nullop +#define dead_islocked vop_noislocked #define dead_pathconf dead_ebadf #define dead_advlock dead_ebadf #define dead_blkatoff dead_badop @@ -279,11 +279,23 @@ dead_lock(v) { struct vop_lock_args /* { struct vnode *a_vp; + int a_flags; + struct proc *a_p; } */ *ap = v; + struct vnode *vp = ap->a_vp; - if (!chkvnlock(ap->a_vp)) - return (0); - return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap)); + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) { + simple_unlock(&vp->v_interlock); + ap->a_flags &= ~LK_INTERLOCK; + } + if (!chkvnlock(vp)) + return (0); + + return (VCALL(vp, VOFFSET(vop_lock), ap)); } /* diff --git a/sys/miscfs/fdesc/fdesc.h b/sys/miscfs/fdesc/fdesc.h index 076999df515..9a2aa369ad3 100644 --- a/sys/miscfs/fdesc/fdesc.h +++ b/sys/miscfs/fdesc/fdesc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fdesc.h,v 1.2 1996/02/27 07:51:39 niklas Exp $ */ +/* $OpenBSD: fdesc.h,v 1.3 1997/10/06 15:19:00 csapuntz Exp $ */ /* $NetBSD: fdesc.h,v 1.9 1996/02/09 22:40:03 christos Exp $ */ /* @@ -76,7 +76,7 @@ struct fdescnode { #define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data) extern dev_t devctty; -extern void fdesc_init __P((void)); +extern int fdesc_init __P((struct vfsconf *)); extern int fdesc_root __P((struct mount *, struct vnode **)); extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **)); extern int (**fdesc_vnodeop_p) __P((void *)); diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c index e3df22819f3..33a6f5589cc 100644 --- a/sys/miscfs/fdesc/fdesc_vfsops.c +++ b/sys/miscfs/fdesc/fdesc_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fdesc_vfsops.c,v 1.2 1996/02/27 07:51:40 niklas Exp $ */ +/* $OpenBSD: fdesc_vfsops.c,v 1.3 1997/10/06 15:19:01 csapuntz Exp $ */ /* $NetBSD: fdesc_vfsops.c,v 1.21 1996/02/09 22:40:07 christos Exp $ */ /* @@ -105,7 +105,7 @@ fdesc_mount(mp, path, data, ndp, p) fmp->f_root = rvp; mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t)fmp; - getnewfsid(mp, makefstype(MOUNT_FDESC)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -131,15 +131,10 @@ fdesc_unmount(mp, mntflags, p) { int error; int flags = 0; - extern int doforce; struct vnode *rootvp = VFSTOFDESC(mp)->f_root; - if (mntflags & MNT_FORCE) { - /* fdesc can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); + if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - } /* * Clear out buffer cache. I don't think we @@ -174,30 +169,18 @@ fdesc_root(mp, vpp) struct vnode **vpp; { struct vnode *vp; - + struct proc *p = curproc; /* XXX */ /* * Return locked reference to root. */ vp = VFSTOFDESC(mp)->f_root; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); *vpp = vp; return (0); } int -fdesc_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - -int fdesc_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; @@ -243,11 +226,12 @@ fdesc_statfs(mp, sbp, p) sbp->f_files = lim + 1; /* Allow for "." */ sbp->f_ffree = freefd; /* See comments above */ if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -263,46 +247,17 @@ fdesc_sync(mp, waitfor, uc, p) return (0); } -/* - * Fdesc flat namespace lookup. - * Currently unsupported. - */ -int -fdesc_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ - - return (EOPNOTSUPP); -} - - -/*ARGSUSED*/ -int -fdesc_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) - struct mount *mp; - struct fid *fhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -fdesc_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - return (EOPNOTSUPP); -} - +#define fdesc_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define fdesc_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define fdesc_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define fdesc_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define fdesc_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) + struct vfsops fdesc_vfsops = { - MOUNT_FDESC, fdesc_mount, fdesc_start, fdesc_unmount, @@ -314,4 +269,6 @@ struct vfsops fdesc_vfsops = { fdesc_fhtovp, fdesc_vptofh, fdesc_init, + fdesc_sysctl }; + diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c index af5ddbd05bf..7d916ef544a 100644 --- a/sys/miscfs/fdesc/fdesc_vnops.c +++ b/sys/miscfs/fdesc/fdesc_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fdesc_vnops.c,v 1.7 1997/08/01 05:58:55 millert Exp $ */ +/* $OpenBSD: fdesc_vnops.c,v 1.8 1997/10/06 15:19:01 csapuntz Exp $ */ /* $NetBSD: fdesc_vnops.c,v 1.32 1996/04/11 11:24:29 mrg Exp $ */ /* @@ -91,11 +91,10 @@ LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; u_long fdhash; int fdesc_badop __P((void *)); -int fdesc_enotsupp __P((void *)); int fdesc_lookup __P((void *)); -#define fdesc_create fdesc_enotsupp -#define fdesc_mknod fdesc_enotsupp +#define fdesc_create eopnotsupp +#define fdesc_mknod eopnotsupp int fdesc_open __P((void *)); #define fdesc_close nullop #define fdesc_access nullop @@ -105,34 +104,35 @@ int fdesc_read __P((void *)); int fdesc_write __P((void *)); int fdesc_ioctl __P((void *)); int fdesc_select __P((void *)); -#define fdesc_mmap fdesc_enotsupp +#define fdesc_mmap eopnotsupp #define fdesc_fsync nullop #define fdesc_seek nullop -#define fdesc_remove fdesc_enotsupp +#define fdesc_remove eopnotsupp +#define fdesc_revoke vop_revoke int fdesc_link __P((void *)); -#define fdesc_rename fdesc_enotsupp -#define fdesc_mkdir fdesc_enotsupp -#define fdesc_rmdir fdesc_enotsupp +#define fdesc_rename eopnotsupp +#define fdesc_mkdir eopnotsupp +#define fdesc_rmdir eopnotsupp int fdesc_symlink __P((void *)); int fdesc_readdir __P((void *)); int fdesc_readlink __P((void *)); int fdesc_abortop __P((void *)); int fdesc_inactive __P((void *)); int fdesc_reclaim __P((void *)); -#define fdesc_lock nullop -#define fdesc_unlock nullop +#define fdesc_lock vop_nolock +#define fdesc_unlock vop_nounlock #define fdesc_bmap fdesc_badop #define fdesc_strategy fdesc_badop int fdesc_print __P((void *)); int fdesc_pathconf __P((void *)); -#define fdesc_islocked nullop -#define fdesc_advlock fdesc_enotsupp -#define fdesc_blkatoff fdesc_enotsupp -#define fdesc_valloc fdesc_enotsupp +#define fdesc_islocked vop_noislocked +#define fdesc_advlock eopnotsupp +#define fdesc_blkatoff eopnotsupp +#define fdesc_valloc eopnotsupp int fdesc_vfree __P((void *)); -#define fdesc_truncate fdesc_enotsupp -#define fdesc_update fdesc_enotsupp -#define fdesc_bwrite fdesc_enotsupp +#define fdesc_truncate eopnotsupp +#define fdesc_update eopnotsupp +#define fdesc_bwrite eopnotsupp static int fdesc_attr __P((int, struct vattr *, struct ucred *, struct proc *)); @@ -150,6 +150,7 @@ struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { { &vop_read_desc, fdesc_read }, /* read */ { &vop_write_desc, fdesc_write }, /* write */ { &vop_ioctl_desc, fdesc_ioctl }, /* ioctl */ + { &vop_revoke_desc, fdesc_revoke }, /* revoke */ { &vop_select_desc, fdesc_select }, /* select */ { &vop_mmap_desc, fdesc_mmap }, /* mmap */ { &vop_fsync_desc, fdesc_fsync }, /* fsync */ @@ -188,8 +189,9 @@ struct vnodeopv_desc fdesc_vnodeop_opv_desc = /* * Initialise cache headers */ -void -fdesc_init() +int +fdesc_init(vfsp) + struct vfsconf *vfsp; { int cttymajor; @@ -199,6 +201,7 @@ fdesc_init() break; devctty = makedev(cttymajor, 0); fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); + return (0); } int @@ -208,6 +211,7 @@ fdesc_allocvp(ftype, ix, mp, vpp) struct mount *mp; struct vnode **vpp; { + struct proc *p = curproc; /* XXX */ struct fdhashhead *fc; struct fdescnode *fd; int error = 0; @@ -216,7 +220,7 @@ fdesc_allocvp(ftype, ix, mp, vpp) loop: for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) { if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { - if (vget(fd->fd_vnode, 0)) + if (vget(fd->fd_vnode, 0, p)) goto loop; *vpp = fd->fd_vnode; return (error); @@ -272,25 +276,23 @@ fdesc_lookup(v) } */ *ap = v; struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; - char *pname; - struct proc *p; - int nfiles; + struct componentname *cnp = ap->a_cnp; + char *pname = cnp->cn_nameptr; + struct proc *p = cnp->cn_proc; + int nfiles = p->p_fd->fd_nfiles; unsigned fd = 0; int error; struct vnode *fvp; char *ln; - pname = ap->a_cnp->cn_nameptr; - if (ap->a_cnp->cn_namelen == 1 && *pname == '.') { + VOP_UNLOCK(dvp, 0, p); + if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; - VREF(dvp); - VOP_LOCK(dvp); + VREF(dvp); + vn_lock(dvp, LK_SHARED | LK_RETRY, p); return (0); } - p = ap->a_cnp->cn_proc; - nfiles = p->p_fd->fd_nfiles; - switch (VTOFDESC(dvp)->fd_type) { default: case Flink: @@ -300,17 +302,17 @@ fdesc_lookup(v) goto bad; case Froot: - if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) { + if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) { error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp); if (error) goto bad; *vpp = fvp; fvp->v_type = VDIR; - VOP_LOCK(fvp); + vn_lock(fvp, LK_SHARED | LK_RETRY, p); return (0); } - if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) { + if (cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) { struct vnode *ttyvp = cttyvp(p); if (ttyvp == NULL) { error = ENXIO; @@ -321,12 +323,12 @@ fdesc_lookup(v) goto bad; *vpp = fvp; fvp->v_type = VCHR; - VOP_LOCK(fvp); + vn_lock(fvp, LK_SHARED | LK_RETRY, p); return (0); } ln = 0; - switch (ap->a_cnp->cn_namelen) { + switch (cnp->cn_namelen) { case 5: if (bcmp(pname, "stdin", 5) == 0) { ln = "fd/0"; @@ -352,7 +354,7 @@ fdesc_lookup(v) VTOFDESC(fvp)->fd_link = ln; *vpp = fvp; fvp->v_type = VLNK; - VOP_LOCK(fvp); + vn_lock(fvp, LK_SHARED | LK_RETRY, p); return (0); } else { error = ENOENT; @@ -362,9 +364,11 @@ fdesc_lookup(v) /* FALL THROUGH */ case Fdevfd: - if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) { - error = fdesc_root(dvp->v_mount, vpp); - return (error); + if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) { + if ((error = fdesc_root(dvp->v_mount, vpp))) + goto bad; + + return (0); } fd = 0; @@ -388,11 +392,13 @@ fdesc_lookup(v) if (error) goto bad; VTOFDESC(fvp)->fd_fd = fd; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); *vpp = fvp; return (0); } bad:; + vn_lock(dvp, LK_SHARED | LK_RETRY, p); *vpp = NULL; return (error); } @@ -683,16 +689,14 @@ fdesc_readdir(v) struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_ncookies; + u_long **a_cookies; } */ *ap = v; struct uio *uio = ap->a_uio; struct dirent d; struct filedesc *fdp; int i; int error; - u_long *cookies = ap->a_cookies; - int ncookies = ap->a_ncookies; switch (VTOFDESC(ap->a_vp)->fd_type) { case Fctty: @@ -745,8 +749,6 @@ fdesc_readdir(v) if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) break; - if (ncookies-- > 0) - *cookies++ = i + 1; } } else { for (; i - 2 < fdp->fd_nfiles && uio->uio_resid >= UIO_MX; @@ -772,8 +774,6 @@ fdesc_readdir(v) if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) break; - if (ncookies-- > 0) - *cookies++ = i + 1; } } @@ -916,6 +916,7 @@ fdesc_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; @@ -923,6 +924,7 @@ fdesc_inactive(v) * Clear out the v_type field to avoid * nasty things happening in vgone(). */ + VOP_UNLOCK(vp, 0, ap->a_p); vp->v_type = VNON; return (0); } @@ -1048,18 +1050,6 @@ fdesc_abortop(v) } /* - * /dev/fd vnode unsupported operation - */ -/*ARGSUSED*/ -int -fdesc_enotsupp(v) - void *v; -{ - - return (EOPNOTSUPP); -} - -/* * /dev/fd "should never get here" operation */ /*ARGSUSED*/ diff --git a/sys/miscfs/fifofs/fifo.h b/sys/miscfs/fifofs/fifo.h index cdfd83fdc3c..84723ebfb61 100644 --- a/sys/miscfs/fifofs/fifo.h +++ b/sys/miscfs/fifofs/fifo.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo.h,v 1.2 1996/02/27 07:53:42 niklas Exp $ */ +/* $OpenBSD: fifo.h,v 1.3 1997/10/06 15:19:02 csapuntz Exp $ */ /* $NetBSD: fifo.h,v 1.10 1996/02/09 22:40:15 christos Exp $ */ /* @@ -60,6 +60,7 @@ int fifo_select __P((void *)); #define fifo_fsync nullop #define fifo_seek fifo_badop #define fifo_remove fifo_badop +#define fifo_revoke vop_revoke #define fifo_link fifo_badop #define fifo_rename fifo_badop #define fifo_mkdir fifo_badop @@ -68,14 +69,14 @@ int fifo_select __P((void *)); #define fifo_readdir fifo_badop #define fifo_readlink fifo_badop #define fifo_abortop fifo_badop -#define fifo_inactive nullop +int fifo_inactive __P((void *)); #define fifo_reclaim nullop -int fifo_lock __P((void *)); -int fifo_unlock __P((void *)); +#define fifo_lock vop_nolock +#define fifo_unlock vop_nounlock int fifo_bmap __P((void *)); #define fifo_strategy fifo_badop int fifo_print __P((void *)); -#define fifo_islocked nullop +#define fifo_islocked vop_noislocked int fifo_pathconf __P((void *)); int fifo_advlock __P((void *)); #define fifo_blkatoff fifo_badop diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index 8b45763c8c3..e3fd6689055 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.4 1996/11/04 03:31:54 tholo Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.5 1997/10/06 15:19:03 csapuntz Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -38,13 +38,13 @@ #include <sys/param.h> #include <sys/proc.h> +#include <sys/systm.h> #include <sys/time.h> #include <sys/namei.h> #include <sys/vnode.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/stat.h> -#include <sys/systm.h> #include <sys/ioctl.h> #include <sys/file.h> #include <sys/errno.h> @@ -79,6 +79,7 @@ struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_select_desc, fifo_select }, /* select */ + { &vop_revoke_desc, fifo_revoke }, /* revoke */ { &vop_mmap_desc, fifo_mmap }, /* mmap */ { &vop_fsync_desc, fifo_fsync }, /* fsync */ { &vop_seek_desc, fifo_seek }, /* seek */ @@ -147,6 +148,7 @@ fifo_open(v) } */ *ap = v; register struct vnode *vp = ap->a_vp; register struct fifoinfo *fip; + struct proc *p = ap->a_p; struct socket *rso, *wso; int error; static char openstr[] = "fifo"; @@ -196,10 +198,10 @@ fifo_open(v) if (ap->a_mode & O_NONBLOCK) { } else { while (fip->fi_writers == 0) { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = tsleep((caddr_t)&fip->fi_readers, PCATCH | PSOCK, openstr, 0); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (error) goto bad; } @@ -213,10 +215,10 @@ fifo_open(v) } } else { while (fip->fi_readers == 0) { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = tsleep((caddr_t)&fip->fi_writers, PCATCH | PSOCK, openstr, 0); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (error) goto bad; } @@ -244,6 +246,7 @@ fifo_read(v) } */ *ap = v; register struct uio *uio = ap->a_uio; register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; + struct proc *p = uio->uio_procp; int error, startresid; #ifdef DIAGNOSTIC @@ -255,10 +258,10 @@ fifo_read(v) if (ap->a_ioflag & IO_NDELAY) rso->so_state |= SS_NBIO; startresid = uio->uio_resid; - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); error = soreceive(rso, (struct mbuf **)0, uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); - VOP_LOCK(ap->a_vp); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); /* * Clear EOF indication after first such return. */ @@ -287,6 +290,7 @@ fifo_write(v) struct ucred *a_cred; } */ *ap = v; struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock; + struct proc *p = ap->a_uio->uio_procp; int error; #ifdef DIAGNOSTIC @@ -295,9 +299,9 @@ fifo_write(v) #endif if (ap->a_ioflag & IO_NDELAY) wso->so_state |= SS_NBIO; - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0); - VOP_LOCK(ap->a_vp); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); if (ap->a_ioflag & IO_NDELAY) wso->so_state &= ~SS_NBIO; return (error); @@ -369,6 +373,19 @@ fifo_select(v) return (0); } +int +fifo_inactive(v) + void *v; +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + /* * This is a noop, simply returning what one has been given. */ @@ -381,6 +398,7 @@ fifo_bmap(v) daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; + int *a_runp; } */ *ap = v; if (ap->a_vpp != NULL) @@ -391,26 +409,6 @@ fifo_bmap(v) } /* - * At the moment we do not do any locking. - */ -/* ARGSUSED */ -int -fifo_lock(v) - void *v; -{ - return (0); -} - -/* ARGSUSED */ -int -fifo_unlock(v) - void *v; -{ - - return (0); -} - -/* * Device close routine */ /* ARGSUSED */ @@ -540,4 +538,5 @@ fifo_badop(v) panic("fifo_badop called"); /* NOTREACHED */ + return(0); } diff --git a/sys/miscfs/kernfs/kernfs.h b/sys/miscfs/kernfs/kernfs.h index 2360f925262..65b0d85b969 100644 --- a/sys/miscfs/kernfs/kernfs.h +++ b/sys/miscfs/kernfs/kernfs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: kernfs.h,v 1.3 1997/04/26 12:09:58 kstailey Exp $ */ +/* $OpenBSD: kernfs.h,v 1.4 1997/10/06 15:19:04 csapuntz Exp $ */ /* $NetBSD: kernfs.h,v 1.10 1996/02/09 22:40:21 christos Exp $ */ /* @@ -77,6 +77,18 @@ struct kernfs_node { #define VFSTOKERNFS(mp) ((struct kernfs_mount *)((mp)->mnt_data)) #define VTOKERN(vp) ((struct kernfs_node *)(vp)->v_data) +#define kernfs_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define kernfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define kernfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define kernfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define kernfs_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) +#define kernfs_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))eopnotsupp) + extern int (**kernfs_vnodeop_p) __P((void *)); extern struct vfsops kernfs_vfsops; extern dev_t rrootdev; diff --git a/sys/miscfs/kernfs/kernfs_vfsops.c b/sys/miscfs/kernfs/kernfs_vfsops.c index dbbb12817c3..dbd34c80f58 100644 --- a/sys/miscfs/kernfs/kernfs_vfsops.c +++ b/sys/miscfs/kernfs/kernfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kernfs_vfsops.c,v 1.5 1997/09/11 05:26:10 millert Exp $ */ +/* $OpenBSD: kernfs_vfsops.c,v 1.6 1997/10/06 15:19:04 csapuntz Exp $ */ /* $NetBSD: kernfs_vfsops.c,v 1.26 1996/04/22 01:42:27 christos Exp $ */ /* @@ -59,7 +59,7 @@ dev_t rrootdev = NODEV; -void kernfs_init __P((void)); +int kernfs_init __P((struct vfsconf *)); void kernfs_get_rrootdev __P((void)); int kernfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); @@ -67,18 +67,13 @@ int kernfs_start __P((struct mount *, int, struct proc *)); int kernfs_unmount __P((struct mount *, int, struct proc *)); int kernfs_root __P((struct mount *, struct vnode **)); int kernfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -int kernfs_quotactl __P((struct mount *, int, uid_t, caddr_t, - struct proc *)); -int kernfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int kernfs_vget __P((struct mount *, ino_t, struct vnode **)); -int kernfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int kernfs_vptofh __P((struct vnode *, struct fid *)); /*ARGSUSED*/ -void -kernfs_init() +int +kernfs_init(vfsp) + struct vfsconf *vfsp; { + return (0); } void @@ -144,7 +139,7 @@ kernfs_mount(mp, path, data, ndp, p) fmp->kf_root = rvp; mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t)fmp; - getnewfsid(mp, makefstype(MOUNT_KERNFS)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -176,7 +171,6 @@ kernfs_unmount(mp, mntflags, p) { int error; int flags = 0; - extern int doforce; struct vnode *rootvp = VFSTOKERNFS(mp)->kf_root; #ifdef KERNFS_DIAGNOSTIC @@ -184,9 +178,6 @@ kernfs_unmount(mp, mntflags, p) #endif if (mntflags & MNT_FORCE) { - /* kernfs can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); flags |= FORCECLOSE; } @@ -225,6 +216,7 @@ kernfs_root(mp, vpp) struct vnode **vpp; { struct vnode *vp; + struct proc *p = curproc; #ifdef KERNFS_DIAGNOSTIC printf("kernfs_root(mp = %p)\n", mp); @@ -235,24 +227,12 @@ kernfs_root(mp, vpp) */ vp = VFSTOKERNFS(mp)->kf_root; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); *vpp = vp; return (0); } int -kernfs_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - -int kernfs_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; @@ -266,8 +246,6 @@ kernfs_statfs(mp, sbp, p) #ifdef COMPAT_09 sbp->f_type = 7; -#else - sbp->f_type = 0; #endif sbp->f_bsize = cnt.v_page_size; sbp->f_iosize = cnt.v_page_size; @@ -277,66 +255,16 @@ kernfs_statfs(mp, sbp, p) sbp->f_files = desiredvnodes; sbp->f_ffree = desiredvnodes - numvnodes; if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); - return (0); -} - -/*ARGSUSED*/ -int -kernfs_sync(mp, waitfor, uc, p) - struct mount *mp; - int waitfor; - struct ucred *uc; - struct proc *p; -{ - + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } -/* - * Kernfs flat namespace lookup. - * Currently unsupported. - */ -int -kernfs_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -kernfs_fhtovp(mp, fhp, mb, vpp, what, anon) - struct mount *mp; - struct fid *fhp; - struct mbuf *mb; - struct vnode **vpp; - int *what; - struct ucred **anon; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -kernfs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - - return (EOPNOTSUPP); -} - struct vfsops kernfs_vfsops = { - MOUNT_KERNFS, kernfs_mount, kernfs_start, kernfs_unmount, @@ -348,4 +276,5 @@ struct vfsops kernfs_vfsops = { kernfs_fhtovp, kernfs_vptofh, kernfs_init, + kernfs_sysctl }; diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c index 785a531f855..7007690e04f 100644 --- a/sys/miscfs/kernfs/kernfs_vnops.c +++ b/sys/miscfs/kernfs/kernfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kernfs_vnops.c,v 1.10 1997/09/11 05:26:11 millert Exp $ */ +/* $OpenBSD: kernfs_vnops.c,v 1.11 1997/10/06 15:19:05 csapuntz Exp $ */ /* $NetBSD: kernfs_vnops.c,v 1.43 1996/03/16 23:52:47 christos Exp $ */ /* @@ -118,11 +118,10 @@ struct kern_target kern_targets[] = { static int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]); int kernfs_badop __P((void *)); -int kernfs_enotsupp __P((void *)); int kernfs_lookup __P((void *)); -#define kernfs_create kernfs_enotsupp -#define kernfs_mknod kernfs_enotsupp +#define kernfs_create eopnotsupp +#define kernfs_mknod eopnotsupp int kernfs_open __P((void *)); #define kernfs_close nullop int kernfs_access __P((void *)); @@ -130,36 +129,37 @@ int kernfs_getattr __P((void *)); int kernfs_setattr __P((void *)); int kernfs_read __P((void *)); int kernfs_write __P((void *)); -#define kernfs_ioctl kernfs_enotsupp -#define kernfs_select kernfs_enotsupp -#define kernfs_mmap kernfs_enotsupp +#define kernfs_ioctl eopnotsupp +#define kernfs_select eopnotsupp +#define kernfs_mmap eopnotsupp #define kernfs_fsync nullop #define kernfs_seek nullop -#define kernfs_remove kernfs_enotsupp +#define kernfs_remove eopnotsupp int kernfs_link __P((void *)); -#define kernfs_rename kernfs_enotsupp -#define kernfs_mkdir kernfs_enotsupp -#define kernfs_rmdir kernfs_enotsupp +#define kernfs_rename eopnotsupp +#define kernfs_revoke vop_revoke +#define kernfs_mkdir eopnotsupp +#define kernfs_rmdir eopnotsupp int kernfs_symlink __P((void *)); int kernfs_readdir __P((void *)); -#define kernfs_readlink kernfs_enotsupp +#define kernfs_readlink eopnotsupp int kernfs_abortop __P((void *)); int kernfs_inactive __P((void *)); int kernfs_reclaim __P((void *)); -#define kernfs_lock nullop -#define kernfs_unlock nullop +#define kernfs_lock vop_nolock +#define kernfs_unlock vop_nounlock #define kernfs_bmap kernfs_badop #define kernfs_strategy kernfs_badop int kernfs_print __P((void *)); -#define kernfs_islocked nullop +#define kernfs_islocked vop_noislocked int kernfs_pathconf __P((void *)); -#define kernfs_advlock kernfs_enotsupp -#define kernfs_blkatoff kernfs_enotsupp -#define kernfs_valloc kernfs_enotsupp +#define kernfs_advlock eopnotsupp +#define kernfs_blkatoff eopnotsupp +#define kernfs_valloc eopnotsupp int kernfs_vfree __P((void *)); -#define kernfs_truncate kernfs_enotsupp -#define kernfs_update kernfs_enotsupp -#define kernfs_bwrite kernfs_enotsupp +#define kernfs_truncate eopnotsupp +#define kernfs_update eopnotsupp +#define kernfs_bwrite eopnotsupp int kernfs_xread __P((struct kern_target *, int, char **, int)); int kernfs_xwrite __P((struct kern_target *, char *, int)); @@ -179,6 +179,7 @@ struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = { { &vop_write_desc, kernfs_write }, /* write */ { &vop_ioctl_desc, kernfs_ioctl }, /* ioctl */ { &vop_select_desc, kernfs_select }, /* select */ + { &vop_revoke_desc, kernfs_revoke }, /* revoke */ { &vop_mmap_desc, kernfs_mmap }, /* mmap */ { &vop_fsync_desc, kernfs_fsync }, /* fsync */ { &vop_seek_desc, kernfs_seek }, /* seek */ @@ -355,6 +356,7 @@ kernfs_lookup(v) struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; char *pname = cnp->cn_nameptr; + struct proc *p = cnp->cn_proc; struct kern_target *kt; struct vnode *fvp; int error, i; @@ -373,7 +375,7 @@ kernfs_lookup(v) if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; VREF(dvp); - /*VOP_LOCK(dvp);*/ + vn_lock(dvp, LK_SHARED | LK_RETRY, p); return (0); } @@ -381,7 +383,7 @@ kernfs_lookup(v) if (cnp->cn_namelen == 4 && bcmp(pname, "root", 4) == 0) { *vpp = rootdir; VREF(rootdir); - VOP_LOCK(rootdir); + vn_lock(rootdir, LK_SHARED | LK_RETRY, p); return (0); } #endif @@ -396,6 +398,7 @@ kernfs_lookup(v) printf("kernfs_lookup: i = %d, failed", i); #endif + vn_lock(dvp, LK_SHARED | LK_RETRY, p); return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); found: @@ -405,7 +408,7 @@ found: if (*dp == NODEV || !vfinddev(*dp, kt->kt_vtype, &fvp)) return (ENOENT); *vpp = fvp; - if (vget(fvp, 1)) + if (vget(fvp, LK_EXCLUSIVE, p)) goto loop; return (0); } @@ -414,13 +417,16 @@ found: printf("kernfs_lookup: allocate new vnode\n"); #endif error = getnewvnode(VT_KERNFS, dvp->v_mount, kernfs_vnodeop_p, &fvp); - if (error) + if (error) { + vn_lock(dvp, LK_SHARED | LK_RETRY, p); return (error); + } MALLOC(fvp->v_data, void *, sizeof(struct kernfs_node), M_TEMP, M_WAITOK); VTOKERN(fvp)->kf_kt = kt; fvp->v_type = kt->kt_vtype; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); *vpp = fvp; #ifdef KERNFS_DIAGNOSTIC @@ -621,13 +627,10 @@ kernfs_readdir(v) u_long *a_cookies; int a_ncookies; } */ *ap = v; + int error, i; struct uio *uio = ap->a_uio; struct dirent d; struct kern_target *kt; - int i; - int error; - u_long *cookies = ap->a_cookies; - int ncookies = ap->a_ncookies; if (ap->a_vp->v_type != VDIR) return (ENOTDIR); @@ -663,8 +666,6 @@ kernfs_readdir(v) if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) break; - if (ncookies-- > 0) - *cookies++ = i + 1; } uio->uio_offset = i; @@ -677,6 +678,7 @@ kernfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; @@ -687,6 +689,7 @@ kernfs_inactive(v) * Clear out the v_type field to avoid * nasty things happening in vgone(). */ + VOP_UNLOCK(vp, 0, ap->a_p); vp->v_type = VNON; return (0); } @@ -817,18 +820,6 @@ kernfs_abortop(v) } /* - * /dev/fd vnode unsupported operation - */ -/*ARGSUSED*/ -int -kernfs_enotsupp(v) - void *v; -{ - - return (EOPNOTSUPP); -} - -/* * /dev/fd "should never get here" operation */ /*ARGSUSED*/ diff --git a/sys/miscfs/nullfs/null.h b/sys/miscfs/nullfs/null.h index 75e8281a06b..bdba4065075 100644 --- a/sys/miscfs/nullfs/null.h +++ b/sys/miscfs/nullfs/null.h @@ -1,4 +1,4 @@ -/* $OpenBSD: null.h,v 1.4 1997/04/10 17:23:14 millert Exp $ */ +/* $OpenBSD: null.h,v 1.5 1997/10/06 15:19:06 csapuntz Exp $ */ /* $NetBSD: null.h,v 1.7 1996/05/17 20:53:11 gwr Exp $ */ /* @@ -92,6 +92,8 @@ extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno)); extern int (**null_vnodeop_p) __P((void *)); extern struct vfsops null_vfsops; -void nullfs_init __P((void)); +int nullfs_init __P((struct vfsconf *)); +int null_bypass __P((void *)); + #endif /* _KERNEL */ diff --git a/sys/miscfs/nullfs/null_subr.c b/sys/miscfs/nullfs/null_subr.c index 8c77df2348d..930b1c632a9 100644 --- a/sys/miscfs/nullfs/null_subr.c +++ b/sys/miscfs/nullfs/null_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: null_subr.c,v 1.4 1997/09/11 05:26:12 millert Exp $ */ +/* $OpenBSD: null_subr.c,v 1.5 1997/10/06 15:19:06 csapuntz Exp $ */ /* $NetBSD: null_subr.c,v 1.6 1996/05/10 22:50:52 jtk Exp $ */ /* @@ -42,6 +42,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/proc.h> #include <sys/time.h> #include <sys/types.h> #include <sys/vnode.h> @@ -67,7 +68,6 @@ LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; u_long null_node_hash; -void nullfs_init __P((void)); static struct vnode * null_node_find __P((struct mount *, struct vnode *)); static int @@ -75,14 +75,17 @@ static int /* * Initialise cache headers */ -void -nullfs_init() +/*ARGSUSED*/ +int +nullfs_init(vfsp) + struct vfsconf *vfsp; { #ifdef NULLFS_DIAGNOSTIC printf("nullfs_init\n"); /* printed during system boot */ #endif null_node_hashtbl = hashinit(NNULLNODECACHE, M_CACHE, &null_node_hash); + return (0); } /* @@ -96,6 +99,7 @@ null_node_find(mp, lowervp) struct null_node_hashhead *hd; struct null_node *a; struct vnode *vp; + struct proc *p = curproc; /* * Find hash base, and then search the (two-way) linked @@ -113,7 +117,7 @@ loop: * stuff, but we don't want to lock * the lower node. */ - if (vget(vp, 0)) { + if (vget(vp, 0, p)) { printf ("null_node_find: vget failed.\n"); goto loop; }; @@ -141,6 +145,7 @@ null_node_alloc(mp, lowervp, vpp) struct vnode *vp, *nvp; int error; extern int (**dead_vnodeop_p) __P((void *)); + struct proc *p = curproc; if ((error = getnewvnode(VT_NULL, mp, null_vnodeop_p, &vp)) != 0) return (error); @@ -206,14 +211,14 @@ loop: vgone(cvp); goto loop; } - if (vget(cvp, 0)) /* can't lock; will die! */ + if (vget(cvp, 0, p)) /* can't lock; will die! */ goto loop; break; } vp->v_hashchain = cvpp; vp->v_specnext = *cvpp; - vp->v_specflags = 0; + vp->v_specmountpoint = NULL; *cvpp = vp; #ifdef DIAGNOSTIC if (cvp == NULLVP) @@ -249,6 +254,7 @@ null_node_create(mp, lowervp, newvpp, takelock) int takelock; { struct vnode *aliasvp; + struct proc *p = curproc; /* XXX */ if ((aliasvp = null_node_find(mp, lowervp)) != NULL) { /* @@ -297,7 +303,7 @@ null_node_create(mp, lowervp, newvpp, takelock) upper layer lock */ VTONULL(aliasvp)->null_flags |= NULL_LLOCK; if (takelock) - VOP_LOCK(aliasvp); + vn_lock(aliasvp, LK_EXCLUSIVE | LK_RETRY, p); *newvpp = aliasvp; return (0); diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c index c27f3f28923..26dcb22db51 100644 --- a/sys/miscfs/nullfs/null_vfsops.c +++ b/sys/miscfs/nullfs/null_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: null_vfsops.c,v 1.4 1997/09/11 05:26:12 millert Exp $ */ +/* $OpenBSD: null_vfsops.c,v 1.5 1997/10/06 15:19:07 csapuntz Exp $ */ /* $NetBSD: null_vfsops.c,v 1.11 1996/05/10 22:50:56 jtk Exp $ */ /* @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/proc.h> #include <sys/time.h> #include <sys/types.h> #include <sys/vnode.h> @@ -138,7 +139,7 @@ nullfs_mount(mp, path, data, ndp, p) /* * Unlock the node (either the lower or the alias) */ - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); /* * Make sure the node alias worked */ @@ -158,7 +159,7 @@ nullfs_mount(mp, path, data, ndp, p) if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t) xmp; - getnewfsid(mp, makefstype(MOUNT_LOFS)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -200,16 +201,12 @@ nullfs_unmount(mp, mntflags, p) struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; int error; int flags = 0; - extern int doforce; #ifdef NULLFS_DIAGNOSTIC printf("nullfs_unmount(mp = %p)\n", mp); #endif if (mntflags & MNT_FORCE) { - /* lofs can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); flags |= FORCECLOSE; } @@ -253,6 +250,7 @@ nullfs_root(mp, vpp) struct vnode **vpp; { struct vnode *vp; + struct proc *p = curproc; #ifdef NULLFS_DIAGNOSTIC printf("nullfs_root(mp = %p, vp = %p->%p)\n", mp, @@ -266,7 +264,7 @@ nullfs_root(mp, vpp) */ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); *vpp = vp; return 0; } @@ -320,7 +318,7 @@ nullfs_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -370,8 +368,10 @@ nullfs_vptofh(vp, fhp) return (EOPNOTSUPP); } +#define nullfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) + struct vfsops null_vfsops = { - MOUNT_NULL, nullfs_mount, nullfs_start, nullfs_unmount, @@ -383,4 +383,5 @@ struct vfsops null_vfsops = { nullfs_fhtovp, nullfs_vptofh, nullfs_init, + nullfs_sysctl }; diff --git a/sys/miscfs/nullfs/null_vnops.c b/sys/miscfs/nullfs/null_vnops.c index ca7c6ce0b86..b4068bee664 100644 --- a/sys/miscfs/nullfs/null_vnops.c +++ b/sys/miscfs/nullfs/null_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: null_vnops.c,v 1.5 1997/09/11 05:26:13 millert Exp $ */ +/* $OpenBSD: null_vnops.c,v 1.6 1997/10/06 15:19:07 csapuntz Exp $ */ /* $NetBSD: null_vnops.c,v 1.7 1996/05/10 22:51:01 jtk Exp $ */ /* @@ -98,10 +98,18 @@ * Although bypass handles most operations, * vop_getattr, _inactive, _reclaim, and _print are not bypassed. * Vop_getattr must change the fsid being returned. + * Vop_lock and vop_unlock must handle any locking for the + * current vnode as well as pass the lock request down. * Vop_inactive and vop_reclaim are not bypassed so that - * they can handle freeing null-layer specific data. - * Vop_print is not bypassed to avoid excessive debugging - * information. + * the can handle freeing null-layer specific data. Vop_print + * is not bypassed to avoid excessive debugging information. + * Also, certain vnod eoperations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally, these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them.Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions + * to the necessary locking at their layer * * * INSTANTIATING VNODE STACKS @@ -182,7 +190,6 @@ int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ -int null_bypass __P((void *)); int null_getattr __P((void *)); int null_inactive __P((void *)); int null_reclaim __P((void *)); @@ -510,6 +517,7 @@ null_lock(v) struct vop_lock_args *ap = v; struct vnode *vp = ap->a_vp; struct null_node *nn; + struct proc *p = ap->a_p; #ifdef NULLFS_DIAGNOSTIC vprint("null_lock_e", ap->a_vp); @@ -533,7 +541,7 @@ start: * is zero, we are probably being reclaimed so we need to * keep our hands off the lower node. */ - VOP_LOCK(nn->null_lowervp); + vn_lock(nn->null_lowervp, LK_EXCLUSIVE | LK_RETRY, p); nn->null_flags |= NULL_LLOCK; } @@ -568,6 +576,7 @@ null_unlock(v) void *v; { struct vop_lock_args *ap = v; + struct proc *p = ap->a_p; struct null_node *nn = VTONULL(ap->a_vp); #ifdef NULLFS_DIAGNOSTIC @@ -587,7 +596,7 @@ null_unlock(v) nn->null_flags &= ~NULL_LOCKED; if ((nn->null_flags & NULL_LLOCK) != 0) - VOP_UNLOCK(nn->null_lowervp); + VOP_UNLOCK(nn->null_lowervp, 0, p); nn->null_flags &= ~NULL_LLOCK; @@ -623,20 +632,21 @@ null_lookup(v) register int error; register struct vnode *dvp; int flags = ap->a_cnp->cn_flags; - + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; #ifdef NULLFS_DIAGNOSTIC - printf("null_lookup: dvp=%p, name='%s'\n", - ap->a_dvp, ap->a_cnp->cn_nameptr); + printf("null_lookup: dvp=%lx, name='%s'\n", + ap->a_dvp, cnp->cn_nameptr); #endif /* * the starting dir (ap->a_dvp) comes in locked. */ /* set LOCKPARENT to hold on to it until done below */ - ap->a_cnp->cn_flags |= LOCKPARENT; + cnp->cn_flags |= LOCKPARENT; error = null_bypass(ap); if (!(flags & LOCKPARENT)) - ap->a_cnp->cn_flags &= ~LOCKPARENT; + cnp->cn_flags &= ~LOCKPARENT; if (error) /* @@ -697,20 +707,20 @@ null_lookup(v) * lock. No need for vget() since we hold a * refcount to the starting directory */ - VOP_UNLOCK(dvp); - VOP_LOCK(*ap->a_vpp); + VOP_UNLOCK(dvp, 0, p); + vn_lock(*ap->a_vpp, LK_EXCLUSIVE | LK_RETRY, p); /* * we should return our directory locked if * (flags & LOCKPARENT) and (flags & ISLASTCN) */ if ((flags & LOCKPARENT) && (flags & ISLASTCN)) - VOP_LOCK(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } else { /* * Normal directory locking order: we hold the starting * directory locked; now lock our layer of the target. */ - VOP_LOCK(*ap->a_vpp); + vn_lock(*ap->a_vpp, LK_RETRY | LK_EXCLUSIVE, p); /* * underlying starting dir comes back locked * if lockparent (we set it) and no error @@ -740,7 +750,7 @@ null_lookup(v) * end yet, !(flags & ISLASTCN) */ if (!(flags & LOCKPARENT) || !(flags & ISLASTCN)) - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); } } return error; diff --git a/sys/miscfs/portal/portal_vfsops.c b/sys/miscfs/portal/portal_vfsops.c index b904993553b..52cdac6b4f1 100644 --- a/sys/miscfs/portal/portal_vfsops.c +++ b/sys/miscfs/portal/portal_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: portal_vfsops.c,v 1.2 1996/02/27 07:59:42 niklas Exp $ */ +/* $OpenBSD: portal_vfsops.c,v 1.3 1997/10/06 15:19:08 csapuntz Exp $ */ /* $NetBSD: portal_vfsops.c,v 1.14 1996/02/09 22:40:41 christos Exp $ */ /* @@ -63,25 +63,15 @@ #include <sys/un.h> #include <miscfs/portal/portal.h> -void portal_init __P((void)); +#define portal_init ((int (*) __P((struct vfsconf *)))nullop) + int portal_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); int portal_start __P((struct mount *, int, struct proc *)); int portal_unmount __P((struct mount *, int, struct proc *)); int portal_root __P((struct mount *, struct vnode **)); -int portal_quotactl __P((struct mount *, int, uid_t, caddr_t, - struct proc *)); int portal_statfs __P((struct mount *, struct statfs *, struct proc *)); -int portal_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int portal_vget __P((struct mount *, ino_t, struct vnode **)); -int portal_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int portal_vptofh __P((struct vnode *, struct fid *)); -void -portal_init() -{ -} /* * Mount the per-process file descriptors (/dev/fd) @@ -136,7 +126,7 @@ portal_mount(mp, path, data, ndp, p) mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t)fmp; - getnewfsid(mp, makefstype(MOUNT_PORTAL)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -162,14 +152,10 @@ portal_unmount(mp, mntflags, p) int mntflags; struct proc *p; { - extern int doforce; struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root; int error, flags = 0; if (mntflags & MNT_FORCE) { - /* portal can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); flags |= FORCECLOSE; } @@ -221,30 +207,19 @@ portal_root(mp, vpp) struct vnode **vpp; { struct vnode *vp; + struct proc *p = curproc; /* * Return locked reference to root. */ vp = VFSTOPORTAL(mp)->pm_root; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); *vpp = vp; return (0); } int -portal_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - -int portal_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; @@ -264,60 +239,30 @@ portal_statfs(mp, sbp, p) sbp->f_files = 1; /* Allow for "." */ sbp->f_ffree = 0; /* See comments above */ if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } -/*ARGSUSED*/ -int -portal_sync(mp, waitfor, uc, p) - struct mount *mp; - int waitfor; - struct ucred *uc; - struct proc *p; -{ - return (0); -} +#define portal_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) -int -portal_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ - - return (EOPNOTSUPP); -} - -int -portal_fhtovp(mp, fhp, mb, vpp, what, anon) - struct mount *mp; - struct fid *fhp; - struct mbuf *mb; - struct vnode **vpp; - int *what; - struct ucred **anon; -{ - - return (EOPNOTSUPP); -} - -int -portal_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - - return (EOPNOTSUPP); -} +#define portal_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define portal_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define portal_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define portal_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define portal_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) struct vfsops portal_vfsops = { - MOUNT_PORTAL, portal_mount, portal_start, portal_unmount, @@ -329,4 +274,5 @@ struct vfsops portal_vfsops = { portal_fhtovp, portal_vptofh, portal_init, + portal_sysctl }; diff --git a/sys/miscfs/portal/portal_vnops.c b/sys/miscfs/portal/portal_vnops.c index e56bff0feda..b71c8adb103 100644 --- a/sys/miscfs/portal/portal_vnops.c +++ b/sys/miscfs/portal/portal_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: portal_vnops.c,v 1.2 1996/02/27 07:59:43 niklas Exp $ */ +/* $OpenBSD: portal_vnops.c,v 1.3 1997/10/06 15:19:09 csapuntz Exp $ */ /* $NetBSD: portal_vnops.c,v 1.17 1996/02/13 13:12:57 mycroft Exp $ */ /* @@ -70,49 +70,50 @@ static int portal_fileid = PORTAL_ROOTFILEID+1; static void portal_closefd __P((struct proc *, int)); static int portal_connect __P((struct socket *, struct socket *)); -int portal_badop __P((void *)); -int portal_enotsupp __P((void *)); + +int portal_badop __P((void *)); int portal_lookup __P((void *)); -#define portal_create portal_enotsupp -#define portal_mknod portal_enotsupp +#define portal_create eopnotsupp +#define portal_mknod eopnotsupp int portal_open __P((void *)); #define portal_close nullop #define portal_access nullop int portal_getattr __P((void *)); int portal_setattr __P((void *)); -#define portal_read portal_enotsupp -#define portal_write portal_enotsupp -#define portal_ioctl portal_enotsupp -#define portal_select portal_enotsupp -#define portal_mmap portal_enotsupp +#define portal_read eopnotsupp +#define portal_write eopnotsupp +#define portal_ioctl eopnotsupp +#define portal_select eopnotsupp +#define portal_mmap eopnotsupp #define portal_fsync nullop #define portal_seek nullop -#define portal_remove portal_enotsupp +#define portal_remove eopnotsupp int portal_link __P((void *)); -#define portal_rename portal_enotsupp -#define portal_mkdir portal_enotsupp -#define portal_rmdir portal_enotsupp +#define portal_rename eopnotsupp +#define portal_mkdir eopnotsupp +#define portal_rmdir eopnotsupp int portal_symlink __P((void *)); int portal_readdir __P((void *)); -#define portal_readlink portal_enotsupp +#define portal_revoke vop_revoke +#define portal_readlink eopnotsupp int portal_abortop __P((void *)); int portal_inactive __P((void *)); int portal_reclaim __P((void *)); -#define portal_lock nullop -#define portal_unlock nullop +#define portal_lock vop_nolock +#define portal_unlock vop_nounlock #define portal_bmap portal_badop #define portal_strategy portal_badop int portal_print __P((void *)); -#define portal_islocked nullop +#define portal_islocked vop_noislocked int portal_pathconf __P((void *)); -#define portal_advlock portal_enotsupp -#define portal_blkatoff portal_enotsupp -#define portal_valloc portal_enotsupp +#define portal_advlock eopnotsupp +#define portal_blkatoff eopnotsupp +#define portal_valloc eopnotsupp int portal_vfree __P((void *)); -#define portal_truncate portal_enotsupp -#define portal_update portal_enotsupp -#define portal_bwrite portal_enotsupp +#define portal_truncate eopnotsupp +#define portal_update eopnotsupp +#define portal_bwrite eopnotsupp int (**portal_vnodeop_p) __P((void *)); struct vnodeopv_entry_desc portal_vnodeop_entries[] = { @@ -129,6 +130,7 @@ struct vnodeopv_entry_desc portal_vnodeop_entries[] = { { &vop_write_desc, portal_write }, /* write */ { &vop_ioctl_desc, portal_ioctl }, /* ioctl */ { &vop_select_desc, portal_select }, /* select */ + { &vop_revoke_desc, portal_revoke }, /* revoke */ { &vop_mmap_desc, portal_mmap }, /* mmap */ { &vop_fsync_desc, portal_fsync }, /* fsync */ { &vop_seek_desc, portal_seek }, /* seek */ @@ -596,7 +598,7 @@ int portal_readdir(v) void *v; { - return (0); + return (0); } /*ARGSUSED*/ @@ -604,7 +606,12 @@ int portal_inactive(v) void *v; { + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } @@ -732,27 +739,10 @@ portal_abortop(v) return (0); } -/* - * Portal vnode unsupported operation - */ -/*ARGSUSED*/ -int -portal_enotsupp(v) - void *v; -{ - - return (EOPNOTSUPP); -} - -/* - * Portal "should never get here" operation - */ -/*ARGSUSED*/ int portal_badop(v) void *v; { - - panic("portal: bad op"); - /* NOTREACHED */ + panic ("portal: bad op"); + return (0); } diff --git a/sys/miscfs/procfs/procfs.h b/sys/miscfs/procfs/procfs.h index c33203b1666..3dd780253c2 100644 --- a/sys/miscfs/procfs/procfs.h +++ b/sys/miscfs/procfs/procfs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: procfs.h,v 1.5 1997/08/29 04:24:36 millert Exp $ */ +/* $OpenBSD: procfs.h,v 1.6 1997/10/06 15:19:10 csapuntz Exp $ */ /* $NetBSD: procfs.h,v 1.17 1996/02/12 15:01:41 christos Exp $ */ /* @@ -128,7 +128,9 @@ int procfs_rw __P((void *)); extern int (**procfs_vnodeop_p) __P((void *)); extern struct vfsops procfs_vfsops; -void procfs_init __P((void)); +struct vfsconf; + +int procfs_init __P((struct vfsconf *)); int procfs_root __P((struct mount *, struct vnode **)); #endif /* _KERNEL */ diff --git a/sys/miscfs/procfs/procfs_subr.c b/sys/miscfs/procfs/procfs_subr.c index 2466afc1e8e..89ace835528 100644 --- a/sys/miscfs/procfs/procfs_subr.c +++ b/sys/miscfs/procfs/procfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: procfs_subr.c,v 1.8 1997/08/16 02:00:49 millert Exp $ */ +/* $OpenBSD: procfs_subr.c,v 1.9 1997/10/06 15:19:10 csapuntz Exp $ */ /* $NetBSD: procfs_subr.c,v 1.15 1996/02/12 15:01:42 christos Exp $ */ /* @@ -54,10 +54,14 @@ static TAILQ_HEAD(, pfsnode) pfshead; static int pfsvplock; -void -procfs_init(void) +/*ARGSUSED*/ +int +procfs_init(vfsp) + struct vfsconf *vfsp; + { TAILQ_INIT(&pfshead); + return (0); } /* @@ -93,6 +97,7 @@ procfs_allocvp(mp, vpp, pid, pfs_type) long pid; pfstype pfs_type; { + struct proc *p = curproc; struct pfsnode *pfs; struct vnode *vp; int error; @@ -103,7 +108,7 @@ loop: if (pfs->pfs_pid == pid && pfs->pfs_type == pfs_type && vp->v_mount == mp) { - if (vget(vp, 0)) + if (vget(vp, 0, p)) goto loop; *vpp = vp; return (0); diff --git a/sys/miscfs/procfs/procfs_vfsops.c b/sys/miscfs/procfs/procfs_vfsops.c index bc8af044c41..a9e6c99aae7 100644 --- a/sys/miscfs/procfs/procfs_vfsops.c +++ b/sys/miscfs/procfs/procfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: procfs_vfsops.c,v 1.4 1996/06/21 12:49:56 mickey Exp $ */ +/* $OpenBSD: procfs_vfsops.c,v 1.5 1997/10/06 15:19:11 csapuntz Exp $ */ /* $NetBSD: procfs_vfsops.c,v 1.25 1996/02/09 22:40:53 christos Exp $ */ /* @@ -61,14 +61,7 @@ int procfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); int procfs_start __P((struct mount *, int, struct proc *)); int procfs_unmount __P((struct mount *, int, struct proc *)); -int procfs_quotactl __P((struct mount *, int, uid_t, caddr_t, - struct proc *)); int procfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -int procfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int procfs_vget __P((struct mount *, ino_t, struct vnode **)); -int procfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int procfs_vptofh __P((struct vnode *, struct fid *)); /* * VFS Operations. * @@ -95,7 +88,7 @@ procfs_mount(mp, path, data, ndp, p) mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = 0; - getnewfsid(mp, makefstype(MOUNT_PROCFS)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -179,72 +172,25 @@ procfs_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } -/*ARGSUSED*/ -int -procfs_quotactl(mp, cmds, uid, arg, p) - struct mount *mp; - int cmds; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -procfs_sync(mp, waitfor, uc, p) - struct mount *mp; - int waitfor; - struct ucred *uc; - struct proc *p; -{ - return (0); -} +#define procfs_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) -/*ARGSUSED*/ -int -procfs_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -procfs_fhtovp(mp, fhp, mb, vpp, what, anon) - struct mount *mp; - struct fid *fhp; - struct mbuf *mb; - struct vnode **vpp; - int *what; - struct ucred **anon; -{ - - return (EINVAL); -} - -/*ARGSUSED*/ -int -procfs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - - return (EINVAL); -} +#define procfs_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define procfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define procfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define procfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define procfs_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) struct vfsops procfs_vfsops = { - MOUNT_PROCFS, procfs_mount, procfs_start, procfs_unmount, @@ -256,4 +202,5 @@ struct vfsops procfs_vfsops = { procfs_fhtovp, procfs_vptofh, procfs_init, + procfs_sysctl }; diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c index 281b301b8d8..4700c4e2f73 100644 --- a/sys/miscfs/procfs/procfs_vnops.c +++ b/sys/miscfs/procfs/procfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: procfs_vnops.c,v 1.6 1997/08/29 04:24:38 millert Exp $ */ +/* $OpenBSD: procfs_vnops.c,v 1.7 1997/10/06 15:19:11 csapuntz Exp $ */ /* $NetBSD: procfs_vnops.c,v 1.40 1996/03/16 23:52:55 christos Exp $ */ /* @@ -728,6 +728,7 @@ procfs_lookup(v) struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; char *pname = cnp->cn_nameptr; + struct proc *curp = curproc; struct proc_target *pt; struct vnode *fvp; pid_t pid; @@ -787,7 +788,7 @@ procfs_lookup(v) fvp = procfs_findtextvp(p); /* We already checked that it exists. */ VREF(fvp); - VOP_LOCK(fvp); + vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, curp); *vpp = fvp; return (0); } @@ -839,8 +840,6 @@ procfs_readdir(v) struct pfsnode *pfs; int i; int error; - u_long *cookies = ap->a_cookies; - int ncookies = ap->a_ncookies; pfs = VTOPFS(ap->a_vp); @@ -880,8 +879,6 @@ procfs_readdir(v) if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) break; - if (ncookies-- > 0) - *cookies++ = i + 1; } break; @@ -943,8 +940,6 @@ procfs_readdir(v) if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0) break; - if (ncookies-- > 0) - *cookies++ = i + 1; } done: diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 321e910cd1b..c9d59179f98 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.9 1997/01/04 17:10:04 kstailey Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.10 1997/10/06 15:19:12 csapuntz Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -79,6 +79,7 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ + { &vop_revoke_desc, spec_revoke }, /* revoke */ { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, spec_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -143,8 +144,13 @@ spec_open(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - struct vnode *bvp, *vp = ap->a_vp; - dev_t bdev, dev = (dev_t)vp->v_rdev; + struct proc *p = ap->a_p; + struct vnode *vp = ap->a_vp; +#if 0 + struct vnode *bvp; + dev_t bdev; +#endif + dev_t dev = (dev_t)vp->v_rdev; register int maj = major(dev); int error; @@ -172,6 +178,7 @@ spec_open(v) * devices whose corresponding block devices are * currently mounted. */ +#if 0 if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && @@ -181,12 +188,13 @@ spec_open(v) if (iskmemdev(dev)) return (EPERM); } +#endif } if (cdevsw[maj].d_type == D_TTY) vp->v_flag |= VISTTY; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -255,10 +263,10 @@ spec_read(v) switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -306,6 +314,19 @@ spec_read(v) /* NOTREACHED */ } +int +spec_inactive(v) + void *v; +{ + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap = v; + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + /* * Vnode op for write */ @@ -341,10 +362,10 @@ spec_write(v) switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -514,6 +535,74 @@ loop: /* * Just call the device strategy routine */ +int fs_read[16], fs_write[16]; + +int cur_found[10]; + +int fs_bwrite[64][10]; +int fs_bwrite_cnt[64]; +int num_found; + +int num_levels = 4; +#include <machine/cpu.h> +#include <machine/pcb.h> + +int find_stack(int); + +int find_stack(int levels) + +{ + struct pcb stack; + int *eip, *ebp; + + savectx(&stack); + ebp = (int *)stack.pcb_ebp; + eip = (int *) *(ebp + 1); + + while ((int)ebp > 0xf0000000 && levels--) { + eip = (int *) *(ebp + 1); + + ebp = (int *) *ebp; + } + + return ((int)eip); +} + +void track_write __P((void)); + +void track_write(void) + +{ + int idx, cnt; + + for (idx = 0; idx < 10; idx++) { + cur_found[idx] = find_stack(idx + num_levels); + } + + for (cnt = 0; cnt < num_found; cnt++) { + for (idx = 0; idx < 10; idx++) { + if (fs_bwrite[cnt][idx] != cur_found[idx]) + goto next_iter; + } + + fs_bwrite_cnt[cnt]++; + break; + next_iter: + } + + if ((cnt == num_found) && + (num_found != 64)) { + for (idx = 0; idx < 10; idx++) { + fs_bwrite[num_found][idx] = cur_found[idx]; + } + + fs_bwrite_cnt[num_found] = 1; + num_found++; + } + + return; +} + int spec_strategy(v) void *v; @@ -521,8 +610,31 @@ spec_strategy(v) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap = v; + struct buf *bp; + + int maj = major(ap->a_bp->b_dev); + + if ((maj >= 0) && (maj < 16)) { + if (ap->a_bp->b_flags & B_READ) + fs_read[maj]++; + else { + fs_write[maj]++; + if (maj == 4) + track_write(); + + } + } + +#if 0 + assert (!(flags & (B_DELWRI | B_DONE))); +#endif - (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); + bp = ap->a_bp; + + if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + + (*bdevsw[maj].d_strategy)(ap->a_bp); return (0); } @@ -538,33 +650,16 @@ spec_bmap(v) daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; + int *a_runp; } */ *ap = v; if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; - return (0); -} - -/* - * At the moment we do not do any locking. - */ -/* ARGSUSED */ -int -spec_lock(v) - void *v; -{ - - return (0); -} - -/* ARGSUSED */ -int -spec_unlock(v) - void *v; -{ - + if (ap->a_runp != NULL) + *ap->a_runp = 0; + return (0); } @@ -621,7 +716,9 @@ spec_close(v) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h index 9f58fa7acb4..94f98ac3c70 100644 --- a/sys/miscfs/specfs/specdev.h +++ b/sys/miscfs/specfs/specdev.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specdev.h,v 1.3 1997/01/04 17:10:05 kstailey Exp $ */ +/* $OpenBSD: specdev.h,v 1.4 1997/10/06 15:19:13 csapuntz Exp $ */ /* $NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $ */ /* @@ -44,7 +44,7 @@ struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; - long si_flags; + struct mount *si_mountpoint; dev_t si_rdev; struct lockf *si_lockf; }; @@ -54,15 +54,10 @@ struct specinfo { #define v_rdev v_specinfo->si_rdev #define v_hashchain v_specinfo->si_hashchain #define v_specnext v_specinfo->si_specnext -#define v_specflags v_specinfo->si_flags +#define v_specmountpoint v_specinfo->si_mountpoint #define v_speclockf v_specinfo->si_lockf /* - * Flags for specinfo - */ -#define SI_MOUNTEDON 0x0001 /* block special device is mounted on */ - -/* * Special device management */ #define SPECHSZ 64 @@ -113,14 +108,14 @@ int spec_fsync __P((void *)); #define spec_readdir spec_badop #define spec_readlink spec_badop #define spec_abortop spec_badop -#define spec_inactive nullop +int spec_inactive __P((void *)); #define spec_reclaim nullop -int spec_lock __P((void *)); -int spec_unlock __P((void *)); +#define spec_lock vop_nolock +#define spec_unlock vop_nounlock +#define spec_islocked vop_noislocked int spec_bmap __P((void *)); int spec_strategy __P((void *)); int spec_print __P((void *)); -#define spec_islocked nullop int spec_pathconf __P((void *)); int spec_advlock __P((void *)); #define spec_blkatoff spec_badop @@ -130,3 +125,4 @@ int spec_advlock __P((void *)); #define spec_truncate nullop #define spec_update nullop #define spec_bwrite vn_bwrite +#define spec_revoke vop_revoke diff --git a/sys/miscfs/umapfs/umap.h b/sys/miscfs/umapfs/umap.h index 07ffa6b6e70..772bce10c4b 100644 --- a/sys/miscfs/umapfs/umap.h +++ b/sys/miscfs/umapfs/umap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: umap.h,v 1.5 1996/03/25 18:02:55 mickey Exp $ */ +/* $OpenBSD: umap.h,v 1.6 1997/10/06 15:19:14 csapuntz Exp $ */ /* $NetBSD: umap.h,v 1.6 1996/02/09 22:41:00 christos Exp $ */ /* @@ -94,6 +94,6 @@ extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno)); extern int (**umap_vnodeop_p) __P((void *)); extern struct vfsops umap_vfsops; -void umapfs_init __P((void)); +int umapfs_init __P((struct vfsconf *)); #endif /* _KERNEL */ diff --git a/sys/miscfs/umapfs/umap_subr.c b/sys/miscfs/umapfs/umap_subr.c index e8ab5c8010f..93bbdd51917 100644 --- a/sys/miscfs/umapfs/umap_subr.c +++ b/sys/miscfs/umapfs/umap_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: umap_subr.c,v 1.8 1997/09/11 05:26:14 millert Exp $ */ +/* $OpenBSD: umap_subr.c,v 1.9 1997/10/06 15:19:14 csapuntz Exp $ */ /* $NetBSD: umap_subr.c,v 1.8 1996/03/05 02:35:39 thorpej Exp $ */ /* @@ -42,6 +42,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/proc.h> #include <sys/time.h> #include <sys/types.h> #include <sys/vnode.h> @@ -75,14 +76,15 @@ static int umap_node_alloc __P((struct mount *, struct vnode *, /* * Initialise cache headers */ -void -umapfs_init() +int +umapfs_init(struct vfsconf *vfsp) { #ifdef UMAPFS_DIAGNOSTIC printf("umapfs_init\n"); /* printed during system boot */ #endif umap_node_hashtbl = hashinit(NUMAPNODECACHE, M_CACHE, &umap_node_hash); + return (0); } /* @@ -141,6 +143,7 @@ umap_node_find(mp, targetvp) struct mount *mp; struct vnode *targetvp; { + struct proc *p = curproc; struct umap_node_hashhead *hd; struct umap_node *a; struct vnode *vp; @@ -166,7 +169,7 @@ loop: * stuff, but we don't want to lock * the lower node. */ - if (vget(vp, 0)) { + if (vget(vp, 0, p)) { #ifdef UMAPFS_DIAGNOSTIC printf ("umap_node_find: vget failed.\n"); #endif @@ -198,6 +201,7 @@ umap_node_alloc(mp, lowervp, vpp) struct umap_node *xp; struct vnode *vp, *nvp; int error; + struct proc *p = curproc; extern int (**dead_vnodeop_p) __P((void *)); if ((error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, &vp)) != 0) @@ -259,14 +263,14 @@ loop: vgone(cvp); goto loop; } - if (vget(cvp, 0)) /* can't lock; will die! */ + if (vget(cvp, 0, p)) /* can't lock; will die! */ goto loop; break; } vp->v_hashchain = cvpp; vp->v_specnext = *cvpp; - vp->v_specflags = 0; + vp->v_specmountpoint = NULL; *cvpp = vp; #ifdef DIAGNOSTIC if (cvp == NULLVP) diff --git a/sys/miscfs/umapfs/umap_vfsops.c b/sys/miscfs/umapfs/umap_vfsops.c index 750dbe11d66..6008d21603c 100644 --- a/sys/miscfs/umapfs/umap_vfsops.c +++ b/sys/miscfs/umapfs/umap_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: umap_vfsops.c,v 1.8 1997/09/11 05:26:15 millert Exp $ */ +/* $OpenBSD: umap_vfsops.c,v 1.9 1997/10/06 15:19:15 csapuntz Exp $ */ /* $NetBSD: umap_vfsops.c,v 1.9 1996/02/09 22:41:05 christos Exp $ */ /* @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/time.h> +#include <sys/proc.h> #include <sys/types.h> #include <sys/vnode.h> #include <sys/mount.h> @@ -181,7 +182,7 @@ umapfs_mount(mp, path, data, ndp, p) /* * Unlock the node (either the lower or the alias) */ - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); /* * Make sure the node alias worked */ @@ -201,7 +202,7 @@ umapfs_mount(mp, path, data, ndp, p) if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t) amp; - getnewfsid(mp, makefstype(MOUNT_UMAP)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -242,16 +243,12 @@ umapfs_unmount(mp, mntflags, p) struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp; int error; int flags = 0; - extern int doforce; #ifdef UMAPFS_DIAGNOSTIC printf("umapfs_unmount(mp = %p)\n", mp); #endif if (mntflags & MNT_FORCE) { - /* lofs can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); flags |= FORCECLOSE; } @@ -294,6 +291,7 @@ umapfs_root(mp, vpp) struct mount *mp; struct vnode **vpp; { + struct proc *p = curproc; struct vnode *vp; #ifdef UMAPFS_DIAGNOSTIC @@ -308,7 +306,7 @@ umapfs_root(mp, vpp) */ vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp; VREF(vp); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); *vpp = vp; return (0); } @@ -361,7 +359,7 @@ umapfs_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -407,8 +405,11 @@ umapfs_vptofh(vp, fhp) return VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp); } +#define umapfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) + + struct vfsops umap_vfsops = { - MOUNT_UMAP, umapfs_mount, umapfs_start, umapfs_unmount, @@ -420,4 +421,5 @@ struct vfsops umap_vfsops = { umapfs_fhtovp, umapfs_vptofh, umapfs_init, + umapfs_sysctl }; diff --git a/sys/miscfs/umapfs/umap_vnops.c b/sys/miscfs/umapfs/umap_vnops.c index 40d1bf18a86..a3fc89bccd5 100644 --- a/sys/miscfs/umapfs/umap_vnops.c +++ b/sys/miscfs/umapfs/umap_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: umap_vnops.c,v 1.6 1996/05/29 07:12:10 deraadt Exp $ */ +/* $OpenBSD: umap_vnops.c,v 1.7 1997/10/06 15:19:15 csapuntz Exp $ */ /* $NetBSD: umap_vnops.c,v 1.5.4.1 1996/05/25 22:13:35 jtc Exp $ */ /* @@ -52,6 +52,7 @@ #include <sys/namei.h> #include <sys/malloc.h> #include <sys/buf.h> +#include <miscfs/nullfs/null.h> #include <miscfs/umapfs/umap.h> @@ -65,6 +66,8 @@ int umap_print __P((void *)); int umap_rename __P((void *)); int umap_strategy __P((void *)); int umap_bwrite __P((void *)); +int umap_unlock __P((void *)); +int umap_lock __P((void *)); /* * Global vfs data structures @@ -83,7 +86,8 @@ struct vnodeopv_entry_desc umap_vnodeop_entries[] = { { &vop_reclaim_desc, umap_reclaim }, { &vop_print_desc, umap_print }, { &vop_rename_desc, umap_rename }, - + { &vop_lock_desc, umap_lock }, + { &vop_unlock_desc, umap_unlock }, { &vop_strategy_desc, umap_strategy }, { &vop_bwrite_desc, umap_bwrite }, @@ -378,10 +382,54 @@ umap_inactive(v) * cache and reusable. * */ - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } +/* + * We need to process our own vnode lock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +int +umap_lock(v) + void *v; +{ + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + + vop_nolock(ap); + if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass(ap)); +} + +/* + * We need to process our own vnode unlock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +int +umap_unlock(v) + void *v; +{ + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + + vop_nounlock(ap); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass(ap)); +} + + + int umap_reclaim(v) void *v; diff --git a/sys/miscfs/union/union.h b/sys/miscfs/union/union.h index 94052ea28be..ee541b44dbc 100644 --- a/sys/miscfs/union/union.h +++ b/sys/miscfs/union/union.h @@ -1,4 +1,4 @@ -/* $OpenBSD: union.h,v 1.3 1997/08/01 05:58:57 millert Exp $ */ +/* $OpenBSD: union.h,v 1.4 1997/10/06 15:19:16 csapuntz Exp $ */ /* $NetBSD: union.h,v 1.9 1996/02/09 22:41:08 christos Exp $ */ /* @@ -40,6 +40,8 @@ * @(#)union.h 8.9 (Berkeley) 12/10/94 */ +struct vfsconf; + struct union_args { char *target; /* Target of loopback */ int mntflags; /* Options on the mount */ @@ -129,7 +131,7 @@ extern void union_newsize __P((struct vnode *, off_t, off_t)); extern int (**union_vnodeop_p) __P((void *)); extern struct vfsops union_vfsops; -void union_init __P((void)); +int union_init __P((struct vfsconf *)); int union_freevp __P((struct vnode *)); #endif /* _KERNEL */ diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c index 546c1958e2f..f9b98c0ad55 100644 --- a/sys/miscfs/union/union_subr.c +++ b/sys/miscfs/union/union_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: union_subr.c,v 1.4 1997/01/02 12:20:43 mickey Exp $ */ +/* $OpenBSD: union_subr.c,v 1.5 1997/10/06 15:19:17 csapuntz Exp $ */ /* $NetBSD: union_subr.c,v 1.18 1996/02/09 22:41:10 christos Exp $ */ /* @@ -78,7 +78,7 @@ static int union_relookup __P((struct union_mount *, struct vnode *, struct componentname *, char *, int)); int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *)); static void union_dircache_r __P((struct vnode *, struct vnode ***, int *)); -struct vnode *union_dircache __P((struct vnode *)); +struct vnode *union_dircache __P((struct vnode *, struct proc *)); /* * This variable is used to hold a pointer to a function @@ -105,7 +105,7 @@ int union_check(p, vpp, fp, auio, error) if ((*vpp)->v_op == union_vnodeop_p) { struct vnode *lvp; - lvp = union_dircache(*vpp); + lvp = union_dircache(*vpp, p); if (lvp != NULLVP) { struct vattr va; @@ -122,7 +122,7 @@ int union_check(p, vpp, fp, auio, error) if (lvp != NULLVP) { *error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); - VOP_UNLOCK(lvp); + VOP_UNLOCK(lvp, 0, p); if (*error) { vrele(lvp); @@ -140,8 +140,9 @@ int union_check(p, vpp, fp, auio, error) return (0); }; -void -union_init() +int +union_init(vfsp) + struct vfsconf *vfsp; { int i; @@ -149,6 +150,7 @@ union_init() LIST_INIT(&unhead[i]); bzero((caddr_t) unvplock, sizeof(unvplock)); union_check_p = union_check; + return (0); } static int @@ -408,7 +410,8 @@ loop: (un->un_uppervp == uppervp || un->un_uppervp == NULLVP) && (UNIONTOV(un)->v_mount == mp)) { - if (vget(UNIONTOV(un), 0)) { + if (vget(UNIONTOV(un), 0, + cnp ? cnp->cn_proc : NULL)) { union_list_unlock(hash); goto loop; } @@ -646,12 +649,12 @@ union_copyfile(fvp, tvp, cred, p) uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; - VOP_UNLOCK(fvp); /* XXX */ + VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); - VOP_LOCK(fvp); /* XXX */ - VOP_UNLOCK(tvp); /* XXX */ + vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); + VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); - VOP_LOCK(tvp); /* XXX */ + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); @@ -719,11 +722,11 @@ union_copyup(un, docopy, cred, p) * XX - should not ignore errors * from VOP_CLOSE */ - VOP_LOCK(lvp); + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); - VOP_UNLOCK(lvp); + VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } #ifdef UNION_DIAGNOSTIC @@ -733,166 +736,166 @@ union_copyup(un, docopy, cred, p) } un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(uvp); + VOP_UNLOCK(uvp, 0, p); union_vn_close(uvp, FWRITE, cred, p); - VOP_LOCK(uvp); - un->un_flags |= UN_ULOCK; - - /* - * Subsequent IOs will go to the top layer, so - * call close on the lower vnode and open on the - * upper vnode to ensure that the filesystem keeps - * its references counts right. This doesn't do - * the right thing with (cred) and (FREAD) though. - * Ignoring error returns is not right, either. - */ - if (error == 0) { - int i; - - for (i = 0; i < un->un_openl; i++) { - (void) VOP_CLOSE(lvp, FREAD, cred, p); - (void) VOP_OPEN(uvp, FREAD, cred, p); - } - un->un_openl = 0; - } - - return (error); - -} - -static int -union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) - struct union_mount *um; - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; - struct componentname *cn; - char *path; - int pathlen; -{ - int error; - - /* - * A new componentname structure must be faked up because - * there is no way to know where the upper level cnp came - * from or what it is being used for. This must duplicate - * some of the work done by NDINIT, some of the work done - * by namei, some of the work done by lookup and some of - * the work done by VOP_LOOKUP when given a CREATE flag. - * Conclusion: Horrible. - * - * The pathname buffer will be FREEed by VOP_MKDIR. - */ - cn->cn_namelen = pathlen; - cn->cn_pnbuf = malloc(cn->cn_namelen+1, M_NAMEI, M_WAITOK); - bcopy(path, cn->cn_pnbuf, cn->cn_namelen); - cn->cn_pnbuf[cn->cn_namelen] = '\0'; - - cn->cn_nameiop = CREATE; - cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); - cn->cn_proc = cnp->cn_proc; - if (um->um_op == UNMNT_ABOVE) - cn->cn_cred = cnp->cn_cred; - else - cn->cn_cred = um->um_cred; - cn->cn_nameptr = cn->cn_pnbuf; - cn->cn_hash = cnp->cn_hash; - cn->cn_consume = cnp->cn_consume; - - VREF(dvp); - error = relookup(dvp, vpp, cn); - if (!error) - vrele(dvp); - else { - free(cn->cn_pnbuf, M_NAMEI); - cn->cn_pnbuf = 0; - } - - return (error); -} - -/* - * Create a shadow directory in the upper layer. - * The new vnode is returned locked. - * - * (um) points to the union mount structure for access to the - * the mounting process's credentials. - * (dvp) is the directory in which to create the shadow directory. - * it is unlocked on entry and exit. - * (cnp) is the componentname to be created. - * (vpp) is the returned newly created shadow directory, which - * is returned locked. - */ -int -union_mkshadow(um, dvp, cnp, vpp) - struct union_mount *um; - struct vnode *dvp; - struct componentname *cnp; - struct vnode **vpp; -{ - int error; - struct vattr va; - struct proc *p = cnp->cn_proc; - struct componentname cn; - - error = union_relookup(um, dvp, vpp, cnp, &cn, - cnp->cn_nameptr, cnp->cn_namelen); - if (error) - return (error); - - if (*vpp) { - VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp); - vrele(*vpp); - *vpp = NULLVP; - return (EEXIST); - } - - /* - * policy: when creating the shadow directory in the - * upper layer, create it owned by the user who did - * the mount, group from parent directory, and mode - * 777 modified by umask (ie mostly identical to the - * mkdir syscall). (jsp, kb) - */ - - VATTR_NULL(&va); - va.va_type = VDIR; - va.va_mode = um->um_cmode; - - /* VOP_LEASE: dvp is locked */ - VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); - - error = VOP_MKDIR(dvp, vpp, &cn, &va); - return (error); -} - -/* - * Create a whiteout entry in the upper layer. - * - * (um) points to the union mount structure for access to the - * the mounting process's credentials. - * (dvp) is the directory in which to create the whiteout. - * it is locked on entry and exit. - * (cnp) is the componentname to be created. - */ -int -union_mkwhiteout(um, dvp, cnp, path) - struct union_mount *um; - struct vnode *dvp; - struct componentname *cnp; - char *path; -{ - int error; - struct proc *p = cnp->cn_proc; - struct vnode *wvp; - struct componentname cn; - - VOP_UNLOCK(dvp); - error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); - if (error) { - VOP_LOCK(dvp); - return (error); + vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_flags |= UN_ULOCK; + + /* + * Subsequent IOs will go to the top layer, so + * call close on the lower vnode and open on the + * upper vnode to ensure that the filesystem keeps + * its references counts right. This doesn't do + * the right thing with (cred) and (FREAD) though. + * Ignoring error returns is not right, either. + */ + if (error == 0) { + int i; + + for (i = 0; i < un->un_openl; i++) { + (void) VOP_CLOSE(lvp, FREAD, cred, p); + (void) VOP_OPEN(uvp, FREAD, cred, p); + } + un->un_openl = 0; + } + + return (error); + + } + + static int + union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) + struct union_mount *um; + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + struct componentname *cn; + char *path; + int pathlen; + { + int error; + + /* + * A new componentname structure must be faked up because + * there is no way to know where the upper level cnp came + * from or what it is being used for. This must duplicate + * some of the work done by NDINIT, some of the work done + * by namei, some of the work done by lookup and some of + * the work done by VOP_LOOKUP when given a CREATE flag. + * Conclusion: Horrible. + * + * The pathname buffer will be FREEed by VOP_MKDIR. + */ + cn->cn_namelen = pathlen; + cn->cn_pnbuf = malloc(cn->cn_namelen+1, M_NAMEI, M_WAITOK); + bcopy(path, cn->cn_pnbuf, cn->cn_namelen); + cn->cn_pnbuf[cn->cn_namelen] = '\0'; + + cn->cn_nameiop = CREATE; + cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_proc = cnp->cn_proc; + if (um->um_op == UNMNT_ABOVE) + cn->cn_cred = cnp->cn_cred; + else + cn->cn_cred = um->um_cred; + cn->cn_nameptr = cn->cn_pnbuf; + cn->cn_hash = cnp->cn_hash; + cn->cn_consume = cnp->cn_consume; + + VREF(dvp); + error = relookup(dvp, vpp, cn); + if (!error) + vrele(dvp); + else { + free(cn->cn_pnbuf, M_NAMEI); + cn->cn_pnbuf = 0; + } + + return (error); + } + + /* + * Create a shadow directory in the upper layer. + * The new vnode is returned locked. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the shadow directory. + * it is unlocked on entry and exit. + * (cnp) is the componentname to be created. + * (vpp) is the returned newly created shadow directory, which + * is returned locked. + */ + int + union_mkshadow(um, dvp, cnp, vpp) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + struct vnode **vpp; + { + int error; + struct vattr va; + struct proc *p = cnp->cn_proc; + struct componentname cn; + + error = union_relookup(um, dvp, vpp, cnp, &cn, + cnp->cn_nameptr, cnp->cn_namelen); + if (error) + return (error); + + if (*vpp) { + VOP_ABORTOP(dvp, &cn); + VOP_UNLOCK(dvp, 0, p); + vrele(*vpp); + *vpp = NULLVP; + return (EEXIST); + } + + /* + * policy: when creating the shadow directory in the + * upper layer, create it owned by the user who did + * the mount, group from parent directory, and mode + * 777 modified by umask (ie mostly identical to the + * mkdir syscall). (jsp, kb) + */ + + VATTR_NULL(&va); + va.va_type = VDIR; + va.va_mode = um->um_cmode; + + /* VOP_LEASE: dvp is locked */ + VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); + + error = VOP_MKDIR(dvp, vpp, &cn, &va); + return (error); + } + + /* + * Create a whiteout entry in the upper layer. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the whiteout. + * it is locked on entry and exit. + * (cnp) is the componentname to be created. + */ + int + union_mkwhiteout(um, dvp, cnp, path) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + char *path; + { + int error; + struct proc *p = cnp->cn_proc; + struct vnode *wvp; + struct componentname cn; + + VOP_UNLOCK(dvp, 0, p); + error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); + if (error) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); } if (wvp) { @@ -1018,6 +1021,7 @@ void union_removed_upper(un) struct union_node *un; { + struct proc *p = curproc; /* * We do not set the uppervp to NULLVP here, because lowervp @@ -1037,7 +1041,7 @@ union_removed_upper(un) if (un->un_flags & UN_ULOCK) { un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp); + VOP_UNLOCK(un->un_uppervp, 0, p); } } @@ -1109,8 +1113,9 @@ union_dircache_r(vp, vppp, cntp) } struct vnode * -union_dircache(vp) +union_dircache(vp, p) struct vnode *vp; + struct proc *p; { int cnt; struct vnode *nvp = NULLVP; @@ -1118,8 +1123,7 @@ union_dircache(vp) struct vnode **dircache; int error; - VOP_LOCK(vp); - + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); dircache = VTOUNION(vp)->un_dircache; if (dircache == 0) { cnt = 0; @@ -1144,7 +1148,7 @@ union_dircache(vp) if (*vpp == NULLVP) goto out; - VOP_LOCK(*vpp); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); VREF(*vpp); error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); if (!error) { @@ -1153,7 +1157,7 @@ union_dircache(vp) } out: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (nvp); } diff --git a/sys/miscfs/union/union_vfsops.c b/sys/miscfs/union/union_vfsops.c index 3c3fb439545..bcdff8cd3bf 100644 --- a/sys/miscfs/union/union_vfsops.c +++ b/sys/miscfs/union/union_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: union_vfsops.c,v 1.4 1997/09/11 05:26:16 millert Exp $ */ +/* $OpenBSD: union_vfsops.c,v 1.5 1997/10/06 15:19:17 csapuntz Exp $ */ /* $NetBSD: union_vfsops.c,v 1.10 1995/06/18 14:47:47 cgd Exp $ */ /* @@ -64,13 +64,7 @@ int union_mount __P((struct mount *, char *, caddr_t, struct nameidata *, int union_start __P((struct mount *, int, struct proc *)); int union_unmount __P((struct mount *, int, struct proc *)); int union_root __P((struct mount *, struct vnode **)); -int union_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); int union_statfs __P((struct mount *, struct statfs *, struct proc *)); -int union_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int union_vget __P((struct mount *, ino_t, struct vnode **)); -int union_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int union_vptofh __P((struct vnode *, struct fid *)); /* * Mount union filesystem @@ -214,7 +208,7 @@ union_mount(mp, path, data, ndp, p) mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); mp->mnt_data = (qaddr_t)um; - getnewfsid(mp, makefstype(MOUNT_UNION)); + vfs_getnewfsid(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); @@ -293,16 +287,12 @@ union_unmount(mp, mntflags, p) int error; int freeing; int flags = 0; - extern int doforce; #ifdef UNION_DIAGNOSTIC printf("union_unmount(mp = %p)\n", mp); #endif if (mntflags & MNT_FORCE) { - /* union can never be rootfs so don't check for it */ - if (!doforce) - return (EINVAL); flags |= FORCECLOSE; } @@ -373,6 +363,7 @@ union_root(mp, vpp) struct mount *mp; struct vnode **vpp; { + struct proc *p = curproc; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; int loselock; @@ -385,7 +376,7 @@ union_root(mp, vpp) VOP_ISLOCKED(um->um_uppervp)) { loselock = 1; } else { - VOP_LOCK(um->um_uppervp); + vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); loselock = 0; } if (um->um_lowervp) @@ -399,9 +390,10 @@ union_root(mp, vpp) 1); if (error) { - if (!loselock) - VOP_UNLOCK(um->um_uppervp); - vrele(um->um_uppervp); + if (loselock) + vrele(um->um_uppervp); + else + vput(um->um_uppervp); if (um->um_lowervp) vrele(um->um_lowervp); } else { @@ -412,19 +404,6 @@ union_root(mp, vpp) return (error); } -/*ARGSUSED*/ -int -union_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - int union_statfs(mp, sbp, p) struct mount *mp; @@ -491,66 +470,29 @@ union_statfs(mp, sbp, p) sbp->f_ffree += mstat.f_ffree; if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } -/*ARGSUSED*/ -int -union_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; -{ - - /* - * XXX - Assumes no data cached at union layer. - */ - return (0); -} +#define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) -/*ARGSUSED*/ -int -union_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp) - struct mount *mp; - struct fid *fidp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; -{ - - return (EOPNOTSUPP); -} - -/*ARGSUSED*/ -int -union_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; -{ - - return (EOPNOTSUPP); -} +#define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define union_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) struct vfsops union_vfsops = { - MOUNT_UNION, union_mount, union_start, union_unmount, @@ -562,4 +504,5 @@ struct vfsops union_vfsops = { union_fhtovp, union_vptofh, union_init, + union_sysctl }; diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c index fa78be69988..3ebaad3721a 100644 --- a/sys/miscfs/union/union_vnops.c +++ b/sys/miscfs/union/union_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: union_vnops.c,v 1.6 1996/12/08 17:40:29 kstailey Exp $ */ +/* $openbsd: union_vnops.c,v 1.6 1996/12/08 17:40:29 kstailey Exp $ */ /* $NetBSD: union_vnops.c,v 1.30.4.1 1996/05/25 22:10:14 jtc Exp $ */ /* @@ -81,6 +81,7 @@ int union_link __P((void *)); int union_rename __P((void *)); int union_mkdir __P((void *)); int union_rmdir __P((void *)); +int union_revoke __P((void *)); int union_symlink __P((void *)); int union_readdir __P((void *)); int union_readlink __P((void *)); @@ -124,6 +125,7 @@ struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_symlink_desc, union_symlink }, /* symlink */ { &vop_readdir_desc, union_readdir }, /* readdir */ { &vop_readlink_desc, union_readlink }, /* readlink */ + { &vop_revoke_desc, union_revoke }, /* revoke */ { &vop_abortop_desc, union_abortop }, /* abortop */ { &vop_inactive_desc, union_inactive }, /* inactive */ { &vop_reclaim_desc, union_reclaim }, /* reclaim */ @@ -148,22 +150,22 @@ struct vnodeopv_entry_desc union_vnodeop_entries[] = { struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries }; -#define FIXUP(un) { \ +#define FIXUP(un, p) { \ if (((un)->un_flags & UN_ULOCK) == 0) { \ - union_fixup(un); \ + union_fixup(un, p); \ } \ } -static void union_fixup __P((struct union_node *)); +static void union_fixup __P((struct union_node *, struct proc *)); static int union_lookup1 __P((struct vnode *, struct vnode **, struct vnode **, struct componentname *)); static void -union_fixup(un) +union_fixup(un, p) struct union_node *un; + struct proc *p; { - - VOP_LOCK(un->un_uppervp); + vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; } @@ -174,6 +176,7 @@ union_lookup1(udvp, dvpp, vpp, cnp) struct vnode **vpp; struct componentname *cnp; { + struct proc *p = cnp->cn_proc; int error; struct vnode *tdvp; struct vnode *dvp; @@ -199,7 +202,7 @@ union_lookup1(udvp, dvpp, vpp, cnp) *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; vput(tdvp); VREF(dvp); - VOP_LOCK(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } @@ -213,7 +216,7 @@ union_lookup1(udvp, dvpp, vpp, cnp) * here to allow it to be unlocked again (phew) in union_lookup. */ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - VOP_LOCK(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); dvp = tdvp; @@ -225,18 +228,18 @@ union_lookup1(udvp, dvpp, vpp, cnp) while (dvp != udvp && (dvp->v_type == VDIR) && (mp = dvp->v_mountedhere)) { - if (mp->mnt_flag & MNT_MLOCK) { - mp->mnt_flag |= MNT_MWAIT; - sleep((caddr_t) mp, PVFS); + if (vfs_busy(mp, 0, 0, p)) continue; - } + + error = VFS_ROOT(mp, &tdvp); + vfs_unbusy(mp, p); + + vput(dvp); - if ((error = VFS_ROOT(mp, &tdvp)) != 0) { - vput(dvp); + if (error) { return (error); } - vput(dvp); dvp = tdvp; } @@ -260,6 +263,7 @@ union_lookup(v) struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; struct union_node *dun = VTOUNION(dvp); + struct proc *p = curproc; struct componentname *cnp = ap->a_cnp; int lockparent = cnp->cn_flags & LOCKPARENT; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); @@ -276,9 +280,9 @@ union_lookup(v) if (dvp == NULLVP) return (ENOENT); VREF(dvp); - VOP_LOCK(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p, 0, p); return (0); } #endif @@ -298,7 +302,7 @@ union_lookup(v) * on and just return that vnode. */ if (upperdvp != NULLVP) { - FIXUP(dun); + FIXUP(dun, p); /* * If we're doing `..' in the underlying filesystem, * we must drop our lock on the union node before @@ -310,7 +314,7 @@ union_lookup(v) if (cnp->cn_flags & ISDOTDOT) { /* retain lock on underlying VP: */ dun->un_flags |= UN_KLOCK; - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); } uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); @@ -330,7 +334,7 @@ union_lookup(v) * dun->un_uppervp locked currently--so we get it * locked here (don't set the UN_ULOCK flag). */ - VOP_LOCK(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } /*if (uppervp == upperdvp) @@ -367,7 +371,7 @@ union_lookup(v) if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - VOP_LOCK(lowerdvp); + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Only do a LOOKUP on the bottom node, since @@ -392,7 +396,7 @@ union_lookup(v) cnp->cn_nameiop = nameiop; if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp); + VOP_UNLOCK(lowerdvp, 0, p); if (cnp->cn_consume != 0) { if (uppervp != NULLVP) { @@ -413,7 +417,7 @@ union_lookup(v) lowervp = LOWERVP(dun->un_pvp); if (lowervp != NULLVP) { VREF(lowervp); - VOP_LOCK(lowervp); + vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; } } @@ -462,9 +466,9 @@ union_lookup(v) * locks/etc! */ dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp); + VOP_UNLOCK(upperdvp, 0, p); uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - VOP_LOCK(upperdvp); + vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; if (uerror) { @@ -478,7 +482,7 @@ union_lookup(v) } if (lowervp != NULLVP) - VOP_UNLOCK(lowervp); + VOP_UNLOCK(lowervp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); @@ -491,7 +495,7 @@ union_lookup(v) } else { if (*ap->a_vpp != dvp) if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && *ap->a_vpp != dvp) { @@ -516,19 +520,21 @@ union_create(v) } */ *ap = v; struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; if ((dvp = un->un_uppervp) != NULLVP) { int error; struct vnode *vp; struct mount *mp; - FIXUP(un); + FIXUP(un, p); VREF(dvp); un->un_flags |= UN_KLOCK; mp = ap->a_dvp->v_mount; vput(ap->a_dvp); - error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap); + error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); if (error) return (error); @@ -537,7 +543,7 @@ union_create(v) mp, NULLVP, NULLVP, - ap->a_cnp, + cnp, vp, NULLVP, 1); @@ -560,11 +566,12 @@ union_whiteout(v) int a_flags; } */ *ap = v; struct union_node *un = VTOUNION(ap->a_dvp); + struct proc *p = curproc; if (un->un_uppervp == NULLVP) return (EOPNOTSUPP); - FIXUP(un); + FIXUP(un, p); return (VOP_WHITEOUT(un->un_uppervp, ap->a_cnp, ap->a_flags)); } @@ -580,13 +587,14 @@ union_mknod(v) } */ *ap = v; struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp; + struct proc *p = ap->a_cnp->cn_proc; if ((dvp = un->un_uppervp) != NULLVP) { int error; struct vnode *vp; struct mount *mp; - FIXUP(un); + FIXUP(un, p); VREF(dvp); un->un_flags |= UN_KLOCK; @@ -656,14 +664,14 @@ union_open(v) * Just open the lower vnode */ un->un_openl++; - VOP_LOCK(tvp); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp); + VOP_UNLOCK(tvp, 0, p); return (error); } - FIXUP(un); + FIXUP(un, p); error = VOP_OPEN(tvp, mode, cred, p); @@ -728,14 +736,15 @@ union_access(v) struct union_node *un = VTOUNION(ap->a_vp); int error = EACCES; struct vnode *vp; + struct proc *p = ap->a_p; if ((vp = un->un_uppervp) != NULLVP) { - FIXUP(un); + FIXUP(un, p); return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p)); } if ((vp = un->un_lowervp) != NULLVP) { - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p); if (error == 0) { struct union_mount *um = MOUNTTOUNIONMOUNT(ap->a_vp->v_mount); @@ -744,7 +753,7 @@ union_access(v) error = VOP_ACCESS(vp, ap->a_mode, um->um_cred, ap->a_p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); } @@ -771,7 +780,7 @@ union_getattr(v) struct vnode *vp = un->un_uppervp; struct vattr *vap; struct vattr va; - + struct proc *p = ap->a_p; /* * Some programs walk the filesystem hierarchy by counting @@ -794,7 +803,7 @@ union_getattr(v) * the union_node's lock flag. */ if (un->un_flags & UN_LOCKED) - FIXUP(un); + FIXUP(un, p); error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) @@ -836,6 +845,7 @@ union_setattr(v) struct proc *a_p; } */ *ap = v; struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; int error; /* @@ -857,7 +867,7 @@ union_setattr(v) * otherwise return read-only filesystem error. */ if (un->un_uppervp != NULLVP) { - FIXUP(un); + FIXUP(un, p); error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) @@ -882,14 +892,15 @@ union_read(v) int error; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); + struct proc *p = curproc; if (dolock) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_vp)); + FIXUP(VTOUNION(ap->a_vp), p); error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); if (dolock) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); /* * XXX @@ -926,12 +937,13 @@ union_write(v) int error; struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = curproc; vp = UPPERVP(ap->a_vp); if (vp == NULLVP) panic("union: missing upper layer in write"); - FIXUP(un); + FIXUP(un, p); error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* @@ -1027,18 +1039,19 @@ union_fsync(v) } */ *ap = v; int error = 0; struct vnode *targetvp = OTHERVP(ap->a_vp); + struct proc *p = ap->a_p; if (targetvp != NULLVP) { int dolock = (targetvp == LOWERVP(ap->a_vp)); if (dolock) - VOP_LOCK(targetvp); + vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_vp)); + FIXUP(VTOUNION(ap->a_vp), p); error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, ap->a_p); if (dolock) - VOP_UNLOCK(targetvp); + VOP_UNLOCK(targetvp, 0, p); } return (error); @@ -1076,6 +1089,7 @@ union_remove(v) int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_cnp->cn_proc; if (dun->un_uppervp == NULLVP) panic("union remove: null upper vnode"); @@ -1085,11 +1099,11 @@ union_remove(v) struct vnode *vp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - FIXUP(dun); + FIXUP(dun, p); VREF(dvp); dun->un_flags |= UN_KLOCK; vput(ap->a_dvp); - FIXUP(un); + FIXUP(un, p); VREF(vp); un->un_flags |= UN_KLOCK; vput(ap->a_vp); @@ -1100,7 +1114,7 @@ union_remove(v) if (!error) union_removed_upper(un); } else { - FIXUP(dun); + FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); @@ -1128,6 +1142,7 @@ union_link(v) struct union_node *dun; struct vnode *dvp; struct vnode *vp; + struct proc *p = ap->a_cnp->cn_proc; dun = VTOUNION(ap->a_dvp); @@ -1146,9 +1161,9 @@ union_link(v) /* * needs to be copied up before we can link it. */ - VOP_LOCK(ap->a_vp); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); if (dun->un_uppervp == un->un_dirvp) { - VOP_UNLOCK(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); } error = union_copyup(un, 1, ap->a_cnp->cn_cred, ap->a_cnp->cn_proc); @@ -1172,18 +1187,18 @@ union_link(v) (error = relookup(ap->a_dvp, &dvp, ap->a_cnp))) { vrele(ap->a_dvp); - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); return EROFS; } if (dvp != NULLVP) { /* The name we want to create has mysteriously appeared (a race?) */ error = EEXIST; - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); goto croak; } } - VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); } vp = un->un_uppervp; } @@ -1198,7 +1213,7 @@ croak: return (error); } - FIXUP(dun); + FIXUP(dun, p); VREF(dvp); dun->un_flags |= UN_KLOCK; vput(ap->a_dvp); @@ -1327,15 +1342,16 @@ union_mkdir(v) } */ *ap = v; struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; + struct proc *p = ap->a_cnp->cn_proc; if (dvp != NULLVP) { int error; struct vnode *vp; - FIXUP(un); + FIXUP(un, p); VREF(dvp); un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap); if (error) { vrele(ap->a_dvp); @@ -1373,6 +1389,7 @@ union_rmdir(v) int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_cnp->cn_proc; if (dun->un_uppervp == NULLVP) panic("union rmdir: null upper vnode"); @@ -1382,11 +1399,11 @@ union_rmdir(v) struct vnode *vp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - FIXUP(dun); + FIXUP(dun, p); VREF(dvp); dun->un_flags |= UN_KLOCK; vput(ap->a_dvp); - FIXUP(un); + FIXUP(un, p); VREF(vp); un->un_flags |= UN_KLOCK; vput(ap->a_vp); @@ -1397,7 +1414,7 @@ union_rmdir(v) if (!error) union_removed_upper(un); } else { - FIXUP(dun); + FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); @@ -1421,12 +1438,13 @@ union_symlink(v) } */ *ap = v; struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; + struct proc *p = ap->a_cnp->cn_proc; if (dvp != NULLVP) { int error; struct vnode *vp; - FIXUP(un); + FIXUP(un, p); VREF(dvp); un->un_flags |= UN_KLOCK; vput(ap->a_dvp); @@ -1457,16 +1475,17 @@ union_readdir(v) struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_ncookies; + u_long **a_cookies; + } */ *ap = v; register struct union_node *un = VTOUNION(ap->a_vp); register struct vnode *vp; - + struct proc *p = curproc; if ((vp = un->un_uppervp) == NULLVP) return (0); - FIXUP(un); + FIXUP(un, p); ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_readdir), ap)); } @@ -1482,16 +1501,18 @@ union_readlink(v) } */ *ap = v; int error; struct vnode *vp = OTHERVP(ap->a_vp); + struct proc *p = curproc; + int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_vp)); + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); if (dolock) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1500,7 +1521,7 @@ union_readlink(v) * When operations want to vput() a union node yet retain a lock on * the upper VP (say, to do some further operations like link(), * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() + * vput() which calls VOP_UNLOCK(, 0, p) and comes here. union_unlock() * unlocks the union node (leaving the upper VP alone), clears the * KLOCK flag, and then returns to vput(). The caller then does whatever * is left to do with the upper VP, and insures that it gets unlocked. @@ -1519,19 +1540,21 @@ union_abortop(v) int error; struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); + struct proc *p = ap->a_cnp->cn_proc; + int islocked = un->un_flags & UN_LOCKED; int dolock = (vp == LOWERVP(ap->a_dvp)); if (islocked) { if (dolock) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_dvp)); + FIXUP(VTOUNION(ap->a_dvp), p); } ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); if (islocked && dolock) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1542,6 +1565,7 @@ union_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; struct union_node *un = VTOUNION(ap->a_vp); @@ -1593,34 +1617,40 @@ union_lock(v) struct vop_lock_args *ap = v; struct vnode *vp = ap->a_vp; struct union_node *un; + struct proc *p = ap->a_p; + int flags = ap->a_flags; + int error = 0; -start: - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - sleep((caddr_t)vp, PINOD); - } + vop_nolock(ap); + /* + * Need to do real lockmgr-style locking here. + * in the mean time, draining won't work quite right, + * which could lead to a few race conditions. + * the following test was here, but is not quite right, we + * still need to take the lock: + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + */ + flags &= ~LK_INTERLOCK; - un = VTOUNION(vp); +start: + un = VTOUNION(vp); if (un->un_uppervp != NULLVP) { if (((un->un_flags & UN_ULOCK) == 0) && (vp->v_usecount != 0)) { - /* - * We MUST always use the order of: take upper - * vp lock, manipulate union node flags, drop - * upper vp lock. This code must not be an - * exception. - */ - VOP_LOCK(un->un_uppervp); - un->un_flags |= UN_ULOCK; + error = vn_lock(un->un_uppervp, flags, p); + if (error) + return (error); + un->un_flags |= UN_ULOCK; } #ifdef DIAGNOSTIC if (un->un_flags & UN_KLOCK) { - vprint("dangling upper lock", vp); - panic("union: dangling upper lock"); + vprint("union: dangling klock", vp); + panic("union: dangling upper lock (%lx)", vp); } -#endif - } + #endif + } if (un->un_flags & UN_LOCKED) { #ifdef DIAGNOSTIC @@ -1648,7 +1678,7 @@ start: * When operations want to vput() a union node yet retain a lock on * the upper VP (say, to do some further operations like link(), * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() + * vput() which calls VOP_UNLOCK(, 0, p) and comes here. union_unlock() * unlocks the union node (leaving the upper VP alone), clears the * KLOCK flag, and then returns to vput(). The caller then does whatever * is left to do with the upper VP, and insures that it gets unlocked. @@ -1662,6 +1692,7 @@ union_unlock(v) { struct vop_lock_args *ap = v; struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; #ifdef DIAGNOSTIC if ((un->un_flags & UN_LOCKED) == 0) @@ -1674,7 +1705,7 @@ union_unlock(v) un->un_flags &= ~UN_LOCKED; if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp); + VOP_UNLOCK(un->un_uppervp, 0, p); un->un_flags &= ~(UN_ULOCK|UN_KLOCK); @@ -1687,6 +1718,8 @@ union_unlock(v) un->un_pid = 0; #endif + vop_nounlock(v); + return (0); } @@ -1702,17 +1735,18 @@ union_bmap(v) int *a_runp; } */ *ap = v; int error; + struct proc *p = curproc; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_vp)); + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_bmap), ap); if (dolock) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1763,21 +1797,42 @@ union_pathconf(v) } */ *ap = v; int error; struct vnode *vp = OTHERVP(ap->a_vp); + struct proc *p = curproc; int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else - FIXUP(VTOUNION(ap->a_vp)); + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); if (dolock) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } int +union_revoke(v) + void *v; +{ + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + + if (UPPERVP(vp)) + VOP_REVOKE(UPPERVP(vp), ap->a_flags); + if (LOWERVP(vp)) + VOP_REVOKE(LOWERVP(vp), ap->a_flags); + vgone(vp); + + return (0); +} + +int union_advlock(v) void *v; { diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c index 0fcb2580df9..258e5632c71 100644 --- a/sys/msdosfs/msdosfs_denode.c +++ b/sys/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_denode.c,v 1.5 1997/10/04 19:08:12 deraadt Exp $ */ +/* $OpenBSD: msdosfs_denode.c,v 1.6 1997/10/06 15:22:38 csapuntz Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.22 1996/10/13 04:16:31 christos Exp $ */ /*- @@ -76,10 +76,13 @@ static struct denode *msdosfs_hashget __P((dev_t, u_long, u_long)); static void msdosfs_hashins __P((struct denode *)); static void msdosfs_hashrem __P((struct denode *)); -void -msdosfs_init() +/*ARGSUSED*/ +int +msdosfs_init(vfsp) + struct vfsconf *vfsp; { dehashtbl = hashinit(desiredvnodes/2, M_MSDOSFSMNT, &dehash); + return (0); } static struct denode * @@ -89,7 +92,8 @@ msdosfs_hashget(dev, dirclust, diroff) u_long diroff; { struct denode *dep; - + struct proc *p = curproc; /* XXX */ + for (;;) for (dep = dehashtbl[DEHASH(dev, dirclust, diroff)];; dep = dep->de_next) { @@ -104,7 +108,7 @@ msdosfs_hashget(dev, dirclust, diroff) sleep(dep, PINOD); break; } - if (!vget(DETOV(dep), 1)) + if (!vget(DETOV(dep), LK_EXCLUSIVE, p)) return (dep); break; } @@ -166,6 +170,7 @@ deget(pmp, dirclust, diroffset, depp) struct denode *ldep; struct vnode *nvp; struct buf *bp; + struct proc *p = curproc; /* XXX */ #ifdef MSDOSFS_DEBUG printf("deget(pmp %08x, dirclust %d, diroffset %x, depp %08x)\n", @@ -218,7 +223,7 @@ deget(pmp, dirclust, diroffset, depp) * can't be accessed until we've read it in and have done what we * need to it. */ - VOP_LOCK(nvp); + vn_lock(nvp, LK_EXCLUSIVE | LK_RETRY, p); msdosfs_hashins(ldep); /* @@ -562,9 +567,11 @@ msdosfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); + struct proc *p = ap->a_p; int error; extern int prtactive; @@ -608,7 +615,7 @@ msdosfs_inactive(v) dep->de_Name[0] = SLOT_DELETED; } deupdat(dep, 0); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); /* * If we are done with the denode, reclaim it * so that it can be reused immediately. diff --git a/sys/msdosfs/msdosfs_lookup.c b/sys/msdosfs/msdosfs_lookup.c index 58e4aa8edb7..42844deb5e3 100644 --- a/sys/msdosfs/msdosfs_lookup.c +++ b/sys/msdosfs/msdosfs_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_lookup.c,v 1.5 1997/03/02 18:01:57 millert Exp $ */ +/* $OpenBSD: msdosfs_lookup.c,v 1.6 1997/10/06 15:22:39 csapuntz Exp $ */ /* $NetBSD: msdosfs_lookup.c,v 1.30 1996/10/25 23:14:08 cgd Exp $ */ /*- @@ -89,6 +89,7 @@ msdosfs_lookup(v) struct vnode *vdp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; daddr_t bn; int error; int lockparent; @@ -161,14 +162,14 @@ msdosfs_lookup(v) VREF(vdp); error = 0; } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); - error = vget(vdp, 1); + VOP_UNLOCK(pdp, 0, p); + error = vget(vdp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) - error = VOP_LOCK(pdp); + error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); } else { - error = vget(vdp, 1); + error = vget(vdp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } /* * Check that the capability number did not change @@ -184,9 +185,9 @@ msdosfs_lookup(v) } vput(vdp); if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } - if ((error = VOP_LOCK(pdp)) != 0) + if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) return (error); vdp = pdp; dp = VTODE(vdp); @@ -414,7 +415,7 @@ notfound:; */ cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (EJUSTRETURN); } /* @@ -484,7 +485,7 @@ foundroot:; return (error); *vpp = DETOV(tdp); if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -512,7 +513,7 @@ foundroot:; *vpp = DETOV(tdp); cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -537,13 +538,13 @@ foundroot:; */ pdp = vdp; if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ + VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) { - VOP_LOCK(pdp); + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (lockparent && (flags & ISLASTCN) && - (error = VOP_LOCK(pdp))) { + (error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p))) { vput(DETOV(tdp)); return (error); } @@ -555,7 +556,7 @@ foundroot:; if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) return (error); if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); *vpp = DETOV(tdp); } diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c index b77c0e9a93e..cf2a101328d 100644 --- a/sys/msdosfs/msdosfs_vfsops.c +++ b/sys/msdosfs/msdosfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_vfsops.c,v 1.7 1997/06/20 14:04:30 kstailey Exp $ */ +/* $OpenBSD: msdosfs_vfsops.c,v 1.8 1997/10/06 15:22:39 csapuntz Exp $ */ /* $NetBSD: msdosfs_vfsops.c,v 1.44 1996/12/22 10:10:32 cgd Exp $ */ /*- @@ -75,10 +75,8 @@ int msdosfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, int msdosfs_start __P((struct mount *, int, struct proc *)); int msdosfs_unmount __P((struct mount *, int, struct proc *)); int msdosfs_root __P((struct mount *, struct vnode **)); -int msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); int msdosfs_statfs __P((struct mount *, struct statfs *, struct proc *)); int msdosfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int msdosfs_vget __P((struct mount *, ino_t, struct vnode **)); int msdosfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); int msdosfs_vptofh __P((struct vnode *, struct fid *)); @@ -121,10 +119,10 @@ msdosfs_mount(mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp)) + if (vfs_busy(mp, 0, 0, p)) return (EBUSY); error = vflush(mp, NULLVP, flags); - vfs_unbusy(mp); + vfs_unbusy(mp, p); } if (!error && (mp->mnt_flag & MNT_RELOAD)) /* not yet implemented */ @@ -138,14 +136,14 @@ msdosfs_mount(mp, path, data, ndp, p) */ if (p->p_ucred->cr_uid != 0) { devvp = pmp->pm_devvp; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); if (error) { - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } pmp->pm_flags &= ~MSDOSFSMNT_RONLY; } @@ -189,13 +187,13 @@ msdosfs_mount(mp, path, data, ndp, p) accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) { vput(devvp); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) error = msdosfs_mountfs(devvp, mp, p, &args); @@ -514,7 +512,7 @@ msdosfs_mountfs(devvp, mp, p, argp) * in the directory entry where we could put uid's and gid's. */ #endif - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; return (0); @@ -561,7 +559,7 @@ msdosfs_unmount(mp, mntflags, p) if ((error = vflush(mp, NULLVP, flags)) != 0) return (error); pmp = VFSTOMSDOSFS(mp); - pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON; + pmp->pm_devvp->v_specmountpoint = NULL; #ifdef MSDOSFS_DEBUG printf("msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("flag %08x, usecount %d, writecount %d, holdcnt %d\n", @@ -605,22 +603,6 @@ msdosfs_root(mp, vpp) } int -msdosfs_quotactl(mp, cmds, uid, arg, p) - struct mount *mp; - int cmds; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - -#ifdef QUOTA - return (EOPNOTSUPP); -#else - return (EOPNOTSUPP); -#endif -} - -int msdosfs_statfs(mp, sbp, p) struct mount *mp; struct statfs *sbp; @@ -645,7 +627,7 @@ msdosfs_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -689,9 +671,9 @@ loop: dep = VTODE(vp); if (((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) - && (vp->v_dirtyblkhd.lh_first == NULL)) + && (vp->v_dirtyblkhd.lh_first == NULL || waitfor == MNT_LAZY)) continue; - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE, p)) goto loop; if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; @@ -753,18 +735,16 @@ msdosfs_vptofh(vp, fhp) return (0); } -int -msdosfs_vget(mp, ino, vpp) - struct mount *mp; - ino_t ino; - struct vnode **vpp; -{ +#define msdosfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) - return (EOPNOTSUPP); -} +#define msdosfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) + +#define msdosfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) struct vfsops msdosfs_vfsops = { - MOUNT_MSDOS, msdosfs_mount, msdosfs_start, msdosfs_unmount, @@ -775,5 +755,6 @@ struct vfsops msdosfs_vfsops = { msdosfs_vget, msdosfs_fhtovp, msdosfs_vptofh, - msdosfs_init + msdosfs_init, + msdosfs_sysctl }; diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c index e792d1b2201..40ff8591d07 100644 --- a/sys/msdosfs/msdosfs_vnops.c +++ b/sys/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_vnops.c,v 1.10 1997/10/04 19:08:13 deraadt Exp $ */ +/* $OpenBSD: msdosfs_vnops.c,v 1.11 1997/10/06 15:22:40 csapuntz Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.48 1996/03/20 00:45:43 thorpej Exp $ */ /*- @@ -949,6 +949,7 @@ msdosfs_rename(v) register struct vnode *fdvp = ap->a_fdvp; register struct componentname *tcnp = ap->a_tcnp; register struct componentname *fcnp = ap->a_fcnp; + struct proc *p = curproc; /* XXX */ register struct denode *ip, *xp, *dp, *zp; u_char toname[11], oldname[11]; u_long from_diroffset, to_diroffset; @@ -989,7 +990,7 @@ abortit: } /* */ - if ((error = VOP_LOCK(fvp)) != 0) + if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) goto abortit; dp = VTODE(fdvp); ip = VTODE(fvp); @@ -1009,7 +1010,7 @@ abortit: (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->de_flag & DE_RENAME)) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } @@ -1040,7 +1041,7 @@ abortit: * call to doscheckpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster) newparent = 1; vrele(fdvp); @@ -1106,7 +1107,7 @@ abortit: if ((fcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost from startdir"); if (!newparent) - VOP_UNLOCK(tdvp); + VOP_UNLOCK(tdvp, 0, p); (void) relookup(fdvp, &fvp, fcnp); if (fvp == NULL) { /* @@ -1116,7 +1117,7 @@ abortit: panic("rename: lost dir entry"); vrele(ap->a_fvp); if (newparent) - VOP_UNLOCK(tdvp); + VOP_UNLOCK(tdvp, 0, p); vrele(tdvp); return 0; } @@ -1136,9 +1137,9 @@ abortit: if (doingdirectory) panic("rename: lost dir entry"); vrele(ap->a_fvp); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); if (newparent) - VOP_UNLOCK(fdvp); + VOP_UNLOCK(fdvp, 0, p); xp = NULL; } else { vrele(fvp); @@ -1160,8 +1161,8 @@ abortit: if (error) { bcopy(oldname, ip->de_Name, 11); if (newparent) - VOP_UNLOCK(fdvp); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); goto bad; } ip->de_refcnt++; @@ -1169,8 +1170,8 @@ abortit: if ((error = removede(zp, ip)) != 0) { /* XXX should really panic here, fs is corrupt */ if (newparent) - VOP_UNLOCK(fdvp); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); goto bad; } if (!doingdirectory) { @@ -1179,8 +1180,8 @@ abortit: if (error) { /* XXX should really panic here, fs is corrupt */ if (newparent) - VOP_UNLOCK(fdvp); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); goto bad; } if (ip->de_dirclust != MSDOSFSROOT) @@ -1188,7 +1189,7 @@ abortit: } reinsert(ip); if (newparent) - VOP_UNLOCK(fdvp); + VOP_UNLOCK(fdvp, 0, p); } /* @@ -1207,19 +1208,19 @@ abortit: if (error) { /* XXX should really panic here, fs is corrupt */ brelse(bp); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); goto bad; } dotdotp = (struct direntry *)bp->b_data + 1; putushort(dotdotp->deStartCluster, dp->de_StartCluster); if ((error = bwrite(bp)) != 0) { /* XXX should really panic here, fs is corrupt */ - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); goto bad; } } - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); bad: if (xp) vput(tvp); @@ -1463,8 +1464,8 @@ msdosfs_readdir(v) struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + u_long **a_cookies; + int *a_ncookies; } */ *ap = v; int error = 0; int diff; @@ -1483,8 +1484,8 @@ msdosfs_readdir(v) struct direntry *dentp; struct dirent dirbuf; struct uio *uio = ap->a_uio; - u_long *cookies; - int ncookies; + u_long *cookies = NULL; + int ncookies = 0; off_t offset; int chksum = -1; @@ -1520,8 +1521,13 @@ msdosfs_readdir(v) lost = uio->uio_resid - count; uio->uio_resid = count; - cookies = ap->a_cookies; - ncookies = ap->a_ncookies; + if (ap->a_ncookies) { + ncookies = uio->uio_resid / sizeof(struct direntry) + 3; + MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + *ap->a_cookies = cookies; + *ap->a_ncookies = ncookies; + } /* * If they are reading from the root directory then, we simulate @@ -1681,6 +1687,10 @@ msdosfs_readdir(v) } out: + /* Subtract unused cookies */ + if (ap->a_ncookies) + *ap->a_ncookies -= ncookies; + uio->uio_offset = offset; uio->uio_resid += lost; if (dep->de_FileSize - (offset - bias) <= 0) diff --git a/sys/msdosfs/msdosfsmount.h b/sys/msdosfs/msdosfsmount.h index 6c3e1a76410..04937d4db84 100644 --- a/sys/msdosfs/msdosfsmount.h +++ b/sys/msdosfs/msdosfsmount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfsmount.h,v 1.5 1997/03/02 18:02:02 millert Exp $ */ +/* $OpenBSD: msdosfsmount.h,v 1.6 1997/10/06 15:22:40 csapuntz Exp $ */ /* $NetBSD: msdosfsmount.h,v 1.15 1996/12/22 10:31:41 cgd Exp $ */ /*- @@ -199,4 +199,4 @@ int msdosfs_statfs __P((struct mount *, struct statfs *, struct proc *)); int msdosfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); int msdosfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); int msdosfs_vptofh __P((struct vnode *, struct fid *)); -void msdosfs_init __P((void)); +int msdosfs_init __P((struct vfsconf *)); diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index 05105b20894..3fc3bf9843f 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_bio.c,v 1.10 1996/07/27 11:10:11 deraadt Exp $ */ +/* $OpenBSD: nfs_bio.c,v 1.11 1997/10/06 15:23:40 csapuntz Exp $ */ /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ /* @@ -740,16 +740,6 @@ nfs_asyncio(bp, cred) * is currently doing a write for this file and will pick up the * delayed writes before going back to sleep. */ - if (bp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, bp, b_synclist); - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - bp->b_synctime = time.tv_sec + 30; - if (bdirties.tqh_first == bp) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); - timeout((void (*)__P((void *)))wakeup, - &bdirties, 30 * hz); - } bp->b_flags |= B_DELWRI; reassignbuf(bp, bp->b_vp); biodone(bp); @@ -910,16 +900,6 @@ nfs_doio(bp, cr, p) * B_DELWRI and B_NEEDCOMMIT flags. */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { - if (bp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, bp, b_synclist); - TAILQ_INSERT_TAIL(&bdirties, bp, b_synclist); - bp->b_synctime = time.tv_sec + 30; - if (bdirties.tqh_first == bp) { - untimeout((void (*)__P((void *)))wakeup, - &bdirties); - timeout((void (*)__P((void *)))wakeup, - &bdirties, 30 * hz); - } bp->b_flags |= B_DELWRI; /* diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c index 27c551880d4..d5b8e9b15d0 100644 --- a/sys/nfs/nfs_node.c +++ b/sys/nfs/nfs_node.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_node.c,v 1.7 1997/04/18 10:09:46 deraadt Exp $ */ +/* $OpenBSD: nfs_node.c,v 1.8 1997/10/06 15:23:41 csapuntz Exp $ */ /* $NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $ */ /* @@ -106,9 +106,7 @@ nfs_nget(mntp, fhp, fhsize, npp) int fhsize; struct nfsnode **npp; { -#ifdef Lite2_integrated struct proc *p = curproc; /* XXX */ -#endif register struct nfsnode *np; struct nfsnodehashhead *nhpp; register struct vnode *vp; @@ -123,11 +121,7 @@ loop: bcmp((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize)) continue; vp = NFSTOV(np); -#ifdef Lite2_integrated if (vget(vp, LK_EXCLUSIVE, p)) -#else - if (vget(vp, 1)) -#endif goto loop; *npp = np; return(0); @@ -162,9 +156,7 @@ nfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; -#ifdef Lite2_integrated struct proc *a_p; -#endif } */ *ap = v; register struct nfsnode *np; register struct sillyrename *sp; @@ -191,9 +183,8 @@ nfs_inactive(v) } np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED | NQNFSNONCACHE | NQNFSWRITE); -#ifdef Lite2_integrated + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); -#endif return (0); } @@ -248,64 +239,6 @@ nfs_reclaim(v) return (0); } -#ifndef Lite2_integrated -/* - * Lock an nfsnode - */ -int -nfs_lock(v) - void *v; -{ - struct vop_lock_args /* { - struct vnode *a_vp; - } */ *ap = v; - register struct vnode *vp = ap->a_vp; - - /* - * Ugh, another place where interruptible mounts will get hung. - * If you make this sleep interruptible, then you have to fix all - * the VOP_LOCK() calls to expect interruptibility. - */ - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "nfslck", 0); - } - if (vp->v_tag == VT_NON) - return (ENOENT); - return (0); -} - -/* - * Unlock an nfsnode - */ -int -nfs_unlock(v) - void *v; -{ -#if 0 - struct vop_unlock_args /* { - struct vnode *a_vp; - } */ *ap = v; -#endif - return (0); -} - -/* - * Check for a locked nfsnode - */ -int -nfs_islocked(v) - void *v; -{ -#if 0 - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap = v; -#endif - return (0); -} -#endif /* Lite2_integrated */ - /* * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually * done. Currently nothing to do. diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c index 778eda1d8f4..88c4b46ed76 100644 --- a/sys/nfs/nfs_nqlease.c +++ b/sys/nfs/nfs_nqlease.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_nqlease.c,v 1.9 1997/04/18 10:13:23 deraadt Exp $ */ +/* $OpenBSD: nfs_nqlease.c,v 1.10 1997/10/06 15:23:41 csapuntz Exp $ */ /* $NetBSD: nfs_nqlease.c,v 1.14 1996/02/18 14:06:50 fvdl Exp $ */ /* @@ -1048,11 +1048,7 @@ nqnfs_clientd(nmp, cred, ncd, flag, argp, p) vp = NFSTOV(np); vpid = vp->v_id; if (np->n_expiry < time.tv_sec) { -#ifdef Lite2_integrated if (vget(vp, LK_EXCLUSIVE, p) == 0) { -#else - if (vget(vp, 1) == 0) { -#endif nmp->nm_inprog = vp; if (vpid == vp->v_id) { CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); @@ -1079,11 +1075,7 @@ nqnfs_clientd(nmp, cred, ncd, flag, argp, p) } else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) { if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE)) == NQNFSWRITE && vp->v_dirtyblkhd.lh_first && -#ifdef Lite2_integrated vget(vp, LK_EXCLUSIVE, p) == 0) { -#else - vget(vp, 1) == 0) { -#endif nmp->nm_inprog = vp; if (vpid == vp->v_id && nqnfs_getlease(vp, ND_WRITE, cred, p)==0) @@ -1188,10 +1180,8 @@ nqnfs_lease_updatetime(deltat) struct mount *mp; struct nfsmount *nmp; int s; -#ifdef Lite2_integrated struct proc *p = curproc; /* XXX */ struct mount *nxtmp; -#endif if (nqnfsstarttime != 0) nqnfsstarttime += deltat; @@ -1205,7 +1195,6 @@ nqnfs_lease_updatetime(deltat) * Search the mount list for all nqnfs mounts and do their timer * queues. */ -#ifdef Lite2_integrated simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nxtmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { @@ -1228,22 +1217,6 @@ nqnfs_lease_updatetime(deltat) vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); -#else /* Lite2_integrated */ - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; - mp = mp->mnt_list.cqe_next) { - if (!strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_NFS, - MFSNAMELEN)) { - nmp = VFSTONFS(mp); - if (nmp->nm_flag & NFSMNT_NQNFS) { - for (np = nmp->nm_timerhead.cqh_first; - np != (void *)&nmp->nm_timerhead; - np = np->n_timer.cqe_next) { - np->n_expiry += deltat; - } - } - } - } -#endif } /* diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c index 17ad65c11cc..4d459c4e6ab 100644 --- a/sys/nfs/nfs_serv.c +++ b/sys/nfs/nfs_serv.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_serv.c,v 1.11 1997/04/18 09:22:45 deraadt Exp $ */ +/* $OpenBSD: nfs_serv.c,v 1.12 1997/10/06 15:23:42 csapuntz Exp $ */ /* $NetBSD: nfs_serv.c,v 1.25 1996/03/02 15:55:52 jtk Exp $ */ /* @@ -2453,15 +2453,8 @@ nfsrv_readdir(nfsd, slp, procp, mrq) nfsm_srvpostop_attr(getret, &at); return (0); } -#ifdef Lite2_integrated VOP_UNLOCK(vp, 0, procp); -#else - VOP_UNLOCK(vp); -#endif MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); - ncookies = siz / (5 * NFSX_UNSIGNED); /*7 for V3, but it's an est. so*/ - MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, - M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; @@ -2473,13 +2466,14 @@ again: io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; -#ifdef Lite2_integrated - VOP_LOCK(vp, 0, procp); -#else - VOP_LOCK(vp); -#endif - error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); + if (cookies) { + free((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); + error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; if (!cookies && !error) @@ -2490,11 +2484,7 @@ again: error = getret; } -#ifdef Lite2_integrated VOP_UNLOCK(vp, 0, procp); -#else - VOP_UNLOCK(vp); -#endif if (error) { vrele(vp); free((caddr_t)rbuf, M_TEMP); @@ -2721,16 +2711,9 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq) nfsm_srvpostop_attr(getret, &at); return (0); } -#ifdef Lite2_integrated VOP_UNLOCK(vp, 0, procp); -#else - VOP_UNLOCK(vp); -#endif MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); - ncookies = siz / (7 * NFSX_UNSIGNED); - MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, - M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; @@ -2743,21 +2726,19 @@ again: io.uio_procp = (struct proc *)0; eofflag = 0; -#ifdef Lite2_integrated - VOP_LOCK(vp, 0, procp); -#else - VOP_LOCK(vp); -#endif - error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); + if (cookies) { + free((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); + error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); -#ifdef Lite2_integrated VOP_UNLOCK(vp, 0, procp); -#else - VOP_UNLOCK(vp); -#endif + if (!cookies && !error) error = NFSERR_PERM; if (!error) diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index e39e5657ed8..1506704ef11 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_subs.c,v 1.19 1997/04/28 00:40:14 deraadt Exp $ */ +/* $OpenBSD: nfs_subs.c,v 1.20 1997/10/06 15:23:43 csapuntz Exp $ */ /* $NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $ */ /* @@ -1144,8 +1144,9 @@ nfs_init() } #ifdef NFSCLIENT -void -nfs_vfs_init() +int +nfs_vfs_init(vfsp) + struct vfsconf *vfsp; { register int i; @@ -1154,6 +1155,8 @@ nfs_vfs_init() nfs_iodwant[i] = (struct proc *)0; TAILQ_INIT(&nfs_bufq); nfs_nhinit(); /* Init the nfsnode table */ + + return (0); } /* @@ -1247,10 +1250,9 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) * Since the nfsnode does not have a lock, its * vnode lock has to be carried over. */ -#ifdef Lite2_integrated + nvp->v_vnlock = vp->v_vnlock; vp->v_vnlock = NULL; -#endif nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; @@ -1694,9 +1696,7 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) int *rdonlyp; int kerbflag; { -#ifdef Lite2_integrated struct proc *p = curproc; /* XXX */ -#endif register struct mount *mp; register int i; struct ucred *credanon; @@ -1704,11 +1704,8 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) struct sockaddr_in *saddr; *vpp = (struct vnode *)0; -#ifdef Lite2_integrated mp = vfs_getvfs(&fhp->fh_fsid); -#else - mp = getvfs(&fhp->fh_fsid); -#endif + if (!mp) return (ESTALE); error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); @@ -1746,11 +1743,8 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) else *rdonlyp = 0; if (!lockflag) -#ifdef Lite2_integrated VOP_UNLOCK(*vpp, 0, p); -#else - VOP_UNLOCK(*vpp); -#endif + return (0); } diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c index 68f33ff1f40..9ccb2b804c2 100644 --- a/sys/nfs/nfs_syscalls.c +++ b/sys/nfs/nfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_syscalls.c,v 1.8 1997/03/30 20:02:32 mickey Exp $ */ +/* $OpenBSD: nfs_syscalls.c,v 1.9 1997/10/06 15:23:43 csapuntz Exp $ */ /* $NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $ */ /* @@ -944,8 +944,6 @@ nfssvc_iod(p) * up to, but not including nfs_strategy(). */ if (nbp) { - if (nbp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, nbp, b_synclist); nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); reassignbuf(nbp, nbp->b_vp); nbp->b_vp->v_numoutput++; diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h index 8b7ac70f417..311f2a1f87b 100644 --- a/sys/nfs/nfs_var.h +++ b/sys/nfs/nfs_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_var.h,v 1.4 1996/12/14 15:36:51 deraadt Exp $ */ +/* $OpenBSD: nfs_var.h,v 1.5 1997/10/06 15:23:44 csapuntz Exp $ */ /* $NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $ */ /* @@ -80,9 +80,6 @@ u_long nfs_hash __P((nfsfh_t *, int)); int nfs_nget __P((struct mount *, nfsfh_t *, int, struct nfsnode **)); int nfs_inactive __P((void *)); int nfs_reclaim __P((void *)); -int nfs_lock __P((void *)); -int nfs_unlock __P((void *)); -int nfs_islocked __P((void *)); int nfs_abortop __P((void *)); /* nfs_vnops.c */ @@ -275,7 +272,7 @@ int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *)); int nfsm_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *)); int nfs_adv __P((struct mbuf **, caddr_t *, int, int)); int nfsm_strtmbuf __P((struct mbuf **, char **, char *, long)); -void nfs_vfs_init __P((void)); +int nfs_vfs_init __P((struct vfsconf *)); void nfs_init __P((void)); int nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *, struct vattr *)); diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c index 596e5448624..ce71627558c 100644 --- a/sys/nfs/nfs_vfsops.c +++ b/sys/nfs/nfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_vfsops.c,v 1.19 1997/04/18 10:15:20 deraadt Exp $ */ +/* $OpenBSD: nfs_vfsops.c,v 1.20 1997/10/06 15:23:45 csapuntz Exp $ */ /* $NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $ */ /* @@ -72,16 +72,13 @@ extern struct nfsstats nfsstats; extern int nfs_ticks; -#ifdef notyet static int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); -#endif /* * nfs vfs operations. */ struct vfsops nfs_vfsops = { - MOUNT_NFS, nfs_mount, nfs_start, nfs_unmount, @@ -93,9 +90,7 @@ struct vfsops nfs_vfsops = { nfs_fhtovp, nfs_vptofh, nfs_vfs_init, -#ifdef notyet nfs_sysctl -#endif }; extern u_int32_t nfs_procids[NFS_NPROCS]; @@ -180,7 +175,7 @@ nfs_statfs(mp, sbp, p) bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); nfsm_reqdone; vrele(vp); crfree(cred); @@ -294,20 +289,11 @@ nfs_mountroot() /* * Link it into the mount list. */ -#ifdef Lite2_integrated simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); simple_unlock(&mountlist_slock); rootvp = vp; vfs_unbusy(mp, procp); -#else - if (vfs_lock(mp)) - panic("nfs_mountroot: vfs_lock"); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; - vfs_unlock(mp); - rootvp = vp; -#endif /* Get root attributes (for the time). */ error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp); @@ -354,9 +340,7 @@ nfs_mountroot() */ nfs_boot_getfh(&nd.nd_boot, "swap", &nd.nd_swap); mp = nfs_mount_diskless(&nd.nd_swap, "/swap", 0, &vp); -#ifdef Lite2_integrated vfs_unbusy(mp, procp); -#endif printf("swap on %s\n", nd.nd_swap.ndm_host); /* @@ -397,16 +381,14 @@ nfs_mount_diskless(ndmntp, mntname, mntflag, vpp) struct mbuf *m; int error; -#ifdef Lite2_integrated vfs_rootmountalloc("nfs", mntname, &mp); -#else /* Create the mount point. */ mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); if (mp == NULL) panic("nfs_mountroot: malloc mount for %s", mntname); bzero((char *)mp, (u_long)sizeof(struct mount)); -#endif + mp->mnt_op = &nfs_vfsops; mp->mnt_flag = mntflag; @@ -692,11 +674,8 @@ mountnfs(argp, mp, nam, pth, hst, vpp) mp->mnt_data = (qaddr_t)nmp; TAILQ_INIT(&nmp->nm_uidlruhead); } -#ifdef Lite2_integrated - vfs_getnewfsid(mp, makefstype(MOUNT_NFS)); -#else - getnewfsid(mp, makefstype(MOUNT_NFS)); -#endif + + vfs_getnewfsid(mp); nmp->nm_mountp = mp; if (argp->flags & NFSMNT_NQNFS) /* @@ -728,7 +707,7 @@ mountnfs(argp, mp, nam, pth, hst, vpp) #else mp->mnt_stat.f_type = 0; #endif - strncpy(&mp->mnt_stat.f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(&mp->mnt_stat.f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); nmp->nm_nam = nam; @@ -903,13 +882,10 @@ loop: */ if (vp->v_mount != mp) goto loop; - if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL) + if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL || + waitfor == MNT_LAZY) continue; -#ifdef Lite2_integrated if (vget(vp, LK_EXCLUSIVE, p)) -#else - if (vget(vp, 1)) -#endif goto loop; error = VOP_FSYNC(vp, cred, waitfor, p); if (error) @@ -934,7 +910,6 @@ nfs_vget(mp, ino, vpp) return (EOPNOTSUPP); } -#ifdef notyet /* * Do that sysctl thang... */ @@ -977,7 +952,6 @@ nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, return EOPNOTSUPP; } } -#endif /* diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 7d009055598..6bc7ad11c12 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_vnops.c,v 1.15 1997/04/18 09:57:29 deraadt Exp $ */ +/* $OpenBSD: nfs_vnops.c,v 1.16 1997/10/06 15:23:45 csapuntz Exp $ */ /* $NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $ */ /* @@ -103,9 +103,7 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_lease_desc, nfs_lease_check }, /* lease */ { &vop_ioctl_desc, nfs_ioctl }, /* ioctl */ { &vop_select_desc, nfs_select }, /* select */ -#ifdef Lite2_integrated { &vop_revoke_desc, nfs_revoke }, /* revoke */ -#endif { &vop_mmap_desc, nfs_mmap }, /* mmap */ { &vop_fsync_desc, nfs_fsync }, /* fsync */ { &vop_seek_desc, nfs_seek }, /* seek */ @@ -159,9 +157,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ -#ifdef Lite2_integrated { &vop_revoke_desc, spec_revoke }, /* revoke */ -#endif { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, nfs_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -213,9 +209,7 @@ struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = { { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_select_desc, fifo_select }, /* select */ -#ifdef Lite2_integrated { &vop_revoke_desc, fifo_revoke }, /* revoke */ -#endif { &vop_mmap_desc, fifo_mmap }, /* mmap */ { &vop_fsync_desc, nfs_fsync }, /* fsync */ { &vop_seek_desc, fifo_seek }, /* seek */ @@ -772,6 +766,7 @@ nfs_lookup(v) register struct componentname *cnp = ap->a_cnp; register struct vnode *dvp = ap->a_dvp; register struct vnode **vpp = ap->a_vpp; + struct proc *p = cnp->cn_proc; register int flags = cnp->cn_flags; register struct vnode *newvp; register u_int32_t *tl; @@ -810,11 +805,8 @@ nfs_lookup(v) VREF(newvp); error = 0; } else -#ifdef Lite2_integrated error = vget(newvp, LK_EXCLUSIVE, p); -#else - error = vget(newvp, 1); -#endif + if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, cnp->cn_proc) @@ -1964,11 +1956,17 @@ nfs_readdir(v) if (!error && ap->a_cookies) { struct dirent *dp; - u_long *cookies = ap->a_cookies; - int ncookies = ap->a_ncookies; + u_long *cookies; + /* XXX - over-estimate - see UFS code for how to do it + right */ + int ncookies = (uio->uio_iov->iov_base - base) / 12; - /* - * Only the NFS server and emulations use cookies, and they + MALLOC(cookies, u_long *, sizeof(*cookies) * ncookies, + M_TEMP, M_WAITOK); + *ap->a_ncookies = ncookies; + *ap->a_cookies = cookies; + + /* Only the NFS server and emulations use cookies, and they * load the directory block into system space, so we can * just look at it directly. */ @@ -1982,6 +1980,8 @@ nfs_readdir(v) *(cookies++) = off; base += dp->d_reclen; } + + *ap->a_ncookies -= ncookies; uio->uio_resid += (uio->uio_iov->iov_base - base); uio->uio_iov->iov_len += (uio->uio_iov->iov_base - base); uio->uio_iov->iov_base = base; @@ -2812,8 +2812,6 @@ again: else { vp->v_numoutput++; bp->b_flags |= B_ASYNC; - if (bp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, bp, b_synclist); bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); @@ -3092,8 +3090,6 @@ nfs_writebp(bp, force) bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff, bp->b_dirtyend); #endif - if (bp->b_flags & B_DELWRI) - TAILQ_REMOVE(&bdirties, bp, b_synclist); bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if (oldflags & B_ASYNC) { diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h index 8ca127a4e6a..b6255b0bbf9 100644 --- a/sys/nfs/nfsnode.h +++ b/sys/nfs/nfsnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nfsnode.h,v 1.3 1996/03/31 13:16:16 mickey Exp $ */ +/* $OpenBSD: nfsnode.h,v 1.4 1997/10/06 15:23:46 csapuntz Exp $ */ /* $NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $ */ /* @@ -191,15 +191,9 @@ int nfs_readlink __P((void *)); int nfs_abortop __P((void *)); int nfs_inactive __P((void *)); int nfs_reclaim __P((void *)); -#ifdef Lite2_integrated #define nfs_lock ((int (*) __P((void *)))vop_nolock) #define nfs_unlock ((int (*) __P((void *)))vop_nounlock) #define nfs_islocked ((int (*) __P((void *)))vop_noislocked) -#else -int nfs_lock __P((void *)); -int nfs_unlock __P((void *)); -int nfs_islocked __P((void *)); -#endif /* Lite2_integrated */ int nfs_bmap __P((void *)); int nfs_strategy __P((void *)); int nfs_print __P((void *)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 34587d51cc8..4b2582677a0 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.7 1997/07/28 09:13:14 deraadt Exp $ */ +/* $OpenBSD: buf.h,v 1.8 1997/10/06 15:25:32 csapuntz Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -48,6 +48,27 @@ #define NOLIST ((struct buf *)0x87654321) /* + * To avoid including <ufs/ffs/softdep.h> + */ + +LIST_HEAD(workhead, worklist); + +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +struct mount; +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; + + +/* * The buffer header describes an I/O operation in the kernel. */ struct buf { @@ -79,6 +100,7 @@ struct buf { struct ucred *b_wcred; /* Write credentials reference. */ int b_validoff; /* Offset in buffer of valid region. */ int b_validend; /* Offset of end of valid region. */ + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* @@ -177,6 +199,7 @@ int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, void brelse __P((struct buf *)); void bremfree __P((struct buf *)); void bufinit __P((void)); +void bdirty __P((struct buf *)); int bwrite __P((struct buf *)); void cluster_callback __P((struct buf *)); int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long, diff --git a/sys/sys/lock.h b/sys/sys/lock.h new file mode 100644 index 00000000000..f4491b09520 --- /dev/null +++ b/sys/sys/lock.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 1995 + * The Regents of the University of California. All rights reserved. + * + * This code contains ideas from software contributed to Berkeley by + * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating + * System project at Carnegie-Mellon University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)lock.h 8.12 (Berkeley) 5/19/95 + */ + +#ifndef _LOCK_H_ +#define _LOCK_H_ + +#include <sys/simplelock.h> + +/* + * The general lock structure. Provides for multiple shared locks, + * upgrading from shared to exclusive, and sleeping until the lock + * can be gained. The simple locks are defined in <machine/param.h>. + */ +struct lock { + struct simplelock lk_interlock; /* lock on remaining fields */ + u_int lk_flags; /* see below */ + int lk_sharecount; /* # of accepted shared locks */ + int lk_waitcount; /* # of processes sleeping for lock */ + short lk_exclusivecount; /* # of recursive exclusive locks */ + short lk_prio; /* priority at which to sleep */ + char *lk_wmesg; /* resource sleeping (for tsleep) */ + int lk_timo; /* maximum sleep time (for tsleep) */ + pid_t lk_lockholder; /* pid of exclusive lock holder */ +}; +/* + * Lock request types: + * LK_SHARED - get one of many possible shared locks. If a process + * holding an exclusive lock requests a shared lock, the exclusive + * lock(s) will be downgraded to shared locks. + * LK_EXCLUSIVE - stop further shared locks, when they are cleared, + * grant a pending upgrade if it exists, then grant an exclusive + * lock. Only one exclusive lock may exist at a time, except that + * a process holding an exclusive lock may get additional exclusive + * locks if it explicitly sets the LK_CANRECURSE flag in the lock + * request, or if the LK_CANRECUSE flag was set when the lock was + * initialized. + * LK_UPGRADE - the process must hold a shared lock that it wants to + * have upgraded to an exclusive lock. Other processes may get + * exclusive access to the resource between the time that the upgrade + * is requested and the time that it is granted. + * LK_EXCLUPGRADE - the process must hold a shared lock that it wants to + * have upgraded to an exclusive lock. If the request succeeds, no + * other processes will have gotten exclusive access to the resource + * between the time that the upgrade is requested and the time that + * it is granted. However, if another process has already requested + * an upgrade, the request will fail (see error returns below). + * LK_DOWNGRADE - the process must hold an exclusive lock that it wants + * to have downgraded to a shared lock. If the process holds multiple + * (recursive) exclusive locks, they will all be downgraded to shared + * locks. + * LK_RELEASE - release one instance of a lock. + * LK_DRAIN - wait for all activity on the lock to end, then mark it + * decommissioned. This feature is used before freeing a lock that + * is part of a piece of memory that is about to be freed. + * + * These are flags that are passed to the lockmgr routine. + */ +#define LK_TYPE_MASK 0x0000000f /* type of lock sought */ +#define LK_SHARED 0x00000001 /* shared lock */ +#define LK_EXCLUSIVE 0x00000002 /* exclusive lock */ +#define LK_UPGRADE 0x00000003 /* shared-to-exclusive upgrade */ +#define LK_EXCLUPGRADE 0x00000004 /* first shared-to-exclusive upgrade */ +#define LK_DOWNGRADE 0x00000005 /* exclusive-to-shared downgrade */ +#define LK_RELEASE 0x00000006 /* release any type of lock */ +#define LK_DRAIN 0x00000007 /* wait for all lock activity to end */ +/* + * External lock flags. + * + * The first three flags may be set in lock_init to set their mode permanently, + * or passed in as arguments to the lock manager. The LK_REENABLE flag may be + * set only at the release of a lock obtained by drain. + */ +#define LK_EXTFLG_MASK 0x00000070 /* mask of external flags */ +#define LK_NOWAIT 0x00000010 /* do not sleep to await lock */ +#define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */ +#define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */ +#define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */ +/* + * Internal lock flags. + * + * These flags are used internally to the lock manager. + */ +#define LK_WANT_UPGRADE 0x00000100 /* waiting for share-to-excl upgrade */ +#define LK_WANT_EXCL 0x00000200 /* exclusive lock sought */ +#define LK_HAVE_EXCL 0x00000400 /* exclusive lock obtained */ +#define LK_WAITDRAIN 0x00000800 /* process waiting for lock to drain */ +#define LK_DRAINING 0x00004000 /* lock is being drained */ +#define LK_DRAINED 0x00008000 /* lock has been decommissioned */ +/* + * Control flags + * + * Non-persistent external flags. + */ +#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after + getting lk_interlock */ +#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ + +/* + * Lock return status. + * + * Successfully obtained locks return 0. Locks will always succeed + * unless one of the following is true: + * LK_FORCEUPGRADE is requested and some other process has already + * requested a lock upgrade (returns EBUSY). + * LK_WAIT is set and a sleep would be required (returns EBUSY). + * LK_SLEEPFAIL is set and a sleep was done (returns ENOLCK). + * PCATCH is set in lock priority and a signal arrives (returns + * either EINTR or ERESTART if system calls is to be restarted). + * Non-null lock timeout and timeout expires (returns EWOULDBLOCK). + * A failed lock attempt always returns a non-zero error value. No lock + * is held after an error return (in particular, a failed LK_UPGRADE + * or LK_FORCEUPGRADE will have released its shared access lock). + */ + +/* + * Indicator that no process holds exclusive lock + */ +#define LK_KERNPROC ((pid_t) -2) +#define LK_NOPROC ((pid_t) -1) + +struct proc; + +void lockinit __P((struct lock *, int prio, char *wmesg, int timo, + int flags)); +int lockmgr __P((__volatile struct lock *, u_int flags, + struct simplelock *, struct proc *p)); +void lockmgr_printinfo __P((struct lock *)); +int lockstatus __P((struct lock *)); + +#endif /* !_LOCK_H_ */ + diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 4b87be6fa20..3e380f50dfd 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: malloc.h,v 1.10 1997/03/01 21:24:46 kstailey Exp $ */ +/* $OpenBSD: malloc.h,v 1.11 1997/10/06 15:25:33 csapuntz Exp $ */ /* $NetBSD: malloc.h,v 1.23 1996/04/05 04:52:52 mhitch Exp $ */ /* @@ -128,8 +128,25 @@ #define M_PFIL 73 /* packer filter */ #define M_TDB 75 /* Transforms database */ #define M_XDATA 76 /* IPsec data */ -#define M_TEMP 84 /* misc temporary data buffers */ -#define M_LAST 85 /* Must be last type + 1 */ +#define M_VFS 77 /* VFS file systems */ + +#define M_PAGEDEP 78 /* File page dependencies */ +#define M_INODEDEP 79 /* Inode dependencies */ +#define M_NEWBLK 80 /* New block allocation */ +#define M_BMSAFEMAP 81 /* Block or frag allocated from cyl group map */ +#define M_ALLOCDIRECT 82 /* Block or frag dependency for an inode */ +#define M_INDIRDEP 83 /* Indirect block dependencies */ +#define M_ALLOCINDIR 84 /* Block dependency for an indirect block */ +#define M_FREEFRAG 85 /* Previously used frag for an inode */ +#define M_FREEBLKS 86 /* Blocks freed from an inode */ +#define M_FREEFILE 87 /* Inode deallocated */ +#define M_DIRADD 88 /* New directory entry */ +#define M_MKDIR 89 /* New directory */ +#define M_DIRREM 90 /* Directory entry deleted */ + +#define M_TEMP 127 /* misc temporary data buffers */ +#define M_LAST 128 /* Must be last type + 1 */ + #define INITKMEMNAMES { \ "free", /* 0 M_FREE */ \ @@ -209,9 +226,29 @@ NULL, \ "tdb", /* 75 M_TDB */ \ "xform_data", /* 76 M_XDATA */ \ - NULL, NULL, \ + "vfs", /* 77 M_VFS */ \ + "pagedep", /* 78 M_PAGEDEP */ \ + "inodedep", /* 79 M_INODEDEP */ \ + "newblk", /* 80 M_NEWBLK */ \ + "bmsafemap", /* 81 M_BMSAFEMAP */ \ + "allocdirect", /* 82 M_ALLOCDIRECT */ \ + "indirdep", /* 83 M_INDIRDEP */ \ + "allocindir", /* 84 M_ALLOCINDIR */ \ + "freefrag", /* 85 M_FREEFRAG */ \ + "freeblks", /* 86 M_FREEBLKS */ \ + "freefile", /* 87 M_FREEFILE */ \ + "diradd", /* 88 M_DIRADD */ \ + "mkdir", /* 89 M_MKDIR */ \ + "dirrem", /* 90 M_DIRREM */ \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ NULL, NULL, NULL, NULL, NULL, \ - "temp", /* 84 M_TEMP */ \ + NULL, \ + "temp", /* 127 M_TEMP */ \ } struct kmemstats { diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 2ad19911a6a..776740078d0 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mount.h,v 1.18 1997/04/16 09:49:00 downsj Exp $ */ +/* $OpenBSD: mount.h,v 1.19 1997/10/06 15:25:33 csapuntz Exp $ */ /* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */ /* @@ -43,6 +43,7 @@ #include <sys/ucred.h> #endif #include <sys/queue.h> +#include <sys/lock.h> typedef struct { int32_t val[2]; } fsid_t; /* file system id type */ @@ -55,7 +56,7 @@ typedef struct { int32_t val[2]; } fsid_t; /* file system id type */ struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_reserved; /* force longword alignment */ - char fid_data[MAXFIDSZ]; /* data (variable length) */ + char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* @@ -77,7 +78,9 @@ struct statfs { long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* file system id */ uid_t f_owner; /* user that mounted the file system */ - long f_spare[4]; /* spare for later */ + long f_syncwrites; /* count of sync writes since mount */ + long f_asyncwrites; /* count of async writes since mount */ + long f_spare[2]; /* spare for later */ char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN]; /* mounted file system */ @@ -116,8 +119,11 @@ LIST_HEAD(vnodelst, vnode); struct mount { CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */ struct vfsops *mnt_op; /* operations on fs */ + struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnode *mnt_syncer; /* syncer vnode */ struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ + struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ @@ -161,7 +167,7 @@ struct mount { /* * Mask of flags that are visible to statfs() */ -#define MNT_VISFLAGMASK 0x0000ffff +#define MNT_VISFLAGMASK 0x0400ffff /* * filesystem control flags. @@ -180,6 +186,37 @@ struct mount { #define MNT_MPWANT 0x00800000 /* waiting for mount point */ #define MNT_UNMOUNT 0x01000000 /* unmount in progress */ #define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */ +#define MNT_SOFTDEP 0x04000000 /* soft dependencies being done */ +/* + * Sysctl CTL_VFS definitions. + * + * Second level identifier specifies which filesystem. Second level + * identifier VFS_GENERIC returns information about all filesystems. + */ +#define VFS_GENERIC 0 /* generic filesystem information */ +/* + * Third level identifiers for VFS_GENERIC are given below; third + * level identifiers for specific filesystems are given in their + * mount specific header files. + */ +#define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ +#define VFS_CONF 2 /* struct: vfsconf for filesystem given + as next argument */ + +/* + * Filesystem configuration information. One of these exists for each + * type of filesystem supported by the kernel. These are searched at + * mount time to identify the requested filesystem. + */ +struct vfsconf { + struct vfsops *vfc_vfsops; /* filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* filesystem type name */ + int vfc_typenum; /* historic filesystem type number */ + int vfc_refcount; /* number mounted of this type */ + int vfc_flags; /* permanent flags */ + int (*vfc_mountroot)(void); /* if != NULL, routine to mount root */ + struct vfsconf *vfc_next; /* next in list */ +}; /* * Operations supported on mounted file system. @@ -190,8 +227,10 @@ struct nameidata; struct mbuf; #endif +extern int maxvfsconf; /* highest defined filesystem type */ +extern struct vfsconf *vfsconf; /* head of list of filesystem types */ + struct vfsops { - char *vfs_name; int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); int (*vfs_start) __P((struct mount *mp, int flags, @@ -211,8 +250,9 @@ struct vfsops { struct mbuf *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); - void (*vfs_init) __P((void)); - int vfs_refcount; + int (*vfs_init) __P((struct vfsconf *)); + int (*vfs_sysctl) __P((int *, u_int, void *, size_t *, void *, + size_t, struct proc *)); }; #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ @@ -234,8 +274,9 @@ struct vfsops { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 -#define MNT_NOWAIT 2 +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 3 /* push data not written by filesystem syncer */ /* * Generic file handle @@ -446,21 +487,25 @@ struct adosfs_args { /* * exported vnode operations */ +int vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *)); +void vfs_getnewfsid __P((struct mount *)); +struct mount *vfs_getvfs __P((fsid_t *)); +int vfs_mountedon __P((struct vnode *)); +int vfs_mountroot __P((void)); +int vfs_rootmountalloc __P((char *, char *, struct mount **)); +void vfs_unbusy __P((struct mount *, struct proc *)); +void vfs_unmountall __P((void)); +extern CIRCLEQ_HEAD(mntlist, mount) mountlist; +extern struct simplelock mountlist_slock; + struct mount *getvfs __P((fsid_t *)); /* return vfs given fsid */ int vfs_export /* process mount export info */ __P((struct mount *, struct netexport *, struct export_args *)); struct netcred *vfs_export_lookup /* lookup host in fs export list */ __P((struct mount *, struct netexport *, struct mbuf *)); -int vfs_lock __P((struct mount *)); /* lock a vfs */ -int vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */ +int vfs_allocate_syncvnode __P((struct mount *)); + void vfs_shutdown __P((void)); /* unmount and sync file systems */ -void vfs_unlock __P((struct mount *)); /* unlock a vfs */ -void vfs_unmountall __P((void)); /* unmount file systems */ -int vfs_busy __P((struct mount *)); -void vfs_unbusy __P((struct mount *)); -extern CIRCLEQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ -extern struct vfsops *vfssw[]; /* filesystem type table */ -extern int nvfssw; long makefstype __P((char *)); int dounmount __P((struct mount *, int, struct proc *)); void vfsinit __P((void)); @@ -479,6 +524,8 @@ int getmntinfo __P((struct statfs **, int)); int mount __P((const char *, const char *, int, void *)); int statfs __P((const char *, struct statfs *)); int unmount __P((const char *, int)); + + __END_DECLS #endif /* _KERNEL */ diff --git a/sys/sys/param.h b/sys/sys/param.h index a7d227ee2da..d9b459abc71 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.15 1997/10/01 21:53:36 deraadt Exp $ */ +/* $OpenBSD: param.h,v 1.16 1997/10/06 15:25:34 csapuntz Exp $ */ /* $NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $ */ /*- @@ -54,6 +54,7 @@ #ifndef _LOCORE #include <sys/types.h> +#include <sys/simplelock.h> #endif /* diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 962009c90d4..e617d3c4052 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -1,4 +1,4 @@ -/* $OpenBSD: queue.h,v 1.4 1996/05/22 12:07:15 deraadt Exp $ */ +/* $OpenBSD: queue.h,v 1.5 1997/10/06 15:25:34 csapuntz Exp $ */ /* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $ */ /* @@ -62,7 +62,7 @@ * linked so that an arbitrary element can be removed without a need to * traverse the list. New elements can be added to the list before or * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may only be traversed in the forward direction. + * the list. A tail queue may be traversed in either direction. * * A circle queue is headed by a pair of pointers, one to the head of the * list and the other to the tail of the list. The elements are doubly @@ -81,7 +81,7 @@ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ -} +} #define LIST_ENTRY(type) \ struct { \ @@ -89,41 +89,45 @@ struct { \ struct type **le_prev; /* address of previous next element */ \ } +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) +#define LIST_END(head) NULL + /* * List functions. */ -#define LIST_INIT(head) { \ +#define LIST_INIT(head) do { \ (head)->lh_first = NULL; \ -} +} while (0) -#define LIST_INSERT_AFTER(listelm, elm, field) { \ +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ &(elm)->field.le_next; \ (listelm)->field.le_next = (elm); \ (elm)->field.le_prev = &(listelm)->field.le_next; \ -} +} while (0) -#define LIST_INSERT_BEFORE(listelm, elm, field) { \ +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ (elm)->field.le_prev = (listelm)->field.le_prev; \ (elm)->field.le_next = (listelm); \ *(listelm)->field.le_prev = (elm); \ (listelm)->field.le_prev = &(elm)->field.le_next; \ -} +} while (0) -#define LIST_INSERT_HEAD(head, elm, field) { \ +#define LIST_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.le_next = (head)->lh_first) != NULL) \ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ (head)->lh_first = (elm); \ (elm)->field.le_prev = &(head)->lh_first; \ -} +} while (0) -#define LIST_REMOVE(elm, field) { \ +#define LIST_REMOVE(elm, field) do { \ if ((elm)->field.le_next != NULL) \ (elm)->field.le_next->field.le_prev = \ (elm)->field.le_prev; \ *(elm)->field.le_prev = (elm)->field.le_next; \ -} +} while (0) /* * Simple queue definitions. @@ -142,33 +146,33 @@ struct { \ /* * Simple queue functions. */ -#define SIMPLEQ_INIT(head) { \ +#define SIMPLEQ_INIT(head) do { \ (head)->sqh_first = NULL; \ (head)->sqh_last = &(head)->sqh_first; \ -} +} while (0) -#define SIMPLEQ_INSERT_HEAD(head, elm, field) { \ +#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ (head)->sqh_last = &(elm)->field.sqe_next; \ (head)->sqh_first = (elm); \ -} +} while (0) -#define SIMPLEQ_INSERT_TAIL(head, elm, field) { \ +#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.sqe_next = NULL; \ *(head)->sqh_last = (elm); \ (head)->sqh_last = &(elm)->field.sqe_next; \ -} +} while (0) -#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\ (head)->sqh_last = &(elm)->field.sqe_next; \ (listelm)->field.sqe_next = (elm); \ -} +} while (0) -#define SIMPLEQ_REMOVE_HEAD(head, elm, field) { \ +#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do { \ if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) \ (head)->sqh_last = &(head)->sqh_first; \ -} +} while (0) /* * Tail queue definitions. @@ -185,15 +189,24 @@ struct { \ struct type **tqe_prev; /* address of previous next element */ \ } + +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define TAILQ_END(head) NULL +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + /* * Tail queue functions. */ -#define TAILQ_INIT(head) { \ +#define TAILQ_INIT(head) do { \ (head)->tqh_first = NULL; \ (head)->tqh_last = &(head)->tqh_first; \ -} +} while (0) -#define TAILQ_INSERT_HEAD(head, elm, field) { \ +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ (head)->tqh_first->field.tqe_prev = \ &(elm)->field.tqe_next; \ @@ -201,16 +214,16 @@ struct { \ (head)->tqh_last = &(elm)->field.tqe_next; \ (head)->tqh_first = (elm); \ (elm)->field.tqe_prev = &(head)->tqh_first; \ -} +} while (0) -#define TAILQ_INSERT_TAIL(head, elm, field) { \ +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.tqe_next = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ (head)->tqh_last = &(elm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ (elm)->field.tqe_next->field.tqe_prev = \ &(elm)->field.tqe_next; \ @@ -218,23 +231,23 @@ struct { \ (head)->tqh_last = &(elm)->field.tqe_next; \ (listelm)->field.tqe_next = (elm); \ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_INSERT_BEFORE(listelm, elm, field) { \ +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ (elm)->field.tqe_next = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_REMOVE(head, elm, field) { \ +#define TAILQ_REMOVE(head, elm, field) do { \ if (((elm)->field.tqe_next) != NULL) \ (elm)->field.tqe_next->field.tqe_prev = \ (elm)->field.tqe_prev; \ else \ (head)->tqh_last = (elm)->field.tqe_prev; \ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ -} +} while (0) /* * Circular queue definitions. @@ -251,15 +264,21 @@ struct { \ struct type *cqe_prev; /* previous element */ \ } +#define CIRCLEQ_FIRST(head) ((head)->cqh_first) +#define CIRCLEQ_LAST(head) ((head)->cqh_last) +#define CIRCLEQ_END(head) ((void *)(head)) +#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + /* * Circular queue functions. */ -#define CIRCLEQ_INIT(head) { \ +#define CIRCLEQ_INIT(head) do { \ (head)->cqh_first = (void *)(head); \ (head)->cqh_last = (void *)(head); \ -} +} while (0) -#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ (elm)->field.cqe_next = (listelm)->field.cqe_next; \ (elm)->field.cqe_prev = (listelm); \ if ((listelm)->field.cqe_next == (void *)(head)) \ @@ -267,9 +286,9 @@ struct { \ else \ (listelm)->field.cqe_next->field.cqe_prev = (elm); \ (listelm)->field.cqe_next = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) { \ +#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ (elm)->field.cqe_next = (listelm); \ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ if ((listelm)->field.cqe_prev == (void *)(head)) \ @@ -277,9 +296,9 @@ struct { \ else \ (listelm)->field.cqe_prev->field.cqe_next = (elm); \ (listelm)->field.cqe_prev = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_HEAD(head, elm, field) { \ +#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \ (elm)->field.cqe_next = (head)->cqh_first; \ (elm)->field.cqe_prev = (void *)(head); \ if ((head)->cqh_last == (void *)(head)) \ @@ -287,9 +306,9 @@ struct { \ else \ (head)->cqh_first->field.cqe_prev = (elm); \ (head)->cqh_first = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_TAIL(head, elm, field) { \ +#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.cqe_next = (void *)(head); \ (elm)->field.cqe_prev = (head)->cqh_last; \ if ((head)->cqh_first == (void *)(head)) \ @@ -297,9 +316,9 @@ struct { \ else \ (head)->cqh_last->field.cqe_next = (elm); \ (head)->cqh_last = (elm); \ -} +} while (0) -#define CIRCLEQ_REMOVE(head, elm, field) { \ +#define CIRCLEQ_REMOVE(head, elm, field) do { \ if ((elm)->field.cqe_next == (void *)(head)) \ (head)->cqh_last = (elm)->field.cqe_prev; \ else \ @@ -310,5 +329,5 @@ struct { \ else \ (elm)->field.cqe_prev->field.cqe_next = \ (elm)->field.cqe_next; \ -} +} while (0) #endif /* !_SYS_QUEUE_H_ */ diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h new file mode 100644 index 00000000000..c979f157b08 --- /dev/null +++ b/sys/sys/simplelock.h @@ -0,0 +1,86 @@ +#ifndef _SIMPLELOCK_H_ +#define _SIMPLELOCK_H_ +/* + * A simple spin lock. + * + * This structure only sets one bit of data, but is sized based on the + * minimum word size that can be operated on by the hardware test-and-set + * instruction. It is only needed for multiprocessors, as uniprocessors + * will always run to completion or a sleep. It is an error to hold one + * of these locks while a process is sleeping. + */ +struct simplelock { + int lock_data; +}; + +#ifndef NCPUS +#define NCPUS 1 +#endif + +#if NCPUS == 1 + +#if !defined(DEBUG) +#define simple_lock(alp) +#define simple_lock_try(alp) (1) /* always succeeds */ +#define simple_unlock(alp) + +static __inline void simple_lock_init __P((struct simplelock *)); + +static __inline void +simple_lock_init(lkp) + struct simplelock *lkp; +{ + + lkp->lock_data = 0; +} + +#else + +void _simple_unlock __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__) +int _simple_lock_try __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__) +void _simple_lock __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__) +void simple_lock_init __P((struct simplelock *alp)); + +#endif /* !defined(DEBUG) */ + +#else /* NCPUS > 1 */ + +/* + * The simple-lock routines are the primitives out of which the lock + * package is built. The machine-dependent code must implement an + * atomic test_and_set operation that indivisibly sets the simple lock + * to non-zero and returns its old value. It also assumes that the + * setting of the lock to zero below is indivisible. Simple locks may + * only be used for exclusive locks. + */ + +static __inline void +simple_lock(lkp) + __volatile struct simplelock *lkp; +{ + + while (test_and_set(&lkp->lock_data)) + continue; +} + +static __inline int +simple_lock_try(lkp) + __volatile struct simplelock *lkp; +{ + + return (!test_and_set(&lkp->lock_data)) +} + +static __inline void +simple_unlock(lkp) + __volatile struct simplelock *lkp; +{ + + lkp->lock_data = 0; +} +#endif /* NCPUS > 1 */ + +#endif /* !_SIMPLELOCK_H_ */ diff --git a/sys/sys/specdev.h b/sys/sys/specdev.h index 9f58fa7acb4..94f98ac3c70 100644 --- a/sys/sys/specdev.h +++ b/sys/sys/specdev.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specdev.h,v 1.3 1997/01/04 17:10:05 kstailey Exp $ */ +/* $OpenBSD: specdev.h,v 1.4 1997/10/06 15:19:13 csapuntz Exp $ */ /* $NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $ */ /* @@ -44,7 +44,7 @@ struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; - long si_flags; + struct mount *si_mountpoint; dev_t si_rdev; struct lockf *si_lockf; }; @@ -54,15 +54,10 @@ struct specinfo { #define v_rdev v_specinfo->si_rdev #define v_hashchain v_specinfo->si_hashchain #define v_specnext v_specinfo->si_specnext -#define v_specflags v_specinfo->si_flags +#define v_specmountpoint v_specinfo->si_mountpoint #define v_speclockf v_specinfo->si_lockf /* - * Flags for specinfo - */ -#define SI_MOUNTEDON 0x0001 /* block special device is mounted on */ - -/* * Special device management */ #define SPECHSZ 64 @@ -113,14 +108,14 @@ int spec_fsync __P((void *)); #define spec_readdir spec_badop #define spec_readlink spec_badop #define spec_abortop spec_badop -#define spec_inactive nullop +int spec_inactive __P((void *)); #define spec_reclaim nullop -int spec_lock __P((void *)); -int spec_unlock __P((void *)); +#define spec_lock vop_nolock +#define spec_unlock vop_nounlock +#define spec_islocked vop_noislocked int spec_bmap __P((void *)); int spec_strategy __P((void *)); int spec_print __P((void *)); -#define spec_islocked nullop int spec_pathconf __P((void *)); int spec_advlock __P((void *)); #define spec_blkatoff spec_badop @@ -130,3 +125,4 @@ int spec_advlock __P((void *)); #define spec_truncate nullop #define spec_update nullop #define spec_bwrite vn_bwrite +#define spec_revoke vop_revoke diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 9eb21269f2c..3cc255b0b1c 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.h,v 1.18 1997/09/08 17:28:18 kstailey Exp $ */ +/* $OpenBSD: sysctl.h,v 1.19 1997/10/06 15:25:35 csapuntz Exp $ */ /* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */ /* @@ -49,9 +49,10 @@ #include <sys/time.h> #include <sys/ucred.h> #include <sys/proc.h> -#include <vm/vm.h> #endif +#include <vm/vm.h> + /* * Definitions for sysctl call. The sysctl call uses a hierarchical name * for objects that can be examined or modified. The name is expressed as @@ -93,7 +94,8 @@ struct ctlname { #define CTL_MACHDEP 7 /* machine dependent */ #define CTL_USER 8 /* user-level */ #define CTL_DDB 9 /* DDB user interface, see ddb_var.h */ -#define CTL_MAXID 10 /* number of valid top-level ids */ +#define CTL_VFS 10 /* VFS sysctl's */ +#define CTL_MAXID 11 /* number of valid top-level ids */ #define CTL_NAMES { \ { 0, 0 }, \ @@ -106,6 +108,7 @@ struct ctlname { { "machdep", CTLTYPE_NODE }, \ { "user", CTLTYPE_NODE }, \ { "ddb", CTLTYPE_NODE }, \ + { "vfs", CTLTYPE_NODE }, \ } /* @@ -383,7 +386,7 @@ int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t)); int sysctl_clockrate __P((char *, size_t *)); int sysctl_rdstring __P((void *, size_t *, void *, char *)); int sysctl_rdstruct __P((void *, size_t *, void *, void *, int)); -int sysctl_vnode __P((char *, size_t *)); +int sysctl_vnode __P((char *, size_t *, struct proc *)); int sysctl_ntptime __P((char *, size_t *)); #ifdef GPROF int sysctl_doprof __P((int *, u_int, void *, size_t *, void *, size_t)); @@ -409,6 +412,8 @@ int net_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); int cpu_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); +int vfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, + struct proc *)); #else /* !_KERNEL */ #include <sys/cdefs.h> diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 757aa464ec2..1dbd1ed3c57 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: systm.h,v 1.20 1997/03/06 07:05:54 tholo Exp $ */ +/* $OpenBSD: systm.h,v 1.21 1997/10/06 15:25:35 csapuntz Exp $ */ /* $NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $ */ /*- @@ -128,7 +128,7 @@ int enodev __P((void)); int enosys __P((void)); int enoioctl __P((void)); int enxio __P((void)); -int eopnotsupp __P((void)); +int eopnotsupp __P((void *)); int lkmenodev __P((void)); @@ -240,7 +240,7 @@ void kmstartup __P((void)); int nfs_mountroot __P((void)); int dk_mountroot __P((void)); -int (*mountroot) __P((void)); +int (*mountroot)__P((void)); #include <lib/libkern/libkern.h> diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index ed2fbcebca7..ebb93d38447 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.8 1996/07/14 08:54:05 downsj Exp $ */ +/* $OpenBSD: vnode.h,v 1.9 1997/10/06 15:25:36 csapuntz Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -37,6 +37,7 @@ */ #include <sys/queue.h> +#include <sys/lock.h> /* * The vnode is the focus of all file activity in UNIX. There is a @@ -60,7 +61,7 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD }; enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, - VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS + VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS, VT_VFS }; /* @@ -69,6 +70,14 @@ enum vtagtype { */ LIST_HEAD(buflists, buf); +/* + * Reading or writing any of these items requires holding the appropriate lock. + * v_freelist is locked by the global vnode_free_list simple lock. + * v_mntvnodes is locked by the global mntvnodes simple lock. + * v_flag, v_usecount, v_holdcount and v_writecount are + * locked by the v_interlock simple lock. + */ + struct vnode { u_long v_flag; /* vnode flags (see below) */ short v_usecount; /* reference count of users */ @@ -83,6 +92,7 @@ struct vnode { struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ long v_numoutput; /* num of writes in progress */ + LIST_ENTRY(vnode) v_synclist; /* vnode with dirty buffers */ enum vtype v_type; /* vnode type */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ @@ -98,7 +108,9 @@ struct vnode { int v_clen; /* length of current cluster */ int v_ralen; /* Read-ahead length */ daddr_t v_maxra; /* last readahead block */ - long v_spare[7]; /* round to 128 bytes */ + struct simplelock v_interlock; /* lock on usecount and flag */ + struct lock *v_vnlock; /* used for non-locking fs's */ + long v_spare[3]; /* round to 128 bytes */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ }; @@ -120,6 +132,7 @@ struct vnode { #define VBWAIT 0x0400 /* waiting for output to complete */ #define VALIASED 0x0800 /* vnode has an alias */ #define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */ +#define VGONEHACK 0x2000 /* vgone: don't put me on the head of the free list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value @@ -151,7 +164,7 @@ struct vattr { * Flags for va_cflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ - +#define VA_EXCLUSIVE 0x02 /* exclusive create request */ /* * Flags for ioflag. */ @@ -197,6 +210,14 @@ extern int vttoif_tab[]; #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */ +#define REVOKEALL 0x0001 /* vop_reovke: revoke all aliases */ + + +TAILQ_HEAD(freelst, vnode); +extern struct freelst vnode_hold_list; /* free vnodes referencing buffers */ +extern struct freelst vnode_free_list; /* vnode free list */ +extern struct simplelock vnode_free_list_slock; + #ifdef DIAGNOSTIC #define HOLDRELE(vp) holdrele(vp) #define VATTR_NULL(vap) vattr_null(vap) @@ -208,11 +229,47 @@ void vattr_null __P((struct vattr *)); void vhold __P((struct vnode *)); void vref __P((struct vnode *)); #else -#define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */ +#define HOLDRELE(vp) holdrele(vp); /* decrease buf or page ref */ #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ -#define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */ -#define VREF(vp) (vp)->v_usecount++ /* increase reference */ -#endif + +static __inline holdrele(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + vp->v_holdcnt--; + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + simple_unlock(&vp->v_interlock); +} +#define VHOLD(vp) vhold(vp) /* increase buf or page ref */ +static __inline vhold(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); +} +#define VREF(vp) vref(vp) /* increase reference */ +static __inline vref(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + vp->v_usecount++; + simple_unlock(&vp->v_interlock); +} +#endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) @@ -220,6 +277,7 @@ void vref __P((struct vnode *)); * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ +extern time_t syncdelay; /* time to delay syncing vnodes */ extern int desiredvnodes; /* number of vnodes desired */ extern struct vattr va_null; /* predefined null vattr structure */ @@ -289,6 +347,11 @@ extern struct vnodeop_desc *vnodeop_descs[]; /* + * Interlock for scanning list of vnodes attached to a mountpoint + */ +struct simplelock mntvnode_slock; + +/* * This macro is very helpful in defining those offsets in the vdesc struct. * * This is stolen from X11R4. I ingored all the fancy stuff for @@ -371,14 +434,15 @@ int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); void getnewfsid __P((struct mount *, int)); void vattr_null __P((struct vattr *vap)); int vcount __P((struct vnode *vp)); -void vclean __P((struct vnode *, int)); +void vclean __P((struct vnode *, int, struct proc *)); int vfinddev __P((dev_t, enum vtype, struct vnode **)); void vflushbuf __P((struct vnode *vp, int sync)); int vflush __P((struct mount *mp, struct vnode *vp, int flags)); void vntblinit __P((void)); void vwakeup __P((struct buf *)); -int vget __P((struct vnode *vp, int lockflag)); +int vget __P((struct vnode *vp, int lockflag, struct proc *p)); void vgone __P((struct vnode *vp)); +void vgonel __P((struct vnode *, struct proc *)); void vgoneall __P((struct vnode *vp)); int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred, struct proc *p, int slpflag, int slptimeo)); @@ -391,14 +455,25 @@ int vn_closefile __P((struct file *fp, struct proc *p)); int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, struct proc *p)); int vn_open __P((struct nameidata *ndp, int fmode, int cmode)); +int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp, + struct proc *p)); int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, int *aresid, struct proc *p)); +int vn_lock __P((struct vnode *vp, int flags, struct proc *p)); +int vop_noislocked __P((void *)); +int vop_nolock __P((void *)); +int vop_nounlock __P((void *)); +int vop_revoke __P((void *)); + int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); int vn_select __P((struct file *fp, int which, struct proc *p)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); int vn_writechk __P((struct vnode *vp)); +void vn_syncer_add_to_worklist __P((struct vnode *vp, int delay)); +void sched_sync __P((struct proc *)); + struct vnode * checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp)); void vput __P((struct vnode *vp)); diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h index abf129f1126..43b56b5dc76 100644 --- a/sys/sys/vnode_if.h +++ b/sys/sys/vnode_if.h @@ -291,6 +291,31 @@ static __inline int VOP_WRITE(vp, uio, ioflag, cred) return (VCALL(vp, VOFFSET(vop_write), &a)); } +struct vop_lease_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; +}; +extern struct vnodeop_desc vop_lease_desc; +static __inline int VOP_LEASE __P((struct vnode *, struct proc *, + struct ucred *, int)); +static __inline int VOP_LEASE(vp, p, cred, flag) + struct vnode *vp; + struct proc *p; + struct ucred *cred; + int flag; +{ + struct vop_lease_args a; + a.a_desc = VDESC(vop_lease); + a.a_vp = vp; + a.a_p = p; + a.a_cred = cred; + a.a_flag = flag; + return (VCALL(vp, VOFFSET(vop_lease), &a)); +} + struct vop_ioctl_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -350,6 +375,24 @@ static __inline int VOP_SELECT(vp, which, fflags, cred, p) return (VCALL(vp, VOFFSET(vop_select), &a)); } +struct vop_revoke_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_flags; +}; +extern struct vnodeop_desc vop_revoke_desc; +static __inline int VOP_REVOKE __P((struct vnode *, int)); +static __inline int VOP_REVOKE(vp, flags) + struct vnode *vp; + int flags; +{ + struct vop_revoke_args a; + a.a_desc = VDESC(vop_revoke); + a.a_vp = vp; + a.a_flags = flags; + return (VCALL(vp, VOFFSET(vop_revoke), &a)); +} + struct vop_mmap_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -582,19 +625,19 @@ struct vop_readdir_args { struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_ncookies; + u_long **a_cookies; }; extern struct vnodeop_desc vop_readdir_desc; static __inline int VOP_READDIR __P((struct vnode *, struct uio *, - struct ucred *, int *, u_long *, int)); -static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies) + struct ucred *, int *, int *, u_long **)); +static __inline int VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies) struct vnode *vp; struct uio *uio; struct ucred *cred; int *eofflag; - u_long *cookies; - int ncookies; + int *ncookies; + u_long **cookies; { struct vop_readdir_args a; a.a_desc = VDESC(vop_readdir); @@ -602,8 +645,8 @@ static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies) a.a_uio = uio; a.a_cred = cred; a.a_eofflag = eofflag; - a.a_cookies = cookies; a.a_ncookies = ncookies; + a.a_cookies = cookies; return (VCALL(vp, VOFFSET(vop_readdir), &a)); } @@ -650,60 +693,78 @@ static __inline int VOP_ABORTOP(dvp, cnp) struct vop_inactive_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + struct proc *a_p; }; extern struct vnodeop_desc vop_inactive_desc; -static __inline int VOP_INACTIVE __P((struct vnode *)); -static __inline int VOP_INACTIVE(vp) +static __inline int VOP_INACTIVE __P((struct vnode *, struct proc *)); +static __inline int VOP_INACTIVE(vp, p) struct vnode *vp; + struct proc *p; { struct vop_inactive_args a; a.a_desc = VDESC(vop_inactive); a.a_vp = vp; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_inactive), &a)); } struct vop_reclaim_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + struct proc *a_p; }; extern struct vnodeop_desc vop_reclaim_desc; -static __inline int VOP_RECLAIM __P((struct vnode *)); -static __inline int VOP_RECLAIM(vp) +static __inline int VOP_RECLAIM __P((struct vnode *, struct proc *)); +static __inline int VOP_RECLAIM(vp, p) struct vnode *vp; + struct proc *p; { struct vop_reclaim_args a; a.a_desc = VDESC(vop_reclaim); a.a_vp = vp; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_reclaim), &a)); } struct vop_lock_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + int a_flags; + struct proc *a_p; }; extern struct vnodeop_desc vop_lock_desc; -static __inline int VOP_LOCK __P((struct vnode *)); -static __inline int VOP_LOCK(vp) +static __inline int VOP_LOCK __P((struct vnode *, int, struct proc *)); +static __inline int VOP_LOCK(vp, flags, p) struct vnode *vp; + int flags; + struct proc *p; { struct vop_lock_args a; a.a_desc = VDESC(vop_lock); a.a_vp = vp; + a.a_flags = flags; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_lock), &a)); } struct vop_unlock_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + int a_flags; + struct proc *a_p; }; extern struct vnodeop_desc vop_unlock_desc; -static __inline int VOP_UNLOCK __P((struct vnode *)); -static __inline int VOP_UNLOCK(vp) +static __inline int VOP_UNLOCK __P((struct vnode *, int, struct proc *)); +static __inline int VOP_UNLOCK(vp, flags, p) struct vnode *vp; + int flags; + struct proc *p; { struct vop_unlock_args a; a.a_desc = VDESC(vop_unlock); a.a_vp = vp; + a.a_flags = flags; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_unlock), &a)); } @@ -864,6 +925,37 @@ static __inline int VOP_VALLOC(pvp, mode, cred, vpp) return (VCALL(pvp, VOFFSET(vop_valloc), &a)); } +struct vop_balloc_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + off_t a_startoffset; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf **a_bpp; +}; +extern struct vnodeop_desc vop_balloc_desc; +static __inline int VOP_BALLOC __P((struct vnode *, off_t, int, + struct ucred *, int, struct buf **)); +static __inline int VOP_BALLOC(vp, startoffset, size, cred, flags, bpp) + struct vnode *vp; + off_t startoffset; + int size; + struct ucred *cred; + int flags; + struct buf **bpp; +{ + struct vop_balloc_args a; + a.a_desc = VDESC(vop_balloc); + a.a_vp = vp; + a.a_startoffset = startoffset; + a.a_size = size; + a.a_cred = cred; + a.a_flags = flags; + a.a_bpp = bpp; + return (VCALL(vp, VOFFSET(vop_balloc), &a)); +} + struct vop_reallocblks_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -957,31 +1049,6 @@ static __inline int VOP_UPDATE(vp, access, modify, waitfor) return (VCALL(vp, VOFFSET(vop_update), &a)); } -struct vop_lease_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; -}; -extern struct vnodeop_desc vop_lease_desc; -static __inline int VOP_LEASE __P((struct vnode *, struct proc *, - struct ucred *, int)); -static __inline int VOP_LEASE(vp, p, cred, flag) - struct vnode *vp; - struct proc *p; - struct ucred *cred; - int flag; -{ - struct vop_lease_args a; - a.a_desc = VDESC(vop_lease); - a.a_vp = vp; - a.a_p = p; - a.a_cred = cred; - a.a_flag = flag; - return (VCALL(vp, VOFFSET(vop_lease), &a)); -} - struct vop_whiteout_args { struct vnodeop_desc *a_desc; struct vnode *a_dvp; diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e25073d6715..e4bf9e3d285 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_alloc.c,v 1.7 1997/07/22 10:31:50 deraadt Exp $ */ +/* $OpenBSD: ffs_alloc.c,v 1.8 1997/10/06 15:26:28 csapuntz Exp $ */ /* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */ /* @@ -59,7 +59,7 @@ extern u_long nextgennumber; static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int)); -static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t)); +static daddr_t ffs_alloccgblk __P((struct inode *, struct buf *, daddr_t)); static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int)); static ino_t ffs_dirpref __P((struct fs *)); static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); @@ -70,6 +70,11 @@ static u_long ffs_hashalloc __P((struct inode *, int, long, int, static daddr_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int)); static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int)); +#ifdef DIAGNOSTIC +static int ffs_checkblk __P((struct inode *, daddr_t, long)); +#endif +int ffs_freefile __P((struct vop_vfree_args *)); + /* * Allocate a block in the file system. * @@ -272,7 +277,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); (void) vnode_pager_uncache(ITOV(ip)); - ffs_blkfree(ip, bprev, (long)osize); + if (!DOINGSOFTDEP(ITOV(ip))) + ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), (long)(request - nsize)); @@ -314,15 +320,10 @@ nospace: * Note that the error return is not reflected back to the user. Rather * the previous block allocation will be used. */ -#ifdef DEBUG -#include <sys/sysctl.h> + int doasyncfree = 1; -struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +int doreallocblks = 1; int prtrealloc = 0; -struct ctldebug debug15 = { "prtrealloc", &prtrealloc }; -#else -#define doasyncfree 1 -#endif int ffs_reallocblks(v) @@ -343,6 +344,9 @@ ffs_reallocblks(v) int i, len, start_lvl, end_lvl, pref, ssize; struct timespec ts; + if (doreallocblks == 0) + return (ENOSPC); + vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; @@ -352,10 +356,22 @@ ffs_reallocblks(v) len = buflist->bs_nchildren; start_lbn = buflist->bs_children[0]->b_lblkno; end_lbn = start_lbn + len - 1; + #ifdef DIAGNOSTIC + for (i = 0; i < len; i++) + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 1"); + for (i = 1; i < len; i++) if (buflist->bs_children[i]->b_lblkno != start_lbn + i) - panic("ffs_reallocblks: non-cluster"); + panic("ffs_reallocblks: non-logical cluster"); + + blkno = buflist->bs_children[0]->b_blkno; + ssize = fsbtodb(fs, fs->fs_frag); + for (i = 1; i < len - 1; i++) + if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) + panic("ffs_reallocblks: non-physical cluster %d", i); #endif /* * If the latest allocation is in a new cylinder group, assume that @@ -422,9 +438,14 @@ ffs_reallocblks(v) #endif blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) + if (i == ssize) { bap = ebap; + soff = -i; + } #ifdef DIAGNOSTIC + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 2"); if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) panic("ffs_reallocblks: alloc mismatch"); #endif @@ -432,6 +453,17 @@ ffs_reallocblks(v) if (prtrealloc) printf(" %d,", *bap); #endif + if (DOINGSOFTDEP(vp)) { + if (sbap == &ip->i_ffs_db[0] && i < ssize) + softdep_setup_allocdirect(ip, start_lbn + i, + blkno, *bap, fs->fs_bsize, fs->fs_bsize, + buflist->bs_children[i]); + else + softdep_setup_allocindir_page(ip, start_lbn + i, + i < ssize ? sbp : ebp, soff + i, blkno, + *bap, buflist->bs_children[i]); + } + *bap++ = blkno; } /* @@ -473,10 +505,15 @@ ffs_reallocblks(v) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize); + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DEBUG + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 3"); if (prtrealloc) printf(" %d,", blkno); #endif @@ -815,6 +852,9 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) fs->fs_cs(fs, cg).cs_nffree--; } fs->fs_fmod = 1; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, bprev); + bdwrite(bp); return (bprev); } @@ -835,8 +875,8 @@ ffs_alloccg(ip, cg, bpref, size) register struct fs *fs; register struct cg *cgp; struct buf *bp; - register int i; - int error, bno, frags, allocsiz; + daddr_t bno, blkno; + int error, i, frags, allocsiz; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) @@ -855,7 +895,7 @@ ffs_alloccg(ip, cg, bpref, size) } cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bdwrite(bp); return (bno); } @@ -877,7 +917,7 @@ ffs_alloccg(ip, cg, bpref, size) brelse(bp); return (NULL); } - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) setbit(cg_blksfree(cgp), bpref + i); @@ -904,8 +944,12 @@ ffs_alloccg(ip, cg, bpref, size) cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; - bdwrite(bp); - return (cg * fs->fs_fpg + bno); + + blkno = cg * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + bdwrite(bp); + return ((u_long)blkno); } /* @@ -920,16 +964,20 @@ ffs_alloccg(ip, cg, bpref, size) * blocks may be fragmented by the routine that allocates them. */ static daddr_t -ffs_alloccgblk(fs, cgp, bpref) - register struct fs *fs; - register struct cg *cgp; +ffs_alloccgblk(ip, bp, bpref) + struct inode *ip; + struct buf *bp; daddr_t bpref; { + struct fs *fs; + struct cg *cgp; daddr_t bno, blkno; int cylno, pos, delta; short *cylbp; register int i; + fs = ip->i_fs; + cgp = (struct cg *)bp->b_data; if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { bpref = cgp->cg_rotor; goto norot; @@ -1020,7 +1068,10 @@ gotit: cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; cg_blktot(cgp)[cylno]--; fs->fs_fmod = 1; - return (cgp->cg_cgx * fs->fs_fpg + bno); + blkno = cgp->cg_cgx * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + return (blkno); } /* @@ -1040,7 +1091,7 @@ ffs_clusteralloc(ip, cg, bpref, len) register struct fs *fs; register struct cg *cgp; struct buf *bp; - int i, run, bno, bit, map; + int i, got, run, bno, bit, map; u_char *mapp; int32_t *lp; @@ -1094,7 +1145,7 @@ ffs_clusteralloc(ip, cg, bpref, len) mapp = &cg_clustersfree(cgp)[bpref / NBBY]; map = *mapp++; bit = 1 << (bpref % NBBY); - for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { + for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { if ((map & bit) == 0) { run = 0; } else { @@ -1102,22 +1153,32 @@ ffs_clusteralloc(ip, cg, bpref, len) if (run == len) break; } - if ((i & (NBBY - 1)) != (NBBY - 1)) { + if ((got & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } - if (i >= cgp->cg_nclusterblks) + if (got >= cgp->cg_nclusterblks) goto fail; /* * Allocate the cluster that we have found. */ - bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); +#ifdef DIAGNOSTIC + for (i = 1; i <= len; i++) + if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) + panic("ffs_clusteralloc: map mismatch"); +#endif + bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); +#ifdef DIAGNOSTIC + if (dtog(fs, bno) != cg) + panic("ffs_clusteralloc: allocated out of group"); +#endif + len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) panic("ffs_clusteralloc: lost block"); bdwrite(bp); return (bno); @@ -1195,6 +1256,9 @@ ffs_nodealloccg(ip, cg, ipref, mode) panic("ffs_nodealloccg: block not in map"); /* NOTREACHED */ gotit: + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; @@ -1229,7 +1293,8 @@ ffs_blkfree(ip, bno, size) int i, error, cg, blk, frags, bbase; fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || + fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); panic("blkfree: bad size"); @@ -1255,7 +1320,7 @@ ffs_blkfree(ip, bno, size) bno = dtogd(fs, bno); if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { printf("dev = 0x%x, block = %d, fs = %s\n", ip->i_dev, bno, fs->fs_fsmnt); panic("blkfree: freeing free block"); @@ -1318,8 +1383,6 @@ ffs_blkfree(ip, bno, size) /* * Free an inode. - * - * The specified inode is placed back in the free map. */ int ffs_vfree(v) @@ -1330,6 +1393,28 @@ ffs_vfree(v) ino_t a_ino; int a_mode; } */ *ap = v; + + + if (DOINGSOFTDEP(ap->a_pvp)) { + softdep_freefile(ap); + return (0); + } + + return (ffs_freefile(ap)); +} + +/* + * Do the actual free operation. + * The specified inode is placed back in the free map. + */ +int +ffs_freefile(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ register struct fs *fs; register struct cg *cgp; register struct inode *pip; @@ -1347,7 +1432,7 @@ ffs_vfree(v) (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); - return (0); + return (error); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { @@ -1378,6 +1463,60 @@ ffs_vfree(v) return (0); } +#ifdef DIAGNOSTIC +/* + * Verify allocation of a block or fragment. Returns true if block or + * fragment is allocated, false if it is free. + */ +int +ffs_checkblk(ip, bno, size) + struct inode *ip; + daddr_t bno; + long size; +{ + struct fs *fs; + struct cg *cgp; + struct buf *bp; + int i, error, frags, free; + + fs = ip->i_fs; + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("bsize = %d, size = %d, fs = %s\n", + fs->fs_bsize, size, fs->fs_fsmnt); + panic("checkblk: bad size"); + } + if ((u_int)bno >= fs->fs_size) + panic("checkblk: bad block %d", bno); + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + /* XXX -probably should pannic here */ + brelse(bp); + return (-1); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + /* XXX -probably should pannic here */ + brelse(bp); + return (-1); + } + bno = dtogd(fs, bno); + if (size == fs->fs_bsize) { + free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); + } else { + frags = numfrags(fs, size); + for (free = 0, i = 0; i < frags; i++) + if (isset(cg_blksfree(cgp), bno + i)) + free++; + if (free != 0 && free != frags) + panic("checkblk: partially free fragment"); + } + brelse(bp); + return (!free); +} +#endif /* DIAGNOSTIC */ + + /* * Find a block of the specified size in the specified cylinder group. * @@ -1550,3 +1689,4 @@ ffs_fserr(fs, uid, cp) log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); } + diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 5a7dc3afcc2..285ca5f2ca7 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_balloc.c,v 1.3 1997/05/30 08:34:19 downsj Exp $ */ +/* $OpenBSD: ffs_balloc.c,v 1.4 1997/10/06 15:26:29 csapuntz Exp $ */ /* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */ /* @@ -41,6 +41,7 @@ #include <sys/buf.h> #include <sys/proc.h> #include <sys/file.h> +#include <sys/mount.h> #include <sys/vnode.h> #include <vm/vm.h> @@ -58,27 +59,44 @@ * the inode and the logical block number in a file. */ int -ffs_balloc(ip, bn, size, cred, bpp, flags) - register struct inode *ip; - register daddr_t bn; +ffs_balloc(v) + void *v; +{ + struct vop_balloc_args /* { + struct vnode *a_vp; + off_t a_startpoint; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf *a_bpp; + } */ *ap = v; + + struct inode *ip; + daddr_t lbn; int size; struct ucred *cred; - struct buf **bpp; int flags; -{ - register struct fs *fs; - register daddr_t nb; + struct fs *fs; + daddr_t nb; struct buf *bp, *nbp; - struct vnode *vp = ITOV(ip); + struct vnode *vp; struct indir indirs[NIADDR + 2]; - daddr_t newb, lbn, *bap, pref; - int osize, nsize, num, i, error; + daddr_t newb, *bap, pref; + int deallocated, osize, nsize, num, i, error; + daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1]; - *bpp = NULL; - if (bn < 0) - return (EFBIG); + vp = ap->a_vp; + ip = VTOI(vp); fs = ip->i_fs; - lbn = bn; + lbn = lblkno(fs, ap->a_startoffset); + size = blkoff(fs, ap->a_startoffset) + ap->a_size; + if (size > fs->fs_bsize) + panic("ffs_balloc; blk too big"); + *ap->a_bpp = NULL; + if (lbn < 0) + return (EFBIG); + cred = ap->a_cred; + flags = ap->a_flags; /* * If the next write will extend the file into a new block, @@ -86,7 +104,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_ffs_size); - if (nb < NDADDR && nb < bn) { + if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, @@ -94,6 +112,11 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, nb, + dbtofsb(fs, bp->b_blkno), ip->i_ffs_db[nb], + fs->fs_bsize, osize, bp); + ip->i_ffs_size = (nb + 1) * fs->fs_bsize; vnode_pager_setsize(vp, (u_long)ip->i_ffs_size); ip->i_ffs_db[nb] = dbtofsb(fs, bp->b_blkno); @@ -107,15 +130,15 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) /* * The first NDADDR blocks are direct blocks */ - if (bn < NDADDR) { - nb = ip->i_ffs_db[bn]; - if (nb != 0 && ip->i_ffs_size >= (bn + 1) * fs->fs_bsize) { - error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp); + if (lbn < NDADDR) { + nb = ip->i_ffs_db[lbn]; + if (nb != 0 && ip->i_ffs_size >= (lbn + 1) * fs->fs_bsize) { + error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } - *bpp = bp; + *ap->a_bpp = bp; return (0); } if (nb != 0) { @@ -125,43 +148,52 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { - error = bread(vp, bn, osize, NOCRED, &bp); + error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } } else { - error = ffs_realloccg(ip, bn, - ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]), + error = ffs_realloccg(ip, lbn, + ffs_blkpref(ip, lbn, (int)lbn, + &ip->i_ffs_db[0]), osize, nsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, + dbtofsb(fs, bp->b_blkno), nb, + nsize, osize, bp); } } else { - if (ip->i_ffs_size < (bn + 1) * fs->fs_bsize) + if (ip->i_ffs_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; - error = ffs_alloc(ip, bn, - ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]), + error = ffs_alloc(ip, lbn, + ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]), nsize, cred, &newb); if (error) return (error); - bp = getblk(vp, bn, nsize, 0, 0); + bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, newb, 0, + nsize, 0, bp); + } - ip->i_ffs_db[bn] = dbtofsb(fs, bp->b_blkno); + ip->i_ffs_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; + *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; - if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) + if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) @@ -172,6 +204,9 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) */ --num; nb = ip->i_ffs_ib[indirs[0].in_off]; + + allocib = NULL; + allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, @@ -179,18 +214,26 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) if (error) return (error); nb = newb; + + *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); + bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(bp)) != 0) { - ffs_blkfree(ip, nb, fs->fs_bsize); - return (error); - } - ip->i_ffs_ib[indirs[0].in_off] = newb; + + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, + newb, 0, fs->fs_bsize, 0, bp); + bdwrite(bp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(bp)) != 0) + goto fail; + } + allocib = &ip->i_ffs_ib[indirs[0].in_off]; + *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* @@ -201,7 +244,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); - return (error); + goto fail; } bap = (daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; @@ -218,20 +261,27 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) &newb); if (error) { brelse(bp); - return (error); + goto fail; } nb = newb; + *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(nbp)) != 0) { - ffs_blkfree(ip, nb, fs->fs_bsize); - brelse(bp); - return (error); + + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocindir_meta(nbp, ip, bp, + indirs[i - 1].in_off, nb); + bdwrite(nbp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(nbp)) != 0) { + brelse(bp); + goto fail; + } } bap[indirs[i - 1].in_off] = nb; /* @@ -253,13 +303,17 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) &newb); if (error) { brelse(bp); - return (error); + goto fail; } nb = newb; + *allocblk++ = nb; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocindir_page(ip, lbn, bp, + indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use @@ -270,7 +324,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) } else { bdwrite(bp); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); } brelse(bp); @@ -278,12 +332,36 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); if (error) { brelse(nbp); - return (error); + goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); + +fail: + /* + * If we have failed part way through block allocation, we + * have to deallocate any indirect blocks that we have allocated. + */ + for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { + ffs_blkfree(ip, *blkp, fs->fs_bsize); + deallocated += fs->fs_bsize; + } + if (allocib != NULL) + *allocib = 0; + if (deallocated) { +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE); +#endif + ip->i_ffs_blocks -= btodb(deallocated); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + return (error); + } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 94ca01ad634..3fe5a46bfa8 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_extern.h,v 1.2 1996/02/27 07:27:36 niklas Exp $ */ +/* $OpenBSD: ffs_extern.h,v 1.3 1997/10/06 15:26:29 csapuntz Exp $ */ /* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */ /*- @@ -36,6 +36,21 @@ * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 */ +#define FFS_CLUSTERREAD 1 /* cluster reading enabled */ +#define FFS_CLUSTERWRITE 2 /* cluster writing enabled */ +#define FFS_REALLOCBLKS 3 /* block reallocation enabled */ +#define FFS_ASYNCFREE 4 /* asynchronous block freeing enabled */ +#define FFS_MAXID 5 /* number of valid ffs ids */ + +#define FFS_NAMES { \ + { 0, 0 }, \ + { "doclusterread", CTLTYPE_INT }, \ + { "doclusterwrite", CTLTYPE_INT }, \ + { "doreallocblks", CTLTYPE_INT }, \ + { "doasyncfree", CTLTYPE_INT }, \ +} + + struct buf; struct fid; struct fs; @@ -47,6 +62,7 @@ struct statfs; struct timeval; struct ucred; struct ufsmount; +struct vfsconf; struct uio; struct vnode; struct mbuf; @@ -67,11 +83,10 @@ int ffs_vfree __P((void *)); void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); /* ffs_balloc.c */ -int ffs_balloc __P((struct inode *, daddr_t, int, struct ucred *, - struct buf **, int)); +int ffs_balloc __P((void *)); /* ffs_inode.c */ -void ffs_init __P((void)); +int ffs_init __P((struct vfsconf *)); int ffs_update __P((void *)); int ffs_truncate __P((void *)); @@ -81,6 +96,8 @@ void ffs_fragacct __P((struct fs *, int, int32_t[], int)); #ifdef DIAGNOSTIC void ffs_checkoverlap __P((struct buf *, struct inode *)); #endif +int ffs_freefile __P((struct vop_vfree_args *)); +int ffs_isfreeblock __P((struct fs *, unsigned char *, daddr_t)); int ffs_isblock __P((struct fs *, unsigned char *, daddr_t)); void ffs_clrblock __P((struct fs *, u_char *, daddr_t)); void ffs_setblock __P((struct fs *, unsigned char *, daddr_t)); @@ -100,6 +117,8 @@ int ffs_vget __P((struct mount *, ino_t, struct vnode **)); int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); int ffs_vptofh __P((struct vnode *, struct fid *)); +int ffs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, + struct proc *)); int ffs_sbupdate __P((struct ufsmount *, int)); int ffs_cgupdate __P((struct ufsmount *, int)); @@ -108,6 +127,38 @@ int ffs_read __P((void *)); int ffs_write __P((void *)); int ffs_fsync __P((void *)); int ffs_reclaim __P((void *)); + + +/* + * Soft dependency function prototypes. + */ + +struct vop_vfree_args; +struct vop_fsync_args; + +void softdep_initialize __P((void)); +int softdep_process_worklist __P((struct mount *)); +int softdep_mount __P((struct vnode *, struct mount *, struct fs *, + struct ucred *)); +int softdep_flushfiles __P((struct mount *, int, struct proc *)); +void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); +void softdep_load_inodeblock __P((struct inode *)); +int softdep_fsync __P((struct vnode *)); +void softdep_freefile __P((struct vop_vfree_args *)); +void softdep_setup_freeblocks __P((struct inode *, off_t)); +void softdep_deallocate_dependencies __P((struct buf *)); +void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t)); +void softdep_setup_blkmapdep __P((struct buf *, struct fs *, daddr_t)); +void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, daddr_t, + daddr_t, long, long, struct buf *)); +void softdep_setup_allocindir_meta __P((struct buf *, struct inode *, + struct buf *, int, daddr_t)); +void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t, + struct buf *, int, daddr_t, daddr_t, struct buf *)); +void softdep_disk_io_initiation __P((struct buf *)); +void softdep_disk_write_complete __P((struct buf *)); +int softdep_sync_metadata __P((struct vop_fsync_args *)); + __END_DECLS extern int (**ffs_vnodeop_p) __P((void *)); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 488841b5e7f..ba1eb996cb9 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_inode.c,v 1.6 1997/05/30 08:34:21 downsj Exp $ */ +/* $OpenBSD: ffs_inode.c,v 1.7 1997/10/06 15:26:30 csapuntz Exp $ */ /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ /* @@ -61,10 +61,12 @@ static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int, long *)); -void -ffs_init() +int +ffs_init(vfsp) + struct vfsconf *vfsp; { - ufs_init(); + softdep_initialize(); + return (ufs_init(vfsp)); } /* @@ -101,7 +103,8 @@ ffs_update(v) ip->i_flag &= ~IN_ACCESS; } if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + ap->a_waitfor != MNT_WAIT) return (0); if (ip->i_flag & IN_ACCESS) { ip->i_ffs_atime = ap->a_access->tv_sec; @@ -133,11 +136,17 @@ ffs_update(v) brelse(bp); return (error); } + + if (DOINGSOFTDEP(ap->a_vp)) + softdep_update_inodeblock(ip, bp, ap->a_waitfor); + else if (ip->i_effnlink != ip->i_ffs_nlink) + panic("ffs_update: bad link cnt"); + *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = ip->i_din.ffs_din; - if (ap->a_waitfor) + if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { return (bwrite(bp)); - else { + } else { bdwrite(bp); return (0); } @@ -179,6 +188,8 @@ ffs_truncate(v) if (length < 0) return (EINVAL); oip = VTOI(ovp); + if (oip->i_ffs_size == length) + return (0); TIMEVAL_TO_TIMESPEC(&time, &ts); if (ovp->v_type == VLNK && (oip->i_ffs_size < ovp->v_mount->mnt_maxsymlinklen || @@ -202,8 +213,34 @@ ffs_truncate(v) return (error); #endif vnode_pager_setsize(ovp, (u_long)length); + ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; + if (DOINGSOFTDEP(ovp)) { + if (length > 0) { + /* + * If a file is only partially truncated, then + * we have to clean up the data structures + * describing the allocation past the truncation + * point. Finding and deallocating those structures + * is a lot of work. Since partial truncation occurs + * rarely, we solve the problem by syncing the file + * so that it will have no data structures left. + */ + if ((error = VOP_FSYNC(ovp, ap->a_cred, MNT_WAIT, + ap->a_p)) != 0) + return (error); + } else { +#ifdef QUOTA + (void) chkdq(oip, -oip->i_ffs_blocks, NOCRED, 0); +#endif + softdep_setup_freeblocks(oip, length); + (void) vinvalbuf(ovp, 0, ap->a_cred, ap->a_p, 0, 0); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &ts, &ts, 0)); + } + } + fs = oip->i_fs; - osize = oip->i_ffs_size; + osize = oip->i_ffs_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest @@ -217,11 +254,12 @@ ffs_truncate(v) aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, - aflags); + error = VOP_BALLOC(ovp, length -1, 1, + ap->a_cred, aflags, &bp); if (error) return (error); oip->i_ffs_size = length; + vnode_pager_setsize(ovp, (u_long)length); (void) vnode_pager_uncache(ovp); if (aflags & B_SYNC) bwrite(bp); @@ -230,6 +268,8 @@ ffs_truncate(v) oip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(ovp, &ts, &ts, 1)); } + vnode_pager_setsize(ovp, (u_long)length); + /* * Shorten the size of the file. If the file is not being * truncated to a block boundry, the contents of the @@ -245,7 +285,8 @@ ffs_truncate(v) aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, aflags); + error = VOP_BALLOC(ovp, length - 1, 1, + ap->a_cred, aflags, &bp); if (error) return (error); oip->i_ffs_size = length; diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index e5d0c350387..7e5e417cf53 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_subr.c,v 1.3 1996/04/21 22:32:33 deraadt Exp $ */ +/* $OpenBSD: ffs_subr.c,v 1.4 1997/10/06 15:26:31 csapuntz Exp $ */ /* $NetBSD: ffs_subr.c,v 1.6 1996/03/17 02:16:23 christos Exp $ */ /* @@ -42,10 +42,10 @@ #ifdef _KERNEL #include <sys/systm.h> #include <sys/vnode.h> -#include <ufs/ffs/ffs_extern.h> #include <sys/buf.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> +#include <ufs/ffs/ffs_extern.h> /* * Return buffer with the contents of block "offset" from the beginning of @@ -240,3 +240,30 @@ ffs_setblock(fs, cp, h) panic("ffs_setblock"); } } + + +/* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: + panic("ffs_isfreeblock"); + } +} + + diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b70f7b0db8d..7b5f8b2463a 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vfsops.c,v 1.9 1997/06/20 14:04:32 kstailey Exp $ */ +/* $OpenBSD: ffs_vfsops.c,v 1.10 1997/10/06 15:26:31 csapuntz Exp $ */ /* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */ /* @@ -51,6 +51,7 @@ #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/malloc.h> +#include <sys/sysctl.h> #include <dev/rndvar.h> @@ -68,7 +69,6 @@ int ffs_sbupdate __P((struct ufsmount *, int)); struct vfsops ffs_vfsops = { - MOUNT_FFS, ffs_mount, ufs_start, ffs_unmount, @@ -80,61 +80,53 @@ struct vfsops ffs_vfsops = { ffs_fhtovp, ffs_vptofh, ffs_init, + ffs_sysctl }; extern u_long nextgennumber; /* * Called by main() when ufs is going to be mounted as root. - * - * Name is updated by mount(8) after booting. */ -#define ROOTNAME "root_device" int ffs_mountroot() { extern struct vnode *rootvp; - register struct fs *fs; - register struct mount *mp; + struct fs *fs; + struct mount *mp; struct proc *p = curproc; /* XXX */ struct ufsmount *ump; - size_t size; int error; /* * Get vnodes for swapdev and rootdev. */ - if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) - panic("ffs_mountroot: can't setup bdevvp's"); - - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = &ffs_vfsops; - mp->mnt_flag = MNT_RDONLY; - if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { - free(mp, M_MOUNT); + if ((error = bdevvp(swapdev, &swapdev_vp)) || + (error = bdevvp(rootdev, &rootvp))) { + printf("ffs_mountroot: can't setup bdevvp's"); return (error); } - if ((error = vfs_lock(mp)) != 0) { - (void)ffs_unmount(mp, 0, p); - free(mp, M_MOUNT); + + if ((error = vfs_rootmountalloc("ffs", "root_device", &mp)) != 0) return (error); - } + if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); + free(mp, M_MOUNT); + return (error); + } + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; - ump = VFSTOUFS(mp); - fs = ump->um_fs; - bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); - fs->fs_fsmnt[0] = '/'; - bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)ffs_statfs(mp, &mp->mnt_stat, p); - vfs_unlock(mp); - inittodr(fs->fs_time); - return (0); + simple_unlock(&mountlist_slock); + ump = VFSTOUFS(mp); + fs = ump->um_fs; + (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); + (void)ffs_statfs(mp, &mp->mnt_stat, p); + + vfs_unbusy(mp, p); + inittodr(fs->fs_time); + return (0); } /* @@ -172,8 +164,6 @@ ffs_mount(mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp)) - return (EBUSY); error = ffs_flushfiles(mp, flags, p); if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0 && @@ -181,7 +171,6 @@ ffs_mount(mp, path, data, ndp, p) fs->fs_clean = FS_ISCLEAN; (void) ffs_sbupdate(ump, MNT_WAIT); } - vfs_unbusy(mp); if (error) return (error); fs->fs_ronly = 1; @@ -198,18 +187,19 @@ ffs_mount(mp, path, data, ndp, p) */ if (p->p_ucred->cr_uid != 0) { devvp = ump->um_devvp; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); if (error) { - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } fs->fs_ronly = 0; fs->fs_clean <<= 1; fs->fs_fmod = 1; + (void) ffs_sbupdate(ump, MNT_WAIT); } if (args.fspec == 0) { /* @@ -243,13 +233,13 @@ ffs_mount(mp, path, data, ndp, p) accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) { vput(devvp); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) error = ffs_mountfs(devvp, mp, p); @@ -317,8 +307,12 @@ ffs_reload(mountp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, 0, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) panic("ffs_reload: dirty1"); + /* * Step 2: re-read superblock from disk. */ @@ -375,19 +369,26 @@ ffs_reload(mountp, cred, p) } loop: + simple_lock(&mntvnode_slock); for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + if (vp->v_mount != mountp) { + simple_unlock(&mntvnode_slock); + goto loop; + } + nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ - if (vp->v_usecount == 0) { - vgone(vp); - continue; - } + if (vrecycle(vp, &mntvnode_slock, p)) + goto loop; + /* * Step 5: invalidate all cached file data. */ - if (vget(vp, 1)) + simple_lock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) goto loop; if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty2"); @@ -403,11 +404,12 @@ loop: } ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)); + ip->i_effnlink = ip->i_ffs_nlink; brelse(bp); vput(vp); - if (vp->v_mount != mountp) - goto loop; + simple_lock(&mntvnode_slock); } + simple_unlock(&mntvnode_slock); return (0); } @@ -426,8 +428,7 @@ ffs_mountfs(devvp, mp, p) dev_t dev; struct partinfo dpart; caddr_t base, space; - int blks; - int error, i, size, ronly; + int error, i, blks, size, ronly; int32_t *lp; struct ucred *cred; extern struct vnode *rootvp; @@ -445,7 +446,10 @@ ffs_mountfs(devvp, mp, p) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); - if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; @@ -484,10 +488,6 @@ ffs_mountfs(devvp, mp, p) bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; - if (ronly == 0) { - fs->fs_clean <<= 1; - fs->fs_fmod = 1; - } size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) @@ -520,9 +520,8 @@ ffs_mountfs(devvp, mp, p) if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0) mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; else - mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; @@ -531,14 +530,24 @@ ffs_mountfs(devvp, mp, p) ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; ffs_oldfscompat(fs); ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; /* XXX */ if (fs->fs_maxfilesize > maxfilesize) /* XXX */ fs->fs_maxfilesize = maxfilesize; /* XXX */ + if (ronly == 0) { + if ((fs->fs_flags & FS_DOSOFTDEP) && + (error = softdep_mount(devvp, mp, fs, cred)) != 0) { + free(base, M_UFSMNT); + goto out; + } + fs->fs_clean = 0; + (void) ffs_sbupdate(ump, MNT_WAIT); + } return (0); out: + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); @@ -595,8 +604,14 @@ ffs_unmount(mp, mntflags, p) flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - if ((error = ffs_flushfiles(mp, flags, p)) != 0) - return (error); + if (mp->mnt_flag & MNT_SOFTDEP) { + if ((error = softdep_flushfiles(mp, flags, p)) != 0) + return (error); + } else { + if ((error = ffs_flushfiles(mp, flags, p)) != 0) + return (error); + } + ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && @@ -605,7 +620,7 @@ ffs_unmount(mp, mntflags, p) fs->fs_clean = FS_ISCLEAN; (void) ffs_sbupdate(ump, MNT_WAIT); } - ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + ump->um_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(ump->um_devvp); @@ -613,7 +628,6 @@ ffs_unmount(mp, mntflags, p) free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; - mp->mnt_flag &= ~MNT_LOCAL; return (error); } @@ -626,12 +640,9 @@ ffs_flushfiles(mp, flags, p) int flags; struct proc *p; { - extern int doforce; register struct ufsmount *ump; int error; - if (!doforce) - flags &= ~FORCECLOSE; ump = VFSTOUFS(mp); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { @@ -649,7 +660,17 @@ ffs_flushfiles(mp, flags, p) */ } #endif - error = vflush(mp, NULLVP, flags); + /* + * Flush all the files. + */ + if ((error = vflush(mp, NULL, flags)) != 0) + return (error); + /* + * Flush filesystem metadata. + */ + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); + VOP_UNLOCK(ump->um_devvp, 0, p); return (error); } @@ -684,10 +705,11 @@ ffs_statfs(mp, sbp, p) sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree; if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -705,7 +727,7 @@ ffs_sync(mp, waitfor, cred, p) struct ucred *cred; struct proc *p; { - register struct vnode *vp; + register struct vnode *vp, *nvp; register struct inode *ip; register struct ufsmount *ump = VFSTOUFS(mp); register struct fs *fs; @@ -717,49 +739,71 @@ ffs_sync(mp, waitfor, cred, p) * Consistency check that the superblock * is still in the buffer cache. */ - if (fs->fs_fmod != 0) { - if (fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - allerror = ffs_cgupdate(ump, waitfor); + if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); } /* * Write back each (modified) inode. */ + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; - if (VOP_ISLOCKED(vp)) - continue; + + simple_lock(&vp->v_interlock); + nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); - if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - vp->v_dirtyblkhd.lh_first == NULL) + if (vp->v_type == VNON || ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + vp->v_dirtyblkhd.lh_first == NULL) || + waitfor == MNT_LAZY) { + simple_unlock(&vp->v_interlock); continue; - if (vget(vp, 1)) - goto loop; + } + simple_unlock(&mntvnode_slock); + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + if (error) { + simple_lock(&mntvnode_slock); + if (error == ENOENT) + goto loop; + continue; + } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; - vput(vp); + VOP_UNLOCK(vp, 0, p); + vrele(vp); + simple_lock(&mntvnode_slock); } + simple_unlock(&mntvnode_slock); /* * Force stale file system control information to be flushed. */ - if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) - allerror = error; + if (waitfor != MNT_LAZY) { + if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) + waitfor = MNT_NOWAIT; + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) + allerror = error; + VOP_UNLOCK(ump->um_devvp, 0, p); + } #ifdef QUOTA qsync(mp); #endif + /* + * Write back modified superblock. + */ + + if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) + allerror = error; + return (allerror); } @@ -796,6 +840,7 @@ ffs_vget(mp, ino, vpp) type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); bzero((caddr_t)ip, sizeof(struct inode)); + lockinit(&ip->i_lock, PINOD, "inode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; @@ -833,6 +878,10 @@ ffs_vget(mp, ino, vpp) return (error); } ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + if (DOINGSOFTDEP(vp)) + softdep_load_inodeblock(ip); + else + ip->i_effnlink = ip->i_ffs_nlink; brelse(bp); /* @@ -965,7 +1014,7 @@ ffs_cgupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { - register struct fs *fs = mp->um_fs; + register struct fs *fs = mp->um_fs, *dfs; register struct buf *bp; int blks; caddr_t space; @@ -987,7 +1036,74 @@ ffs_cgupdate(mp, waitfor) else bawrite(bp); } - if (!allerror && error) + + /* + * Now write back the superblock itself. If any errors occurred + * up to this point, then fail so that the superblock avoids + * being written out as clean. + */ + if (allerror) + return (allerror); + bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; + bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + /* Restore compatibility to old file systems. XXX */ + dfs = (struct fs *)bp->b_data; /* XXX */ + if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ + dfs->fs_nrpos = -1; /* XXX */ + if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ + int32_t *lp, tmp; /* XXX */ + /* XXX */ + lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ + tmp = lp[4]; /* XXX */ + for (i = 4; i > 0; i--) /* XXX */ + lp[i] = lp[i-1]; /* XXX */ + lp[0] = tmp; /* XXX */ + } /* XXX */ + dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ + if (waitfor != MNT_WAIT) + bawrite(bp); + else if ((error = bwrite(bp)) != 0) allerror = error; + return (allerror); } + +/* + * fast filesystem related variables. + */ +int +ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ + extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree; + + /* all sysctl names at this level are terminal */ + if (namelen != 1) + return (ENOTDIR); /* overloaded */ + + switch (name[0]) { + case FFS_CLUSTERREAD: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doclusterread)); + case FFS_CLUSTERWRITE: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doclusterwrite)); + case FFS_REALLOCBLKS: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doreallocblks)); + case FFS_ASYNCFREE: + return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree)); + default: + return (EOPNOTSUPP); + } + /* NOTREACHED */ +} + diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index e9462ff50be..088ba291a3a 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vnops.c,v 1.3 1996/05/22 11:47:18 deraadt Exp $ */ +/* $OpenBSD: ffs_vnops.c,v 1.4 1997/10/06 15:26:32 csapuntz Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* @@ -82,6 +82,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_lease_desc, ufs_lease_check }, /* lease */ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ { &vop_select_desc, ufs_select }, /* select */ + { &vop_revoke_desc, ufs_revoke }, /* revoke */ { &vop_mmap_desc, ufs_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, ufs_seek }, /* seek */ @@ -106,6 +107,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */ { &vop_valloc_desc, ffs_valloc }, /* valloc */ + { &vop_balloc_desc, ffs_balloc }, /* balloc */ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ { &vop_vfree_desc, ffs_vfree }, /* vfree */ { &vop_truncate_desc, ffs_truncate }, /* truncate */ @@ -132,6 +134,7 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ + { &vop_revoke_desc, spec_revoke }, /* revoke */ { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -183,6 +186,7 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = { { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_select_desc, fifo_select }, /* select */ + { &vop_revoke_desc, fifo_revoke }, /* revoke */ { &vop_mmap_desc, fifo_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, fifo_seek }, /* seek */ @@ -218,20 +222,11 @@ struct vnodeopv_desc ffs_fifoop_opv_desc = { &ffs_fifoop_p, ffs_fifoop_entries }; #endif /* FIFO */ -#ifdef DEBUG /* * Enabling cluster read/write operations. */ -#include <sys/sysctl.h> int doclusterread = 1; -struct ctldebug debug11 = { "doclusterread", &doclusterread }; int doclusterwrite = 1; -struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite }; -#else -/* XXX for ufs_readwrite */ -#define doclusterread 1 -#define doclusterwrite 1 -#endif #include <ufs/ufs/ufs_readwrite.c> @@ -249,12 +244,84 @@ ffs_fsync(v) int a_waitfor; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; + struct vnode *vp = ap->a_vp; + struct buf *bp, *nbp; struct timespec ts; + int s, error, passes, skipmeta; - vflushbuf(vp, ap->a_waitfor == MNT_WAIT); + /* + * Flush all dirty buffers associated with a vnode + */ + passes = NIADDR; + skipmeta = 0; + if (ap->a_waitfor == MNT_WAIT) + skipmeta = 1; +loop: + s = splbio(); +loop2: + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("ffs_fsync: not dirty"); + if (skipmeta && bp->b_lblkno < 0) + continue; + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if (bp->b_vp == vp || ap->a_waitfor != MNT_WAIT) + (void) bawrite(bp); + else if ((error = bwrite(bp)) != 0) + return (error); + goto loop; + } + if (skipmeta) { + skipmeta = 0; + goto loop2; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); + } + /* + * Ensure that any filesystem metatdata associated + * with the vnode has been written. + */ + splx(s); + if ((error = softdep_sync_metadata(ap)) != 0) + return (error); + s = splbio(); + if (vp->v_dirtyblkhd.lh_first) { + /* + * Block devices associated with filesystems may + * have new I/O requests posted for them even if + * the vnode is locked, so no amount of trying will + * get them clean. Thus we give block devices a + * good effort, then just give up. For all other file + * types, go around and try again until it is clean. + */ + if (passes > 0) { + passes -= 1; + goto loop2; + } +#ifdef DIAGNOSTIC + if (vp->v_type != VBLK) + vprint("ffs_fsync: dirty", vp); +#endif + } + } + splx(s); TIMEVAL_TO_TIMESPEC(&time, &ts); - return (VOP_UPDATE(ap->a_vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)); + if ((error = VOP_UPDATE(vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)) != 0) return (error); + if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT) + error = softdep_fsync(vp); + return (error); } /* @@ -266,11 +333,12 @@ ffs_reclaim(v) { struct vop_reclaim_args /* { struct vnode *a_vp; + sturct proc *a_p; } */ *ap = v; register struct vnode *vp = ap->a_vp; int error; - if ((error = ufs_reclaim(vp)) != 0) + if ((error = ufs_reclaim(vp, ap->a_p)) != 0) return (error); FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE); diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index e5a17da3a22..2979a3c4fe9 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fs.h,v 1.4 1997/05/30 08:34:28 downsj Exp $ */ +/* $OpenBSD: fs.h,v 1.5 1997/10/06 15:26:32 csapuntz Exp $ */ /* $NetBSD: fs.h,v 1.6 1995/04/12 21:21:02 mycroft Exp $ */ /* @@ -221,7 +221,7 @@ struct fs { int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* file system is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_flags; /* currently unused flag */ + int8_t fs_flags; /* see FS_ below */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ @@ -267,6 +267,12 @@ struct fs { #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ +/* + * Filesystem falgs. + */ +#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ + /* * Rotational layout table format types */ @@ -490,6 +496,12 @@ struct ocg { ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) +#define sblksize(fs, size, lbn) \ + (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (size))))) + + /* * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte * sector size. diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h index bd14c23226d..3616acedf76 100644 --- a/sys/ufs/mfs/mfs_extern.h +++ b/sys/ufs/mfs/mfs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_extern.h,v 1.2 1996/02/27 07:15:46 niklas Exp $ */ +/* $OpenBSD: mfs_extern.h,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */ /* $NetBSD: mfs_extern.h,v 1.4 1996/02/09 22:31:27 christos Exp $ */ /*- @@ -43,6 +43,7 @@ struct proc; struct statfs; struct ucred; struct vnode; +struct vfsconf; __BEGIN_DECLS /* mfs_vfsops.c */ @@ -53,7 +54,7 @@ int mfs_mount __P((struct mount *, char *, caddr_t, int mfs_start __P((struct mount *, int, struct proc *)); int mfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -void mfs_init __P((void)); +int mfs_init __P((struct vfsconf *)); /* mfs_vnops.c */ int mfs_open __P((void *)); @@ -65,6 +66,7 @@ int mfs_close __P((void *)); int mfs_inactive __P((void *)); int mfs_reclaim __P((void *)); int mfs_print __P((void *)); +#define mfs_revoke vop_revoke int mfs_badop __P((void *)); __END_DECLS diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c index 577325fe95b..dbd32e6ea2f 100644 --- a/sys/ufs/mfs/mfs_vfsops.c +++ b/sys/ufs/mfs/mfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_vfsops.c,v 1.2 1996/02/27 07:15:47 niklas Exp $ */ +/* $OpenBSD: mfs_vfsops.c,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */ /* $NetBSD: mfs_vfsops.c,v 1.10 1996/02/09 22:31:28 christos Exp $ */ /* @@ -69,7 +69,6 @@ extern int (**mfs_vnodeop_p) __P((void *)); * mfs vfs operations. */ struct vfsops mfs_vfsops = { - MOUNT_MFS, mfs_mount, mfs_start, ffs_unmount, @@ -81,37 +80,31 @@ struct vfsops mfs_vfsops = { ffs_fhtovp, ffs_vptofh, mfs_init, + ffs_sysctl }; /* * Called by main() when mfs is going to be mounted as root. - * - * Name is updated by mount(8) after booting. */ -#define ROOTNAME "mfs_root" int mfs_mountroot() { extern struct vnode *rootvp; register struct fs *fs; - register struct mount *mp; + struct mount *mp; struct proc *p = curproc; /* XXX */ struct ufsmount *ump; struct mfsnode *mfsp; - size_t size; int error; - /* - * Get vnodes for swapdev and rootdev. - */ - if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) - panic("mfs_mountroot: can't setup bdevvp's"); - - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = &mfs_vfsops; - mp->mnt_flag = MNT_RDONLY; + if ((error = bdevvp(swapdev, &swapdev_vp)) || + (error = bdevvp(rootdev, &rootvp))) { + printf("mfs_mountroot: can't setup bdevvp's"); + return (error); + } + if ((error = vfs_rootmountalloc("mfs", "mfs_root", &mp)) != 0) + return (error); mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK); rootvp->v_data = mfsp; rootvp->v_op = mfs_vnodeop_p; @@ -122,28 +115,20 @@ mfs_mountroot() mfsp->mfs_pid = p->p_pid; mfsp->mfs_buflist = (struct buf *)0; if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); free(mp, M_MOUNT); free(mfsp, M_MFSNODE); return (error); } - if ((error = vfs_lock(mp)) != 0) { - (void)ffs_unmount(mp, 0, p); - free(mp, M_MOUNT); - free(mfsp, M_MFSNODE); - return (error); - } + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; + simple_unlock(&mountlist_slock); ump = VFSTOUFS(mp); fs = ump->um_fs; - bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); - fs->fs_fsmnt[0] = '/'; - bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); (void)ffs_statfs(mp, &mp->mnt_stat, p); - vfs_unlock(mp); + vfs_unbusy(mp, p); inittodr((time_t)0); return (0); } @@ -207,10 +192,7 @@ mfs_mount(mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp)) - return (EBUSY); error = ffs_flushfiles(mp, flags, p); - vfs_unbusy(mp); if (error) return (error); } @@ -272,7 +254,6 @@ mfs_start(mp, flags, p) register struct mfsnode *mfsp = VTOMFS(vp); register struct buf *bp; register caddr_t base; - int error = 0; base = mfsp->mfs_baseoff; while (mfsp->mfs_buflist != (struct buf *)-1) { @@ -289,13 +270,11 @@ mfs_start(mp, flags, p) * otherwise we will loop here, as tsleep will always return * EINTR/ERESTART. */ - if ((error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0)) != 0) { - DOIO(); - if (dounmount(mp, 0, p) != 0) - CLRSIG(p, CURSIG(p)); - } + if (tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0) && + dounmount(mp, 0, p) != 0) + CLRSIG(p, CURSIG(p)); } - return (error); + return (0); } /* @@ -311,10 +290,10 @@ mfs_statfs(mp, sbp, p) error = ffs_statfs(mp, sbp, p); #ifdef COMPAT_09 - sbp->f_type = 3; + sbp->f_type = mp->mnt_vfc->vfc_typenum; #else sbp->f_type = 0; #endif - strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); return (error); } diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c index 63b20a029bf..84a5ed3d368 100644 --- a/sys/ufs/mfs/mfs_vnops.c +++ b/sys/ufs/mfs/mfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_vnops.c,v 1.4 1996/04/21 22:32:49 deraadt Exp $ */ +/* $OpenBSD: mfs_vnops.c,v 1.5 1997/10/06 15:27:13 csapuntz Exp $ */ /* $NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $ */ /* @@ -72,6 +72,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = { { &vop_write_desc, mfs_write }, /* write */ { &vop_ioctl_desc, mfs_ioctl }, /* ioctl */ { &vop_select_desc, mfs_select }, /* select */ + { &vop_revoke_desc, mfs_revoke }, /* revoke */ { &vop_mmap_desc, mfs_mmap }, /* mmap */ { &vop_fsync_desc, spec_fsync }, /* fsync */ { &vop_seek_desc, mfs_seek }, /* seek */ @@ -231,6 +232,9 @@ mfs_bmap(v) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + return (0); } @@ -294,12 +298,14 @@ mfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; register struct mfsnode *mfsp = VTOMFS(ap->a_vp); if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1)) panic("mfs_inactive: not inactive (mfs_buflist %p)", mfsp->mfs_buflist); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } @@ -352,8 +358,9 @@ mfs_badop(v) /* * Memory based filesystem initialization. */ -void -mfs_init() +int +mfs_init(vfsp) + struct vfsconf *vfsp; { - + return (0); } diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h index d37f7ba4e68..29c290c4e09 100644 --- a/sys/ufs/mfs/mfsnode.h +++ b/sys/ufs/mfs/mfsnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mfsnode.h,v 1.3 1996/06/11 03:25:15 tholo Exp $ */ +/* $OpenBSD: mfsnode.h,v 1.4 1997/10/06 15:27:13 csapuntz Exp $ */ /* $NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $ */ /* @@ -76,9 +76,9 @@ struct mfsnode { #define mfs_readdir mfs_badop #define mfs_readlink mfs_badop #define mfs_abortop mfs_badop -#define mfs_lock nullop -#define mfs_unlock nullop -#define mfs_islocked nullop +#define mfs_lock vop_nolock +#define mfs_unlock vop_nounlock +#define mfs_islocked vop_noislocked #define mfs_pathconf mfs_badop #define mfs_advlock mfs_badop #define mfs_blkatoff mfs_badop diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 9dcc48697f1..0a9a7a24151 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: inode.h,v 1.6 1997/05/30 15:18:49 downsj Exp $ */ +/* $OpenBSD: inode.h,v 1.7 1997/10/06 15:27:36 csapuntz Exp $ */ /* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */ /* @@ -45,6 +45,8 @@ #include <ufs/ufs/dir.h> #include <ufs/ext2fs/ext2fs_dinode.h> +typedef long ufs_lbn_t; + /* * Per-filesystem inode extensions. */ @@ -63,13 +65,13 @@ struct ext2fs_inode_ext { * active, and is put back when the file is no longer being used. */ struct inode { - struct inode *i_next; /* Hash chain forward. */ - struct inode **i_prev; /* Hash chain back. */ + LIST_ENTRY(inode) i_hash; /* Hash chain */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ @@ -83,8 +85,8 @@ struct inode { struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ - pid_t i_lockholder; /* DEBUG: holder of inode lock. */ - pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */ + struct lock i_lock; /* Inode lock */ + /* * Side effects; used during directory lookup. */ @@ -180,14 +182,11 @@ struct inode { /* These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ -#define IN_EXLOCK 0x0004 /* File has exclusive lock. */ -#define IN_LOCKED 0x0008 /* Inode lock. */ -#define IN_LWAIT 0x0010 /* Process waiting on file lock. */ -#define IN_MODIFIED 0x0020 /* Inode has been modified. */ -#define IN_RENAME 0x0040 /* Inode is being renamed. */ -#define IN_SHLOCK 0x0080 /* File has shared lock. */ -#define IN_UPDATE 0x0100 /* Modification time update request. */ -#define IN_WANTED 0x0200 /* Inode is wanted by a process. */ +#define IN_UPDATE 0x0004 /* Modification time update request */ +#define IN_MODIFIED 0x0008 /* Inode has been modified. */ +#define IN_RENAME 0x0010 /* Inode is being renamed. */ +#define IN_SHLOCK 0x0020 /* FIle has shared lock. */ +#define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #ifdef _KERNEL /* @@ -242,6 +241,9 @@ struct indir { } \ } +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) + /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 4dbeed61a92..166d8f43684 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_extern.h,v 1.2 1996/02/27 07:21:25 niklas Exp $ */ +/* $OpenBSD: ufs_extern.h,v 1.3 1997/10/06 15:27:36 csapuntz Exp $ */ /* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */ /*- @@ -54,6 +54,7 @@ struct ufs_args; struct ufsmount; struct uio; struct vattr; +struct vfsconf; struct vnode; __BEGIN_DECLS @@ -86,6 +87,7 @@ int ufs_readdir __P((void *)); int ufs_readlink __P((void *)); int ufs_remove __P((void *)); int ufs_rename __P((void *)); +#define ufs_revoke vop_revoke int ufs_rmdir __P((void *)); int ufs_seek __P((void *)); int ufs_select __P((void *)); @@ -117,19 +119,19 @@ void ufs_ihashins __P((struct inode *)); void ufs_ihashrem __P((struct inode *)); /* ufs_inode.c */ -void ufs_init __P((void)); -int ufs_reclaim __P((struct vnode *)); +int ufs_init __P((struct vfsconf *)); +int ufs_reclaim __P((struct vnode *, struct proc *)); /* ufs_lookup.c */ void ufs_dirbad __P((struct inode *, doff_t, char *)); int ufs_dirbadentry __P((struct vnode *, struct direct *, int)); -int ufs_direnter __P((struct inode *, struct vnode *, - struct componentname *)); -int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *, - struct proc *)); -int ufs_dirremove __P((struct vnode *, struct componentname *)); +void ufs_makedirentry __P((struct inode *, struct componentname *, + struct direct *)); +int ufs_direnter __P((struct vnode *, struct direct *, + struct componentname *, struct buf *)); +int ufs_dirremove __P((struct vnode *, struct inode *, int, int)); int ufs_dirrewrite __P((struct inode *, struct inode *, - struct componentname *)); + ino_t, int, int)); int ufs_dirempty __P((struct inode *, ino_t, struct ucred *)); int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *)); @@ -165,4 +167,19 @@ int ufs_vinit __P((struct mount *, int (**) __P((void *)), int (**) __P((void *)), struct vnode **)); int ufs_makeinode __P((int, struct vnode *, struct vnode **, struct componentname *)); + + +/* + * Soft dependency function prototypes. + */ +void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, + long, struct buf *)); +void softdep_change_directoryentry_offset __P((struct inode *, caddr_t, + caddr_t, caddr_t, int)); +void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, + int)); +void softdep_setup_directory_change __P((struct buf *, struct inode *, + struct inode *, long, int)); +void softdep_increase_linkcnt __P((struct inode *)); + __END_DECLS diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c index a9b7227942d..84ff51b8b39 100644 --- a/sys/ufs/ufs/ufs_ihash.c +++ b/sys/ufs/ufs/ufs_ihash.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_ihash.c,v 1.2 1996/02/27 07:21:26 niklas Exp $ */ +/* $OpenBSD: ufs_ihash.c,v 1.3 1997/10/06 15:27:37 csapuntz Exp $ */ /* $NetBSD: ufs_ihash.c,v 1.3 1996/02/09 22:36:04 christos Exp $ */ /* @@ -49,9 +49,10 @@ /* * Structures associated with inode cacheing. */ -struct inode **ihashtbl; +LIST_HEAD(ihashhead, inode) *ihashtbl; u_long ihash; /* size of hash table - 1 */ -#define INOHASH(device, inum) (((device) + (inum)) & ihash) +#define INOHASH(device, inum) (&ihashtbl[((device) + (inum)) & ihash]) +struct simplelock ufs_ihash_slock; /* * Initialize inode hash table. @@ -61,6 +62,7 @@ ufs_ihashinit() { ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash); + simple_lock_init(&ufs_ihash_slock); } /* @@ -68,19 +70,21 @@ ufs_ihashinit() * to it. If it is in core, return it, even if it is locked. */ struct vnode * -ufs_ihashlookup(device, inum) - dev_t device; +ufs_ihashlookup(dev, inum) + dev_t dev; ino_t inum; { - register struct inode *ip; + struct inode *ip; - for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) - return (ITOV(ip)); - } - /* NOTREACHED */ + simple_lock(&ufs_ihash_slock); + for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) + if (inum == ip->i_number && dev == ip->i_dev) + break; + simple_unlock(&ufs_ihash_slock); + + if (ip) + return (ITOV(ip)); + return (NULLVP); } /* @@ -88,30 +92,28 @@ ufs_ihashlookup(device, inum) * to it. If it is in core, but locked, wait for it. */ struct vnode * -ufs_ihashget(device, inum) - dev_t device; +ufs_ihashget(dev, inum) + dev_t dev; ino_t inum; { - register struct inode *ip; + struct proc *p = curproc; + struct inode *ip; struct vnode *vp; +loop: + simple_lock(&ufs_ihash_slock); + for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) { + if (inum == ip->i_number && dev == ip->i_dev) { + vp = ITOV(ip); + simple_lock(&vp->v_interlock); + simple_unlock(&ufs_ihash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + goto loop; + return (vp); + } - for (;;) - for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) { - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; - sleep(ip, PINOD); - break; - } - vp = ITOV(ip); - if (!vget(vp, 1)) - return (vp); - break; - } - } - /* NOTREACHED */ + } + simple_unlock(&ufs_ihash_slock); + return (NULL); } /* @@ -121,21 +123,16 @@ void ufs_ihashins(ip) struct inode *ip; { - struct inode **ipp, *iq; + struct proc *p = curproc; /* XXX */ + struct ihashhead *ipp; - ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)]; - if ((iq = *ipp) != NULL) - iq->i_prev = &ip->i_next; - ip->i_next = iq; - ip->i_prev = ipp; - *ipp = ip; - if (ip->i_flag & IN_LOCKED) - panic("ufs_ihashins: already locked"); - if (curproc) - ip->i_lockholder = curproc->p_pid; - else - ip->i_lockholder = -1; - ip->i_flag |= IN_LOCKED; + /* lock the inode, then put it on the appropriate hash list */ + lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p); + + simple_lock(&ufs_ihash_slock); + ipp = INOHASH(ip->i_dev, ip->i_number); + LIST_INSERT_HEAD(ipp, ip, i_hash); + simple_unlock(&ufs_ihash_slock); } /* @@ -143,15 +140,14 @@ ufs_ihashins(ip) */ void ufs_ihashrem(ip) - register struct inode *ip; + struct inode *ip; { - register struct inode *iq; + simple_lock(&ufs_ihash_slock); + LIST_REMOVE(ip, i_hash); + #ifdef DIAGNOSTIC + ip->i_hash.le_next = NULL; + ip->i_hash.le_prev = NULL; + #endif + simple_unlock(&ufs_ihash_slock); - if ((iq = ip->i_next) != NULL) - iq->i_prev = ip->i_prev; - *ip->i_prev = iq; -#ifdef DIAGNOSTIC - ip->i_next = NULL; - ip->i_prev = NULL; -#endif } diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index eed08b7f2cf..31437cd4bfd 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_inode.c,v 1.4 1997/05/30 08:35:04 downsj Exp $ */ +/* $OpenBSD: ufs_inode.c,v 1.5 1997/10/06 15:27:37 csapuntz Exp $ */ /* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */ /* @@ -57,6 +57,7 @@ u_long nextgennumber; /* Next generation number to assign. */ +#if 0 void ufs_init() { @@ -71,7 +72,7 @@ ufs_init() #endif return; } - +#endif /* * Last reference to an inode. If necessary, write or delete it. */ @@ -81,39 +82,29 @@ ufs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + sturct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct proc *p = ap->a_p; struct timespec ts; - int mode, error; + int mode, error = 0; extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ffs_inactive: pushing active", vp); - /* Get rid of inodes related to stale file handles. */ - if (ip->i_ffs_mode == 0) { - if ((vp->v_flag & VXLOCK) == 0) - vgone(vp); - return (0); - } - - error = 0; -#ifdef DIAGNOSTIC - if (VOP_ISLOCKED(vp)) - panic("ffs_inactive: locked inode"); - if (curproc) - ip->i_lockholder = curproc->p_pid; - else - ip->i_lockholder = -1; -#endif - ip->i_flag |= IN_LOCKED; + /* + * Ignore inodes related to stale file handles. + */ + if (ip->i_ffs_mode == 0) + goto out; if (ip->i_ffs_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, 0); #endif - error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL); + error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p); ip->i_ffs_rdev = 0; mode = ip->i_ffs_mode; ip->i_ffs_mode = 0; @@ -124,13 +115,14 @@ ufs_inactive(v) TIMEVAL_TO_TIMESPEC(&time, &ts); VOP_UPDATE(vp, &ts, &ts, 0); } - VOP_UNLOCK(vp); +out: + VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (vp->v_usecount == 0 && ip->i_ffs_mode == 0) - vgone(vp); + if (ip->i_ffs_mode == 0) + vrecycle(vp, (struct simplelock *)0, p); return (error); } @@ -138,8 +130,9 @@ ufs_inactive(v) * Reclaim an inode so that it can be used for other purposes. */ int -ufs_reclaim(vp) +ufs_reclaim(vp, p) register struct vnode *vp; + struct proc *p; { register struct inode *ip; extern int prtactive; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 38d828b987e..47587cdd00a 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_lookup.c,v 1.4 1997/05/30 08:35:08 downsj Exp $ */ +/* $OpenBSD: ufs_lookup.c,v 1.5 1997/10/06 15:27:38 csapuntz Exp $ */ /* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */ /* @@ -43,12 +43,16 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/namei.h> #include <sys/buf.h> #include <sys/file.h> +#include <sys/stat.h> #include <sys/mount.h> #include <sys/vnode.h> +#include <vm/vm.h> + #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> @@ -131,6 +135,7 @@ ufs_lookup(v) struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; + struct proc *p = cnp->cn_proc; bp = NULL; slotoffset = -1; @@ -148,6 +153,10 @@ ufs_lookup(v) if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) return (error); + if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + /* * We now have a segment name to search for, and a directory to search. * @@ -173,14 +182,14 @@ ufs_lookup(v) VREF(vdp); error = 0; } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); - error = vget(vdp, 1); + VOP_UNLOCK(pdp, 0, p); + error = vget(vdp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) - error = VOP_LOCK(pdp); + error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); } else { - error = vget(vdp, 1); + error = vget(vdp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } /* * Check that the capability number did not change @@ -191,13 +200,14 @@ ufs_lookup(v) return (0); vput(vdp); if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } - if ((error = VOP_LOCK(pdp)) != 0) + *vpp = NULL; + + if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) return (error); vdp = pdp; dp = VTOI(pdp); - *vpp = NULL; } /* @@ -396,7 +406,7 @@ notfound: (nameiop == DELETE && (ap->a_cnp->cn_flags & DOWHITEOUT) && (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_ffs_nlink != 0) { + (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -446,7 +456,7 @@ notfound: */ cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (EJUSTRETURN); } /* @@ -524,7 +534,7 @@ found: } *vpp = tdp; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -551,7 +561,7 @@ found: *vpp = tdp; cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -576,14 +586,14 @@ found: */ pdp = vdp; if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ + VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) { - VOP_LOCK(pdp); + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (lockparent && (flags & ISLASTCN) && - (error = VOP_LOCK(pdp))) { + (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { vput(tdp); return (error); } @@ -596,7 +606,7 @@ found: if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); *vpp = tdp; } @@ -671,108 +681,130 @@ bad: } /* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument ip is the inode which the new - * directory entry will refer to. Dvp is a pointer to the directory to - * be written, which was left locked by namei. Remaining parameters - * (dp->i_offset, dp->i_count) indicate how the space for the new - * entry is to be obtained. + * Construct a new directory entry after a call to namei, using the + * parameters that it left in the componentname argument cnp. The + * argument ip is the inode to which the new directory entry will refer. */ -int -ufs_direnter(ip, dvp, cnp) - struct inode *ip; - struct vnode *dvp; - register struct componentname *cnp; +void +ufs_makedirentry(ip, cnp, newdirp) + struct inode *ip; + struct componentname *cnp; + struct direct *newdirp; { - register struct inode *dp; - struct direct newdir; - + #ifdef DIAGNOSTIC - if ((cnp->cn_flags & SAVENAME) == 0) - panic("direnter: missing name"); + if ((cnp->cn_flags & SAVENAME) == 0) + panic("ufs_makedirentry: missing name"); #endif - dp = VTOI(dvp); - newdir.d_ino = ip->i_number; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - if (dvp->v_mount->mnt_maxsymlinklen > 0) - newdir.d_type = IFTODT(ip->i_ffs_mode); - else { - newdir.d_type = 0; + newdirp->d_ino = ip->i_number; + newdirp->d_namlen = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); + if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) + newdirp->d_type = IFTODT(ip->i_ffs_mode); + else { + newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdir.d_namlen; - newdir.d_namlen = newdir.d_type; - newdir.d_type = tmp; } + { u_char tmp = newdirp->d_namlen; + newdirp->d_namlen = newdirp->d_type; + newdirp->d_type = tmp; } # endif - } - return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc)); + } } - + /* - * Common entry point for directory entry removal used by ufs_direnter - * and ufs_whiteout + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument dirp is the new directory + * entry contents. Dvp is a pointer to the directory to be written, + * which was left locked by namei. Remaining parameters (dp->i_offset, + * dp->i_count) indicate how the space for the new entry is to be obtained. + * Non-null bp indicates that a directory is being created (for the + * soft dependency code). */ int -ufs_direnter2(dvp, dirp, cr, p) - struct vnode *dvp; - struct direct *dirp; - struct ucred *cr; - struct proc *p; +ufs_direnter(dvp, dirp, cnp, newdirbp) + struct vnode *dvp; + struct direct *dirp; + struct componentname *cnp; + struct buf *newdirbp; { - int newentrysize; - struct inode *dp; - struct buf *bp; - struct iovec aiov; - struct uio auio; - u_int dsize; - struct direct *ep, *nep; - int error, loc, spacefree; - char *dirbuf; + struct ucred *cr; + struct proc *p; + int newentrysize; + struct inode *dp; + struct buf *bp; + u_int dsize; + struct direct *ep, *nep; + int error, ret, blkoff, loc, spacefree, flags; + char *dirbuf; + struct timespec ts; - dp = VTOI(dvp); - newentrysize = DIRSIZ(FSFMT(dvp), dirp); + error = 0; + cr = cnp->cn_cred; + p = cnp->cn_proc; + dp = VTOI(dvp); + newentrysize = DIRSIZ(FSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the - * new entry into a fresh block. - */ - if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter2: newblk"); - auio.uio_offset = dp->i_offset; - dirp->d_reclen = DIRBLKSIZ; - auio.uio_resid = newentrysize; - aiov.iov_len = newentrysize; - aiov.iov_base = (caddr_t)dirp; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dvp, &auio, IO_SYNC, cr); - if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - /* XXX should grow with balloc() */ - panic("ufs_direnter2: frag size"); - else if (!error) { - dp->i_ffs_size = roundup(dp->i_ffs_size, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE; + * new entry into a fresh block. + */ + if (dp->i_offset & (DIRBLKSIZ - 1)) + panic("ufs_direnter: newblk"); + flags = B_CLRBUF; + if (!DOINGSOFTDEP(dvp)) + flags |= B_SYNC; + if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, + cr, flags, &bp)) != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); } - return (error); - } - - /* - * If dp->i_count is non-zero, then namei found space - * for the new entry in the range dp->i_offset to - * dp->i_offset + dp->i_count in the directory. - * To use this space, we may have to compact the entries located - * there, by copying them together towards the beginning of the - * block, leaving the free space in one usable chunk at the end. - */ - - /* + dp->i_ffs_size = dp->i_offset + DIRBLKSIZ; + dp->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(dvp, (u_long)dp->i_ffs_size); + dirp->d_reclen = DIRBLKSIZ; + blkoff = dp->i_offset & + (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); + bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); + if (DOINGSOFTDEP(dvp)) { + /* + * Ensure that the entire newly allocated block is a + * valid directory so that future growth within the + * block does not have to ensure that the block is + * written before the inode. + */ + blkoff += DIRBLKSIZ; + while (blkoff < bp->b_bcount) { + ((struct direct *) + (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; + blkoff += DIRBLKSIZ; + } + softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); + } + TIMEVAL_TO_TIMESPEC(&time, &ts); + ret = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp)); + if (error == 0) + return (ret); + return (error); + } + + /* + * If dp->i_count is non-zero, then namei found space for the new + * entry in the range dp->i_offset to dp->i_offset + dp->i_count + * in the directory. To use this space, we may have to compact + * the entries located there, by copying them together towards the + * beginning of the block, leaving the free space in one usable + * chunk at the end. + */ + + /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. @@ -784,15 +816,17 @@ ufs_direnter2(dvp, dirp, cr, p) /* * Get the block containing the space for the new directory entry. */ - error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); - if (error) - return (error); + if ((error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) + != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); + } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the - * space. + * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = DIRSIZ(FSFMT(dvp), ep); @@ -810,7 +844,11 @@ ufs_direnter2(dvp, dirp, cr, p) dsize = DIRSIZ(FSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; loc += nep->d_reclen; - bcopy((caddr_t)nep, (caddr_t)ep, dsize); + if (DOINGSOFTDEP(dvp)) + softdep_change_directoryentry_offset(dp, dirbuf, + (caddr_t)nep, (caddr_t)ep, dsize); + else + bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Update the pointer fields in the previous entry (if any), @@ -820,19 +858,26 @@ ufs_direnter2(dvp, dirp, cr, p) (ep->d_ino == WINO && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) - panic("ufs_direnter2: compact1"); + panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) - panic("ufs_direnter2: compact2"); + panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); - error = VOP_BWRITE(bp); + + if (DOINGSOFTDEP(dvp)) { + softdep_setup_directory_add(bp, dp, + dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); + } dp->i_flag |= IN_CHANGE | IN_UPDATE; - if (!error && dp->i_endoff && dp->i_endoff < dp->i_ffs_size) + if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_ffs_size) error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); return (error); } @@ -850,18 +895,20 @@ ufs_direnter2(dvp, dirp, cr, p) * to the size of the previous entry. */ int -ufs_dirremove(dvp, cnp) +ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; - struct componentname *cnp; + struct inode *ip; + int flags; + int isrmdir; { - register struct inode *dp; + struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); - if (cnp->cn_flags & DOWHITEOUT) { + if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. */ @@ -871,33 +918,39 @@ ufs_dirremove(dvp, cnp) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; - error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + goto out; } + if ((error = VOP_BLKATOFF(dvp, + (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) + return (error); + if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ - error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, - &bp); - if (error) - return (error); ep->d_ino = 0; + } else { + /* + * Collapse new free space into previous entry. + */ + ep->d_reclen += dp->i_reclen; + } +out: + if (ip) { + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + } + if (DOINGSOFTDEP(dvp)) { + if (ip) + softdep_setup_remove(bp, dp, ip, isrmdir); + bdwrite(bp); + } else { + if (ip) + ip->i_ffs_nlink--; /* XXX */ + error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); } - /* - * Collapse new free space into previous entry. - */ - error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), - (char **)&ep, &bp); - if (error) - return (error); - ep->d_reclen += dp->i_reclen; - error = VOP_BWRITE(bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } @@ -908,9 +961,11 @@ ufs_dirremove(dvp, cnp) * set up by a call to namei. */ int -ufs_dirrewrite(dp, ip, cnp) - struct inode *dp, *ip; - struct componentname *cnp; +ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) + struct inode *dp, *oip; + ino_t newinum; + int newtype; + int isrmdir; { struct buf *bp; struct direct *ep; @@ -920,10 +975,18 @@ ufs_dirrewrite(dp, ip, cnp) error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); - ep->d_ino = ip->i_number; + ep->d_ino = newinum; if (vdp->v_mount->mnt_maxsymlinklen > 0) - ep->d_type = IFTODT(ip->i_ffs_mode); - error = VOP_BWRITE(bp); + ep->d_type = newtype; + oip->i_effnlink--; + oip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vdp)) { + softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); + bdwrite(bp); + } else { + oip->i_ffs_nlink--; /* XXX */ + error = VOP_BWRITE(bp); + } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } @@ -983,7 +1046,7 @@ ufs_dirempty(ip, parentino, cred) * 1 implies ".", 2 implies ".." if second * char is also "." */ - if (namlen == 1) + if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index f6ea0606058..bc295d57e26 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_quota.c,v 1.3 1997/05/30 08:35:10 downsj Exp $ */ +/* $OpenBSD: ufs_quota.c,v 1.4 1997/10/06 15:27:38 csapuntz Exp $ */ /* $NetBSD: ufs_quota.c,v 1.8 1996/02/09 22:36:09 christos Exp $ */ /* @@ -376,15 +376,11 @@ quotaon(p, mp, type, fname) if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) return (error); vp = nd.ni_vp; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (EACCES); } - if (vfs_busy(mp)) { - (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); - return (EBUSY); - } if (*vpp != vp) quotaoff(p, mp, type); ump->um_qflags[type] |= QTF_OPENING; @@ -414,9 +410,9 @@ quotaon(p, mp, type, fname) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) + if (vp->v_type == VNON || vp->v_writecount == 0) continue; - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE, p)) goto again; if ((error = getinoquota(VTOI(vp))) != 0) { vput(vp); @@ -429,7 +425,6 @@ again: ump->um_qflags[type] &= ~QTF_OPENING; if (error) quotaoff(p, mp, type); - vfs_unbusy(mp); return (error); } @@ -449,8 +444,6 @@ quotaoff(p, mp, type) register struct inode *ip; int error; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("quotaoff: not busy"); if ((qvp = ump->um_quotas[type]) == NULLVP) return (0); ump->um_qflags[type] |= QTF_CLOSING; @@ -461,7 +454,9 @@ quotaoff(p, mp, type) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vget(vp, 1)) + if (vp->v_type == VNON) + continue; + if (vget(vp, LK_EXCLUSIVE, p)) goto again; ip = VTOI(vp); dq = ip->i_dquot[type]; @@ -621,16 +616,16 @@ qsync(mp) struct mount *mp; { struct ufsmount *ump = VFSTOUFS(mp); + struct proc *p = curproc; register struct vnode *vp, *nextvp; register struct dquot *dq; register int i; + int error = 0; /* * Check if the mount point has any quotas. * If not, simply return. */ - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("qsync: not busy"); for (i = 0; i < MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; @@ -640,22 +635,34 @@ qsync(mp) * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ + simple_lock(&mntvnode_slock); again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (VOP_ISLOCKED(vp)) - continue; - if (vget(vp, 1)) + for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { + if (vp->v_mount != mp) goto again; + nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; + simple_lock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + if (error) { + simple_lock(&mntvnode_slock); + if (error == ENOENT) + goto again; + continue; + } for (i = 0; i < MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) dqsync(vp, dq); } vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } + simple_lock(&mntvnode_slock); + if (vp->v_mntvnodes.le_next != nextvp) + goto again; + } + simple_unlock(&mntvnode_slock); return (0); } @@ -697,6 +704,7 @@ dqget(vp, id, ump, type, dqp) register int type; struct dquot **dqp; { + struct proc *p = curproc; register struct dquot *dq; struct dqhash *dqh; register struct vnode *dqvp; @@ -752,7 +760,7 @@ dqget(vp, id, ump, type, dqp) * Initialize the contents of the dquot structure. */ if (vp != dqvp) - VOP_LOCK(dqvp); + vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); LIST_INSERT_HEAD(dqh, dq, dq_hash); DQREF(dq); dq->dq_flags = DQ_LOCK; @@ -772,7 +780,7 @@ dqget(vp, id, ump, type, dqp) if (auio.uio_resid == sizeof(struct dqblk) && error == 0) bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk)); if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); if (dq->dq_flags & DQ_WANT) wakeup((caddr_t)dq); dq->dq_flags = 0; @@ -844,6 +852,7 @@ dqsync(vp, dq) struct vnode *vp; register struct dquot *dq; { + struct proc *p = curproc; struct vnode *dqvp; struct iovec aiov; struct uio auio; @@ -856,13 +865,13 @@ dqsync(vp, dq) if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); if (vp != dqvp) - VOP_LOCK(dqvp); + vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; sleep((caddr_t)dq, PINOD+2); if ((dq->dq_flags & DQ_MOD) == 0) { if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); return (0); } } @@ -883,7 +892,7 @@ dqsync(vp, dq) wakeup((caddr_t)dq); dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); return (error); } diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 604c16fcb90..25148b78f61 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_readwrite.c,v 1.9 1997/05/30 08:35:13 downsj Exp $ */ +/* $OpenBSD: ufs_readwrite.c,v 1.10 1997/10/06 15:27:39 csapuntz Exp $ */ /* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */ /*- @@ -242,19 +242,13 @@ WRITE(v) xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; -#ifdef LFS_READWRITE - (void)lfs_check(vp, lbn); - error = lfs_balloc(vp, blkoffset, xfersize, lbn, &bp); -#else if (fs->fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); -#endif - if (error) + if ((error = VOP_BALLOC(vp, uio->uio_offset, xfersize, + ap->a_cred, flags, &bp)) != 0) break; if (uio->uio_offset + xfersize > ip->i_ffs_size) { ip->i_ffs_size = uio->uio_offset + xfersize; diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c index 0e308fd39dd..11dfa3086c4 100644 --- a/sys/ufs/ufs/ufs_vfsops.c +++ b/sys/ufs/ufs/ufs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vfsops.c,v 1.3 1997/05/30 08:35:15 downsj Exp $ */ +/* $OpenBSD: ufs_vfsops.c,v 1.4 1997/10/06 15:27:39 csapuntz Exp $ */ /* $NetBSD: ufs_vfsops.c,v 1.4 1996/02/09 22:36:12 christos Exp $ */ /* @@ -125,39 +125,64 @@ ufs_quotactl(mp, cmds, uid, arg, p) if ((u_int)type >= MAXQUOTAS) return (EINVAL); + if (vfs_busy(mp, LK_NOWAIT, 0, p)) + return (0); + + switch (cmd) { case Q_QUOTAON: - return (quotaon(p, mp, type, arg)); + error = quotaon(p, mp, type, arg); + break; case Q_QUOTAOFF: - if (vfs_busy(mp)) - return (0); error = quotaoff(p, mp, type); - vfs_unbusy(mp); - return (error); + break; case Q_SETQUOTA: - return (setquota(mp, uid, type, arg)); + error = setquota(mp, uid, type, arg) ; + break; case Q_SETUSE: - return (setuse(mp, uid, type, arg)); + error = setuse(mp, uid, type, arg); + break; case Q_GETQUOTA: - return (getquota(mp, uid, type, arg)); + error = getquota(mp, uid, type, arg); + break; case Q_SYNC: - if (vfs_busy(mp)) - return (0); error = qsync(mp); - vfs_unbusy(mp); - return (error); + break; default: - return (EINVAL); + error = EINVAL; + break; } - /* NOTREACHED */ + + vfs_unbusy(mp, p); + return (error); +#endif +} + + +/* + * Initial UFS filesystems, done only once. + */ +int +ufs_init(vfsp) + struct vfsconf *vfsp; +{ + static int done; + + if (done) + return (0); + done = 1; + ufs_ihashinit(); +#ifdef QUOTA + dqinit(); #endif + return (0); } /* diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index fe58d6e899e..12245ddece3 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vnops.c,v 1.10 1997/07/03 17:49:49 deraadt Exp $ */ +/* $OpenBSD: ufs_vnops.c,v 1.11 1997/10/06 15:27:40 csapuntz Exp $ */ /* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */ /* @@ -90,6 +90,19 @@ union _qcvt { (q) = tmp.qcvt; \ } + +/* + * A virgin directory (no blushing please). + */ +static struct dirtemplate mastertemplate = { + 0, 12, DT_DIR, 1, ".", + 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." +}; +static struct odirtemplate omastertemplate = { + 0, 12, 1, ".", + 0, DIRBLKSIZ - 12, 2, ".." +}; + /* * Create a regular file */ @@ -117,19 +130,19 @@ ufs_mknod(v) void *v; { struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap = v; - register struct vattr *vap = ap->a_vap; - register struct vnode **vpp = ap->a_vpp; - register struct inode *ip; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vattr *vap = ap->a_vap; + struct vnode **vpp = ap->a_vpp; + struct inode *ip; int error; if ((error = - ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, vpp, ap->a_cnp)) != 0) + ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), + ap->a_dvp, vpp, ap->a_cnp)) != 0) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; @@ -163,11 +176,11 @@ ufs_open(v) void *v; { struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap = v; /* * Files marked append-only must be opened for appending. @@ -194,11 +207,13 @@ ufs_close(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); - if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (0); } @@ -212,25 +227,27 @@ ufs_access(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); mode_t mode = ap->a_mode; -#ifdef DIAGNOSTIC - if (!VOP_ISLOCKED(vp)) { - vprint("ufs_access: not locked", vp); - panic("ufs_access: not locked"); - } -#endif -#ifdef QUOTA - if (mode & VWRITE) + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + if (mode & VWRITE) { switch (vp->v_type) { int error; case VDIR: case VLNK: case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); +#ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); +#endif break; case VBAD: case VBLK: @@ -239,8 +256,9 @@ ufs_access(v) case VFIFO: case VNON: break; + } -#endif + } /* If immutable bit set, nobody gets to write it. */ if ((mode & VWRITE) && (ip->i_ffs_flags & IMMUTABLE)) @@ -261,9 +279,9 @@ ufs_getattr(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - register struct vattr *vap = ap->a_vap; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct vattr *vap = ap->a_vap; ITIMES(ip, &time, &time); /* @@ -272,7 +290,7 @@ ufs_getattr(v) vap->va_fsid = ip->i_dev; vap->va_fileid = ip->i_number; vap->va_mode = ip->i_ffs_mode & ~IFMT; - vap->va_nlink = ip->i_ffs_nlink; + vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_ffs_uid; vap->va_gid = ip->i_ffs_gid; vap->va_rdev = (dev_t)ip->i_ffs_rdev; @@ -311,11 +329,11 @@ ufs_setattr(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vattr *vap = ap->a_vap; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - register struct ucred *cred = ap->a_cred; - register struct proc *p = ap->a_p; + struct vattr *vap = ap->a_vap; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; int error; /* @@ -328,6 +346,8 @@ ufs_setattr(v) return (EINVAL); } if (vap->va_flags != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); if (cred->cr_uid != ip->i_ffs_uid && (error = suser(cred, &p->p_acflag))) return (error); @@ -337,7 +357,8 @@ ufs_setattr(v) return (EPERM); ip->i_ffs_flags = vap->va_flags; } else { - if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND)) + if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND) || + (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); ip->i_ffs_flags &= SF_SETTABLE; ip->i_ffs_flags |= (vap->va_flags & UF_SETTABLE); @@ -352,19 +373,36 @@ ufs_setattr(v) * Go through the fields and update if not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p); if (error) return (error); } if (vap->va_size != VNOVAL) { - if (vp->v_type == VDIR) - return (EISDIR); - error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p); - if (error) - return (error); + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0) + return (error); } ip = VTOI(vp); if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); if (cred->cr_uid != ip->i_ffs_uid && (error = suser(cred, &p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || @@ -374,13 +412,16 @@ ufs_setattr(v) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; - error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 1); + error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0); if (error) return (error); } error = 0; - if (vap->va_mode != (mode_t)VNOVAL) + if (vap->va_mode != (mode_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); error = ufs_chmod(vp, (int)vap->va_mode, cred, p); + } return (error); } @@ -390,12 +431,12 @@ ufs_setattr(v) */ static int ufs_chmod(vp, mode, cred, p) - register struct vnode *vp; - register int mode; - register struct ucred *cred; + struct vnode *vp; + int mode; + struct ucred *cred; struct proc *p; { - register struct inode *ip = VTOI(vp); + struct inode *ip = VTOI(vp); int error; if (cred->cr_uid != ip->i_ffs_uid && @@ -421,18 +462,18 @@ ufs_chmod(vp, mode, cred, p) */ static int ufs_chown(vp, uid, gid, cred, p) - register struct vnode *vp; + struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct proc *p; { - register struct inode *ip = VTOI(vp); + struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA - register int i; + int i; long change; #endif @@ -614,9 +655,9 @@ ufs_remove(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct inode *ip; - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; + struct inode *ip; + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); @@ -625,10 +666,8 @@ ufs_remove(v) error = EPERM; goto out; } - if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) { - ip->i_ffs_nlink--; - ip->i_flag |= IN_CHANGE; - } + if ((error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0)) != 0) + goto out; out: if (dvp == vp) vrele(vp); @@ -650,10 +689,12 @@ ufs_link(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct vnode *dvp = ap->a_dvp; - register struct vnode *vp = ap->a_vp; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct inode *ip; + struct direct newdir; struct timespec ts; int error; @@ -671,7 +712,7 @@ ufs_link(v) error = EXDEV; goto out2; } - if (dvp != vp && (error = VOP_LOCK(vp))) { + if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(dvp, cnp); goto out2; } @@ -686,20 +727,25 @@ ufs_link(v) error = EPERM; goto out1; } + ip->i_effnlink++; ip->i_ffs_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vp)) + softdep_increase_linkcnt(ip); TIMEVAL_TO_TIMESPEC(&time, &ts); - error = VOP_UPDATE(vp, &ts, &ts, 1); - if (!error) - error = ufs_direnter(ip, dvp, cnp); + if ((error = VOP_UPDATE(vp, &ts, &ts, !DOINGSOFTDEP(vp))) == 0) { + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, &newdir, cnp, NULL); + } if (error) { + ip->i_effnlink--; ip->i_ffs_nlink--; ip->i_flag |= IN_CHANGE; } FREE(cnp->cn_pnbuf, M_NAMEI); out1: if (dvp != vp) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); out2: vput(dvp); return (error); @@ -742,7 +788,7 @@ ufs_whiteout(v) newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc); + error = ufs_direnter(dvp, &newdir, cnp, NULL); break; case DELETE: @@ -753,8 +799,11 @@ ufs_whiteout(v) #endif cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; + default: + panic("ufs_whiteout: unknown op"); + /* NOTREACHED */ } if (cnp->cn_flags & HASBUF) { FREE(cnp->cn_pnbuf, M_NAMEI); @@ -801,17 +850,17 @@ ufs_rename(v) struct componentname *a_tcnp; } */ *ap = v; struct vnode *tvp = ap->a_tvp; - register struct vnode *tdvp = ap->a_tdvp; + struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; - register struct vnode *fdvp = ap->a_fdvp; - register struct componentname *tcnp = ap->a_tcnp; - register struct componentname *fcnp = ap->a_fcnp; - register struct inode *ip, *xp, *dp; - struct dirtemplate dirbuf; + struct vnode *fdvp = ap->a_fdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + struct proc *p = fcnp->cn_proc; + struct inode *ip, *xp, *dp; + struct direct newdir; struct timespec ts; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; - u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || @@ -868,13 +917,13 @@ abortit: (void) relookup(fdvp, &fvp, fcnp); return (VOP_REMOVE(fdvp, fvp, fcnp)); } - if ((error = VOP_LOCK(fvp)) != 0) + if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_ffs_flags & (IMMUTABLE | APPEND)) || (dp->i_ffs_flags & APPEND)) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } @@ -883,7 +932,7 @@ abortit: if (!error && tvp) error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); if (error) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EACCES; goto abortit; } @@ -895,7 +944,7 @@ abortit: (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } @@ -920,11 +969,14 @@ abortit: * completing our work, the link count * may be wrong, but correctable. */ + ip->i_effnlink++; ip->i_ffs_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_increase_linkcnt(ip); TIMEVAL_TO_TIMESPEC(&time, &ts); - if ((error = VOP_UPDATE(fvp, &ts, &ts, 1)) != 0) { - VOP_UNLOCK(fvp); + if ((error = VOP_UPDATE(fvp, &ts, &ts, !DOINGSOFTDEP(fvp))) != 0) { + VOP_UNLOCK(fvp, 0, p); goto bad; } @@ -939,7 +991,7 @@ abortit: * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { @@ -978,13 +1030,19 @@ abortit: error = EMLINK; goto bad; } + dp->i_effnlink++; dp->i_ffs_nlink++; dp->i_flag |= IN_CHANGE; - if ((error = VOP_UPDATE(tdvp, &ts, &ts, 1)) != 0) + if (DOINGSOFTDEP(tdvp)) + softdep_increase_linkcnt(dp); + if ((error = VOP_UPDATE(tdvp, &ts, &ts, + !DOINGSOFTDEP(tdvp))) != 0) goto bad; } - if ((error = ufs_direnter(ip, tdvp, tcnp)) != 0) { + ufs_makedirentry(ip, tcnp, &newdir); + if ((error = ufs_direnter(tdvp, &newdir, tcnp, NULL)) != 0) { if (doingdirectory && newparent) { + dp->i_effnlink--; dp->i_ffs_nlink--; dp->i_flag |= IN_CHANGE; (void)VOP_UPDATE(tdvp, &ts, &ts, 1); @@ -1018,8 +1076,8 @@ abortit: * (both directories, or both not directories). */ if ((xp->i_ffs_mode & IFMT) == IFDIR) { - if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || - xp->i_ffs_nlink > 2) { + if (xp->i_effnlink > 2 || + !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } @@ -1032,37 +1090,35 @@ abortit: error = EISDIR; goto bad; } - if ((error = ufs_dirrewrite(dp, ip, tcnp)) != 0) - goto bad; - /* - * If the target directory is in the same - * directory as the source directory, - * decrement the link count on the parent - * of the target directory. - */ - if (doingdirectory && !newparent) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - } - vput(tdvp); - /* - * Adjust the link count of the target to - * reflect the dirrewrite above. If this is - * a directory it is empty and there are - * no links to it, so we can squash the inode and - * any space associated with it. We disallowed - * renaming over top of a directory with links to - * it above, as the remaining link would point to - * a directory without "." or ".." entries. - */ - xp->i_ffs_nlink--; + + if ((error = ufs_dirrewrite(dp, xp, ip->i_number, + IFTODT(ip->i_ffs_mode), doingdirectory)) != 0) + goto bad; if (doingdirectory) { - if (--xp->i_ffs_nlink != 0) - panic("rename: linked directory"); - error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC, - tcnp->cn_cred, tcnp->cn_proc); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + xp->i_effnlink--; + xp->i_flag |= IN_CHANGE; } - xp->i_flag |= IN_CHANGE; + if (doingdirectory && !DOINGSOFTDEP(tvp)) { + /* + * Truncate inode. The only stuff left in the directory + * is "." and "..". The "." reference is inconsequential + * since we are quashing it. We have removed the "." + * reference and the reference in the parent directory, + * but there may be other hard links. The soft + * dependency code will arrange to do these operations + * after the parent directory entry has been deleted on + * disk, so when running with that code we avoid doing + * them now. + */ + dp->i_ffs_nlink--; + xp->i_ffs_nlink--; + if ((error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC, + tcnp->cn_cred, tcnp->cn_proc)) != 0) + goto bad; + } + vput(tdvp); vput(tvp); xp = NULL; } @@ -1092,10 +1148,9 @@ abortit: * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; its link - * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IRENAME flag ensures that it cannot be moved by another - * rename. + * is a directory then it cannot have been rmdir'ed; the IN_RENAME + * flag ensures that it cannot be moved by another rename or removed + * by a rmdir. */ if (xp != ip) { if (doingdirectory) @@ -1108,44 +1163,11 @@ abortit: * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, - tcnp->cn_cred, (int *)0, (struct proc *)0); - if (error == 0) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (fvp->v_mount->mnt_maxsymlinklen <= 0) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - ufs_dirbad(xp, (doff_t)12, - "rename: mangled dir"); - } else { - dirbuf.dotdot_ino = newparent; - (void) vn_rdwr(UIO_WRITE, fvp, - (caddr_t)&dirbuf, - sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, - tcnp->cn_cred, (int *)0, - (struct proc *)0); - cache_purge(fdvp); - } - } - } - error = ufs_dirremove(fdvp, fcnp); - if (!error) { - xp->i_ffs_nlink--; - xp->i_flag |= IN_CHANGE; + xp->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); + cache_purge(fdvp); } + error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } if (dp) @@ -1162,7 +1184,8 @@ bad: out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; - if (VOP_LOCK(fvp) == 0) { + if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { + ip->i_effnlink--; ip->i_ffs_nlink--; ip->i_flag |= IN_CHANGE; vput(fvp); @@ -1172,18 +1195,6 @@ out: } /* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, ".", - 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, ".", - 0, DIRBLKSIZ - 12, 2, ".." -}; - -/* * Mkdir system call */ int @@ -1196,11 +1207,13 @@ ufs_mkdir(v) struct componentname *a_cnp; struct vattr *a_vap; } */ *ap = v; - register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip, *dp; + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; struct vnode *tvp; + struct buf *bp; + struct direct newdir; struct dirtemplate dirtemplate, *dtp; struct timespec ts; int error, dmode; @@ -1239,24 +1252,31 @@ ufs_mkdir(v) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_ffs_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 2; ip->i_ffs_nlink = 2; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); + if (cnp->cn_flags & ISWHITEOUT) ip->i_ffs_flags |= UF_OPAQUE; - TIMEVAL_TO_TIMESPEC(&time, &ts); - error = VOP_UPDATE(tvp, &ts, &ts, 1); /* - * Bump link count in parent directory - * to reflect work done below. Should - * be done before reference is created - * so reparation is possible if we crash. + * Bump link count in parent directory to reflect work done below. + * Should be done before reference is create so cleanup is + * possible if we crash. */ + dp->i_effnlink++; dp->i_ffs_nlink++; dp->i_flag |= IN_CHANGE; - if ((error = VOP_UPDATE(dvp, &ts, &ts, 1)) != 0) + if (DOINGSOFTDEP(dvp)) + softdep_increase_linkcnt(dp); + TIMEVAL_TO_TIMESPEC(&time, &ts); + if ((error = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp))) != 0) goto bad; - /* Initialize directory with "." and ".." from static template. */ + /* + * Initialize directory with "." and ".." from static template. + */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else @@ -1264,40 +1284,56 @@ ufs_mkdir(v) dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; - error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); - if (error) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; + + if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, + B_CLRBUF, &bp)) != 0) + goto bad; + ip->i_ffs_size = DIRBLKSIZ; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(tvp, (u_long)ip->i_ffs_size); + bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); + if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) { + (void)VOP_BWRITE(bp); goto bad; - } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ - else { - ip->i_ffs_size = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE; } - /* Directory set up, now install it's entry in the parent directory. */ - if ((error = ufs_direnter(ip, dvp, cnp)) != 0) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - } -bad: /* - * No need to do an explicit VOP_TRUNCATE here, vrele will do this - * for us because we set the link count to 0. + * Directory set up, now install it's entry in the parent directory. + * + * If we are not doing soft dependencies, then we must write out the + * buffer containing the new directory body before entering the new + * name in the parent. If we are doing soft dependencies, then the + * buffer containing the new directory body will be passed to and + * released in the soft dependency code after the code has attached + * an appropriate ordering dependency to the buffer which ensures that + * the buffer is written before the new name is written in the parent. */ - if (error) { - ip->i_ffs_nlink = 0; - ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) + goto bad; + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, &newdir, cnp, bp); + +bad: + if (error == 0) { + *ap->a_vpp = tvp; + } else { + dp->i_effnlink--; + dp->i_ffs_nlink--; + dp->i_flag |= IN_CHANGE; + /* + * No need to do an explicit VOP_TRUNCATE here, vrele will + * do this for us because we set the link count to 0. + */ + ip->i_effnlink = 0; + ip->i_ffs_nlink = 0; + ip->i_flag |= IN_CHANGE; + vput(tvp); - } else - *ap->a_vpp = tvp; + } out: FREE(cnp->cn_pnbuf, M_NAMEI); vput(dvp); + return (error); } @@ -1313,10 +1349,10 @@ ufs_rmdir(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip, *dp; + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; int error; ip = VTOI(vp); @@ -1330,14 +1366,17 @@ ufs_rmdir(v) return (EINVAL); } /* - * Verify the directory is empty (and valid). - * (Rmdir ".." won't be valid since - * ".." will contain a reference to - * the current directory and thus be - * non-empty.) + * Do not remove a directory that is in the process of being renamed. + * Verify the directory is empty (and valid). Rmdir ".." will not be + * valid since ".." will contain a reference to the current directory + * and thus be non-empty. */ error = 0; - if (ip->i_ffs_nlink != 2 || + if (ip->i_flag & IN_RENAME) { + error = EINVAL; + goto out; + } + if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; @@ -1352,31 +1391,33 @@ ufs_rmdir(v) * inode. If we crash in between, the directory * will be reattached to lost+found, */ - if ((error = ufs_dirremove(dvp, cnp)) != 0) + if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) goto out; - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; - /* - * Truncate inode. The only stuff left - * in the directory is "." and "..". The - * "." reference is inconsequential since - * we're quashing it. The ".." reference - * has already been adjusted above. We've - * removed the "." reference and the reference - * in the parent directory, but there may be - * other hard links so decrement by 2 and - * worry about them later. + /* + * Truncate inode. The only stuff left in the directory is "." and + * "..". The "." reference is inconsequential since we are quashing + * it. We have removed the "." reference and the reference in the + * parent directory, but there may be other hard links. So, + * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no + * new entries are made. The soft dependency code will arrange to + * do these operations after the parent directory entry has been + * deleted on disk, so when running with that code we avoid doing + * them now. */ - ip->i_ffs_nlink -= 2; - error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); - cache_purge(ITOV(ip)); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(vp)) { + dp->i_ffs_nlink--; + ip->i_ffs_nlink--; + error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, + cnp->cn_proc); + } + cache_purge(vp); out: - if (dvp) - vput(dvp); + vput(dvp); vput(vp); return (error); } @@ -1395,8 +1436,8 @@ ufs_symlink(v) struct vattr *a_vap; char *a_target; } */ *ap = v; - register struct vnode *vp, **vpp = ap->a_vpp; - register struct inode *ip; + struct vnode *vp, **vpp = ap->a_vpp; + struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, @@ -1436,10 +1477,10 @@ ufs_readdir(v) struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int ncookies; + u_long **a_cookies; + int *ncookies; } */ *ap = v; - register struct uio *uio = ap->a_uio; + struct uio *uio = ap->a_uio; int error; size_t count, lost; off_t off = uio->uio_offset; @@ -1495,9 +1536,10 @@ ufs_readdir(v) error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); # endif if (!error && ap->a_ncookies) { - register struct dirent *dp; - register u_long *cookies = ap->a_cookies; - register int ncookies = ap->a_ncookies; + struct dirent *dp, *dpstart; + off_t offstart; + u_long *cookies; + int ncookies; /* * Only the NFS server and emulations use cookies, and they @@ -1506,17 +1548,28 @@ ufs_readdir(v) */ if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ufs_readdir: lost in space"); - dp = (struct dirent *) - (uio->uio_iov->iov_base - (uio->uio_offset - off)); - while (ncookies-- && off < uio->uio_offset) { - if (dp->d_reclen == 0) - break; + + dpstart = (struct dirent *) + (uio->uio_iov->iov_base - (uio->uio_offset - off)); + offstart = off; + for (dp = dpstart, ncookies = 0; off < uio->uio_offset; ) { + if (dp->d_reclen == 0) + break; + off += dp->d_reclen; + ncookies++; + dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + } + lost += uio->uio_offset - off; + uio->uio_offset = off; + MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + *ap->a_ncookies = ncookies; + *ap->a_cookies = cookies; + for (off = offstart, dp = dpstart; off < uio->uio_offset; ) { + *(cookies++) = off; off += dp->d_reclen; - *(cookies++) = off; - dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); } - lost += uio->uio_offset - off; - uio->uio_offset = off; } uio->uio_resid += lost; *ap->a_eofflag = VTOI(ap->a_vp)->i_ffs_size <= uio->uio_offset; @@ -1535,8 +1588,8 @@ ufs_readlink(v) struct uio *a_uio; struct ucred *a_cred; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); int isize; isize = ip->i_ffs_size; @@ -1575,82 +1628,31 @@ ufs_lock(v) { struct vop_lock_args /* { struct vnode *a_vp; + int a_flags; + sturct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip; -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ -#endif + struct vnode *vp = ap->a_vp; -start: - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - sleep((caddr_t)vp, PINOD); - } - if (vp->v_tag == VT_NON) - return (ENOENT); - ip = VTOI(vp); - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; -#ifdef DIAGNOSTIC - if (p) { - if (p->p_pid == ip->i_lockholder) - panic("locking against myself"); - ip->i_lockwaiter = p->p_pid; - } else - ip->i_lockwaiter = -1; -#endif - (void) sleep((caddr_t)ip, PINOD); - goto start; - } -#ifdef DIAGNOSTIC - ip->i_lockwaiter = 0; - if (ip->i_lockholder != 0) - panic("lockholder (%d) != 0", ip->i_lockholder); - if (p && p->p_pid == 0) - printf("locking by process 0\n"); - if (p) - ip->i_lockholder = p->p_pid; - else - ip->i_lockholder = -1; -#endif - ip->i_flag |= IN_LOCKED; - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock, + ap->a_p)); } /* * Unlock an inode. If WANT bit is on, wakeup. */ -int lockcount = 90; int ufs_unlock(v) void *v; { struct vop_unlock_args /* { struct vnode *a_vp; + int a_flags; + struct proc *a_p; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ -#endif + struct vnode *vp = ap->a_vp; -#ifdef DIAGNOSTIC - if ((ip->i_flag & IN_LOCKED) == 0) { - vprint("ufs_unlock: unlocked inode", ap->a_vp); - panic("ufs_unlock NOT LOCKED"); - } - if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 && - ip->i_lockholder > -1 && lockcount++ < 100) - panic("unlocker (%d) != lock holder (%d)", - p->p_pid, ip->i_lockholder); - ip->i_lockholder = 0; -#endif - ip->i_flag &= ~IN_LOCKED; - if (ip->i_flag & IN_WANTED) { - ip->i_flag &= ~IN_WANTED; - wakeup((caddr_t)ip); - } - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE, + &vp->v_interlock, ap->a_p)); } /* @@ -1664,9 +1666,7 @@ ufs_islocked(v) struct vnode *a_vp; } */ *ap = v; - if (VTOI(ap->a_vp)->i_flag & IN_LOCKED) - return (1); - return (0); + return (lockstatus(&VTOI(ap->a_vp)->i_lock)); } /* @@ -1680,9 +1680,9 @@ ufs_strategy(v) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap = v; - register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; - register struct inode *ip; + struct buf *bp = ap->a_bp; + struct vnode *vp = bp->b_vp; + struct inode *ip; int error; ip = VTOI(vp); @@ -1720,8 +1720,8 @@ ufs_print(v) struct vop_print_args /* { struct vnode *a_vp; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number, major(ip->i_dev), minor(ip->i_dev)); @@ -1729,12 +1729,7 @@ ufs_print(v) if (vp->v_type == VFIFO) fifo_printinfo(vp); #endif /* FIFO */ - printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : ""); - if (ip->i_lockholder == 0) - return (0); - printf("\towner pid %d", ip->i_lockholder); - if (ip->i_lockwaiter) - printf(" waiting pid %d", ip->i_lockwaiter); + lockmgr_printinfo(&ip->i_lock); printf("\n"); return (0); } @@ -1796,10 +1791,12 @@ ufsspec_close(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); + struct inode *ip = VTOI(ap->a_vp); - if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -1864,10 +1861,13 @@ ufsfifo_close(v) struct proc *a_p; } */ *ap = v; extern int (**fifo_vnodeop_p) __P((void *)); - register struct inode *ip = VTOI(ap->a_vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); - if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); } #endif /* FIFO */ @@ -1924,7 +1924,7 @@ ufs_advlock(v) struct flock *a_fl; int a_flags; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); + struct inode *ip = VTOI(ap->a_vp); return (lf_advlock(&ip->i_lockf, ip->i_ffs_size, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags)); @@ -1953,9 +1953,9 @@ ufs_vinit(mntp, specops, fifoops, vpp) if ((nvp = checkalias(vp, ip->i_ffs_rdev, mntp)) != NULL) { /* * Discard unneeded vnode, but save its inode. + * Note that the lock is carried over in the inode + * to the replacement vnode. */ - ufs_ihashrem(ip); - VOP_UNLOCK(vp); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; @@ -1966,7 +1966,6 @@ ufs_vinit(mntp, specops, fifoops, vpp) */ vp = nvp; ip->i_vnode = vp; - ufs_ihashins(ip); } break; case VFIFO: @@ -2005,7 +2004,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) struct vnode **vpp; struct componentname *cnp; { - register struct inode *ip, *pdir; + struct inode *ip, *pdir; + struct direct newdir; struct timespec ts; struct vnode *tvp; int error; @@ -2040,7 +2040,10 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_ffs_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 1; ip->i_ffs_nlink = 1; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if ((ip->i_ffs_mode & ISGID) && !groupmember(ip->i_ffs_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) @@ -2053,10 +2056,13 @@ ufs_makeinode(mode, dvp, vpp, cnp) * Make sure inode goes to disk before directory entry. */ TIMEVAL_TO_TIMESPEC(&time, &ts); - if ((error = VOP_UPDATE(tvp, &ts, &ts, 1)) != 0) + if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) goto bad; - if ((error = ufs_direnter(ip, dvp, cnp)) != 0) + + ufs_makedirentry(ip, cnp, &newdir); + if ((error = ufs_direnter(dvp, &newdir, cnp, NULL)) != 0) goto bad; + if ((cnp->cn_flags & SAVESTART) == 0) FREE(cnp->cn_pnbuf, M_NAMEI); vput(dvp); @@ -2070,8 +2076,12 @@ bad: */ free(cnp->cn_pnbuf, M_NAMEI); vput(dvp); + ip->i_effnlink = 0; ip->i_ffs_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); + return (error); } + + diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c deleted file mode 100644 index 6e856bc9c7b..00000000000 --- a/sys/vm/kern_lock.c +++ /dev/null @@ -1,538 +0,0 @@ -/* $OpenBSD: kern_lock.c,v 1.6 1997/07/25 06:03:03 mickey Exp $ */ -/* $NetBSD: kern_lock.c,v 1.10 1994/10/30 19:11:09 cgd Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)kern_lock.c 8.1 (Berkeley) 6/11/93 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Locking primitives implementation - */ - -#include <sys/param.h> -#include <sys/systm.h> - -#include <vm/vm.h> -#include <vm/vm_extern.h> - -/* XXX */ -#include <sys/proc.h> -typedef void *thread_t; -#define current_thread() ((thread_t)&curproc->p_thread) -/* XXX */ - -#if NCPUS > 1 - -/* - * Module: lock - * Function: - * Provide reader/writer sychronization. - * Implementation: - * Simple interlock on a bit. Readers first interlock - * increment the reader count, then let go. Writers hold - * the interlock (thus preventing further readers), and - * wait for already-accepted readers to go away. - */ - -/* - * The simple-lock routines are the primitives out of which - * the lock package is built. The implementation is left - * to the machine-dependent code. - */ - -#ifdef notdef -/* - * A sample implementation of simple locks. - * assumes: - * boolean_t test_and_set(boolean_t *) - * indivisibly sets the boolean to TRUE - * and returns its old value - * and that setting a boolean to FALSE is indivisible. - */ -/* - * simple_lock_init initializes a simple lock. A simple lock - * may only be used for exclusive locks. - */ - -void simple_lock_init(l) - simple_lock_t l; -{ - *(boolean_t *)l = FALSE; -} - -void simple_lock(l) - simple_lock_t l; -{ - while (test_and_set((boolean_t *)l)) - continue; -} - -void simple_unlock(l) - simple_lock_t l; -{ - *(boolean_t *)l = FALSE; -} - -boolean_t simple_lock_try(l) - simple_lock_t l; -{ - return (!test_and_set((boolean_t *)l)); -} -#endif /* notdef */ -#endif /* NCPUS > 1 */ - -#if NCPUS > 1 -int lock_wait_time = 100; -#else /* NCPUS > 1 */ - - /* - * It is silly to spin on a uni-processor as if we - * thought something magical would happen to the - * want_write bit while we are executing. - */ -int lock_wait_time = 0; -#endif /* NCPUS > 1 */ - - -/* - * Routine: lock_init - * Function: - * Initialize a lock; required before use. - * Note that clients declare the "struct lock" - * variables and then initialize them, rather - * than getting a new one from this module. - */ -void lock_init(l, can_sleep) - lock_t l; - boolean_t can_sleep; -{ - bzero(l, sizeof(lock_data_t)); - simple_lock_init(&l->interlock); - l->want_write = FALSE; - l->want_upgrade = FALSE; - l->read_count = 0; - l->can_sleep = can_sleep; - l->thread = (char *)-1; /* XXX */ - l->recursion_depth = 0; -} - -void lock_sleepable(l, can_sleep) - lock_t l; - boolean_t can_sleep; -{ - simple_lock(&l->interlock); - l->can_sleep = can_sleep; - simple_unlock(&l->interlock); -} - - -/* - * Sleep locks. These use the same data structure and algorithm - * as the spin locks, but the process sleeps while it is waiting - * for the lock. These work on uniprocessor systems. - */ - -void lock_write(l) - register lock_t l; -{ - register int i; - - simple_lock(&l->interlock); - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock. - */ - l->recursion_depth++; - simple_unlock(&l->interlock); - return; - } - - /* - * Try to acquire the want_write bit. - */ - while (l->want_write) { - if ((i = lock_wait_time) > 0) { - simple_unlock(&l->interlock); - while (--i > 0 && l->want_write) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->want_write) { - l->waiting = TRUE; - thread_sleep(l, &l->interlock, FALSE); - simple_lock(&l->interlock); - } - } - l->want_write = TRUE; - - /* Wait for readers (and upgrades) to finish */ - - while ((l->read_count != 0) || l->want_upgrade) { - if ((i = lock_wait_time) > 0) { - simple_unlock(&l->interlock); - while (--i > 0 && (l->read_count != 0 || - l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { - l->waiting = TRUE; - thread_sleep(l, &l->interlock, FALSE); - simple_lock(&l->interlock); - } - } - simple_unlock(&l->interlock); -} - -void lock_done(l) - register lock_t l; -{ - simple_lock(&l->interlock); - - if (l->read_count != 0) - l->read_count--; - else - if (l->recursion_depth != 0) - l->recursion_depth--; - else - if (l->want_upgrade) - l->want_upgrade = FALSE; - else - l->want_write = FALSE; - - if (l->waiting) { - l->waiting = FALSE; - thread_wakeup(l); - } - simple_unlock(&l->interlock); -} - -void lock_read(l) - register lock_t l; -{ - register int i; - - simple_lock(&l->interlock); - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock. - */ - l->read_count++; - simple_unlock(&l->interlock); - return; - } - - while (l->want_write || l->want_upgrade) { - if ((i = lock_wait_time) > 0) { - simple_unlock(&l->interlock); - while (--i > 0 && (l->want_write || l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->want_write || l->want_upgrade)) { - l->waiting = TRUE; - thread_sleep(l, &l->interlock, FALSE); - simple_lock(&l->interlock); - } - } - - l->read_count++; - simple_unlock(&l->interlock); -} - -/* - * Routine: lock_read_to_write - * Function: - * Improves a read-only lock to one with - * write permission. If another reader has - * already requested an upgrade to a write lock, - * no lock is held upon return. - * - * Returns TRUE if the upgrade *failed*. - */ -boolean_t lock_read_to_write(l) - register lock_t l; -{ - register int i; - - simple_lock(&l->interlock); - - l->read_count--; - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock. - */ - l->recursion_depth++; - simple_unlock(&l->interlock); - return(FALSE); - } - - if (l->want_upgrade) { - /* - * Someone else has requested upgrade. - * Since we've released a read lock, wake - * him up. - */ - if (l->waiting) { - l->waiting = FALSE; - thread_wakeup(l); - } - - simple_unlock(&l->interlock); - return (TRUE); - } - - l->want_upgrade = TRUE; - - while (l->read_count != 0) { - if ((i = lock_wait_time) > 0) { - simple_unlock(&l->interlock); - while (--i > 0 && l->read_count != 0) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->read_count != 0) { - l->waiting = TRUE; - thread_sleep(l, &l->interlock, FALSE); - simple_lock(&l->interlock); - } - } - - simple_unlock(&l->interlock); - return (FALSE); -} - -void lock_write_to_read(l) - register lock_t l; -{ - simple_lock(&l->interlock); - - l->read_count++; - if (l->recursion_depth != 0) - l->recursion_depth--; - else - if (l->want_upgrade) - l->want_upgrade = FALSE; - else - l->want_write = FALSE; - - if (l->waiting) { - l->waiting = FALSE; - thread_wakeup(l); - } - - simple_unlock(&l->interlock); -} - - -/* - * Routine: lock_try_write - * Function: - * Tries to get a write lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t lock_try_write(l) - register lock_t l; -{ - - simple_lock(&l->interlock); - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock - */ - l->recursion_depth++; - simple_unlock(&l->interlock); - return(TRUE); - } - - if (l->want_write || l->want_upgrade || l->read_count) { - /* - * Can't get lock. - */ - simple_unlock(&l->interlock); - return(FALSE); - } - - /* - * Have lock. - */ - - l->want_write = TRUE; - simple_unlock(&l->interlock); - return(TRUE); -} - -/* - * Routine: lock_try_read - * Function: - * Tries to get a read lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t lock_try_read(l) - register lock_t l; -{ - simple_lock(&l->interlock); - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock - */ - l->read_count++; - simple_unlock(&l->interlock); - return(TRUE); - } - - if (l->want_write || l->want_upgrade) { - simple_unlock(&l->interlock); - return(FALSE); - } - - l->read_count++; - simple_unlock(&l->interlock); - return(TRUE); -} - -/* - * Routine: lock_try_read_to_write - * Function: - * Improves a read-only lock to one with - * write permission. If another reader has - * already requested an upgrade to a write lock, - * the read lock is still held upon return. - * - * Returns FALSE if the upgrade *failed*. - */ -boolean_t lock_try_read_to_write(l) - register lock_t l; -{ - - simple_lock(&l->interlock); - - if (((thread_t)l->thread) == current_thread()) { - /* - * Recursive lock - */ - l->read_count--; - l->recursion_depth++; - simple_unlock(&l->interlock); - return(TRUE); - } - - if (l->want_upgrade) { - simple_unlock(&l->interlock); - return(FALSE); - } - l->want_upgrade = TRUE; - l->read_count--; - - while (l->read_count != 0) { - l->waiting = TRUE; - thread_sleep(l, &l->interlock, FALSE); - simple_lock(&l->interlock); - } - - simple_unlock(&l->interlock); - return(TRUE); -} - -/* - * Allow a process that has a lock for write to acquire it - * recursively (for read, write, or update). - */ -void lock_set_recursive(l) - lock_t l; -{ - simple_lock(&l->interlock); - if (!l->want_write) { - panic("lock_set_recursive: don't have write lock"); - } - l->thread = (char *) current_thread(); - simple_unlock(&l->interlock); -} - -/* - * Prevent a lock from being re-acquired. - */ -void lock_clear_recursive(l) - lock_t l; -{ - simple_lock(&l->interlock); - if (((thread_t) l->thread) != current_thread()) { - panic("lock_clear_recursive: wrong thread"); - } - if (l->recursion_depth == 0) - l->thread = (char *)-1; /* XXX */ - simple_unlock(&l->interlock); -} diff --git a/sys/vm/lock.h b/sys/vm/lock.h deleted file mode 100644 index 4d64105395e..00000000000 --- a/sys/vm/lock.h +++ /dev/null @@ -1,175 +0,0 @@ -/* $OpenBSD: lock.h,v 1.4 1996/08/02 00:05:56 niklas Exp $ */ -/* $NetBSD: lock.h,v 1.8 1994/10/30 19:11:11 cgd Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)lock.h 8.1 (Berkeley) 6/11/93 - * - * - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -/* - * Locking primitives definitions - */ - -#ifndef _LOCK_H_ -#define _LOCK_H_ - -#define NCPUS 1 /* XXX */ - -/* - * A simple spin lock. - */ - -struct slock { - int lock_data; /* in general 1 bit is sufficient */ -}; - -typedef struct slock simple_lock_data_t; -typedef struct slock *simple_lock_t; - -/* - * The general lock structure. Provides for multiple readers, - * upgrading from read to write, and sleeping until the lock - * can be gained. - */ - -struct lock { -#ifdef vax - /* - * Efficient VAX implementation -- see field description below. - */ - unsigned int read_count:16, - want_upgrade:1, - want_write:1, - waiting:1, - can_sleep:1, - :0; - - simple_lock_data_t interlock; -#else /* vax */ -#ifdef ns32000 - /* - * Efficient ns32000 implementation -- - * see field description below. - */ - simple_lock_data_t interlock; - unsigned int read_count:16, - want_upgrade:1, - want_write:1, - waiting:1, - can_sleep:1, - :0; - -#else /* ns32000 */ - /* Only the "interlock" field is used for hardware exclusion; - * other fields are modified with normal instructions after - * acquiring the interlock bit. - */ - simple_lock_data_t - interlock; /* Interlock for remaining fields */ - boolean_t want_write; /* Writer is waiting, or locked for write */ - boolean_t want_upgrade; /* Read-to-write upgrade waiting */ - boolean_t waiting; /* Someone is sleeping on lock */ - boolean_t can_sleep; /* Can attempts to lock go to sleep */ - int read_count; /* Number of accepted readers */ -#endif /* ns32000 */ -#endif /* vax */ - void *thread; /* Thread that has lock, if recursive locking allowed */ - /* (should be thread_t, but but we then have mutually - recursive definitions) */ - int recursion_depth;/* Depth of recursion */ -}; - -typedef struct lock lock_data_t; -typedef struct lock *lock_t; - -#if NCPUS > 1 -__BEGIN_DECLS -void simple_lock __P((simple_lock_t)); -void simple_lock_init __P((simple_lock_t)); -boolean_t simple_lock_try __P((simple_lock_t)); -void simple_unlock __P((simple_lock_t)); -__END_DECLS -#else /* No multiprocessor locking is necessary. */ -#define simple_lock(l) -#define simple_lock_init(l) -#define simple_lock_try(l) (1) /* Always succeeds. */ -#define simple_unlock(l) -#endif - -/* Sleep locks must work even if no multiprocessing. */ - -#define lock_read_done(l) lock_done(l) -#define lock_write_done(l) lock_done(l) - -void lock_clear_recursive __P((lock_t)); -void lock_done __P((lock_t)); -void lock_init __P((lock_t, boolean_t)); -void lock_read __P((lock_t)); -boolean_t lock_read_to_write __P((lock_t)); -void lock_set_recursive __P((lock_t)); -void lock_sleepable __P((lock_t, boolean_t)); -boolean_t lock_try_read __P((lock_t)); -boolean_t lock_try_read_to_write __P((lock_t)); -boolean_t lock_try_write __P((lock_t)); -void lock_write __P((lock_t)); -void lock_write_to_read __P((lock_t)); -#endif /* !_LOCK_H_ */ diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 688c7987e8c..5a1775f9ace 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm.h,v 1.2 1996/08/02 00:05:58 niklas Exp $ */ +/* $OpenBSD: vm.h,v 1.3 1997/10/06 15:28:51 csapuntz Exp $ */ /* $NetBSD: vm.h,v 1.13 1994/06/29 06:47:52 cgd Exp $ */ /* @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm.h 8.2 (Berkeley) 12/13/93 + * @(#)vm.h 8.5 (Berkeley) 5/11/95 */ #ifndef VM_H @@ -59,10 +59,18 @@ typedef struct vm_page *vm_page_t; struct pager_struct; typedef struct pager_struct *vm_pager_t; +/* + * MACH VM locking type mappings to kernel types + */ +typedef struct simplelock simple_lock_data_t; +typedef struct simplelock *simple_lock_t; +typedef struct lock lock_data_t; +typedef struct lock *lock_t; + #include <sys/vmmeter.h> #include <sys/queue.h> #include <vm/vm_param.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/vm_prot.h> #include <vm/vm_inherit.h> #include <vm/vm_map.h> diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 8f09389aae1..449a3c4d7ee 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_extern.h,v 1.11 1997/07/25 06:03:05 mickey Exp $ */ +/* $OpenBSD: vm_extern.h,v 1.12 1997/10/06 15:28:51 csapuntz Exp $ */ /* $NetBSD: vm_extern.h,v 1.20 1996/04/23 12:25:23 christos Exp $ */ /*- @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 + * @(#)vm_extern.h 8.5 (Berkeley) 5/3/95 */ struct buf; diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index f2529758cfc..036d768c9ec 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_fault.c,v 1.10 1997/07/30 23:32:24 niklas Exp $ */ +/* $OpenBSD: vm_fault.c,v 1.11 1997/10/06 15:28:52 csapuntz Exp $ */ /* $NetBSD: vm_fault.c,v 1.20 1997/02/18 13:39:33 mrg Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 + * @(#)vm_fault.c 8.5 (Berkeley) 1/9/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index a3abdf369da..9d7ae19daeb 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_glue.c,v 1.24 1997/07/25 06:03:07 mickey Exp $ */ +/* $OpenBSD: vm_glue.c,v 1.25 1997/10/06 15:28:52 csapuntz Exp $ */ /* $NetBSD: vm_glue.c,v 1.55.4.1 1996/06/13 17:25:45 cgd Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 + * @(#)vm_glue.c 8.9 (Berkeley) 3/4/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. @@ -381,13 +381,16 @@ scheduler() loop: #ifdef DEBUG - while (!enableswap) + while (!enableswap) { + panic ("swap disabled??"); tsleep((caddr_t)&proc0, PVM, "noswap", 0); + } #endif pp = NULL; ppri = INT_MIN; for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) { + pri = p->p_swtime + p->p_slptime - p->p_nice * 8; if (pri > ppri) { pp = p; @@ -411,6 +414,7 @@ loop: * We would like to bring someone in. * This part is really bogus cuz we could deadlock on memory * despite our feeble check. + * XXX should require at least vm_swrss / 2 */ if (cnt.v_free_count > atop(USPACE)) { #ifdef DEBUG diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index a0af430839c..134fb7fa8de 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_kern.c,v 1.5 1997/09/22 15:17:18 chuck Exp $ */ +/* $OpenBSD: vm_kern.c,v 1.6 1997/10/06 15:28:53 csapuntz Exp $ */ /* $NetBSD: vm_kern.c,v 1.17.6.1 1996/06/13 17:21:28 cgd Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 + * @(#)vm_kern.c 8.4 (Berkeley) 1/9/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index b96f1570623..2cf2387896f 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_map.c,v 1.5 1997/07/25 06:03:07 mickey Exp $ */ +/* $OpenBSD: vm_map.c,v 1.6 1997/10/06 15:28:53 csapuntz Exp $ */ /* $NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 + * @(#)vm_map.c 8.9 (Berkeley) 5/17/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. @@ -75,7 +75,6 @@ #include <vm/vm.h> #include <vm/vm_page.h> -#include <vm/vm_object.h> /* * Virtual memory maps provide for the mapping, protection, @@ -267,7 +266,7 @@ vm_map_init(map, min, max, pageable) map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; - lock_init(&map->lock, TRUE); + lockinit(&map->lock, PVM, "thrd_sleep", 0, 0); simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); } @@ -401,12 +400,14 @@ vm_map_deallocate(map) * to it. */ - vm_map_lock(map); + vm_map_lock_drain_interlock(map); (void) vm_map_delete(map, map->min_offset, map->max_offset); pmap_destroy(map->pmap); + vm_map_unlock(map); + FREE(map, M_VMMAP); } @@ -1195,7 +1196,7 @@ vm_map_pageable(map, start, end, new_pageable) * If a region becomes completely unwired, * unwire its physical pages and mappings. */ - lock_set_recursive(&map->lock); + vm_map_set_recursive(&map->lock); entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { @@ -1207,7 +1208,7 @@ vm_map_pageable(map, start, end, new_pageable) entry = entry->next; } - lock_clear_recursive(&map->lock); + vm_map_clear_recursive(&map->lock); } else { @@ -1316,8 +1317,8 @@ vm_map_pageable(map, start, end, new_pageable) vm_map_unlock(map); /* trust me ... */ } else { - lock_set_recursive(&map->lock); - lock_write_to_read(&map->lock); + vm_map_set_recursive(&map->lock); + lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc); } rv = 0; @@ -1348,7 +1349,7 @@ vm_map_pageable(map, start, end, new_pageable) vm_map_lock(map); } else { - lock_clear_recursive(&map->lock); + vm_map_clear_recursive(&map->lock); } if (rv) { vm_map_unlock(map); @@ -2002,7 +2003,7 @@ vm_map_copy(dst_map, src_map, else { new_src_map = src_map; new_src_start = src_entry->start; - lock_set_recursive(&src_map->lock); + vm_map_set_recursive(&src_map->lock); } if (dst_entry->is_a_map) { @@ -2040,7 +2041,7 @@ vm_map_copy(dst_map, src_map, else { new_dst_map = dst_map; new_dst_start = dst_entry->start; - lock_set_recursive(&dst_map->lock); + vm_map_set_recursive(&dst_map->lock); } /* @@ -2052,9 +2053,9 @@ vm_map_copy(dst_map, src_map, FALSE, FALSE); if (dst_map == new_dst_map) - lock_clear_recursive(&dst_map->lock); + vm_map_clear_recursive(&dst_map->lock); if (src_map == new_src_map) - lock_clear_recursive(&src_map->lock); + vm_map_clear_recursive(&src_map->lock); } /* @@ -2423,7 +2424,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, * share map to the new object. */ - if (lock_read_to_write(&share_map->lock)) { + if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, + (void *)0, curproc)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; @@ -2436,7 +2438,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, entry->needs_copy = FALSE; - lock_write_to_read(&share_map->lock); + lockmgr(&share_map->lock, LK_DOWNGRADE, + (void *)0, curproc); } else { /* @@ -2453,7 +2456,8 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, */ if (entry->object.vm_object == NULL) { - if (lock_read_to_write(&share_map->lock)) { + if (lockmgr(&share_map->lock, LK_EXCLUPGRADE, + (void *)0, curproc)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; @@ -2462,7 +2466,7 @@ vm_map_lookup(var_map, vaddr, fault_type, out_entry, entry->object.vm_object = vm_object_allocate( (vm_size_t)(entry->end - entry->start)); entry->offset = 0; - lock_write_to_read(&share_map->lock); + lockmgr(&share_map->lock, LK_DOWNGRADE, (void *)0, curproc); } /* diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index d67ca25e845..8bcc10691a0 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_map.h,v 1.3 1996/08/02 00:06:01 niklas Exp $ */ +/* $OpenBSD: vm_map.h,v 1.4 1997/10/06 15:28:54 csapuntz Exp $ */ /* $NetBSD: vm_map.h,v 1.11 1995/03/26 20:39:10 jtc Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_map.h 8.3 (Berkeley) 3/15/94 + * @(#)vm_map.h 8.9 (Berkeley) 5/17/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. @@ -162,14 +162,42 @@ typedef struct { * Perform locking on the data portion of a map. */ +#include <sys/proc.h> /* XXX for curproc and p_pid */ + +#define vm_map_lock_drain_interlock(map) { \ + lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \ + &(map)->ref_lock, curproc); \ + (map)->timestamp++; \ +} +#ifdef DIAGNOSTIC #define vm_map_lock(map) { \ - lock_write(&(map)->lock); \ + if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) { \ + panic("vm_map_lock: failed to get lock"); \ + } \ (map)->timestamp++; \ } -#define vm_map_unlock(map) lock_write_done(&(map)->lock) -#define vm_map_lock_read(map) lock_read(&(map)->lock) -#define vm_map_unlock_read(map) lock_read_done(&(map)->lock) - +#else +#define vm_map_lock(map) { \ + lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc); \ + (map)->timestamp++; \ +} +#endif /* DIAGNOSTIC */ +#define vm_map_unlock(map) \ + lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc) +#define vm_map_lock_read(map) \ + lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc) +#define vm_map_unlock_read(map) \ + lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc) +#define vm_map_set_recursive(map) { \ + simple_lock(&(map)->lk_interlock); \ + (map)->lk_flags |= LK_CANRECURSE; \ + simple_unlock(&(map)->lk_interlock); \ +} +#define vm_map_clear_recursive(map) { \ + simple_lock(&(map)->lk_interlock); \ + (map)->lk_flags &= ~LK_CANRECURSE; \ + simple_unlock(&(map)->lk_interlock); \ +} /* * Functions implemented as macros */ diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 3d96b889c5a..f873a44fb81 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_meter.c,v 1.4 1997/04/17 01:25:20 niklas Exp $ */ +/* $OpenBSD: vm_meter.c,v 1.5 1997/10/06 15:28:54 csapuntz Exp $ */ /* $NetBSD: vm_meter.c,v 1.18 1996/02/05 01:53:59 christos Exp $ */ /* @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94 + * @(#)vm_meter.c 8.7 (Berkeley) 5/10/95 */ #include <sys/param.h> @@ -223,6 +223,7 @@ vmtotal(totalp) } if (object->ref_count > 1) { /* shared object */ + simple_unlock(&vm_object_list_lock); totalp->t_vmshr += num_pages(object->size); totalp->t_rmshr += object->resident_page_count; if (object->flags & OBJ_ACTIVE) { diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index b4c7ae54454..2191798d551 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_object.c,v 1.17 1997/07/25 06:03:09 mickey Exp $ */ +/* $OpenBSD: vm_object.c,v 1.18 1997/10/06 15:28:55 csapuntz Exp $ */ /* $NetBSD: vm_object.c,v 1.46 1997/03/30 20:56:12 mycroft Exp $ */ /*- @@ -66,7 +66,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_object.c 8.5 (Berkeley) 3/22/94 + * @(#)vm_object.c 8.7 (Berkeley) 5/11/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. @@ -416,10 +416,8 @@ vm_object_terminate(object) * * XXX need to do something in the event of a cleaning error. */ - if ((object->flags & OBJ_INTERNAL) == 0) { + if ((object->flags & OBJ_INTERNAL) == 0) (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE); - vm_object_unlock(object); - } /* * Now free the pages. diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 370bab3920b..1ab4e4e717b 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_object.h,v 1.5 1997/04/17 01:25:21 niklas Exp $ */ +/* $OpenBSD: vm_object.h,v 1.6 1997/10/06 15:28:56 csapuntz Exp $ */ /* $NetBSD: vm_object.h,v 1.16 1995/03/29 22:10:28 briggs Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_object.h 8.3 (Berkeley) 1/12/94 + * @(#)vm_object.h 8.4 (Berkeley) 1/9/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 23585120dd2..2e67ad62b54 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_pageout.c,v 1.5 1997/04/17 01:25:22 niklas Exp $ */ +/* $OpenBSD: vm_pageout.c,v 1.6 1997/10/06 15:28:56 csapuntz Exp $ */ /* $NetBSD: vm_pageout.c,v 1.23 1996/02/05 01:54:07 christos Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 + * @(#)vm_pageout.c 8.7 (Berkeley) 6/19/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index f07c6fbcf85..8dc02c28b13 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_pageout.h,v 1.2 1996/08/02 00:06:04 niklas Exp $ */ +/* $OpenBSD: vm_pageout.h,v 1.3 1997/10/06 15:28:57 csapuntz Exp $ */ /* $NetBSD: vm_pageout.h,v 1.11 1995/03/26 20:39:14 jtc Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_pageout.h 8.2 (Berkeley) 1/12/94 + * @(#)vm_pageout.h 8.3 (Berkeley) 1/9/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 8adc53ed4b4..6e22f638a73 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_pager.c,v 1.5 1996/09/20 06:44:49 deraadt Exp $ */ +/* $OpenBSD: vm_pager.c,v 1.6 1997/10/06 15:28:57 csapuntz Exp $ */ /* $NetBSD: vm_pager.c,v 1.21 1996/03/16 23:15:25 christos Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_pager.c 8.6 (Berkeley) 1/12/94 + * @(#)vm_pager.c 8.7 (Berkeley) 7/7/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 81e7a361ca2..53faf42aaab 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_pager.h,v 1.2 1996/08/02 00:06:05 niklas Exp $ */ +/* $OpenBSD: vm_pager.h,v 1.3 1997/10/06 15:28:57 csapuntz Exp $ */ /* $NetBSD: vm_pager.h,v 1.10 1995/03/26 20:39:15 jtc Exp $ */ /* @@ -38,7 +38,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_pager.h 8.4 (Berkeley) 1/12/94 + * @(#)vm_pager.h 8.5 (Berkeley) 7/7/94 */ /* diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 1348b9021d5..3a18feac870 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_param.h,v 1.8 1997/04/10 13:48:50 deraadt Exp $ */ +/* $OpenBSD: vm_param.h,v 1.9 1997/10/06 15:28:58 csapuntz Exp $ */ /* $NetBSD: vm_param.h,v 1.12 1995/03/26 20:39:16 jtc Exp $ */ /* @@ -36,7 +36,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vm_param.h 8.1 (Berkeley) 6/11/93 + * @(#)vm_param.h 8.2 (Berkeley) 1/9/95 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 135815a9afe..c5e46bf65f4 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_unix.c,v 1.5 1997/09/17 17:26:15 weingart Exp $ */ +/* $OpenBSD: vm_unix.c,v 1.6 1997/10/06 15:28:58 csapuntz Exp $ */ /* $NetBSD: vm_unix.c,v 1.19 1996/02/10 00:08:14 christos Exp $ */ /* @@ -40,7 +40,7 @@ * * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ * - * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93 + * @(#)vm_unix.c 8.2 (Berkeley) 1/9/95 */ /* diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 58eb3712a30..16788808a0a 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode_pager.c,v 1.3 1996/04/21 22:33:18 deraadt Exp $ */ +/* $OpenBSD: vnode_pager.c,v 1.4 1997/10/06 15:28:59 csapuntz Exp $ */ /* $NetBSD: vnode_pager.c,v 1.19 1996/03/16 23:15:27 christos Exp $ */ /* @@ -38,7 +38,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 + * @(#)vnode_pager.c 8.10 (Berkeley) 5/14/95 */ /* @@ -280,7 +280,8 @@ vnode_pager_haspage(pager, offset) vm_pager_t pager; vm_offset_t offset; { - register vn_pager_t vnp = (vn_pager_t)pager->pg_data; + struct proc *p = curproc; /* XXX */ + vn_pager_t vnp = (vn_pager_t)pager->pg_data; daddr_t bn; int err; @@ -294,9 +295,9 @@ vnode_pager_haspage(pager, offset) * Lock the vnode first to make sure we have the most recent * version of the size. */ - VOP_LOCK(vnp->vnp_vp); + vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY, p); if (offset >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp); + VOP_UNLOCK(vnp->vnp_vp, 0, p); #ifdef DEBUG if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) printf("vnode_pager_haspage: pg %p, off %lx, size %lx\n", @@ -315,7 +316,7 @@ vnode_pager_haspage(pager, offset) err = VOP_BMAP(vnp->vnp_vp, offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, (struct vnode **)0, &bn, NULL); - VOP_UNLOCK(vnp->vnp_vp); + VOP_UNLOCK(vnp->vnp_vp, 0, p); if (err) { #ifdef DEBUG if (vpagerdebug & VDB_FAIL) @@ -425,7 +426,8 @@ void vnode_pager_umount(mp) register struct mount *mp; { - register vm_pager_t pager, npager; + struct proc *p = curproc; /* XXX */ + vm_pager_t pager, npager; struct vnode *vp; for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){ @@ -436,9 +438,9 @@ vnode_pager_umount(mp) npager = pager->pg_list.tqe_next; vp = ((vn_pager_t)pager->pg_data)->vnp_vp; if (mp == (struct mount *)0 || vp->v_mount == mp) { - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void) vnode_pager_uncache(vp); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); } } } @@ -455,15 +457,15 @@ boolean_t vnode_pager_uncache(vp) register struct vnode *vp; { - register vm_object_t object; + struct proc *p = curproc; /* XXX */ + vm_object_t object; boolean_t uncached; vm_pager_t pager; /* * Not a mapped vnode */ - pager = (vm_pager_t)vp->v_vmdata; - if (pager == NULL) + if (vp->v_type != VREG || (pager = (vm_pager_t)vp->v_vmdata) == NULL) return (TRUE); #ifdef DEBUG if (!VOP_ISLOCKED(vp)) { @@ -492,9 +494,9 @@ vnode_pager_uncache(vp) object = vm_object_lookup(pager); if (object) { uncached = (object->ref_count <= 1); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); pager_cache(object, FALSE); - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); } else uncached = TRUE; return(uncached); @@ -540,9 +542,9 @@ vnode_pager_io(vnp, mlist, npages, sync, rw) * read beyond EOF (returns error) * short read */ - VOP_LOCK(vnp->vnp_vp); + vn_lock(vnp->vnp_vp, LK_EXCLUSIVE | LK_RETRY, p); if (foff >= vnp->vnp_size) { - VOP_UNLOCK(vnp->vnp_vp); + VOP_UNLOCK(vnp->vnp_vp, 0, p); vm_pager_unmap_pages(kva, npages); #ifdef DEBUG if (vpagerdebug & VDB_SIZE) @@ -573,7 +575,7 @@ vnode_pager_io(vnp, mlist, npages, sync, rw) error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); else error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); - VOP_UNLOCK(vnp->vnp_vp); + VOP_UNLOCK(vnp->vnp_vp, 0, p); #ifdef DEBUG if (vpagerdebug & VDB_IO) { if (error || auio.uio_resid) |