summaryrefslogtreecommitdiffstats
path: root/sys/dev/vnd.c
diff options
context:
space:
mode:
authorthib <thib@openbsd.org>2011-04-18 16:50:22 +0000
committerthib <thib@openbsd.org>2011-04-18 16:50:22 +0000
commitad08565a1e415583c1570acf60fdcc201ba30242 (patch)
treef3ec74bd13c7fa38420821c7cb8e88105e2268d2 /sys/dev/vnd.c
parentPrint size_t with %zu and ssize_t with %zd in error/debug output. (diff)
downloadwireguard-openbsd-ad08565a1e415583c1570acf60fdcc201ba30242.tar.xz
wireguard-openbsd-ad08565a1e415583c1570acf60fdcc201ba30242.zip
remove the old style by-pass-the-buffer-cache vnd code.
this will treat vndX the same as svndX. ok and gcc2 fixes from deraadt@
Diffstat (limited to 'sys/dev/vnd.c')
-rw-r--r--sys/dev/vnd.c279
1 files changed, 51 insertions, 228 deletions
diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c
index 912f4455bb5..bdd6df82414 100644
--- a/sys/dev/vnd.c
+++ b/sys/dev/vnd.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnd.c,v 1.108 2011/04/02 15:24:03 thib Exp $ */
+/* $OpenBSD: vnd.c,v 1.109 2011/04/18 16:50:22 thib Exp $ */
/* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */
/*
@@ -33,25 +33,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * from: Utah $Hdr: vn.c 1.13 94/04/02$
- *
- * @(#)vn.c 8.6 (Berkeley) 4/1/94
*/
/*
- * Vnode disk driver.
- *
- * Block/character interface to a vnode. Allows one to treat a file
- * as a disk (e.g. build a filesystem in it, mount it, etc.).
- *
- * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
- * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping
- * as it doesn't distort the local buffer cache. The latter is good for
- * building disk images as it keeps the cache consistent after the block
- * device is closed.
+ * There is a security issue involved with this driver.
*
- * NOTE 2: There is a security issue involved with this driver.
* Once mounted all access to the contents of the "mapped" file via
* the special file is controlled by the permissions on the special
* file, the protection of the mapped file is ignored (effectively,
@@ -102,12 +88,8 @@ int vnddebug = 0x00;
* DISKUNIT(), but with the minor masked off.
*/
#define vndunit(x) DISKUNIT(makedev(major(x), minor(x) & 0x7ff))
-#define vndsimple(x) (minor(x) & 0x800)
-
-/* same as MAKEDISKDEV, preserving the vndsimple() property */
#define VNDLABELDEV(dev) \
- makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
- (vndsimple(dev) ? 0x800 : 0))
+ makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART))
struct vndbuf {
struct buf vb_buf;
@@ -145,7 +127,6 @@ struct vnd_softc {
#define VNF_LABELLING 0x0100
#define VNF_WLABEL 0x0200
#define VNF_HAVELABEL 0x0400
-#define VNF_SIMPLE 0x1000
#define VNF_READONLY 0x2000
#define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE)
@@ -157,7 +138,6 @@ int numvnd = 0;
void vndattach(int);
void vndclear(struct vnd_softc *);
-void vndstart(struct vnd_softc *, struct buf *);
int vndsetcred(struct vnd_softc *, struct ucred *);
void vndiodone(struct buf *);
void vndshutdown(void);
@@ -232,12 +212,6 @@ vndopen(dev_t dev, int flags, int mode, struct proc *p)
if ((error = vndlock(sc)) != 0)
return (error);
- if (!vndsimple(dev) && sc->sc_vp != NULL &&
- (sc->sc_vp->v_type != VREG || sc->sc_keyctx != NULL)) {
- error = EINVAL;
- goto bad;
- }
-
if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
error = EROFS;
goto bad;
@@ -252,20 +226,12 @@ vndopen(dev_t dev, int flags, int mode, struct proc *p)
part = DISKPART(dev);
pmask = 1 << part;
- /*
- * If any partition is open, all succeeding openings must be of the
- * same type or read-only.
- */
- if (sc->sc_dk.dk_openmask) {
- if (((sc->sc_flags & VNF_SIMPLE) != 0) !=
- (vndsimple(dev) != 0) && (flags & FWRITE)) {
- error = EBUSY;
- goto bad;
- }
- } else if (vndsimple(dev))
- sc->sc_flags |= VNF_SIMPLE;
- else
- sc->sc_flags &= ~VNF_SIMPLE;
+ /* Allow access to the raw device even if we are open. */
+ if (sc->sc_dk.dk_openmask && !(part == RAW_PART) &&
+ !(mode == S_IFCHR)) {
+ error = EBUSY;
+ goto bad;
+ }
/* Check that the partition exists. */
if (part != RAW_PART &&
@@ -360,33 +326,17 @@ vndclose(dev_t dev, int flags, int mode, struct proc *p)
return (0);
}
-/*
- * Two methods are used, the traditional buffercache bypassing and the
- * newer, cache-coherent on unmount, one.
- *
- * Former method:
- * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
- * Note that this driver can only be used for swapping over NFS on the hp
- * since nfs_strategy on the vax cannot handle u-areas and page tables.
- *
- * Latter method:
- * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
- * access the underlying file.
- */
void
vndstrategy(struct buf *bp)
{
int unit = vndunit(bp->b_dev);
struct vnd_softc *vnd = &vnd_softc[unit];
- struct vndbuf *nbp;
- int bsize;
off_t bn;
- caddr_t addr;
- size_t resid;
- int sz, flags, error, s;
+ int sz, s, part;
struct iovec aiov;
struct uio auio;
struct proc *p = curproc;
+ daddr64_t off;
DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
@@ -441,167 +391,50 @@ vndstrategy(struct buf *bp)
else
sz = howmany(bp->b_bcount, DEV_BSIZE);
- /* No bypassing of buffer cache? */
- if (vndsimple(bp->b_dev)) {
- int part = DISKPART(bp->b_dev);
- daddr64_t off = DL_SECTOBLK(vnd->sc_dk.dk_label,
- DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part]));
- aiov.iov_base = bp->b_data;
- auio.uio_resid = aiov.iov_len = bp->b_bcount;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_procp = p;
-
- vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (bp->b_flags & B_READ) {
- auio.uio_rw = UIO_READ;
- bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
- vnd->sc_cred);
- if (vnd->sc_keyctx)
- vndencrypt(vnd, bp->b_data,
- bp->b_bcount, bp->b_blkno, 0);
- } else {
- if (vnd->sc_keyctx)
- vndencrypt(vnd, bp->b_data,
- bp->b_bcount, bp->b_blkno, 1);
- auio.uio_rw = UIO_WRITE;
- /*
- * Upper layer has already checked I/O for
- * limits, so there is no need to do it again.
- */
- bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
- IO_NOLIMIT, vnd->sc_cred);
- /* Data in buffer cache needs to be in clear */
- if (vnd->sc_keyctx)
- vndencrypt(vnd, bp->b_data,
- bp->b_bcount, bp->b_blkno, 0);
- }
- VOP_UNLOCK(vnd->sc_vp, 0, p);
- if (bp->b_error)
- bp->b_flags |= B_ERROR;
- bp->b_resid = auio.uio_resid;
- s = splbio();
- biodone(bp);
- splx(s);
-
- return;
- }
-
- if (vnd->sc_vp->v_type != VREG || vnd->sc_keyctx != NULL) {
- bp->b_error = EINVAL;
- bp->b_flags |= B_ERROR;
- s = splbio();
- biodone(bp);
- splx(s);
- return;
- }
-
- /* The old-style buffercache bypassing method. */
- bn += DL_SECTOBLK(vnd->sc_dk.dk_label,
- DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]));
- bn = dbtob(bn);
- bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
- addr = bp->b_data;
- flags = bp->b_flags | B_CALL;
- for (resid = bp->b_resid; resid; resid -= sz) {
- struct vnode *vp;
- daddr64_t nbn;
- int off, nra;
-
- nra = 0;
- vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
- error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
- VOP_UNLOCK(vnd->sc_vp, 0, p);
- if (error == 0 && (long)nbn == -1)
- error = EIO;
-#ifdef VNDDEBUG
- if (!dovndcluster)
- nra = 0;
-#endif
-
- if ((off = bn % bsize) != 0)
- sz = bsize - off;
- else
- sz = (1 + nra) * bsize;
- if (resid < sz)
- sz = resid;
-
- DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%lld sz %x\n",
- vnd->sc_vp, vp, bn, nbn, sz);
-
- s = splbio();
- nbp = getvndbuf();
- splx(s);
- nbp->vb_buf.b_flags = flags;
- nbp->vb_buf.b_bcount = sz;
- nbp->vb_buf.b_bufsize = bp->b_bufsize;
- nbp->vb_buf.b_error = 0;
- if (vp->v_type == VBLK || vp->v_type == VCHR)
- nbp->vb_buf.b_dev = vp->v_rdev;
- else
- nbp->vb_buf.b_dev = NODEV;
- nbp->vb_buf.b_data = addr;
- nbp->vb_buf.b_blkno = nbn + btodb(off);
- nbp->vb_buf.b_proc = bp->b_proc;
- nbp->vb_buf.b_iodone = vndiodone;
- nbp->vb_buf.b_vp = vp;
- nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
- nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
- nbp->vb_buf.b_validoff = bp->b_validoff;
- nbp->vb_buf.b_validend = bp->b_validend;
- LIST_INIT(&nbp->vb_buf.b_dep);
- nbp->vb_buf.b_bq = NULL;
-
- /* save a reference to the old buffer */
- nbp->vb_obp = bp;
-
+ part = DISKPART(bp->b_dev);
+ off = DL_SECTOBLK(vnd->sc_dk.dk_label,
+ DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part]));
+ aiov.iov_base = bp->b_data;
+ auio.uio_resid = aiov.iov_len = bp->b_bcount;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+
+ vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (bp->b_flags & B_READ) {
+ auio.uio_rw = UIO_READ;
+ bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
+ vnd->sc_cred);
+ if (vnd->sc_keyctx)
+ vndencrypt(vnd, bp->b_data,
+ bp->b_bcount, bp->b_blkno, 0);
+ } else {
+ if (vnd->sc_keyctx)
+ vndencrypt(vnd, bp->b_data,
+ bp->b_bcount, bp->b_blkno, 1);
+ auio.uio_rw = UIO_WRITE;
/*
- * If there was an error or a hole in the file...punt.
- * Note that we deal with this after the nbp allocation.
- * This ensures that we properly clean up any operations
- * that we have already fired off.
- *
- * XXX we could deal with holes here but it would be
- * a hassle (in the write case).
- * We must still however charge for the write even if there
- * was an error.
+ * Upper layer has already checked I/O for
+ * limits, so there is no need to do it again.
*/
- if (error) {
- nbp->vb_buf.b_error = error;
- nbp->vb_buf.b_flags |= B_ERROR;
- bp->b_resid -= (resid - sz);
- s = splbio();
- /* charge for the write */
- if ((nbp->vb_buf.b_flags & B_READ) == 0)
- nbp->vb_buf.b_vp->v_numoutput++;
- biodone(&nbp->vb_buf);
- splx(s);
- return;
- }
-
- vndstart(vnd, &nbp->vb_buf);
- bn += sz;
- addr += sz;
+ bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
+ IO_NOLIMIT, vnd->sc_cred);
+ /* Data in buffer cache needs to be in clear */
+ if (vnd->sc_keyctx)
+ vndencrypt(vnd, bp->b_data,
+ bp->b_bcount, bp->b_blkno, 0);
}
+ VOP_UNLOCK(vnd->sc_vp, 0, p);
+ if (bp->b_error)
+ bp->b_flags |= B_ERROR;
+ bp->b_resid = auio.uio_resid;
+ s = splbio();
+ biodone(bp);
+ splx(s);
}
-void
-vndstart(struct vnd_softc *vnd, struct buf *bp)
-{
- DNPRINTF(VDB_IO,
- "vndstart(%d): bp %p vp %p blkno %lld addr %p cnt %lx\n",
- vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
- bp->b_bcount);
-
- /* Instrumentation. */
- disk_busy(&vnd->sc_dk);
-
- if ((bp->b_flags & B_READ) == 0)
- bp->b_vp->v_numoutput++;
- VOP_STRATEGY(bp);
-}
void
vndiodone(struct buf *bp)
@@ -726,8 +559,6 @@ vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
case VNDIOCSET:
if (vnd->sc_flags & VNF_INITED)
return (EBUSY);
- if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen)
- return (EINVAL);
if ((error = vndlock(vnd)) != 0)
return (error);
@@ -750,8 +581,7 @@ vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
/* Set disk name depending on how we were created. */
bzero(vnd->sc_dk_name, sizeof(vnd->sc_dk_name));
if (snprintf(vnd->sc_dk_name, sizeof(vnd->sc_dk_name),
- "%svnd%d", ((vnd->sc_flags & VNF_SIMPLE) ? "s" : ""),
- unit) >= sizeof(vnd->sc_dk_name)) {
+ "vnd%d", unit) >= sizeof(vnd->sc_dk_name)) {
printf("VNDIOCSET: disk name too long\n");
vndunlock(vnd);
return(ENXIO);
@@ -779,13 +609,6 @@ vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
return (error);
}
- if (nd.ni_vp->v_type != VREG && !vndsimple(dev)) {
- VOP_UNLOCK(nd.ni_vp, 0, p);
- vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
- vndunlock(vnd);
- return (EINVAL);
- }
-
if (nd.ni_vp->v_type == VBLK)
vnd->sc_size = vndbdevsize(nd.ni_vp, p);
else {