summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortedu <tedu@openbsd.org>2013-04-23 20:42:38 +0000
committertedu <tedu@openbsd.org>2013-04-23 20:42:38 +0000
commit9266689daacf17f4c34ff3e9e7ade3eb4ab14762 (patch)
tree64bf864f09bb84dd46c75492c5d0b94c35f588a5
parentsimple large ino_t handling (diff)
downloadwireguard-openbsd-9266689daacf17f4c34ff3e9e7ade3eb4ab14762.tar.xz
wireguard-openbsd-9266689daacf17f4c34ff3e9e7ade3eb4ab14762.zip
change ffs layout policy. try to pack meta data in close to the start of
each cylinder group, which should speed up disk access a little and fsck substantially. adapted from freebsd. for more info: www.mckusick.com/publications/faster_fsck.pdf ok deraadt
-rw-r--r--sys/ufs/ffs/ffs_alloc.c217
-rw-r--r--sys/ufs/ffs/ffs_balloc.c10
-rw-r--r--sys/ufs/ffs/fs.h4
3 files changed, 186 insertions, 45 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 0888675962c..d22bcd08144 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_alloc.c,v 1.92 2011/09/18 23:20:28 bluhm Exp $ */
+/* $OpenBSD: ffs_alloc.c,v 1.93 2013/04/23 20:42:38 tedu Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */
/*
@@ -996,6 +996,22 @@ ffs_dirpref(struct inode *pip)
* Limit number of dirs in one cg and reserve space for
* regular files, but only if we have no deficit in
* inodes or space.
+ *
+ * We are trying to find a suitable cylinder group nearby
+ * our preferred cylinder group to place a new directory.
+ * We scan from our preferred cylinder group forward looking
+ * for a cylinder group that meets our criterion. If we get
+ * to the final cylinder group and do not find anything,
+ * we start scanning backwards from our preferred cylinder
+ * group. The ideal would be to alternate looking forward
+ * and backward, but tha tis just too complex to code for
+ * the gain it would get. The most likely place where the
+ * backward scan would take effect is when we start near
+ * the end of the filesystem and do not find anything from
+ * where we are to the end. In that case, scanning backward
+ * will likely find us a suitable cylinder group much closer
+ * to our desired location than if we were to start scanning
+ * forward from the beginning for the filesystem.
*/
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
@@ -1004,7 +1020,7 @@ ffs_dirpref(struct inode *pip)
if (fs->fs_contigdirs[cg] < maxcontigdirs)
goto end;
}
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
fs->fs_cs(fs, cg).cs_nifree >= minifree &&
fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
@@ -1017,7 +1033,7 @@ ffs_dirpref(struct inode *pip)
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
goto end;
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
goto end;
end:
@@ -1031,9 +1047,15 @@ end:
*
* If no blocks have been allocated in the first section, the policy is to
* request a block in the same cylinder group as the inode that describes
- * the file. If no blocks have been allocated in any other section, the
- * policy is to place the section in a cylinder group with a greater than
- * average number of free blocks. An appropriate cylinder group is found
+ * the file. The first indirect is allocated immediately following the last
+ * direct block and the data blocks for the first indirect immediately
+ * follow it.
+ *
+ * If no blocks have been allocated in any other section, the indirect
+ * block(s) are allocated in the same cylinder group as its inode in an
+ * area reserved immediately following the inode blocks. The policy for
+ * the data blocks is to place them in a cylinder group with a greater than
+ * average number of free blocks. An appropriate cylinder group is found
* by using a rotor that sweeps the cylinder groups. When a new group of
* blocks is needed, the sweep begins in the cylinder group following the
* cylinder group from which the previous allocation was made. The sweep
@@ -1047,21 +1069,76 @@ int32_t
ffs1_blkpref(struct inode *ip, daddr64_t lbn, int indx, int32_t *bap)
{
struct fs *fs;
- int cg, avgbfree, startcg;
+ int cg, inocg, avgbfree, startcg;
+ uint32_t pref;
+ KASSERT(indx <= 0 || bap != NULL);
fs = ip->i_fs;
+ /*
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din1->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
+ * If we are allocating the first data block in the first indirect
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
+ */
+ if (lbn == NDADDR) {
+ pref = ip->i_din1->di_ib[0];
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
+ return (pref + fs->fs_frag);
+ }
+ /*
+ * If we are the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((DIP(ip, mode) & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg =
- ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1]) + 1;
startcg %= fs->fs_ncg;
@@ -1069,16 +1146,18 @@ ffs1_blkpref(struct inode *ip, daddr64_t lbn, int indx, int32_t *bap)
for (cg = startcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
for (cg = 0; cg <= startcg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
return (0);
}
-
+ /*
+ * Otherwise, we just always try to lay things out contiguously.
+ */
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1090,23 +1169,77 @@ int64_t
ffs2_blkpref(struct inode *ip, daddr64_t lbn, int indx, int64_t *bap)
{
struct fs *fs;
- int cg, avgbfree, startcg;
+ int cg, inocg, avgbfree, startcg;
+ uint64_t pref;
+ KASSERT(indx <= 0 || bap != NULL);
fs = ip->i_fs;
+ /*
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din2->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
+ * If we are allocating the first data block in the first indirect
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
+ */
+ if (lbn == NDADDR) {
+ pref = ip->i_din2->di_ib[0];
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
+ return (pref + fs->fs_frag);
+ }
+ /*
+ * If we are the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
-
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((DIP(ip, mode) & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg = ino_to_cg(fs, ip->i_number) +
- lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1] + 1);
@@ -1125,7 +1258,7 @@ ffs2_blkpref(struct inode *ip, daddr64_t lbn, int indx, int64_t *bap)
}
/*
- * We always just try to lay things out contiguously.
+ * Otherwise, we just always try to lay things out contiguously.
*/
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1367,24 +1500,27 @@ ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr64_t bpref)
struct cg *cgp;
daddr64_t bno, blkno;
u_int8_t *blksfree;
- int cylno;
+ int cylno, cgbpref;
fs = ip->i_fs;
cgp = (struct cg *) bp->b_data;
blksfree = cg_blksfree(cgp);
- if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx)
+ if (bpref == 0) {
bpref = cgp->cg_rotor;
- else {
- bpref = blknum(fs, bpref);
- bno = dtogd(fs, bpref);
- /*
- * If the requested block is available, use it.
- */
- if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
- goto gotit;
+ } else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
+ /* map bpref to correct zone in this cg */
+ if (bpref < cgdata(fs, cgbpref))
+ bpref = cgmeta(fs, cgp->cg_cgx);
+ else
+ bpref = cgdata(fs, cgp->cg_cgx);
}
-
+ /*
+ * If the requested block is available, use it.
+ */
+ bno = dtogd(fs, blknum(fs, bpref));
+ if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
+ goto gotit;
/*
* Take the next available block in this cylinder group.
*/
@@ -1392,7 +1528,9 @@ ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr64_t bpref)
if (bno < 0)
return (0);
- cgp->cg_rotor = bno;
+ /* Update cg_rotor only if allocated from the data zone */
+ if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx)))
+ cgp->cg_rotor = bno;
gotit:
blkno = fragstoblks(fs, bno);
@@ -1478,9 +1616,10 @@ ffs_clusteralloc(struct inode *ip, int cg, daddr64_t bpref, int len)
* be recalled to try an allocation in the next cylinder group.
*/
if (dtog(fs, bpref) != cg)
- bpref = 0;
+ bpref = cgdata(fs, cg);
else
- bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+ bpref = blknum(fs, bpref);
+ bpref = fragstoblks(fs, dtogd(fs, bpref));
mapp = &cg_clustersfree(cgp)[bpref / NBBY];
map = *mapp++;
bit = 1 << (bpref % NBBY);
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index e22675fde65..dba063771c1 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_balloc.c,v 1.37 2011/07/04 04:30:41 tedu Exp $ */
+/* $OpenBSD: ffs_balloc.c,v 1.38 2013/04/23 20:42:38 tedu Exp $ */
/* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */
/*
@@ -238,7 +238,7 @@ ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred,
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
- pref = ffs1_blkpref(ip, lbn, 0, (int32_t *)0);
+ pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
cred, &newb);
if (error)
@@ -286,7 +286,7 @@ ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred,
continue;
}
if (pref == 0)
- pref = ffs1_blkpref(ip, lbn, 0, (int32_t *)0);
+ pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
&newb);
if (error) {
@@ -617,7 +617,7 @@ ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
allocblk = allociblk;
if (nb == 0) {
- pref = ffs2_blkpref(ip, lbn, 0, NULL);
+ pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
&newb);
if (error)
@@ -673,7 +673,7 @@ ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
}
if (pref == 0)
- pref = ffs2_blkpref(ip, lbn, 0, NULL);
+ pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL);
error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
&newb);
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index c68ca2f799e..3698a53b5b3 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: fs.h,v 1.35 2008/11/06 18:01:45 deraadt Exp $ */
+/* $OpenBSD: fs.h,v 1.36 2013/04/23 20:42:38 tedu Exp $ */
/* $NetBSD: fs.h,v 1.6 1995/04/12 21:21:02 mycroft Exp $ */
/*
@@ -464,6 +464,8 @@ struct ocg {
* They calc file system addresses of cylinder group data structures.
*/
#define cgbase(fs, c) ((daddr64_t)(fs)->fs_fpg * (c))
+#define cgdata(fs, c) (cgdmin(fs, c) + (fs)->fs_minfree) /* data zone */
+#define cgmeta(fs, c) (cgdmin(fs, c)) /* meta data */
#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */
#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */
#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */