From 1fa40b01ae4d1b00e366d4949edcc230f5cd6d99 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 14 May 2007 18:23:50 +1000
Subject: [XFS] Only use refcounted pages for I/O

Many block drivers (aoe, iscsi) really want refcountable pages in bios,
which is what almost everyone send down. XFS unfortunately has a few
places where it sends down buffers that may come from kmalloc, which
breaks them.

Fix the places that use kmalloc()d buffers.

SGI-PV: 964546
SGI-Modid: xfs-linux-melb:xfs-kern:28562a

Signed-Off-By: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 49 ++++++++++++++++++++--------------------------
 fs/xfs/linux-2.6/xfs_buf.h |  2 +-
 fs/xfs/xfs_log.c           | 19 +++++++++---------
 3 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af14..208daf58b826 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -314,7 +314,7 @@ xfs_buf_free(
 
 	ASSERT(list_empty(&bp->b_hash_list));
 
-	if (bp->b_flags & _XBF_PAGE_CACHE) {
+	if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
 		uint		i;
 
 		if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
 		for (i = 0; i < bp->b_page_count; i++) {
 			struct page	*page = bp->b_pages[i];
 
-			ASSERT(!PagePrivate(page));
+			if (bp->b_flags & _XBF_PAGE_CACHE)
+				ASSERT(!PagePrivate(page));
 			page_cache_release(page);
 		}
 		_xfs_buf_free_pages(bp);
-	} else if (bp->b_flags & _XBF_KMEM_ALLOC) {
-		 /*
-		  * XXX(hch): bp->b_count_desired might be incorrect (see
-		  * xfs_buf_associate_memory for details), but fortunately
-		  * the Linux version of kmem_free ignores the len argument..
-		  */
-		kmem_free(bp->b_addr, bp->b_count_desired);
-		_xfs_buf_free_pages(bp);
 	}
 
 	xfs_buf_deallocate(bp);
@@ -764,41 +757,41 @@ xfs_buf_get_noaddr(
 	size_t			len,
 	xfs_buftarg_t		*target)
 {
-	size_t			malloc_len = len;
+	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+	int			error, i;
 	xfs_buf_t		*bp;
-	void			*data;
-	int			error;
 
 	bp = xfs_buf_allocate(0);
 	if (unlikely(bp == NULL))
 		goto fail;
 	_xfs_buf_initialize(bp, target, 0, len, 0);
 
- try_again:
-	data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
-	if (unlikely(data == NULL))
+	error = _xfs_buf_get_pages(bp, page_count, 0);
+	if (error)
 		goto fail_free_buf;
 
-	/* check whether alignment matches.. */
-	if ((__psunsigned_t)data !=
-	    ((__psunsigned_t)data & ~target->bt_smask)) {
-		/* .. else double the size and try again */
-		kmem_free(data, malloc_len);
-		malloc_len <<= 1;
-		goto try_again;
+	for (i = 0; i < page_count; i++) {
+		bp->b_pages[i] = alloc_page(GFP_KERNEL);
+		if (!bp->b_pages[i])
+			goto fail_free_mem;
 	}
+	bp->b_flags |= _XBF_PAGES;
 
-	error = xfs_buf_associate_memory(bp, data, len);
-	if (error)
+	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+	if (unlikely(error)) {
+		printk(KERN_WARNING "%s: failed to map pages\n",
+				__FUNCTION__);
 		goto fail_free_mem;
-	bp->b_flags |= _XBF_KMEM_ALLOC;
+	}
 
 	xfs_buf_unlock(bp);
 
-	XB_TRACE(bp, "no_daddr", data);
+	XB_TRACE(bp, "no_daddr", len);
 	return bp;
+
  fail_free_mem:
-	kmem_free(data, malloc_len);
+	while (--i >= 0)
+		__free_page(bp->b_pages[i]);
  fail_free_buf:
 	xfs_buf_free(bp);
  fail:
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a5..b5908a34b15d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
 
 	/* flags used only internally */
 	_XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
-	_XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
+	_XBF_PAGES = (1 << 18),	    /* backed by refcounted pages	   */
 	_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
 	_XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
 } xfs_buf_flags_t;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17bd..635f99e6302f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		*iclogp = (xlog_in_core_t *)
 			  kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
 		iclog = *iclogp;
-		iclog->hic_data = (xlog_in_core_2_t *)
-			  kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
-
 		iclog->ic_prev = prev_iclog;
 		prev_iclog = iclog;
+
+		bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+		if (!XFS_BUF_CPSEMA(bp))
+			ASSERT(0);
+		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+		XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+		iclog->ic_bp = bp;
+		iclog->hic_data = bp->b_addr;
+
 		log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
 
 		head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
 		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
 
-		bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
-		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
-		XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
-		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
-		iclog->ic_bp = bp;
 
 		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
 		iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
 		}
 #endif
 		next_iclog = iclog->ic_next;
-		kmem_free(iclog->hic_data, log->l_iclog_size);
 		kmem_free(iclog, sizeof(xlog_in_core_t));
 		iclog = next_iclog;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 4cc929ee305c69573cb842aade059dbe2a93940c Mon Sep 17 00:00:00 2001
From: Nathan Scott <nscott@aconex.com>
Date: Mon, 14 May 2007 18:24:02 +1000
Subject: [XFS] Don't grow filesystems past the size they can index.

When growing a filesystem we don't check to see if the new size overflows
the page cache index range, so we can do silly things like grow a
filesystem page 16TB on a 32bit. Check new filesystem sizes against the
limits the kernel can support.

SGI-PV: 957886
SGI-Modid: xfs-linux-melb:xfs-kern:28563a

Signed-Off-By: Nathan Scott <nscott@aconex.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_fsops.c   |  2 ++
 fs/xfs/xfs_mount.c   | 35 +++++++++++++++++++++++------------
 fs/xfs/xfs_mount.h   |  1 +
 fs/xfs/xfs_rtalloc.c |  4 +++-
 4 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b599e6be9ec1..25e5eae8a976 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -140,6 +140,8 @@ xfs_growfs_data_private(
 	pct = in->imaxpct;
 	if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
 		return XFS_ERROR(EINVAL);
+	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
+		return error;
 	dpct = pct - mp->m_sb.sb_imax_pct;
 	error = xfs_read_buf(mp, mp->m_ddev_targp,
 			XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a96bde6df96d..5de1f392e632 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -202,6 +202,27 @@ xfs_mount_free(
 	kmem_free(mp, sizeof(xfs_mount_t));
 }
 
+/*
+ * Check size of device based on the (data/realtime) block count.
+ * Note: this check is used by the growfs code as well as mount.
+ */
+int
+xfs_sb_validate_fsb_count(
+	xfs_sb_t	*sbp,
+	__uint64_t	nblocks)
+{
+	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
+	ASSERT(sbp->sb_blocklog >= BBSHIFT);
+
+#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
+	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+		return E2BIG;
+#else                  /* Limited by UINT_MAX of sectors */
+	if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
+		return E2BIG;
+#endif
+	return 0;
+}
 
 /*
  * Check the validity of the SB found.
@@ -284,18 +305,8 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
-	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
-	ASSERT(sbp->sb_blocklog >= BBSHIFT);
-
-#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
-	if (unlikely(
-	    (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
-	    (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
-#else                  /* Limited by UINT_MAX of sectors */
-	if (unlikely(
-	    (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
-	    (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
-#endif
+	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
+	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
 		xfs_fs_mount_cmn_err(flags,
 			"file system too large to be mounted on this system.");
 		return XFS_ERROR(E2BIG);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 82304b94646d..871a5bfd8617 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -624,6 +624,7 @@ extern int	xfs_sync_inodes(xfs_mount_t *, int, int *);
 extern xfs_agnumber_t	xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
 						xfs_agnumber_t);
 extern void	xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
+extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 
 extern struct xfs_dmops xfs_dmcore_stub;
 extern struct xfs_qmops xfs_qmcore_stub;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b3a5f07bd073..47082c01872d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1882,11 +1882,13 @@ xfs_growfs_rt(
 	    (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
 	    (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
 		return XFS_ERROR(EINVAL);
+	if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
+		return error;
 	/*
 	 * Read in the last block of the device, make sure it exists.
 	 */
 	error = xfs_read_buf(mp, mp->m_rtdev_targp,
-			XFS_FSB_TO_BB(mp, in->newblocks - 1),
+			XFS_FSB_TO_BB(mp, nrblocks - 1),
 			XFS_FSB_TO_BB(mp, 1), 0, &bp);
 	if (error)
 		return error;
-- 
cgit v1.2.3-59-g8ed1b


From 40095b64f5da601a8ab61fbe4b40feb46830052e Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 14 May 2007 18:24:09 +1000
Subject: [XFS] Sleeping with the ilock waiting for I/O completion is Bad.

Recent fixes to the filesystem freezing code introduced a vn_iowait call
in the middle of the sync code. Unfortunately, at the point where this
call was added we are holding the ilock. The ilock is needed by I/O
completion for unwritten extent conversion and now updating the file size.
Hence I/o cannot complete if we hold the ilock while waiting for I/O
completion.

Fix up the bug and clean the code up around it.

SGI-PV: 963674
SGI-Modid: xfs-linux-melb:xfs-kern:28566a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_vfsops.c | 73 ++++++++++++++++++++---------------------------------
 1 file changed, 28 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 65c561201cb8..92c1425d06ce 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1128,58 +1128,41 @@ xfs_sync_inodes(
 		 * in the inode list.
 		 */
 
-		if ((flags & SYNC_CLOSE)  && (vp != NULL)) {
-			/*
-			 * This is the shutdown case.  We just need to
-			 * flush and invalidate all the pages associated
-			 * with the inode.  Drop the inode lock since
-			 * we can't hold it across calls to the buffer
-			 * cache.
-			 *
-			 * We don't set the VREMAPPING bit in the vnode
-			 * here, because we don't hold the vnode lock
-			 * exclusively.  It doesn't really matter, though,
-			 * because we only come here when we're shutting
-			 * down anyway.
-			 */
-			xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-			if (XFS_FORCED_SHUTDOWN(mp)) {
-				bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
-			} else {
-				error = bhv_vop_flushinval_pages(vp, 0, -1, FI_REMAPF);
+		/*
+		 * If we have to flush data or wait for I/O completion
+		 * we need to drop the ilock that we currently hold.
+		 * If we need to drop the lock, insert a marker if we
+		 * have not already done so.
+		 */
+		if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
+		    ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
+			if (mount_locked) {
+				IPOINTER_INSERT(ip, mp);
 			}
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-			xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-		} else if ((flags & SYNC_DELWRI) && (vp != NULL)) {
-			if (VN_DIRTY(vp)) {
-				/* We need to have dropped the lock here,
-				 * so insert a marker if we have not already
-				 * done so.
-				 */
-				if (mount_locked) {
-					IPOINTER_INSERT(ip, mp);
-				}
-
-				/*
-				 * Drop the inode lock since we can't hold it
-				 * across calls to the buffer cache.
-				 */
-				xfs_iunlock(ip, XFS_ILOCK_SHARED);
+			if (flags & SYNC_CLOSE) {
+				/* Shutdown case. Flush and invalidate. */
+				if (XFS_FORCED_SHUTDOWN(mp))
+					bhv_vop_toss_pages(vp, 0, -1, FI_REMAPF);
+				else
+					error = bhv_vop_flushinval_pages(vp, 0,
+								-1, FI_REMAPF);
+			} else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
 				error = bhv_vop_flush_pages(vp, (xfs_off_t)0,
 							-1, fflag, FI_NONE);
-				xfs_ilock(ip, XFS_ILOCK_SHARED);
 			}
 
+			/*
+			 * When freezing, we need to wait ensure all I/O (including direct
+			 * I/O) is complete to ensure no further data modification can take
+			 * place after this point
+			 */
+			if (flags & SYNC_IOWAIT)
+				vn_iowait(vp);
+
+			xfs_ilock(ip, XFS_ILOCK_SHARED);
 		}
-		/*
-		 * When freezing, we need to wait ensure all I/O (including direct
-		 * I/O) is complete to ensure no further data modification can take
-		 * place after this point
-		 */
-		if (flags & SYNC_IOWAIT)
-			vn_iowait(vp);
 
 		if (flags & SYNC_BDFLUSH) {
 			if ((flags & SYNC_ATTR) &&
-- 
cgit v1.2.3-59-g8ed1b


From 3db296f341b5902c4f9317022ae5d4da2d59d598 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 14 May 2007 18:24:16 +1000
Subject: [XFS] Fix use-after-free during log unmount.

Don't reference the log buffer after running the callbacks as the callback
can trigger the log buffers to be freed during unmount.

SGI-PV: 964545
SGI-Modid: xfs-linux-melb:xfs-kern:28567a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_log.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 635f99e6302f..5bb902056e61 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -967,14 +967,16 @@ xlog_iodone(xfs_buf_t *bp)
 	} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		aborted = XFS_LI_ABORTED;
 	}
+
+	/* log I/O is always issued ASYNC */
+	ASSERT(XFS_BUF_ISASYNC(bp));
 	xlog_state_done_syncing(iclog, aborted);
-	if (!(XFS_BUF_ISASYNC(bp))) {
-		/*
-		 * Corresponding psema() will be done in bwrite().  If we don't
-		 * vsema() here, panic.
-		 */
-		XFS_BUF_V_IODONESEMA(bp);
-	}
+	/*
+	 * do not reference the buffer (bp) here as we could race
+	 * with it being freed after writing the unmount record to the
+	 * log.
+	 */
+
 }	/* xlog_iodone */
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From ca165b88927e41ad18908d7b37f08ef81eae0bf8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 May 2007 15:21:11 +1000
Subject: [XFS] Fix double free in xfs_buf_get_noaddr error handling path

SGI-PV: 964983
SGI-Modid: xfs-linux-melb:xfs-kern:28639a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 208daf58b826..192aa0650919 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -792,8 +792,9 @@ xfs_buf_get_noaddr(
  fail_free_mem:
 	while (--i >= 0)
 		__free_page(bp->b_pages[i]);
+	_xfs_buf_free_pages(bp);
  fail_free_buf:
-	xfs_buf_free(bp);
+	xfs_buf_deallocate(bp);
  fail:
 	return NULL;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 511105b3d7c2440ee84fc3f90d200569aac88162 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 24 May 2007 15:21:57 +1000
Subject: [XFS] Fix vmalloc leak on mount/unmount.

When setting the length of the iclogbuf to write out we should just be
changing the desired byte count rather completely reassociating the buffer
memory with the buffer. Reassociating the buffer memory changes the
apparent length of the buffer and hence when we free the buffer, we don't
free all the vmap()d space we originally allocated.

SGI-PV: 964983
SGI-Modid: xfs-linux-melb:xfs-kern:28640a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5bb902056e61..fb50fd400e53 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1436,7 +1436,7 @@ xlog_sync(xlog_t		*log,
 	} else {
 		iclog->ic_bwritecnt = 1;
 	}
-	XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count);
+	XFS_BUF_SET_COUNT(bp, count);
 	XFS_BUF_SET_FSPRIVATE(bp, iclog);	/* save for later */
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
-- 
cgit v1.2.3-59-g8ed1b


From 92dfe8d266eaf35a50607a0e0dcf525e1d367c80 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 24 May 2007 15:22:19 +1000
Subject: [XFS] Make hole punching at EOF atomic.

If hole punching at EOF is done as two steps (i.e. truncate then extend)
the file is in a transient state between the two steps where an
application can see the incorrect file size. Punching a hole to EOF needs
to be treated in teh same way as all other hole punching cases so that the
file size is never seen to change.

SGI-PV: 962012
SGI-Modid: xfs-linux-melb:xfs-kern:28641a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_rw.h       | 14 +++++++++++---
 fs/xfs/xfs_vnodeops.c | 28 ++++++++++++++++++----------
 2 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 188b296ff50c..ba74000931ec 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -71,6 +71,12 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
 		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)));
 }
 
+/*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_LOCK	(1<<0)
+#define XFS_FREE_EOF_NOLOCK	(1<<1)
+
 /*
  * Prototypes for functions in xfs_rw.c.
  */
@@ -91,10 +97,12 @@ extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
 extern int xfs_rwlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern void xfs_rwunlock(bhv_desc_t *bdp, bhv_vrwlock_t write_lock);
 extern int xfs_setattr(bhv_desc_t *, bhv_vattr_t *vap, int flags,
-		       cred_t *credp);
+			cred_t *credp);
 extern int xfs_change_file_space(bhv_desc_t *bdp, int cmd, xfs_flock64_t *bf,
-				 xfs_off_t offset, cred_t *credp, int flags);
+			xfs_off_t offset, cred_t *credp, int flags);
 extern int xfs_set_dmattrs(bhv_desc_t *bdp, u_int evmask, u_int16_t state,
-			   cred_t *credp);
+			cred_t *credp);
+extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
+			int flags);
 
 #endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70bc82f65311..6420ca8df5ec 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1201,13 +1201,15 @@ xfs_fsync(
 }
 
 /*
- * This is called by xfs_inactive to free any blocks beyond eof,
- * when the link count isn't zero.
+ * This is called by xfs_inactive to free any blocks beyond eof
+ * when the link count isn't zero and by xfs_dm_punch_hole() when
+ * punching a hole to EOF.
  */
-STATIC int
-xfs_inactive_free_eofblocks(
+int
+xfs_free_eofblocks(
 	xfs_mount_t	*mp,
-	xfs_inode_t	*ip)
+	xfs_inode_t	*ip,
+	int		flags)
 {
 	xfs_trans_t	*tp;
 	int		error;
@@ -1216,6 +1218,7 @@ xfs_inactive_free_eofblocks(
 	xfs_filblks_t	map_len;
 	int		nimaps;
 	xfs_bmbt_irec_t	imap;
+	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
 	/*
 	 * Figure out if there are any blocks beyond the end
@@ -1256,11 +1259,13 @@ xfs_inactive_free_eofblocks(
 		 * cache and we can't
 		 * do that within a transaction.
 		 */
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		if (use_iolock)
+			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
 				    ip->i_size);
 		if (error) {
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			if (use_iolock)
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return error;
 		}
 
@@ -1297,7 +1302,8 @@ xfs_inactive_free_eofblocks(
 			error = xfs_trans_commit(tp,
 						XFS_TRANS_RELEASE_LOG_RES);
 		}
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
+					    : XFS_ILOCK_EXCL));
 	}
 	return error;
 }
@@ -1573,7 +1579,8 @@ xfs_release(
 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
 		    (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			if (error)
 				return error;
 			/* Update linux inode block count after free above */
 			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
@@ -1654,7 +1661,8 @@ xfs_inactive(
 		     (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 		      (ip->i_delayed_blks != 0)))) {
-			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
+			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			if (error)
 				return VN_INACTIVE_CACHE;
 			/* Update linux inode block count after free above */
 			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
-- 
cgit v1.2.3-59-g8ed1b


From 3260f78ad6d5b788e78ea709d377f58e569bee41 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 24 May 2007 15:25:42 +1000
Subject: [XFS] Use generic shrinker interfaces in XFS.

SGI-PV: 964986
SGI-Modid: xfs-linux-melb:xfs-kern:28642a

Signed-Off-By: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/kmem.h    | 19 -------------------
 fs/xfs/linux-2.6/xfs_buf.c |  6 +++---
 fs/xfs/quota/xfs_qm.c      |  6 +++---
 3 files changed, 6 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 9ebabdf7829c..4b6470cf87f0 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone)
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 
-/*
- * Low memory cache shrinkers
- */
-
-typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, gfp_t);
-
-static inline kmem_shaker_t
-kmem_shake_register(kmem_shake_func_t sfunc)
-{
-	return set_shrinker(DEFAULT_SEEKS, sfunc);
-}
-
-static inline void
-kmem_shake_deregister(kmem_shaker_t shrinker)
-{
-	remove_shrinker(shrinker);
-}
-
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 192aa0650919..a3eff54a201c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -35,7 +35,7 @@
 #include <linux/freezer.h>
 
 static kmem_zone_t *xfs_buf_zone;
-static kmem_shaker_t xfs_buf_shake;
+static struct shrinker *xfs_buf_shake;
 STATIC int xfsbufd(void *);
 STATIC int xfsbufd_wakeup(int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
@@ -1831,7 +1831,7 @@ xfs_buf_init(void)
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-	xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
+	xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
 	if (!xfs_buf_shake)
 		goto out_destroy_xfsdatad_workqueue;
 
@@ -1853,7 +1853,7 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
-	kmem_shake_deregister(xfs_buf_shake);
+	remove_shrinker(xfs_buf_shake);
 	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 3e4a8ad8a34c..eeb3d9cc0e8e 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -62,7 +62,7 @@ uint		ndquot;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
-static kmem_shaker_t	xfs_qm_shaker;
+static struct shrinker *xfs_qm_shaker;
 
 static cred_t	xfs_zerocr;
 static xfs_inode_t	xfs_zeroino;
@@ -150,7 +150,7 @@ xfs_Gqm_init(void)
 	} else
 		xqm->qm_dqzone = qm_dqzone;
 
-	xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
+	xfs_qm_shaker = set_shrinker(DEFAULT_SEEKS, xfs_qm_shake);
 
 	/*
 	 * The t_dqinfo portion of transactions.
@@ -182,7 +182,7 @@ xfs_qm_destroy(
 
 	ASSERT(xqm != NULL);
 	ASSERT(xqm->qm_nrefs == 0);
-	kmem_shake_deregister(xfs_qm_shaker);
+	remove_shrinker(xfs_qm_shaker);
 	hsize = xqm->qm_dqhashmask + 1;
 	for (i = 0; i < hsize; i++) {
 		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-- 
cgit v1.2.3-59-g8ed1b


From 92821e2ba4ae26887223326fb0b95cdab963b768 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 24 May 2007 15:26:31 +1000
Subject: [XFS] Lazy Superblock Counters

When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.

When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.

The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.

The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.

This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.

It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.

One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.

As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....

SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c |   3 +-
 fs/xfs/linux-2.6/xfs_vfs.h   |   1 +
 fs/xfs/xfs_ag.h              |   8 ++-
 fs/xfs/xfs_alloc.c           |  48 +++++++++++---
 fs/xfs/xfs_alloc.h           |   6 +-
 fs/xfs/xfs_alloc_btree.c     |  20 +++---
 fs/xfs/xfs_fs.h              |   1 +
 fs/xfs/xfs_fsops.c           |   2 +
 fs/xfs/xfs_ialloc.c          |  28 +++++++-
 fs/xfs/xfs_ialloc.h          |  10 +++
 fs/xfs/xfs_log.c             |   4 +-
 fs/xfs/xfs_log_recover.c     |   8 +++
 fs/xfs/xfs_mount.c           | 154 +++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_mount.h           |  10 ++-
 fs/xfs/xfs_sb.h              |  16 ++++-
 fs/xfs/xfs_trans.c           |  58 ++++++++++++----
 fs/xfs/xfs_trans.h           |   3 +-
 fs/xfs/xfs_vfsops.c          |  11 ++++
 18 files changed, 337 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bf9a9d5909be..05f188ed1206 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -547,7 +547,8 @@ vfs_sync_worker(
 
 	if (!(vfsp->vfs_flag & VFS_RDONLY))
 		error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
-					SYNC_ATTR | SYNC_REFCACHE, NULL);
+					SYNC_ATTR | SYNC_REFCACHE | SYNC_SUPER,
+					NULL);
 	vfsp->vfs_sync_seq++;
 	wake_up(&vfsp->vfs_wait_single_sync_task);
 }
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index e2c2ce98ab5b..cb7b0d62fb96 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -92,6 +92,7 @@ typedef enum {
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
 #define SYNC_IOWAIT		0x0100  /* wait for all I/O to complete */
+#define SYNC_SUPER		0x0200  /* flush superblock to disk */
 
 #define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
 #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 9ece7f87ec5b..b1dd0029c60e 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -68,6 +68,7 @@ typedef struct xfs_agf {
 	__be32		agf_flcount;	/* count of blocks in freelist */
 	__be32		agf_freeblks;	/* total free blocks */
 	__be32		agf_longest;	/* longest free space */
+	__be32		agf_btreeblks;	/* # of blocks held in AGF btrees */
 } xfs_agf_t;
 
 #define	XFS_AGF_MAGICNUM	0x00000001
@@ -81,7 +82,8 @@ typedef struct xfs_agf {
 #define	XFS_AGF_FLCOUNT		0x00000100
 #define	XFS_AGF_FREEBLKS	0x00000200
 #define	XFS_AGF_LONGEST		0x00000400
-#define	XFS_AGF_NUM_BITS	11
+#define	XFS_AGF_BTREEBLKS	0x00000800
+#define	XFS_AGF_NUM_BITS	12
 #define	XFS_AGF_ALL_BITS	((1 << XFS_AGF_NUM_BITS) - 1)
 
 /* disk block (xfs_daddr_t) in the AG */
@@ -186,11 +188,13 @@ typedef struct xfs_perag
 	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
+	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
 	xfs_agino_t	pagi_freecount;	/* number of free inodes */
+	xfs_agino_t	pagi_count;	/* number of allocated inodes */
+	int		pagb_count;	/* pagb slots in use */
 #ifdef __KERNEL__
 	lock_t		pagb_lock;	/* lock for pagb_list */
 #endif
-	int		pagb_count;	/* pagb slots in use */
 	xfs_perag_busy_t *pagb_list;	/* unstable blocks */
 } xfs_perag_t;
 
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8e9a40aa0cd3..98f95d4c4bcc 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1447,7 +1447,8 @@ xfs_alloc_ag_vextent_small(
 	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
 		 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
 		  > args->minleft)) {
-		if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
+		error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+		if (error)
 			goto error0;
 		if (fbno != NULLAGBLOCK) {
 			if (args->userdata) {
@@ -1923,7 +1924,8 @@ xfs_alloc_fix_freelist(
 	while (be32_to_cpu(agf->agf_flcount) > need) {
 		xfs_buf_t	*bp;
 
-		if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
+		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+		if (error)
 			return error;
 		if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
 			return error;
@@ -1973,8 +1975,9 @@ xfs_alloc_fix_freelist(
 		 * Put each allocated block on the list.
 		 */
 		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
-			if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
-					bno)))
+			error = xfs_alloc_put_freelist(tp, agbp,
+							agflbp, bno, 0);
+			if (error)
 				return error;
 		}
 	}
@@ -1991,13 +1994,15 @@ int				/* error */
 xfs_alloc_get_freelist(
 	xfs_trans_t	*tp,	/* transaction pointer */
 	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
-	xfs_agblock_t	*bnop)	/* block address retrieved from freelist */
+	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
+	int		btreeblk) /* destination is a AGF btree */
 {
 	xfs_agf_t	*agf;	/* a.g. freespace structure */
 	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
 	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
 	xfs_agblock_t	bno;	/* block number returned */
 	int		error;
+	int		logflags;
 #ifdef XFS_ALLOC_TRACE
 	static char	fname[] = "xfs_alloc_get_freelist";
 #endif
@@ -2032,8 +2037,16 @@ xfs_alloc_get_freelist(
 	be32_add(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
-	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
-	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+
+	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
+	if (btreeblk) {
+		be32_add(&agf->agf_btreeblks, 1);
+		pag->pagf_btreeblks++;
+		logflags |= XFS_AGF_BTREEBLKS;
+	}
+
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
 	*bnop = bno;
 
 	/*
@@ -2071,6 +2084,7 @@ xfs_alloc_log_agf(
 		offsetof(xfs_agf_t, agf_flcount),
 		offsetof(xfs_agf_t, agf_freeblks),
 		offsetof(xfs_agf_t, agf_longest),
+		offsetof(xfs_agf_t, agf_btreeblks),
 		sizeof(xfs_agf_t)
 	};
 
@@ -2106,12 +2120,14 @@ xfs_alloc_put_freelist(
 	xfs_trans_t		*tp,	/* transaction pointer */
 	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
 	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
-	xfs_agblock_t		bno)	/* block being freed */
+	xfs_agblock_t		bno,	/* block being freed */
+	int			btreeblk) /* block came from a AGF btree */
 {
 	xfs_agf_t		*agf;	/* a.g. freespace structure */
 	xfs_agfl_t		*agfl;	/* a.g. free block array */
 	__be32			*blockp;/* pointer to array entry */
 	int			error;
+	int			logflags;
 #ifdef XFS_ALLOC_TRACE
 	static char		fname[] = "xfs_alloc_put_freelist";
 #endif
@@ -2132,11 +2148,22 @@ xfs_alloc_put_freelist(
 	be32_add(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
+
+	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
+	if (btreeblk) {
+		be32_add(&agf->agf_btreeblks, -1);
+		pag->pagf_btreeblks--;
+		logflags |= XFS_AGF_BTREEBLKS;
+	}
+
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
+
 	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
 	blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
 	*blockp = cpu_to_be32(bno);
-	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
-	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+	TRACE_MODAGF(NULL, agf, logflags);
+	xfs_alloc_log_agf(tp, agbp, logflags);
 	xfs_trans_log_buf(tp, agflbp,
 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
@@ -2196,6 +2223,7 @@ xfs_alloc_read_agf(
 	pag = &mp->m_perag[agno];
 	if (!pag->pagf_init) {
 		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
 		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
 		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
 		pag->pagf_levels[XFS_BTNUM_BNOi] =
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5a4256120ccc..5aec15d0651e 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -136,7 +136,8 @@ int				/* error */
 xfs_alloc_get_freelist(
 	struct xfs_trans *tp,	/* transaction pointer */
 	struct xfs_buf	*agbp,	/* buffer containing the agf structure */
-	xfs_agblock_t	*bnop);	/* block address retrieved from freelist */
+	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
+	int		btreeblk); /* destination is a AGF btree */
 
 /*
  * Log the given fields from the agf structure.
@@ -165,7 +166,8 @@ xfs_alloc_put_freelist(
 	struct xfs_trans *tp,	/* transaction pointer */
 	struct xfs_buf	*agbp,	/* buffer for a.g. freelist header */
 	struct xfs_buf	*agflbp,/* buffer for a.g. free block array */
-	xfs_agblock_t	bno);	/* block being freed */
+	xfs_agblock_t	bno,	/* block being freed */
+	int		btreeblk); /* owner was a AGF btree */
 
 /*
  * Read in the allocation group header (free/alloc section).
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 74cadf95d4e8..1603ce595853 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -226,8 +226,9 @@ xfs_alloc_delrec(
 			/*
 			 * Put this buffer/block on the ag's freelist.
 			 */
-			if ((error = xfs_alloc_put_freelist(cur->bc_tp,
-					cur->bc_private.a.agbp, NULL, bno)))
+			error = xfs_alloc_put_freelist(cur->bc_tp,
+					cur->bc_private.a.agbp, NULL, bno, 1);
+			if (error)
 				return error;
 			/*
 			 * Since blocks move to the free list without the
@@ -549,8 +550,9 @@ xfs_alloc_delrec(
 	/*
 	 * Free the deleting block by putting it on the freelist.
 	 */
-	if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			NULL, rbno)))
+	error = xfs_alloc_put_freelist(cur->bc_tp,
+					 cur->bc_private.a.agbp, NULL, rbno, 1);
+	if (error)
 		return error;
 	/*
 	 * Since blocks move to the free list without the coordination
@@ -1320,8 +1322,9 @@ xfs_alloc_newroot(
 	/*
 	 * Get a buffer from the freelist blocks, for the new root.
 	 */
-	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			&nbno)))
+	error = xfs_alloc_get_freelist(cur->bc_tp,
+					cur->bc_private.a.agbp, &nbno, 1);
+	if (error)
 		return error;
 	/*
 	 * None available, we fail.
@@ -1604,8 +1607,9 @@ xfs_alloc_split(
 	 * Allocate the new block from the freelist.
 	 * If we can't do it, we're toast.  Give up.
 	 */
-	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-			&rbno)))
+	error = xfs_alloc_get_freelist(cur->bc_tp,
+					 cur->bc_private.a.agbp, &rbno, 1);
+	if (error)
 		return error;
 	if (rbno == NULLAGBLOCK) {
 		*stat = 0;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1335449841cd..1b60cfc28be5 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -238,6 +238,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_LOGV2	0x0100	/* log format version 2	*/
 #define XFS_FSOP_GEOM_FLAGS_SECTOR	0x0200	/* sector sizes >1BB	*/
 #define XFS_FSOP_GEOM_FLAGS_ATTR2	0x0400	/* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_LAZYSB	0x4000	/* lazy superblock counters */
 
 
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 25e5eae8a976..27d01afe8465 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -94,6 +94,8 @@ xfs_fs_geometry(
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
 			(XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
+				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
 			(XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
 		geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index b5feb3e77116..f943368c9b93 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -123,6 +123,7 @@ xfs_ialloc_ag_alloc(
 	int		blks_per_cluster;  /* fs blocks per inode cluster */
 	xfs_btree_cur_t	*cur;		/* inode btree cursor */
 	xfs_daddr_t	d;		/* disk addr of buffer */
+	xfs_agnumber_t	agno;
 	int		error;
 	xfs_buf_t	*fbuf;		/* new free inodes' buffer */
 	xfs_dinode_t	*free;		/* new free inode structure */
@@ -302,15 +303,15 @@ xfs_ialloc_ag_alloc(
 	}
 	be32_add(&agi->agi_count, newlen);
 	be32_add(&agi->agi_freecount, newlen);
+	agno = be32_to_cpu(agi->agi_seqno);
 	down_read(&args.mp->m_peraglock);
-	args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
+	args.mp->m_perag[agno].pagi_freecount += newlen;
 	up_read(&args.mp->m_peraglock);
 	agi->agi_newino = cpu_to_be32(newino);
 	/*
 	 * Insert records describing the new inode chunk into the btree.
 	 */
-	cur = xfs_btree_init_cursor(args.mp, tp, agbp,
-			be32_to_cpu(agi->agi_seqno),
+	cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno,
 			XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
 	for (thisino = newino;
 	     thisino < newino + newlen;
@@ -1387,6 +1388,7 @@ xfs_ialloc_read_agi(
 	pag = &mp->m_perag[agno];
 	if (!pag->pagi_init) {
 		pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+		pag->pagi_count = be32_to_cpu(agi->agi_count);
 		pag->pagi_init = 1;
 	} else {
 		/*
@@ -1410,3 +1412,23 @@ xfs_ialloc_read_agi(
 	*bpp = bp;
 	return 0;
 }
+
+/*
+ * Read in the agi to initialise the per-ag data in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+	xfs_mount_t	*mp,		/* file system mount structure */
+	xfs_trans_t	*tp,		/* transaction pointer */
+	xfs_agnumber_t	agno)		/* allocation group number */
+{
+	xfs_buf_t	*bp = NULL;
+	int		error;
+
+	error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
+	if (error)
+		return error;
+	if (bp)
+		xfs_trans_brelse(tp, bp);
+	return 0;
+}
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 7f5debe1acb6..97f4040931ca 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -149,6 +149,16 @@ xfs_ialloc_read_agi(
 	xfs_agnumber_t	agno,		/* allocation group number */
 	struct xfs_buf	**bpp);		/* allocation group hdr buf */
 
+/*
+ * Read in the allocation group header to initialise the per-ag data
+ * in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+	struct xfs_mount *mp,		/* file system mount structure */
+	struct xfs_trans *tp,		/* transaction pointer */
+        xfs_agnumber_t  agno);		/* allocation group number */
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index fb50fd400e53..9d4c4fbeb3ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -817,10 +817,8 @@ xfs_log_need_covered(xfs_mount_t *mp)
 	SPLDECL(s);
 	int		needed = 0, gen;
 	xlog_t		*log = mp->m_log;
-	bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
 
-	if (vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
-	    (vfsp->vfs_flag & VFS_RDONLY))
+	if (!xfs_fs_writable(mp))
 		return 0;
 
 	s = LOG_LOCK(log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 080fabf61c92..fddbb091a86f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,6 +927,14 @@ xlog_find_tail(
 			ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
 					after_umount_blk);
 			*tail_blk = after_umount_blk;
+
+			/*
+			 * Note that the unmount was clean. If the unmount
+			 * was not clean, we need to know this to rebuild the
+			 * superblock counters from the perag headers if we
+			 * have a filesystem using non-persistent counters.
+			 */
+			log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
 		}
 	}
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5de1f392e632..f6fe47d8c4dc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -643,6 +643,64 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 					sbp->sb_inopblock);
 	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
 }
+
+/*
+ * xfs_initialize_perag_data
+ *
+ * Read in each per-ag structure so we can count up the number of
+ * allocated inodes, free inodes and used filesystem blocks as this
+ * information is no longer persistent in the superblock. Once we have
+ * this information, write it into the in-core superblock structure.
+ */
+STATIC int
+xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
+{
+	xfs_agnumber_t	index;
+	xfs_perag_t	*pag;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	uint64_t	ifree = 0;
+	uint64_t	ialloc = 0;
+	uint64_t	bfree = 0;
+	uint64_t	bfreelst = 0;
+	uint64_t	btree = 0;
+	int		error;
+	int		s;
+
+	for (index = 0; index < agcount; index++) {
+		/*
+		 * read the agf, then the agi. This gets us
+		 * all the inforamtion we need and populates the
+		 * per-ag structures for us.
+		 */
+		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
+		if (error)
+			return error;
+
+		error = xfs_ialloc_pagi_init(mp, NULL, index);
+		if (error)
+			return error;
+		pag = &mp->m_perag[index];
+		ifree += pag->pagi_freecount;
+		ialloc += pag->pagi_count;
+		bfree += pag->pagf_freeblks;
+		bfreelst += pag->pagf_flcount;
+		btree += pag->pagf_btreeblks;
+	}
+	/*
+	 * Overwrite incore superblock counters with just-read data
+	 */
+	s = XFS_SB_LOCK(mp);
+	sbp->sb_ifree = ifree;
+	sbp->sb_icount = ialloc;
+	sbp->sb_fdblocks = bfree + bfreelst + btree;
+	XFS_SB_UNLOCK(mp, s);
+
+	/* Fixup the per-cpu counters as well. */
+	xfs_icsb_reinit_counters(mp);
+
+	return 0;
+}
+
 /*
  * xfs_mountfs
  *
@@ -986,6 +1044,34 @@ xfs_mountfs(
 		goto error2;
 	}
 
+	/*
+	 * Now the log is mounted, we know if it was an unclean shutdown or
+	 * not. If it was, with the first phase of recovery has completed, we
+	 * have consistent AG blocks on disk. We have not recovered EFIs yet,
+	 * but they are recovered transactionally in the second recovery phase
+	 * later.
+	 *
+	 * Hence we can safely re-initialise incore superblock counters from
+	 * the per-ag data. These may not be correct if the filesystem was not
+	 * cleanly unmounted, so we need to wait for recovery to finish before
+	 * doing this.
+	 *
+	 * If the filesystem was cleanly unmounted, then we can trust the
+	 * values in the superblock to be correct and we don't need to do
+	 * anything here.
+	 *
+	 * If we are currently making the filesystem, the initialisation will
+	 * fail as the perag data is in an undefined state.
+	 */
+
+	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+	    !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
+	     !mp->m_sb.sb_inprogress) {
+		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
+		if (error) {
+			goto error2;
+		}
+	}
 	/*
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
@@ -1049,6 +1135,7 @@ xfs_mountfs(
 		goto error4;
 	}
 
+
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
@@ -1111,10 +1198,9 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 		xfs_binval(mp->m_rtdev_targp);
 	}
 
+	xfs_log_sbcount(mp, 1);
 	xfs_unmountfs_writesb(mp);
-
 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
-
 	xfs_log_unmount(mp);			/* Done! No more fs ops. */
 
 	xfs_freesb(mp);
@@ -1160,6 +1246,62 @@ xfs_unmountfs_wait(xfs_mount_t *mp)
 	xfs_wait_buftarg(mp->m_ddev_targp);
 }
 
+int
+xfs_fs_writable(xfs_mount_t *mp)
+{
+	bhv_vfs_t	*vfsp = XFS_MTOVFS(mp);
+
+	return !(vfs_test_for_freeze(vfsp) || XFS_FORCED_SHUTDOWN(mp) ||
+		(vfsp->vfs_flag & VFS_RDONLY));
+}
+
+/*
+ * xfs_log_sbcount
+ *
+ * Called either periodically to keep the on disk superblock values
+ * roughly up to date or from unmount to make sure the values are
+ * correct on a clean unmount.
+ *
+ * Note this code can be called during the process of freezing, so
+ * we may need to use the transaction allocator which does not not
+ * block when the transaction subsystem is in its frozen state.
+ */
+int
+xfs_log_sbcount(
+	xfs_mount_t	*mp,
+	uint		sync)
+{
+	xfs_trans_t	*tp;
+	int		error;
+
+	if (!xfs_fs_writable(mp))
+		return 0;
+
+	xfs_icsb_sync_counters(mp);
+
+	/*
+	 * we don't need to do this if we are updating the superblock
+	 * counters on every modification.
+	 */
+	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
+		return 0;
+
+	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
+	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+					XFS_DEFAULT_LOG_COUNT);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+
+	xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
+	if (sync)
+		xfs_trans_set_sync(tp);
+	xfs_trans_commit(tp, 0);
+
+	return 0;
+}
+
 int
 xfs_unmountfs_writesb(xfs_mount_t *mp)
 {
@@ -1171,16 +1313,15 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 	 * skip superblock write if fs is read-only, or
 	 * if we are doing a forced umount.
 	 */
-	sbp = xfs_getsb(mp, 0);
 	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
 		XFS_FORCED_SHUTDOWN(mp))) {
 
-		xfs_icsb_sync_counters(mp);
+		sbp = xfs_getsb(mp, 0);
+ 		sb = XFS_BUF_TO_SBP(sbp);
 
 		/*
 		 * mark shared-readonly if desired
 		 */
-		sb = XFS_BUF_TO_SBP(sbp);
 		if (mp->m_mk_sharedro) {
 			if (!(sb->sb_flags & XFS_SBF_READONLY))
 				sb->sb_flags |= XFS_SBF_READONLY;
@@ -1189,6 +1330,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 			xfs_fs_cmn_err(CE_NOTE, mp,
 				"Unmounting, marking shared read-only");
 		}
+
 		XFS_BUF_UNDONE(sbp);
 		XFS_BUF_UNREAD(sbp);
 		XFS_BUF_UNDELAYWRITE(sbp);
@@ -1203,8 +1345,8 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
 					  mp, sbp, XFS_BUF_ADDR(sbp));
 		if (error && mp->m_mk_sharedro)
 			xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
+		xfs_buf_relse(sbp);
 	}
-	xfs_buf_relse(sbp);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 871a5bfd8617..0bca2d422719 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -429,12 +429,12 @@ typedef struct xfs_mount {
 /*
  * Flags for m_flags.
  */
-#define	XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
+#define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 						   must be synchronous except
 						   for space allocations */
-#define	XFS_MOUNT_INO64		(1ULL << 1)
+#define XFS_MOUNT_INO64		(1ULL << 1)
 			     /* (1ULL << 2)	-- currently unused */
-			     /* (1ULL << 3)	-- currently unused */
+#define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
 						   disk errors in metadata */
@@ -511,6 +511,8 @@ xfs_preferred_iosize(xfs_mount_t *mp)
 
 #define XFS_MAXIOFFSET(mp)	((mp)->m_maxioffset)
 
+#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
+				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 #define xfs_force_shutdown(m,f)	\
 	bhv_vfs_force_shutdown((XFS_MTOVFS(m)), f, __FILE__, __LINE__)
@@ -602,6 +604,7 @@ typedef struct xfs_mod_sb {
 
 extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
+extern int	xfs_log_sbcount(xfs_mount_t *, uint);
 extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int	xfs_mountfs(struct bhv_vfs *, xfs_mount_t *mp, int);
 extern void	xfs_mountfs_check_barriers(xfs_mount_t *mp);
@@ -618,6 +621,7 @@ extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int	xfs_readsb(xfs_mount_t *, int);
 extern void	xfs_freesb(xfs_mount_t *);
+extern int	xfs_fs_writable(xfs_mount_t *);
 extern void	xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
 extern int	xfs_syncsub(xfs_mount_t *, int, int *);
 extern int	xfs_sync_inodes(xfs_mount_t *, int, int *);
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 467854b45c8f..ef42537a607a 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -74,12 +74,13 @@ struct xfs_mount;
  */
 #define XFS_SB_VERSION2_REALFBITS	0x00ffffff	/* Mask: features */
 #define XFS_SB_VERSION2_RESERVED1BIT	0x00000001
-#define XFS_SB_VERSION2_RESERVED2BIT	0x00000002
+#define XFS_SB_VERSION2_LAZYSBCOUNTBIT	0x00000002	/* Superblk counters */
 #define XFS_SB_VERSION2_RESERVED4BIT	0x00000004
 #define XFS_SB_VERSION2_ATTR2BIT	0x00000008	/* Inline attr rework */
 
 #define	XFS_SB_VERSION2_OKREALFBITS	\
-	(XFS_SB_VERSION2_ATTR2BIT)
+	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
+	 XFS_SB_VERSION2_ATTR2BIT)
 #define	XFS_SB_VERSION2_OKSASHFBITS	\
 	(0)
 #define XFS_SB_VERSION2_OKREALBITS	\
@@ -181,6 +182,9 @@ typedef enum {
 #define XFS_SB_SHARED_VN	XFS_SB_MVAL(SHARED_VN)
 #define XFS_SB_UNIT		XFS_SB_MVAL(UNIT)
 #define XFS_SB_WIDTH		XFS_SB_MVAL(WIDTH)
+#define XFS_SB_ICOUNT		XFS_SB_MVAL(ICOUNT)
+#define XFS_SB_IFREE		XFS_SB_MVAL(IFREE)
+#define XFS_SB_FDBLOCKS		XFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
@@ -188,7 +192,7 @@ typedef enum {
 	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
 	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-	 XFS_SB_FEATURES2)
+	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
 
 
 /*
@@ -414,6 +418,12 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
  *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
 
+static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_HASMOREBITS(sbp) &&	\
+		((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
+}
+
 #define XFS_SB_VERSION_HASATTR2(sbp)	xfs_sb_version_hasattr2(sbp)
 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
 {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cc2d60951e21..7133fd9ab868 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -427,6 +427,14 @@ undo_blocks:
  *
  * Mark the transaction structure to indicate that the superblock
  * needs to be updated before committing.
+ *
+ * Because we may not be keeping track of allocated/free inodes and
+ * used filesystem blocks in the superblock, we do not mark the
+ * superblock dirty in this transaction if we modify these fields.
+ * We still need to update the transaction deltas so that they get
+ * applied to the incore superblock, but we don't want them to
+ * cause the superblock to get locked and logged if these are the
+ * only fields in the superblock that the transaction modifies.
  */
 void
 xfs_trans_mod_sb(
@@ -434,13 +442,19 @@ xfs_trans_mod_sb(
 	uint		field,
 	int64_t		delta)
 {
+	uint32_t	flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
+	xfs_mount_t	*mp = tp->t_mountp;
 
 	switch (field) {
 	case XFS_TRANS_SB_ICOUNT:
 		tp->t_icount_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_IFREE:
 		tp->t_ifree_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FDBLOCKS:
 		/*
@@ -453,6 +467,8 @@ xfs_trans_mod_sb(
 			ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
 		}
 		tp->t_fdblocks_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_RES_FDBLOCKS:
 		/*
@@ -462,6 +478,8 @@ xfs_trans_mod_sb(
 		 */
 		ASSERT(delta < 0);
 		tp->t_res_fdblocks_delta += delta;
+		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
+			flags &= ~XFS_TRANS_SB_DIRTY;
 		break;
 	case XFS_TRANS_SB_FREXTENTS:
 		/*
@@ -544,18 +562,23 @@ xfs_trans_apply_sb_deltas(
 	       (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
 		tp->t_ag_btree_delta));
 
-	if (tp->t_icount_delta != 0) {
-		INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
-	}
-	if (tp->t_ifree_delta != 0) {
-		INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
-	}
+	/*
+	 * Only update the superblock counters if we are logging them
+	 */
+	if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) {
+		if (tp->t_icount_delta != 0) {
+			INT_MOD(sbp->sb_icount, ARCH_CONVERT, tp->t_icount_delta);
+		}
+		if (tp->t_ifree_delta != 0) {
+			INT_MOD(sbp->sb_ifree, ARCH_CONVERT, tp->t_ifree_delta);
+		}
 
-	if (tp->t_fdblocks_delta != 0) {
-		INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
-	}
-	if (tp->t_res_fdblocks_delta != 0) {
-		INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+		if (tp->t_fdblocks_delta != 0) {
+			INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_fdblocks_delta);
+		}
+		if (tp->t_res_fdblocks_delta != 0) {
+			INT_MOD(sbp->sb_fdblocks, ARCH_CONVERT, tp->t_res_fdblocks_delta);
+		}
 	}
 
 	if (tp->t_frextents_delta != 0) {
@@ -627,6 +650,7 @@ xfs_trans_unreserve_and_mod_sb(
 {
 	xfs_mod_sb_t	msb[14];	/* If you add cases, add entries */
 	xfs_mod_sb_t	*msbp;
+	xfs_mount_t	*mp = tp->t_mountp;
 	/* REFERENCED */
 	int		error;
 	int		rsvd;
@@ -659,8 +683,15 @@ xfs_trans_unreserve_and_mod_sb(
 	 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
 	 * explicitly NOT applied to the in-core superblock.
 	 * The idea is that that has already been done.
+	 *
+	 * If we are not logging superblock counters, then the inode
+	 * allocated/free and used block counts are not updated in the
+	 * on disk superblock. In this case, XFS_TRANS_SB_DIRTY will
+	 * not be set when the transaction is updated but we still need
+	 * to update the incore superblock with the changes.
 	 */
-	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+	if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
 		if (tp->t_icount_delta != 0) {
 			msbp->msb_field = XFS_SBS_ICOUNT;
 			msbp->msb_delta = tp->t_icount_delta;
@@ -676,6 +707,9 @@ xfs_trans_unreserve_and_mod_sb(
 			msbp->msb_delta = tp->t_fdblocks_delta;
 			msbp++;
 		}
+	}
+
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
 		if (tp->t_frextents_delta != 0) {
 			msbp->msb_field = XFS_SBS_FREXTENTS;
 			msbp->msb_delta = tp->t_frextents_delta;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7dfcc450366f..0e26e729023e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -94,7 +94,8 @@ typedef struct xfs_trans_header {
 #define	XFS_TRANS_GROWFSRT_ZERO		38
 #define	XFS_TRANS_GROWFSRT_FREE		39
 #define	XFS_TRANS_SWAPEXT		40
-#define	XFS_TRANS_TYPE_MAX		40
+#define	XFS_TRANS_SB_COUNT		41
+#define	XFS_TRANS_TYPE_MAX		41
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 92c1425d06ce..3a647339f40e 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -672,6 +672,7 @@ xfs_mntupdate(
 	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
 		bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
 		xfs_quiesce_fs(mp);
+		xfs_log_sbcount(mp, 1);
 		xfs_log_unmount_write(mp);
 		xfs_unmountfs_writesb(mp);
 		vfsp->vfs_flag |= VFS_RDONLY;
@@ -1496,6 +1497,15 @@ xfs_syncsub(
 			xfs_refcache_purge_some(mp);
 	}
 
+	/*
+	 * If asked, update the disk superblock with incore counter values if we
+	 * are using non-persistent counters so that they don't get too far out
+	 * of sync if we crash or get a forced shutdown. We don't want to force
+	 * this to disk, just get a transaction into the iclogs....
+	 */
+	if (flags & SYNC_SUPER)
+		xfs_log_sbcount(mp, 0);
+
 	/*
 	 * Now check to see if the log needs a "dummy" transaction.
 	 */
@@ -1962,6 +1972,7 @@ xfs_freeze(
 	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
 
 	/* Push the superblock and write an unmount record */
+	xfs_log_sbcount(mp, 1);
 	xfs_log_unmount_write(mp);
 	xfs_unmountfs_writesb(mp);
 	xfs_fs_log_dummy(mp);
-- 
cgit v1.2.3-59-g8ed1b


From 210c6f1caa451623e14a7cd71000d2c2e0d9cc43 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 24 May 2007 15:26:51 +1000
Subject: [XFS] Fix the transaction flags to make lazy superblock counters
 work.

SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28653a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_trans.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7133fd9ab868..2caa0783049b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -533,7 +533,7 @@ xfs_trans_mod_sb(
 		return;
 	}
 
-	tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
+	tp->t_flags |= flags;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 4e5ae8386b55677bde05bbd38b8fc82c67ad4564 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 5 Jun 2007 16:24:15 +1000
Subject: [XFS] xfs_bmapi fails to update the previous extent pointer

When processing multiple extent maps, xfs_bmapi needs to keep track of the
extent behind the one it is currently working on to be able to trim extent
ranges correctly. Failing to update the previous pointer can result in
corrupted extent lists in memory and this will result in panics or assert
failures.

Update the previous pointer correctly when we move to the next extent to
process.

SGI-PV: 965631
SGI-Modid: xfs-linux-melb:xfs-kern:28773a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Vlad Apostolov <vapo@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_bmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index b1ea26e40aaf..9a654faf0c08 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5219,10 +5219,10 @@ xfs_bmapi(
 		 * Else go on to the next record.
 		 */
 		ep = xfs_iext_get_ext(ifp, ++lastx);
-		if (lastx >= nextents) {
+		prev = got;
+		if (lastx >= nextents)
 			eof = 1;
-			prev = got;
-		} else
+		else
 			xfs_bmbt_get_all(ep, &got);
 	}
 	ifp->if_lastex = lastx;
-- 
cgit v1.2.3-59-g8ed1b


From f4a9f28a909debe97cd3f6ca30e82e5811125bff Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 5 Jun 2007 16:24:27 +1000
Subject: [XFS] Flush the block device before closing it on unmount.

SGI-PV: 965630
SGI-Modid: xfs-linux-melb:xfs-kern:28774a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a3eff54a201c..2df63622354e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1447,6 +1447,7 @@ xfs_free_buftarg(
 	int			external)
 {
 	xfs_flush_buftarg(btp, 1);
+	xfs_blkdev_issue_flush(btp);
 	if (external)
 		xfs_blkdev_put(btp->bt_bdev);
 	xfs_free_bufhash(btp);
-- 
cgit v1.2.3-59-g8ed1b


From e927af90aaa7d75543edbbd9c2810e6963d0443f Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 5 Jun 2007 16:24:36 +1000
Subject: [XFS] Block on unwritten extent conversion during synchronous direct
 I/O.

Currently we do not wait on extent conversion to occur, and hence we can
return to userspace from a synchronous direct I/O write without having
completed all the actions in the write. Hence a read after the write may
see zeroes (unwritten extent) rather than the data that was written.

Block the I/O completion by triggering a synchronous workqueue flush to
ensure that the conversion has occurred before we return to userspace.

SGI-PV: 964092
SGI-Modid: xfs-linux-melb:xfs-kern:28775a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7361861e3aac..80a321462315 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -108,14 +108,19 @@ xfs_page_trace(
 
 /*
  * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend.
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
  */
 STATIC void
 xfs_finish_ioend(
-	xfs_ioend_t		*ioend)
+	xfs_ioend_t	*ioend,
+	int		wait)
 {
-	if (atomic_dec_and_test(&ioend->io_remaining))
+	if (atomic_dec_and_test(&ioend->io_remaining)) {
 		queue_work(xfsdatad_workqueue, &ioend->io_work);
+		if (wait)
+			flush_workqueue(xfsdatad_workqueue);
+	}
 }
 
 /*
@@ -334,7 +339,7 @@ xfs_end_bio(
 	bio->bi_end_io = NULL;
 	bio_put(bio);
 
-	xfs_finish_ioend(ioend);
+	xfs_finish_ioend(ioend, 0);
 	return 0;
 }
 
@@ -470,7 +475,7 @@ xfs_submit_ioend(
 		}
 		if (bio)
 			xfs_submit_ioend_bio(ioend, bio);
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	} while ((ioend = next) != NULL);
 }
 
@@ -1416,6 +1421,13 @@ xfs_end_io_direct(
 	 * This is not necessary for synchronous direct I/O, but we do
 	 * it anyway to keep the code uniform and simpler.
 	 *
+	 * Well, if only it were that simple. Because synchronous direct I/O
+	 * requires extent conversion to occur *before* we return to userspace,
+	 * we have to wait for extent conversion to complete. Look at the
+	 * iocb that has been passed to us to determine if this is AIO or
+	 * not. If it is synchronous, tell xfs_finish_ioend() to kick the
+	 * workqueue and wait for it to complete.
+	 *
 	 * The core direct I/O code might be changed to always call the
 	 * completion handler in the future, in which case all this can
 	 * go away.
@@ -1423,9 +1435,9 @@ xfs_end_io_direct(
 	ioend->io_offset = offset;
 	ioend->io_size = size;
 	if (ioend->io_type == IOMAP_READ) {
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	} else if (private && size > 0) {
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
 	} else {
 		/*
 		 * A direct I/O write ioend starts it's life in unwritten
@@ -1434,7 +1446,7 @@ xfs_end_io_direct(
 		 * handler.
 		 */
 		INIT_WORK(&ioend->io_work, xfs_end_bio_written);
-		xfs_finish_ioend(ioend);
+		xfs_finish_ioend(ioend, 0);
 	}
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From b2826136a1fc3ea451bcbb73a75ca50b3231aa8f Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Tue, 5 Jun 2007 16:24:44 +1000
Subject: [XFS] Handle null returned from xfs_vtoi() in xfs_setfilesize().

SGI-PV: 965636
SGI-Modid: xfs-linux-melb:xfs-kern:28777a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Olaf Weber <olaf@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 80a321462315..c097e4e69768 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -161,6 +161,8 @@ xfs_setfilesize(
 	xfs_fsize_t		bsize;
 
 	ip = xfs_vtoi(ioend->io_vnode);
+	if (!ip)
+		return;
 
 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
 	ASSERT(ioend->io_type != IOMAP_READ);
-- 
cgit v1.2.3-59-g8ed1b


From 45c34141126a89da07197d5b89c04c6847f1171a Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:49:44 +1000
Subject: [XFS] Apply transaction delta counts atomically to incore counters

With the per-cpu superblock counters, batch updates are no longer atomic
across the entire batch of changes. This is not an issue if each
individual change in the batch is applied atomically. Unfortunately, free
block count changes are not applied atomically, and they are applied in a
manner guaranteed to cause problems.

Essentially, the free block count reservation that the transaction took
initially is returned to the in core counters before a second delta takes
away what is used. because these two operations are not atomic, we can
race with another thread that can use the returned transaction reservation
before the transaction takes the space away again and we can then get
ENOSPC being reported in a spot where we don't have an ENOSPC condition,
nor should we ever see one there.

Fix it up by rolling the two deltas into the one so it can be applied
safely (i.e. atomically) to the incore counters.

SGI-PV: 964465
SGI-Modid: xfs-linux-melb:xfs-kern:28796a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_trans.c | 77 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 2caa0783049b..356d6627f581 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -638,11 +638,23 @@ xfs_trans_apply_sb_deltas(
 }
 
 /*
- * xfs_trans_unreserve_and_mod_sb() is called to release unused
- * reservations and apply superblock counter changes to the in-core
- * superblock.
+ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
+ * and apply superblock counter changes to the in-core superblock.  The
+ * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
+ * applied to the in-core superblock.  The idea is that that has already been
+ * done.
  *
  * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
+ * However, we have to ensure that we only modify each superblock field only
+ * once because the application of the delta values may not be atomic. That can
+ * lead to ENOSPC races occurring if we have two separate modifcations of the
+ * free space counter to put back the entire reservation and then take away
+ * what we used.
+ *
+ * If we are not logging superblock counters, then the inode allocated/free and
+ * used block counts are not updated in the on disk superblock. In this case,
+ * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
+ * still need to update the incore superblock with the changes.
  */
 STATIC void
 xfs_trans_unreserve_and_mod_sb(
@@ -654,42 +666,43 @@ xfs_trans_unreserve_and_mod_sb(
 	/* REFERENCED */
 	int		error;
 	int		rsvd;
+	int64_t		blkdelta = 0;
+	int64_t		rtxdelta = 0;
 
 	msbp = msb;
 	rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
-	/*
-	 * Release any reserved blocks.  Any that were allocated
-	 * will be taken back again by fdblocks_delta below.
-	 */
-	if (tp->t_blk_res > 0) {
+	/* calculate free blocks delta */
+	if (tp->t_blk_res > 0)
+		blkdelta = tp->t_blk_res;
+
+	if ((tp->t_fdblocks_delta != 0) &&
+	    (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	     (tp->t_flags & XFS_TRANS_SB_DIRTY)))
+	        blkdelta += tp->t_fdblocks_delta;
+
+	if (blkdelta != 0) {
 		msbp->msb_field = XFS_SBS_FDBLOCKS;
-		msbp->msb_delta = tp->t_blk_res;
+		msbp->msb_delta = blkdelta;
 		msbp++;
 	}
 
-	/*
-	 * Release any reserved real time extents .  Any that were
-	 * allocated will be taken back again by frextents_delta below.
-	 */
-	if (tp->t_rtx_res > 0) {
+	/* calculate free realtime extents delta */
+	if (tp->t_rtx_res > 0)
+		rtxdelta = tp->t_rtx_res;
+
+	if ((tp->t_frextents_delta != 0) &&
+	    (tp->t_flags & XFS_TRANS_SB_DIRTY))
+		rtxdelta += tp->t_frextents_delta;
+
+	if (rtxdelta != 0) {
 		msbp->msb_field = XFS_SBS_FREXTENTS;
-		msbp->msb_delta = tp->t_rtx_res;
+		msbp->msb_delta = rtxdelta;
 		msbp++;
 	}
 
-	/*
-	 * Apply any superblock modifications to the in-core version.
-	 * The t_res_fdblocks_delta and t_res_frextents_delta fields are
-	 * explicitly NOT applied to the in-core superblock.
-	 * The idea is that that has already been done.
-	 *
-	 * If we are not logging superblock counters, then the inode
-	 * allocated/free and used block counts are not updated in the
-	 * on disk superblock. In this case, XFS_TRANS_SB_DIRTY will
-	 * not be set when the transaction is updated but we still need
-	 * to update the incore superblock with the changes.
-	 */
+	/* apply remaining deltas */
+
 	if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
 	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
 		if (tp->t_icount_delta != 0) {
@@ -702,19 +715,9 @@ xfs_trans_unreserve_and_mod_sb(
 			msbp->msb_delta = tp->t_ifree_delta;
 			msbp++;
 		}
-		if (tp->t_fdblocks_delta != 0) {
-			msbp->msb_field = XFS_SBS_FDBLOCKS;
-			msbp->msb_delta = tp->t_fdblocks_delta;
-			msbp++;
-		}
 	}
 
 	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
-		if (tp->t_frextents_delta != 0) {
-			msbp->msb_field = XFS_SBS_FREXTENTS;
-			msbp->msb_delta = tp->t_frextents_delta;
-			msbp++;
-		}
 		if (tp->t_dblocks_delta != 0) {
 			msbp->msb_field = XFS_SBS_DBLOCKS;
 			msbp->msb_delta = tp->t_dblocks_delta;
-- 
cgit v1.2.3-59-g8ed1b


From effd120edb7609069cca9f3d1cb4bfae464b2f85 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:49:58 +1000
Subject: [XFS] Map unwritten extents correctly for I/o completion processing

If we have multiple unwritten extents within a single page, we fail to
tell the I/o completion construction handlers we need a new handle for the
second and subsequent blocks in the page. While we still issue the I/O
correctly, we do not have the correct ranges recorded in the ioend
structures and hence when we go to convert the unwritten extents we screw
it up.

Make sure we start a new ioend every time the mapping changes so that we
convert the correct ranges on I/O completion.

SGI-PV: 964647
SGI-Modid: xfs-linux-melb:xfs-kern:28797a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c097e4e69768..fd4105d662e0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1010,6 +1010,8 @@ xfs_page_state_convert(
 		if (buffer_unwritten(bh) || buffer_delay(bh) ||
 		    ((buffer_uptodate(bh) || PageUptodate(page)) &&
 		     !buffer_mapped(bh) && (unmapped || startio))) {
+			int new_ioend = 0;
+
 			/*
 			 * Make sure we don't use a read-only iomap
 			 */
@@ -1028,6 +1030,15 @@ xfs_page_state_convert(
 			}
 
 			if (!iomap_valid) {
+				/*
+				 * if we didn't have a valid mapping then we
+				 * need to ensure that we put the new mapping
+				 * in a new ioend structure. This needs to be
+				 * done to ensure that the ioends correctly
+				 * reflect the block mappings at io completion
+				 * for unwritten extent conversion.
+				 */
+				new_ioend = 1;
 				if (type == IOMAP_NEW) {
 					size = xfs_probe_cluster(inode,
 							page, bh, head, 0);
@@ -1047,7 +1058,7 @@ xfs_page_state_convert(
 				if (startio) {
 					xfs_add_to_ioend(inode, bh, offset,
 							type, &ioend,
-							!iomap_valid);
+							new_ioend);
 				} else {
 					set_buffer_dirty(bh);
 					unlock_buffer(bh);
-- 
cgit v1.2.3-59-g8ed1b


From 0164af51cedf46e1d58fd53854373f544150c597 Mon Sep 17 00:00:00 2001
From: Tim Shimmin <tes@sgi.com>
Date: Mon, 18 Jun 2007 16:50:08 +1000
Subject: [XFS] Log the agf_length change in xfs_growfs_data_private().

SGI-PV: 963528
SGI-Modid: xfs-linux-melb:xfs-kern:28856a

Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
---
 fs/xfs/xfs_fsops.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 27d01afe8465..ddd45e5b9383 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -332,6 +332,7 @@ xfs_growfs_data_private(
 		be32_add(&agf->agf_length, new);
 		ASSERT(be32_to_cpu(agf->agf_length) ==
 		       be32_to_cpu(agi->agi_length));
+		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
 		/*
 		 * Free the new space.
 		 */
-- 
cgit v1.2.3-59-g8ed1b


From 641c56fbfeae85d5ec87fee90a752f7b7224f236 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:50:17 +1000
Subject: [XFS] Prevent deadlock when flushing inodes on unmount

When we are unmounting the filesystem, we flush all the inodes to disk.
Unfortunately, if we have an inode cluster that has just been freed and
marked stale sitting in an incore log buffer (i.e. hasn't been flushed to
disk), it will be holding all the flush locks on the inodes in that
cluster.

xfs_iflush_all() which is called during unmount walks all the inodes
trying to reclaim them, and it doing so calls xfs_finish_reclaim() on each
inode. If the inode is dirty, if grabs the flush lock and flushes it.
Unfortunately, find dirty inodes that already have their flush lock held
and so we sleep.

At this point in the unmount process, we are running single-threaded.
There is nothing more that can push on the log to force the transaction
holding the inode flush locks to disk and hence we deadlock.

The fix is to issue a log force before flushing the inodes on unmount so
that all the flush locks will be released before we start flushing the
inodes.

SGI-PV: 964538
SGI-Modid: xfs-linux-melb:xfs-kern:28862a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_mount.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f6fe47d8c4dc..39cf6f3267c3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1182,6 +1182,17 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	int64_t		fsid;
 #endif
 
+	/*
+	 * We can potentially deadlock here if we have an inode cluster
+	 * that has been freed has it's buffer still pinned in memory because
+	 * the transaction is still sitting in a iclog. The stale inodes
+	 * on that buffer will have their flush locks held until the
+	 * transaction hits the disk and the callbacks run. the inode
+	 * flush takes the flush lock unconditionally and with nothing to
+	 * push out the iclog we will never get that unlocked. hence we
+	 * need to force the log first.
+	 */
+	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
 	xfs_iflush_all(mp);
 
 	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-- 
cgit v1.2.3-59-g8ed1b


From 84e1e99f112dead8f9ba036c02d24a9f5ce7f544 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:50:27 +1000
Subject: [XFS] Prevent ENOSPC from aborting transactions that need to succeed

During delayed allocation extent conversion or unwritten extent
conversion, we need to reserve some blocks for transactions reservations.
We need to reserve these blocks in case a btree split occurs and we need
to allocate some blocks.

Unfortunately, we've only ever reserved the number of data blocks we are
allocating, so in both the unwritten and delalloc case we can get ENOSPC
to the transaction reservation. This is bad because in both cases we
cannot report the failure to the writing application.

The fix is two-fold:

1 - leverage the reserved block infrastructure XFS already
has to reserve a small pool of blocks by default to allow
specially marked transactions to dip into when we are at
ENOSPC.
Default setting is min(5%, 1024 blocks).

2 - convert critical transaction reservations to be allowed
to dip into this pool. Spots changed are delalloc
conversion, unwritten extent conversion and growing a
filesystem at ENOSPC.
This also allows growing the filesytsem to succeed at ENOSPC.

SGI-PV: 964468
SGI-Modid: xfs-linux-melb:xfs-kern:28865a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_fsops.c | 10 +++++++---
 fs/xfs/xfs_iomap.c | 22 ++++++++--------------
 fs/xfs/xfs_mount.c | 37 +++++++++++++++++++++++++++++++++++--
 3 files changed, 50 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ddd45e5b9383..2251a49f3e17 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -177,6 +177,7 @@ xfs_growfs_data_private(
 		up_write(&mp->m_peraglock);
 	}
 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
+	tp->t_flags |= XFS_TRANS_RESERVE;
 	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
 			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
 		xfs_trans_cancel(tp, 0);
@@ -499,8 +500,9 @@ xfs_reserve_blocks(
 	unsigned long		s;
 
 	/* If inval is null, report current values and return */
-
 	if (inval == (__uint64_t *)NULL) {
+		if (!outval)
+			return EINVAL;
 		outval->resblks = mp->m_resblks;
 		outval->resblks_avail = mp->m_resblks_avail;
 		return 0;
@@ -563,8 +565,10 @@ retry:
 		}
 	}
 out:
-	outval->resblks = mp->m_resblks;
-	outval->resblks_avail = mp->m_resblks_avail;
+	if (outval) {
+		outval->resblks = mp->m_resblks;
+		outval->resblks_avail = mp->m_resblks_avail;
+	}
 	XFS_SB_UNLOCK(mp, s);
 
 	if (fdblks_delta) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b94..ab5062199f55 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -489,13 +489,13 @@ xfs_iomap_write_direct(
 	if (unlikely(rt)) {
 		resrtextents = qblocks = resaligned;
 		resrtextents /= mp->m_sb.sb_rextsize;
-  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-  		quota_flag = XFS_QMOPT_RES_RTBLKS;
-  	} else {
-  		resrtextents = 0;
+		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+		quota_flag = XFS_QMOPT_RES_RTBLKS;
+	} else {
+		resrtextents = 0;
 		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-  		quota_flag = XFS_QMOPT_RES_REGBLKS;
-  	}
+		quota_flag = XFS_QMOPT_RES_REGBLKS;
+	}
 
 	/*
 	 * Allocate and setup the transaction
@@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
 		nimaps = 0;
 		while (nimaps == 0) {
 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+			tp->t_flags |= XFS_TRANS_RESERVE;
 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
 			error = xfs_trans_reserve(tp, nres,
 					XFS_WRITE_LOG_RES(mp),
 					0, XFS_TRANS_PERM_LOG_RES,
 					XFS_WRITE_LOG_COUNT);
-			if (error == ENOSPC) {
-				error = xfs_trans_reserve(tp, 0,
-						XFS_WRITE_LOG_RES(mp),
-						0,
-						XFS_TRANS_PERM_LOG_RES,
-						XFS_WRITE_LOG_COUNT);
-			}
 			if (error) {
 				xfs_trans_cancel(tp, 0);
 				return XFS_ERROR(error);
@@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
 		 * from unwritten to real. Do allocations in a loop until
 		 * we have covered the range passed in.
 		 */
-
 		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		tp->t_flags |= XFS_TRANS_RESERVE;
 		error = xfs_trans_reserve(tp, resblks,
 				XFS_WRITE_LOG_RES(mp), 0,
 				XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 39cf6f3267c3..31453ca0f3dd 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -725,7 +725,7 @@ xfs_mountfs(
 	bhv_vnode_t	*rvp = NULL;
 	int		readio_log, writeio_log;
 	xfs_daddr_t	d;
-	__uint64_t	ret64;
+	__uint64_t	resblks;
 	__int64_t	update_flags;
 	uint		quotamount, quotaflags;
 	int		agno;
@@ -842,6 +842,7 @@ xfs_mountfs(
 	 */
 	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
 	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+		__uint64_t	ret64;
 		if (xfs_uuid_mount(mp)) {
 			error = XFS_ERROR(EINVAL);
 			goto error1;
@@ -1135,13 +1136,27 @@ xfs_mountfs(
 		goto error4;
 	}
 
-
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
 	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
 		goto error4;
 
+	/*
+	 * Now we are mounted, reserve a small amount of unused space for
+	 * privileged transactions. This is needed so that transaction
+	 * space required for critical operations can dip into this pool
+	 * when at ENOSPC. This is needed for operations like create with
+	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
+	 * are not allowed to use this reserved space.
+	 *
+	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
+	 * This may drive us straight to ENOSPC on mount, but that implies
+	 * we were already there on the last unmount.
+	 */
+	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
+	xfs_reserve_blocks(mp, &resblks, NULL);
+
 	return 0;
 
  error4:
@@ -1181,6 +1196,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	int64_t		fsid;
 #endif
+	__uint64_t	resblks;
 
 	/*
 	 * We can potentially deadlock here if we have an inode cluster
@@ -1209,6 +1225,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 		xfs_binval(mp->m_rtdev_targp);
 	}
 
+	/*
+	 * Unreserve any blocks we have so that when we unmount we don't account
+	 * the reserved free space as used. This is really only necessary for
+	 * lazy superblock counting because it trusts the incore superblock
+	 * counters to be aboslutely correct on clean unmount.
+	 *
+	 * We don't bother correcting this elsewhere for lazy superblock
+	 * counting because on mount of an unclean filesystem we reconstruct the
+	 * correct counter value and this is irrelevant.
+	 *
+	 * For non-lazy counter filesystems, this doesn't matter at all because
+	 * we only every apply deltas to the superblock and hence the incore
+	 * value does not matter....
+	 */
+	resblks = 0;
+	xfs_reserve_blocks(mp, &resblks, NULL);
+
 	xfs_log_sbcount(mp, 1);
 	xfs_unmountfs_writesb(mp);
 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
-- 
cgit v1.2.3-59-g8ed1b


From 957d0ebed04239b734552c7da3fae9094b6f090c Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:50:37 +1000
Subject: [XFS] Cleanup inode extent size hint extraction

SGI-PV: 966004
SGI-Modid: xfs-linux-melb:xfs-kern:28866a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_bmap.c     | 23 +++++++----------------
 fs/xfs/xfs_iomap.c    | 19 ++++---------------
 fs/xfs/xfs_rw.h       | 22 ++++++++++++++++++++++
 fs/xfs/xfs_vnodeops.c | 17 +++++------------
 4 files changed, 38 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 9a654faf0c08..183add2f21b7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2609,8 +2609,7 @@ xfs_bmap_rtalloc(
 	xfs_rtblock_t	rtb;
 
 	mp = ap->ip->i_mount;
-	align = ap->ip->i_d.di_extsize ?
-		ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+	align = xfs_get_extsz_hint(ap->ip);
 	prod = align / mp->m_sb.sb_rextsize;
 	error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
 					align, 1, ap->eof, 0,
@@ -2715,9 +2714,7 @@ xfs_bmap_btalloc(
 	int		error;
 
 	mp = ap->ip->i_mount;
-	align = (ap->userdata && ap->ip->i_d.di_extsize &&
-		(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
-		ap->ip->i_d.di_extsize : 0;
+	align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
 	if (unlikely(align)) {
 		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
 						align, 0, ap->eof, 0, ap->conv,
@@ -2817,9 +2814,9 @@ xfs_bmap_btalloc(
 		args.total = ap->total;
 		args.minlen = ap->minlen;
 	}
-	if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
-		    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
-		args.prod = ap->ip->i_d.di_extsize;
+	/* apply extent size hints if obtained earlier */
+	if (unlikely(align)) {
+		args.prod = align;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	} else if (mp->m_sb.sb_blocksize >= NBPP) {
@@ -4868,12 +4865,7 @@ xfs_bmapi(
 				xfs_extlen_t	extsz;
 
 				/* Figure out the extent size, adjust alen */
-				if (rt) {
-					if (!(extsz = ip->i_d.di_extsize))
-						extsz = mp->m_sb.sb_rextsize;
-				} else {
-					extsz = ip->i_d.di_extsize;
-				}
+				extsz = xfs_get_extsz_hint(ip);
 				if (extsz) {
 					error = xfs_bmap_extsize_align(mp,
 							&got, &prev, extsz,
@@ -5813,8 +5805,7 @@ xfs_getbmap(
 		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
 		return XFS_ERROR(EINVAL);
 	if (whichfork == XFS_DATA_FORK) {
-		if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
-				(XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+		if (xfs_get_extsz_hint(ip) ||
 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
 			fixlen = XFS_MAXIOFFSET(mp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ab5062199f55..bf57b75acb90 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -451,19 +451,14 @@ xfs_iomap_write_direct(
 		return XFS_ERROR(error);
 
 	rt = XFS_IS_REALTIME_INODE(ip);
-	if (unlikely(rt)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
+	extsz = xfs_get_extsz_hint(ip);
 
 	isize = ip->i_size;
 	if (io->io_new_size > isize)
 		isize = io->io_new_size;
 
-  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-  	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	if ((offset + count) > isize) {
 		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
 							&last_fsb);
@@ -666,13 +661,7 @@ xfs_iomap_write_delay(
 	if (error)
 		return XFS_ERROR(error);
 
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
-
+	extsz = xfs_get_extsz_hint(ip);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
 retry:
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index ba74000931ec..fcf28dbded7c 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -77,6 +77,28 @@ xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
 #define XFS_FREE_EOF_LOCK	(1<<0)
 #define XFS_FREE_EOF_NOLOCK	(1<<1)
 
+
+/*
+ * helper function to extract extent size hint from inode
+ */
+STATIC_INLINE xfs_extlen_t
+xfs_get_extsz_hint(
+	xfs_inode_t	*ip)
+{
+	xfs_extlen_t	extsz;
+
+	if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+		extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+				? ip->i_d.di_extsize
+				: ip->i_mount->m_sb.sb_rextsize;
+		ASSERT(extsz);
+	} else {
+		extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
+				? ip->i_d.di_extsize : 0;
+	}
+	return extsz;
+}
+
 /*
  * Prototypes for functions in xfs_rw.c.
  */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6420ca8df5ec..8c830a48165a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -183,9 +183,8 @@ xfs_getattr(
 			 * realtime extent size or the realtime volume's
 			 * extent size.
 			 */
-			vap->va_blocksize = ip->i_d.di_extsize ?
-				(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
-				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
+			vap->va_blocksize =
+				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
 		}
 		break;
 	}
@@ -4055,22 +4054,16 @@ xfs_alloc_file_space(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	rt = XFS_IS_REALTIME_INODE(ip);
-	if (unlikely(rt)) {
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-	} else {
-		extsz = ip->i_d.di_extsize;
-	}
-
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
 	if (len <= 0)
 		return XFS_ERROR(EINVAL);
 
+	rt = XFS_IS_REALTIME_INODE(ip);
+	extsz = xfs_get_extsz_hint(ip);
+
 	count = len;
-	error = 0;
 	imapp = &imaps[0];
 	nimaps = 1;
 	bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
-- 
cgit v1.2.3-59-g8ed1b


From 516b2e7c2661615ba5d5ad9fb584f068363502d3 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Mon, 18 Jun 2007 16:50:48 +1000
Subject: [XFS] Fix remount,readonly path to flush everything correctly.

The remount readonly path can fail to writeback properly because we still
have active transactions after calling xfs_quiesce_fs(). Further
investigation shows that this path is broken in the same ways that the xfs
freeze path was broken so fix it the same way.

SGI-PV: 964464
SGI-Modid: xfs-linux-melb:xfs-kern:28869a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c |  2 +-
 fs/xfs/linux-2.6/xfs_vfs.h   | 14 ++++++++++++
 fs/xfs/xfs_vfsops.c          | 53 ++++++++++++++++++++++++++------------------
 3 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 05f188ed1206..06894cf00b12 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -664,7 +664,7 @@ xfs_fs_sync_super(
 		 * occur here so don't bother flushing the buftarg (i.e
 		 * SYNC_QUIESCE) because it'll just get dirty again.
 		 */
-		flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT;
+		flags = SYNC_DATA_QUIESCE;
 	} else
 		flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
 
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index cb7b0d62fb96..dca3481aaafa 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -94,6 +94,20 @@ typedef enum {
 #define SYNC_IOWAIT		0x0100  /* wait for all I/O to complete */
 #define SYNC_SUPER		0x0200  /* flush superblock to disk */
 
+/*
+ * When remounting a filesystem read-only or freezing the filesystem,
+ * we have two phases to execute. This first phase is syncing the data
+ * before we quiesce the fielsystem, and the second is flushing all the
+ * inodes out after we've waited for all the transactions created by
+ * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
+ * to ensure that the inodes are written to their location on disk
+ * rather than just existing in transactions in the log. This means
+ * after a quiesce there is no log replay required to write the inodes
+ * to disk (this is the main difference between a sync and a quiesce).
+ */
+#define SYNC_DATA_QUIESCE	(SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
+#define SYNC_INODE_QUIESCE	(SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
+
 #define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
 #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
 #define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 3a647339f40e..c343fde10ef9 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -640,7 +640,7 @@ xfs_quiesce_fs(
 	 * we can write the unmount record.
 	 */
 	do {
-		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
+		xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
 		if (!pincount) {
 			delay(50);
@@ -651,6 +651,30 @@ xfs_quiesce_fs(
 	return 0;
 }
 
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
+STATIC void
+xfs_attr_quiesce(
+	xfs_mount_t	*mp)
+{
+	/* wait for all modifications to complete */
+	while (atomic_read(&mp->m_active_trans) > 0)
+		delay(100);
+
+	/* flush inodes and push all remaining buffers out to disk */
+	xfs_quiesce_fs(mp);
+
+	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
+	/* Push the superblock and write an unmount record */
+	xfs_log_sbcount(mp, 1);
+	xfs_log_unmount_write(mp);
+	xfs_unmountfs_writesb(mp);
+}
+
 STATIC int
 xfs_mntupdate(
 	bhv_desc_t			*bdp,
@@ -670,11 +694,8 @@ xfs_mntupdate(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 		}
 	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
-		bhv_vfs_sync(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL);
-		xfs_quiesce_fs(mp);
-		xfs_log_sbcount(mp, 1);
-		xfs_log_unmount_write(mp);
-		xfs_unmountfs_writesb(mp);
+		bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
+		xfs_attr_quiesce(mp);
 		vfsp->vfs_flag |= VFS_RDONLY;
 	}
 	return 0;
@@ -1952,9 +1973,9 @@ xfs_showargs(
 }
 
 /*
- * Second stage of a freeze. The data is already frozen, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceding.
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of themetadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
  */
 STATIC void
 xfs_freeze(
@@ -1962,19 +1983,7 @@ xfs_freeze(
 {
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
-	/* wait for all modifications to complete */
-	while (atomic_read(&mp->m_active_trans) > 0)
-		delay(100);
-
-	/* flush inodes and push all remaining buffers out to disk */
-	xfs_quiesce_fs(mp);
-
-	ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
-
-	/* Push the superblock and write an unmount record */
-	xfs_log_sbcount(mp, 1);
-	xfs_log_unmount_write(mp);
-	xfs_unmountfs_writesb(mp);
+	xfs_attr_quiesce(mp);
 	xfs_fs_log_dummy(mp);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 39726be2a2e6e61f352852da2c3a807773e33346 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 18 Jun 2007 17:57:45 +1000
Subject: [XFS] Use do_div() on 64 bit types.

SGI-PV: 966145
SGI-Modid: xfs-linux-melb:xfs-kern:28889a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_mount.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 31453ca0f3dd..a66b39805176 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1154,7 +1154,9 @@ xfs_mountfs(
 	 * This may drive us straight to ENOSPC on mount, but that implies
 	 * we were already there on the last unmount.
 	 */
-	resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
+	resblks = mp->m_sb.sb_dblocks;
+	do_div(resblks, 20);
+	resblks = min_t(__uint64_t, resblks, 1024);
 	xfs_reserve_blocks(mp, &resblks, NULL);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 87ae3c2411cfd280e8289e232b718fae9f63950b Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Thu, 28 Jun 2007 16:43:14 +1000
Subject: [XFS] Cancel transactions on xfs_itruncate_start error.

SGI-PV: 966502
SGI-Modid: xfs-linux-melb:xfs-kern:28943a

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_vnodeops.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8c830a48165a..5dbca95598e0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1263,6 +1263,7 @@ xfs_free_eofblocks(
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
 				    ip->i_size);
 		if (error) {
+			xfs_trans_cancel(tp, 0);
 			if (use_iolock)
 				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return error;
@@ -1687,6 +1688,7 @@ xfs_inactive(
 
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
 		if (error) {
+			xfs_trans_cancel(tp, 0);
 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 24ad33ff714bd117cab30e71e2ad41e4e1185108 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Thu, 28 Jun 2007 16:43:30 +1000
Subject: [XFS] Kill off xfs_count_bits

xfs_count_bits is only called once, and is then compared to 0. IOW, what
it really wants to know is, is the bitmap empty. This can be done more
simply, certainly.

SGI-PV: 966503
SGI-Modid: xfs-linux-melb:xfs-kern:28944a

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_bit.c      | 91 +++++----------------------------------------------
 fs/xfs/xfs_bit.h      |  4 +--
 fs/xfs/xfs_buf_item.c |  4 +--
 3 files changed, 13 insertions(+), 86 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 1afe07f67e3b..fab0b6d5a41b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -65,44 +65,6 @@ static const char xfs_highbit[256] = {
 };
 #endif
 
-/*
- * Count of bits set in byte, 0..8.
- */
-static const char xfs_countbit[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3,			/* 00 .. 07 */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 08 .. 0f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 10 .. 17 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 18 .. 1f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 20 .. 27 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 28 .. 2f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 30 .. 37 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 38 .. 3f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 40 .. 47 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 48 .. 4f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 50 .. 57 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 58 .. 5f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 60 .. 67 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 68 .. 6f */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 70 .. 77 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* 78 .. 7f */
-	1, 2, 2, 3, 2, 3, 3, 4,			/* 80 .. 87 */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 88 .. 8f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* 90 .. 97 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* 98 .. 9f */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* a0 .. a7 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* a8 .. af */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* b0 .. b7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* b8 .. bf */
-	2, 3, 3, 4, 3, 4, 4, 5,			/* c0 .. c7 */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* c8 .. cf */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* d0 .. d7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* d8 .. df */
-	3, 4, 4, 5, 4, 5, 5, 6,			/* e0 .. e7 */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* e8 .. ef */
-	4, 5, 5, 6, 5, 6, 6, 7,			/* f0 .. f7 */
-	5, 6, 6, 7, 6, 7, 7, 8,			/* f8 .. ff */
-};
-
 /*
  * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
  */
@@ -167,56 +129,21 @@ xfs_highbit64(
 
 
 /*
- * Count the number of bits set in the bitmap starting with bit
- * start_bit.  Size is the size of the bitmap in words.
- *
- * Do the counting by mapping a byte value to the number of set
- * bits for that value using the xfs_countbit array, i.e.
- * xfs_countbit[0] == 0, xfs_countbit[1] == 1, xfs_countbit[2] == 1,
- * xfs_countbit[3] == 2, etc.
+ * Return whether bitmap is empty.
+ * Size is number of words in the bitmap, which is padded to word boundary
+ * Returns 1 for empty, 0 for non-empty.
  */
 int
-xfs_count_bits(uint *map, uint size, uint start_bit)
+xfs_bitmap_empty(uint *map, uint size)
 {
-	register int	bits;
-	register unsigned char	*bytep;
-	register unsigned char	*end_map;
-	int		byte_bit;
-
-	bits = 0;
-	end_map = (char*)(map + size);
-	bytep = (char*)(map + (start_bit & ~0x7));
-	byte_bit = start_bit & 0x7;
-
-	/*
-	 * If the caller fell off the end of the map, return 0.
-	 */
-	if (bytep >= end_map) {
-		return (0);
-	}
-
-	/*
-	 * If start_bit is not byte aligned, then process the
-	 * first byte separately.
-	 */
-	if (byte_bit != 0) {
-		/*
-		 * Shift off the bits we don't want to look at,
-		 * before indexing into xfs_countbit.
-		 */
-		bits += xfs_countbit[(*bytep >> byte_bit)];
-		bytep++;
-	}
+	uint i;
+	uint ret = 0;
 
-	/*
-	 * Count the bits in each byte until the end of the bitmap.
-	 */
-	while (bytep < end_map) {
-		bits += xfs_countbit[*bytep];
-		bytep++;
+	for (i = 0; i < size; i++) {
+		ret |= map[i];
 	}
 
-	return (bits);
+	return (ret == 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 0bbe56817542..082641a9782c 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -55,8 +55,8 @@ extern int xfs_lowbit64(__uint64_t v);
 /* Get high bit set out of 64-bit argument, -1 if none set */
 extern int xfs_highbit64(__uint64_t);
 
-/* Count set bits in map starting with start_bit */
-extern int xfs_count_bits(uint *map, uint size, uint start_bit);
+/* Return whether bitmap is empty (1 == empty) */
+extern int xfs_bitmap_empty(uint *map, uint size);
 
 /* Count continuous one bits in map starting with start_bit */
 extern int xfs_contig_bits(uint *map, uint size, uint start_bit);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6c1bddc04e31..b0667cb27d66 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -580,8 +580,8 @@ xfs_buf_item_unlock(
 	 * If the buf item isn't tracking any data, free it.
 	 * Otherwise, if XFS_BLI_HOLD is set clear it.
 	 */
-	if (xfs_count_bits(bip->bli_format.blf_data_map,
-			      bip->bli_format.blf_map_size, 0) == 0) {
+	if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
+			     bip->bli_format.blf_map_size)) {
 		xfs_buf_item_relse(bp);
 	} else if (hold) {
 		bip->bli_flags &= ~XFS_BLI_HOLD;
-- 
cgit v1.2.3-59-g8ed1b


From 54aa8e26e90da882b145fcd33ed752431d6b318b Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Thu, 28 Jun 2007 16:43:39 +1000
Subject: [XFS] Simplify XFS min/max macros.

SGI-PV: 964547
SGI-Modid: xfs-linux-melb:xfs-kern:28945a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nscott@aconex.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_btree.h | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 4e27d55a1e73..6e40a0a198ff 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -444,30 +444,14 @@ xfs_btree_setbuf(
 /*
  * Min and max functions for extlen, agblock, fileoff, and filblks types.
  */
-#define	XFS_EXTLEN_MIN(a,b)	\
-	((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
-		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#define	XFS_EXTLEN_MAX(a,b)	\
-	((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
-		(xfs_extlen_t)(a) : (xfs_extlen_t)(b))
-#define	XFS_AGBLOCK_MIN(a,b)	\
-	((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
-		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define	XFS_AGBLOCK_MAX(a,b)	\
-	((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
-		(xfs_agblock_t)(a) : (xfs_agblock_t)(b))
-#define	XFS_FILEOFF_MIN(a,b)	\
-	((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
-		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define	XFS_FILEOFF_MAX(a,b)	\
-	((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
-		(xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
-#define	XFS_FILBLKS_MIN(a,b)	\
-	((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
-		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
-#define	XFS_FILBLKS_MAX(a,b)	\
-	((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
-		(xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#define	XFS_EXTLEN_MIN(a,b)	min_t(xfs_extlen_t, (a), (b))
+#define	XFS_EXTLEN_MAX(a,b)	max_t(xfs_extlen_t, (a), (b))
+#define	XFS_AGBLOCK_MIN(a,b)	min_t(xfs_agblock_t, (a), (b))
+#define	XFS_AGBLOCK_MAX(a,b)	max_t(xfs_agblock_t, (a), (b))
+#define	XFS_FILEOFF_MIN(a,b)	min_t(xfs_fileoff_t, (a), (b))
+#define	XFS_FILEOFF_MAX(a,b)	max_t(xfs_fileoff_t, (a), (b))
+#define	XFS_FILBLKS_MIN(a,b)	min_t(xfs_filblks_t, (a), (b))
+#define	XFS_FILBLKS_MAX(a,b)	max_t(xfs_filblks_t, (a), (b))
 
 #define	XFS_FSB_SANITY_CHECK(mp,fsb)	\
 	(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
-- 
cgit v1.2.3-59-g8ed1b


From bbaaf53808c778bda24f8245a440c5ceacc1a37d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 28 Jun 2007 16:43:50 +1000
Subject: [XFS] Reduce shouting by removing unnecessary macros from dir2 code.

SGI-PV: 966505
SGI-Modid: xfs-linux-melb:xfs-kern:28947a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_dir2.c       |  12 +--
 fs/xfs/xfs_dir2_block.c |  98 +++++++++++------------
 fs/xfs/xfs_dir2_block.h |   2 -
 fs/xfs/xfs_dir2_data.c  |  54 ++++++-------
 fs/xfs/xfs_dir2_data.h  |  12 +--
 fs/xfs/xfs_dir2_leaf.c  | 106 ++++++++++++-------------
 fs/xfs/xfs_dir2_leaf.h  |  29 ++-----
 fs/xfs/xfs_dir2_node.c  |  66 ++++++++--------
 fs/xfs/xfs_dir2_node.h  |   4 +-
 fs/xfs/xfs_dir2_sf.c    | 204 ++++++++++++++++++++++++------------------------
 fs/xfs/xfs_dir2_sf.h    |  20 +----
 11 files changed, 283 insertions(+), 324 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8e8e5279334a..29e091914df4 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -55,9 +55,9 @@ xfs_dir_mount(
 	       XFS_MAX_BLOCKSIZE);
 	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
 	mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
-	mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
-	mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
-	mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
 	mp->m_attr_node_ents =
 		(mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
 		(uint)sizeof(xfs_da_node_entry_t);
@@ -554,7 +554,7 @@ xfs_dir2_grow_inode(
 	 */
 	if (mapp != &map)
 		kmem_free(mapp, sizeof(*mapp) * count);
-	*dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno);
+	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
 	/*
 	 * Update file's size if this is the data space and it grew.
 	 */
@@ -706,7 +706,7 @@ xfs_dir2_shrink_inode(
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	da = XFS_DIR2_DB_TO_DA(mp, db);
+	da = xfs_dir2_db_to_da(mp, db);
 	/*
 	 * Unmap the fsblock(s).
 	 */
@@ -742,7 +742,7 @@ xfs_dir2_shrink_inode(
 	/*
 	 * If the block isn't the last one in the directory, we're done.
 	 */
-	if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0))
+	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0))
 		return 0;
 	bno = da;
 	if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 3accc1dcd6c9..e4df1aaae2a2 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -115,13 +115,13 @@ xfs_dir2_block_addname(
 		xfs_da_brelse(tp, bp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
-	len = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	len = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * Set up pointers to parts of the block.
 	 */
 	bf = block->hdr.bestfree;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * No stale entries?  Need space for entry and new leaf.
 	 */
@@ -396,7 +396,7 @@ xfs_dir2_block_addname(
 	 * Fill in the leaf entry.
 	 */
 	blp[mid].hashval = cpu_to_be32(args->hashval);
-	blp[mid].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
 	/*
@@ -411,7 +411,7 @@ xfs_dir2_block_addname(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
@@ -455,7 +455,7 @@ xfs_dir2_block_getdents(
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (XFS_DIR2_DATAPTR_TO_DB(mp, uio->uio_offset) > mp->m_dirdatablk) {
+	if (xfs_dir2_dataptr_to_db(mp, uio->uio_offset) > mp->m_dirdatablk) {
 		*eofp = 1;
 		return 0;
 	}
@@ -471,15 +471,15 @@ xfs_dir2_block_getdents(
 	 * Extract the byte offset we start at from the seek pointer.
 	 * We'll skip entries before this.
 	 */
-	wantoff = XFS_DIR2_DATAPTR_TO_OFF(mp, uio->uio_offset);
+	wantoff = xfs_dir2_dataptr_to_off(mp, uio->uio_offset);
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
 	/*
 	 * Set up values for the loop.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	ptr = (char *)block->u;
-	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	p.dbp = dbp;
 	p.put = put;
 	p.uio = uio;
@@ -502,7 +502,7 @@ xfs_dir2_block_getdents(
 		/*
 		 * Bump pointer for the next iteration.
 		 */
-		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		ptr += xfs_dir2_data_entsize(dep->namelen);
 		/*
 		 * The entry is before the desired starting point, skip it.
 		 */
@@ -513,7 +513,7 @@ xfs_dir2_block_getdents(
 		 */
 		p.namelen = dep->namelen;
 
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						    ptr - (char *)block);
 		p.ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -531,7 +531,7 @@ xfs_dir2_block_getdents(
 		 */
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					(char *)dep - (char *)block);
 			xfs_da_brelse(tp, bp);
 			return error;
@@ -545,7 +545,7 @@ xfs_dir2_block_getdents(
 	*eofp = 1;
 
 	uio->uio_offset =
-		XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+		xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
 
 	xfs_da_brelse(tp, bp);
 
@@ -569,8 +569,8 @@ xfs_dir2_block_log_leaf(
 
 	mp = tp->t_mountp;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
 		(uint)((char *)&blp[last + 1] - (char *)block - 1));
 }
@@ -589,7 +589,7 @@ xfs_dir2_block_log_tail(
 
 	mp = tp->t_mountp;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
 		(uint)((char *)(btp + 1) - (char *)block - 1));
 }
@@ -623,13 +623,13 @@ xfs_dir2_block_lookup(
 	mp = dp->i_mount;
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Get the offset from the leaf entry, to point to the data.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Fill in inode number, release the block.
 	 */
@@ -675,8 +675,8 @@ xfs_dir2_block_lookup_int(
 	ASSERT(bp != NULL);
 	block = bp->data;
 	xfs_dir2_data_check(dp, bp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Loop doing a binary search for our hash value.
 	 * Find our entry, ENOENT if it's not there.
@@ -713,7 +713,7 @@ xfs_dir2_block_lookup_int(
 		 * Get pointer to the entry from the leaf.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-			((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+			((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Compare, if it's right give back buffer & entry number.
 		 */
@@ -768,20 +768,20 @@ xfs_dir2_block_removename(
 	tp = args->trans;
 	mp = dp->i_mount;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry using the leaf entry.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	/*
 	 * Mark the data entry's space free.
 	 */
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, bp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Fix up the block tail.
 	 */
@@ -843,13 +843,13 @@ xfs_dir2_block_replace(
 	dp = args->dp;
 	mp = dp->i_mount;
 	block = bp->data;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Point to the data entry we need to change.
 	 */
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(blp[ent].address)));
+	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
 	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
 	/*
 	 * Change the inode number to the new value.
@@ -912,7 +912,7 @@ xfs_dir2_leaf_to_block(
 	mp = dp->i_mount;
 	leaf = lbp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * If there are data blocks other than the first one, take this
 	 * opportunity to remove trailing empty data blocks that may have
@@ -920,7 +920,7 @@ xfs_dir2_leaf_to_block(
 	 * These will show up in the leaf bests table.
 	 */
 	while (dp->i_d.di_size > mp->m_dirblksize) {
-		bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+		bestsp = xfs_dir2_leaf_bests_p(ltp);
 		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
 		    mp->m_dirblksize - (uint)sizeof(block->hdr)) {
 			if ((error =
@@ -974,14 +974,14 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Initialize the block tail.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
 	btp->stale = 0;
 	xfs_dir2_block_log_tail(tp, dbp);
 	/*
 	 * Initialize the block leaf area.  We compact out stale entries.
 	 */
-	lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+	lep = xfs_dir2_block_leaf_p(btp);
 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
 		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
@@ -1067,7 +1067,7 @@ xfs_dir2_sf_to_block(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Copy the directory into the stack buffer.
 	 * Then pitch the incore inode data so we can make extents.
@@ -1119,10 +1119,10 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Fill in the tail.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	btp->count = cpu_to_be32(sfp->hdr.count + 2);	/* ., .. */
 	btp->stale = 0;
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	blp = xfs_dir2_block_leaf_p(btp);
 	endoffset = (uint)((char *)blp - (char *)block);
 	/*
 	 * Remove the freespace, we'll manage it.
@@ -1138,25 +1138,25 @@ xfs_dir2_sf_to_block(
 	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-	blp[0].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	/*
 	 * Create entry for ..
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 		((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
-	dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)block);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-	blp[1].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 				(char *)dep - (char *)block));
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
 	/*
@@ -1165,7 +1165,7 @@ xfs_dir2_sf_to_block(
 	if ((i = 0) == sfp->hdr.count)
 		sfep = NULL;
 	else
-		sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+		sfep = xfs_dir2_sf_firstentry(sfp);
 	/*
 	 * Need to preserve the existing offset values in the sf directory.
 	 * Insert holes (unused entries) where necessary.
@@ -1177,7 +1177,7 @@ xfs_dir2_sf_to_block(
 		if (sfep == NULL)
 			newoffset = endoffset;
 		else
-			newoffset = XFS_DIR2_SF_GET_OFFSET(sfep);
+			newoffset = xfs_dir2_sf_get_offset(sfep);
 		/*
 		 * There should be a hole here, make one.
 		 */
@@ -1186,7 +1186,7 @@ xfs_dir2_sf_to_block(
 			      ((char *)block + offset);
 			dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 			dup->length = cpu_to_be16(newoffset - offset);
-			*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16(
+			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
 				((char *)dup - (char *)block));
 			xfs_dir2_data_log_unused(tp, bp, dup);
 			(void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
@@ -1198,22 +1198,22 @@ xfs_dir2_sf_to_block(
 		 * Copy a real entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
-		dep->inumber = cpu_to_be64(XFS_DIR2_SF_GET_INUMBER(sfp,
-				XFS_DIR2_SF_INUMBERP(sfep)));
+		dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
+				xfs_dir2_sf_inumberp(sfep)));
 		dep->namelen = sfep->namelen;
 		memcpy(dep->name, sfep->name, dep->namelen);
-		tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+		tagp = xfs_dir2_data_entry_tag_p(dep);
 		*tagp = cpu_to_be16((char *)dep - (char *)block);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
 					(char *)sfep->name, sfep->namelen));
-		blp[2 + i].address = cpu_to_be32(XFS_DIR2_BYTE_TO_DATAPTR(mp,
+		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
 						 (char *)dep - (char *)block));
 		offset = (int)((char *)(tagp + 1) - (char *)block);
 		if (++i == sfp->hdr.count)
 			sfep = NULL;
 		else
-			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/* Done with the temporary buffer */
 	kmem_free(buf, buf_len);
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
index 6722effd0b20..e7c2606161e9 100644
--- a/fs/xfs/xfs_dir2_block.h
+++ b/fs/xfs/xfs_dir2_block.h
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_block {
 /*
  * Pointer to the leaf header embedded in a data block (1-block format)
  */
-#define	XFS_DIR2_BLOCK_TAIL_P(mp,block)	xfs_dir2_block_tail_p(mp,block)
 static inline xfs_dir2_block_tail_t *
 xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 {
@@ -71,7 +70,6 @@ xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
 /*
  * Pointer to the leaf entries embedded in a data block (1-block format)
  */
-#define	XFS_DIR2_BLOCK_LEAF_P(btp)	xfs_dir2_block_leaf_p(btp)
 static inline struct xfs_dir2_leaf_entry *
 xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
 {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c211c37ef67c..7ebe295bd6d3 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -72,8 +72,8 @@ xfs_dir2_data_check(
 	bf = d->hdr.bestfree;
 	p = (char *)d->u;
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		lep = XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		lep = xfs_dir2_block_leaf_p(btp);
 		endp = (char *)lep;
 	} else
 		endp = (char *)d + mp->m_dirblksize;
@@ -107,7 +107,7 @@ xfs_dir2_data_check(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT(lastfree == 0);
-			ASSERT(be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) ==
+			ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
 			       (char *)dup - (char *)d);
 			dfp = xfs_dir2_data_freefind(d, dup);
 			if (dfp) {
@@ -131,12 +131,12 @@ xfs_dir2_data_check(
 		dep = (xfs_dir2_data_entry_t *)p;
 		ASSERT(dep->namelen != 0);
 		ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
-		ASSERT(be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) ==
+		ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
 		       (char *)dep - (char *)d);
 		count++;
 		lastfree = 0;
 		if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-			addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)d));
 			hash = xfs_da_hashname((char *)dep->name, dep->namelen);
@@ -147,7 +147,7 @@ xfs_dir2_data_check(
 			}
 			ASSERT(i < be32_to_cpu(btp->count));
 		}
-		p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		p += xfs_dir2_data_entsize(dep->namelen);
 	}
 	/*
 	 * Need to have seen all the entries and all the bestfree slots.
@@ -346,8 +346,8 @@ xfs_dir2_data_freescan(
 	 */
 	p = (char *)d->u;
 	if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		endp = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		endp = (char *)xfs_dir2_block_leaf_p(btp);
 	} else
 		endp = (char *)d + mp->m_dirblksize;
 	/*
@@ -360,7 +360,7 @@ xfs_dir2_data_freescan(
 		 */
 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 			ASSERT((char *)dup - (char *)d ==
-			       be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
 			xfs_dir2_data_freeinsert(d, dup, loghead);
 			p += be16_to_cpu(dup->length);
 		}
@@ -370,8 +370,8 @@ xfs_dir2_data_freescan(
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
 			ASSERT((char *)dep - (char *)d ==
-			       be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)));
-			p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+			       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
+			p += xfs_dir2_data_entsize(dep->namelen);
 		}
 	}
 }
@@ -402,7 +402,7 @@ xfs_dir2_data_init(
 	/*
 	 * Get the buffer set up for the block.
 	 */
-	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp,
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
 		XFS_DATA_FORK);
 	if (error) {
 		return error;
@@ -427,7 +427,7 @@ xfs_dir2_data_init(
 	t=mp->m_dirblksize - (uint)sizeof(d->hdr);
 	d->hdr.bestfree[0].length = cpu_to_be16(t);
 	dup->length = cpu_to_be16(t);
-	*XFS_DIR2_DATA_UNUSED_TAG_P(dup) = cpu_to_be16((char *)dup - (char *)d);
+	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
 	/*
 	 * Log it and return it.
 	 */
@@ -452,7 +452,7 @@ xfs_dir2_data_log_entry(
 	ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
 	       be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
 	xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
-		(uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
+		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
 		       (char *)d - 1));
 }
 
@@ -497,8 +497,8 @@ xfs_dir2_data_log_unused(
 	 * Log the end (tag) of the unused entry.
 	 */
 	xfs_da_log_buf(tp, bp,
-		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d),
-		(uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P(dup) - (char *)d +
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
+		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
 		       sizeof(xfs_dir2_data_off_t) - 1));
 }
 
@@ -535,8 +535,8 @@ xfs_dir2_data_make_free(
 		xfs_dir2_block_tail_t	*btp;	/* block tail */
 
 		ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
-		btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
-		endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
+		btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+		endptr = (char *)xfs_dir2_block_leaf_p(btp);
 	}
 	/*
 	 * If this isn't the start of the block, then back up to
@@ -587,7 +587,7 @@ xfs_dir2_data_make_free(
 		 * Fix up the new big freespace.
 		 */
 		be16_add(&prevdup->length, len + be16_to_cpu(postdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		if (!needscan) {
@@ -621,7 +621,7 @@ xfs_dir2_data_make_free(
 	else if (prevdup) {
 		dfp = xfs_dir2_data_freefind(d, prevdup);
 		be16_add(&prevdup->length, len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(prevdup) =
+		*xfs_dir2_data_unused_tag_p(prevdup) =
 			cpu_to_be16((char *)prevdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, prevdup);
 		/*
@@ -649,7 +649,7 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -676,7 +676,7 @@ xfs_dir2_data_make_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		(void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
@@ -712,7 +712,7 @@ xfs_dir2_data_use_free(
 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
 	ASSERT(offset >= (char *)dup - (char *)d);
 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
-	ASSERT((char *)dup - (char *)d == be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)));
+	ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
@@ -745,7 +745,7 @@ xfs_dir2_data_use_free(
 		newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup->length = cpu_to_be16(oldlen - len);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -772,7 +772,7 @@ xfs_dir2_data_use_free(
 	else if (matchback) {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		/*
@@ -799,13 +799,13 @@ xfs_dir2_data_use_free(
 	else {
 		newdup = dup;
 		newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup) =
+		*xfs_dir2_data_unused_tag_p(newdup) =
 			cpu_to_be16((char *)newdup - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup);
 		newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-		*XFS_DIR2_DATA_UNUSED_TAG_P(newdup2) =
+		*xfs_dir2_data_unused_tag_p(newdup2) =
 			cpu_to_be16((char *)newdup2 - (char *)d);
 		xfs_dir2_data_log_unused(tp, bp, newdup2);
 		/*
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index c94c9099cfb1..b816e0252739 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -44,7 +44,7 @@ struct xfs_trans;
 #define	XFS_DIR2_DATA_SPACE	0
 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_DATA_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
 
 /*
  * Offsets of . and .. in data space (always block 0)
@@ -52,9 +52,9 @@ struct xfs_trans;
 #define	XFS_DIR2_DATA_DOT_OFFSET	\
 	((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
 #define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
-	(XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1))
+	(XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
 #define	XFS_DIR2_DATA_FIRST_OFFSET		\
-	(XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2))
+	(XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
 
 /*
  * Structures.
@@ -123,7 +123,6 @@ typedef struct xfs_dir2_data {
 /*
  * Size of a data entry.
  */
-#define XFS_DIR2_DATA_ENTSIZE(n)	xfs_dir2_data_entsize(n)
 static inline int xfs_dir2_data_entsize(int n)
 {
 	return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
@@ -133,19 +132,16 @@ static inline int xfs_dir2_data_entsize(int n)
 /*
  * Pointer to an entry's tag word.
  */
-#define	XFS_DIR2_DATA_ENTRY_TAG_P(dep)	xfs_dir2_data_entry_tag_p(dep)
 static inline __be16 *
 xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
 {
 	return (__be16 *)((char *)dep +
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen) - sizeof(__be16));
+		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
 }
 
 /*
  * Pointer to a freespace's tag word.
  */
-#define	XFS_DIR2_DATA_UNUSED_TAG_P(dup) \
-	xfs_dir2_data_unused_tag_p(dup)
 static inline __be16 *
 xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
 {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index db14ea71459f..1b73c9ad646a 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -92,7 +92,7 @@ xfs_dir2_block_to_leaf(
 	if ((error = xfs_da_grow_inode(args, &blkno))) {
 		return error;
 	}
-	ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
+	ldb = xfs_dir2_da_to_db(mp, blkno);
 	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
 	/*
 	 * Initialize the leaf block, get a buffer for it.
@@ -104,8 +104,8 @@ xfs_dir2_block_to_leaf(
 	leaf = lbp->data;
 	block = dbp->data;
 	xfs_dir2_data_check(dp, dbp);
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 	/*
 	 * Set the counts in the leaf header.
 	 */
@@ -137,9 +137,9 @@ xfs_dir2_block_to_leaf(
 	/*
 	 * Set up leaf tail and bests table.
 	 */
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ltp->bestcount = cpu_to_be32(1);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	bestsp[0] =  block->hdr.bestfree[0].length;
 	/*
 	 * Log the data header and leaf bests table.
@@ -209,9 +209,9 @@ xfs_dir2_leaf_addname(
 	 */
 	index = xfs_dir2_leaf_search_hash(args, lbp);
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
-	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
+	length = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * See if there are any entries with the same hash value
 	 * and space in their block for the new entry.
@@ -223,7 +223,7 @@ xfs_dir2_leaf_addname(
 	     index++, lep++) {
 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
 			continue;
-		i = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		ASSERT(i < be32_to_cpu(ltp->bestcount));
 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
 		if (be16_to_cpu(bestsp[i]) >= length) {
@@ -378,7 +378,7 @@ xfs_dir2_leaf_addname(
 	 */
 	else {
 		if ((error =
-		    xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
+		    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
 			    -1, &dbp, XFS_DATA_FORK))) {
 			xfs_da_brelse(tp, lbp);
 			return error;
@@ -407,7 +407,7 @@ xfs_dir2_leaf_addname(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	/*
 	 * Need to scan fix up the bestfree table.
@@ -529,7 +529,7 @@ xfs_dir2_leaf_addname(
 	 * Fill in the new leaf entry.
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block,
+	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
 				be16_to_cpu(*tagp)));
 	/*
 	 * Log the leaf fields and give up the buffers.
@@ -567,13 +567,13 @@ xfs_dir2_leaf_check(
 	 * Should factor in the size of the bests table as well.
 	 * We can deduce a value for that from di_size.
 	 */
-	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * Leaves and bests don't overlap.
 	 */
 	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
-	       (char *)XFS_DIR2_LEAF_BESTS_P(ltp));
+	       (char *)xfs_dir2_leaf_bests_p(ltp));
 	/*
 	 * Check hash value order, count stale entries.
 	 */
@@ -815,12 +815,12 @@ xfs_dir2_leaf_getdents(
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
-	curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset);
+	curoff = xfs_dir2_dataptr_to_byte(mp, uio->uio_offset);
 	/*
 	 * Force this conversion through db so we truncate the offset
 	 * down to get the start of the data block.
 	 */
-	map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff));
+	map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
 	/*
 	 * Loop over directory entries until we reach the end offset.
 	 * Get more blocks and readahead as necessary.
@@ -870,7 +870,7 @@ xfs_dir2_leaf_getdents(
 			 */
 			if (1 + ra_want > map_blocks &&
 			    map_off <
-			    XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) {
+			    xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
 				/*
 				 * Get more bmaps, fill in after the ones
 				 * we already have in the table.
@@ -878,7 +878,7 @@ xfs_dir2_leaf_getdents(
 				nmap = map_size - map_valid;
 				error = xfs_bmapi(tp, dp,
 					map_off,
-					XFS_DIR2_BYTE_TO_DA(mp,
+					xfs_dir2_byte_to_da(mp,
 						XFS_DIR2_LEAF_OFFSET) - map_off,
 					XFS_BMAPI_METADATA, NULL, 0,
 					&map[map_valid], &nmap, NULL, NULL);
@@ -903,7 +903,7 @@ xfs_dir2_leaf_getdents(
 					map[map_valid + nmap - 1].br_blockcount;
 				else
 					map_off =
-						XFS_DIR2_BYTE_TO_DA(mp,
+						xfs_dir2_byte_to_da(mp,
 							XFS_DIR2_LEAF_OFFSET);
 				/*
 				 * Look for holes in the mapping, and
@@ -931,14 +931,14 @@ xfs_dir2_leaf_getdents(
 			 * No valid mappings, so no more data blocks.
 			 */
 			if (!map_valid) {
-				curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off);
+				curoff = xfs_dir2_da_to_byte(mp, map_off);
 				break;
 			}
 			/*
 			 * Read the directory block starting at the first
 			 * mapping.
 			 */
-			curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff);
+			curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
 			error = xfs_da_read_buf(tp, dp, map->br_startoff,
 				map->br_blockcount >= mp->m_dirblkfsbs ?
 				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
@@ -1014,7 +1014,7 @@ xfs_dir2_leaf_getdents(
 			/*
 			 * Having done a read, we need to set a new offset.
 			 */
-			newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0);
+			newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
 			/*
 			 * Start of the current block.
 			 */
@@ -1024,7 +1024,7 @@ xfs_dir2_leaf_getdents(
 			 * Make sure we're in the right block.
 			 */
 			else if (curoff > newoff)
-				ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) ==
+				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
 				       curdb);
 			data = bp->data;
 			xfs_dir2_data_check(dp, bp);
@@ -1032,7 +1032,7 @@ xfs_dir2_leaf_getdents(
 			 * Find our position in the block.
 			 */
 			ptr = (char *)&data->u;
-			byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff);
+			byteoff = xfs_dir2_byte_to_off(mp, curoff);
 			/*
 			 * Skip past the header.
 			 */
@@ -1054,15 +1054,15 @@ xfs_dir2_leaf_getdents(
 					}
 					dep = (xfs_dir2_data_entry_t *)ptr;
 					length =
-					   XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+					   xfs_dir2_data_entsize(dep->namelen);
 					ptr += length;
 				}
 				/*
 				 * Now set our real offset.
 				 */
 				curoff =
-					XFS_DIR2_DB_OFF_TO_BYTE(mp,
-					    XFS_DIR2_BYTE_TO_DB(mp, curoff),
+					xfs_dir2_db_off_to_byte(mp,
+					    xfs_dir2_byte_to_db(mp, curoff),
 					    (char *)ptr - (char *)data);
 				if (ptr >= (char *)data + mp->m_dirblksize) {
 					continue;
@@ -1091,9 +1091,9 @@ xfs_dir2_leaf_getdents(
 
 		p->namelen = dep->namelen;
 
-		length = XFS_DIR2_DATA_ENTSIZE(p->namelen);
+		length = xfs_dir2_data_entsize(p->namelen);
 
-		p->cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
+		p->cook = xfs_dir2_byte_to_dataptr(mp, curoff + length);
 
 		p->ino = be64_to_cpu(dep->inumber);
 #if XFS_BIG_INUMS
@@ -1121,10 +1121,10 @@ xfs_dir2_leaf_getdents(
 	 * All done.  Set output offset value to current offset.
 	 */
 	*eofp = eof;
-	if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR))
+	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
 		uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
 	else
-		uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
+		uio->uio_offset = xfs_dir2_byte_to_dataptr(mp, curoff);
 	kmem_free(map, map_size * sizeof(*map));
 	kmem_free(p, sizeof(*p));
 	if (bp)
@@ -1159,7 +1159,7 @@ xfs_dir2_leaf_init(
 	/*
 	 * Get the buffer for the block.
 	 */
-	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
+	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
 		XFS_DATA_FORK);
 	if (error) {
 		return error;
@@ -1181,7 +1181,7 @@ xfs_dir2_leaf_init(
 	 * the block.
 	 */
 	if (magic == XFS_DIR2_LEAF1_MAGIC) {
-		ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 		ltp->bestcount = 0;
 		xfs_dir2_leaf_log_tail(tp, bp);
 	}
@@ -1206,9 +1206,9 @@ xfs_dir2_leaf_log_bests(
 
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
-	firstb = XFS_DIR2_LEAF_BESTS_P(ltp) + first;
-	lastb = XFS_DIR2_LEAF_BESTS_P(ltp) + last;
+	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
+	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
+	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
 	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
 }
@@ -1268,7 +1268,7 @@ xfs_dir2_leaf_log_tail(
 	mp = tp->t_mountp;
 	leaf = bp->data;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
 		(uint)(mp->m_dirblksize - 1));
 }
@@ -1312,7 +1312,7 @@ xfs_dir2_leaf_lookup(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	/*
 	 * Return the found inode number.
 	 */
@@ -1381,7 +1381,7 @@ xfs_dir2_leaf_lookup_int(
 		/*
 		 * Get the new data block number.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		/*
 		 * If it's not the same as the old data block number,
 		 * need to pitch the old one and read the new one.
@@ -1391,7 +1391,7 @@ xfs_dir2_leaf_lookup_int(
 				xfs_da_brelse(tp, dbp);
 			if ((error =
 			    xfs_da_read_buf(tp, dp,
-				    XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
+				    xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
 				    XFS_DATA_FORK))) {
 				xfs_da_brelse(tp, lbp);
 				return error;
@@ -1404,7 +1404,7 @@ xfs_dir2_leaf_lookup_int(
 		 */
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)dbp->data +
-		       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+		       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 		/*
 		 * If it matches then return it.
 		 */
@@ -1469,20 +1469,20 @@ xfs_dir2_leaf_removename(
 	 * Point to the leaf entry, use that to point to the data entry.
 	 */
 	lep = &leaf->ents[index];
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 	dep = (xfs_dir2_data_entry_t *)
-	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+	      ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 	needscan = needlog = 0;
 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
 	/*
 	 * Mark the former data entry unused.
 	 */
 	xfs_dir2_data_make_free(tp, dbp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
@@ -1602,7 +1602,7 @@ xfs_dir2_leaf_replace(
 	 */
 	dep = (xfs_dir2_data_entry_t *)
 	      ((char *)dbp->data +
-	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, be32_to_cpu(lep->address)));
+	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
 	/*
 	 * Put the new inode number in, log it.
@@ -1698,7 +1698,7 @@ xfs_dir2_leaf_trim_data(
 	/*
 	 * Read the offending data block.  We need its buffer.
 	 */
-	if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
+	if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
 			XFS_DATA_FORK))) {
 		return error;
 	}
@@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data(
 	 */
 
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
@@ -1727,7 +1727,7 @@ xfs_dir2_leaf_trim_data(
 	/*
 	 * Eliminate the last bests entry from the table.
 	 */
-	bestsp = XFS_DIR2_LEAF_BESTS_P(ltp);
+	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	be32_add(&ltp->bestcount, -1);
 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
 	xfs_dir2_leaf_log_tail(tp, lbp);
@@ -1838,12 +1838,12 @@ xfs_dir2_node_to_leaf(
 	/*
 	 * Set up the leaf tail from the freespace block.
 	 */
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	ltp->bestcount = free->hdr.nvalid;
 	/*
 	 * Set up the leaf bests table.
 	 */
-	memcpy(XFS_DIR2_LEAF_BESTS_P(ltp), free->bests,
+	memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
 		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
 	xfs_dir2_leaf_log_tail(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index f57ca1162412..70c97f3f815e 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -32,7 +32,7 @@ struct xfs_trans;
 #define	XFS_DIR2_LEAF_SPACE	1
 #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_LEAF_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
 
 /*
  * Offset in data space of a data entry.
@@ -82,7 +82,6 @@ typedef struct xfs_dir2_leaf {
  * DB blocks here are logical directory block numbers, not filesystem blocks.
  */
 
-#define	XFS_DIR2_MAX_LEAF_ENTS(mp)	xfs_dir2_max_leaf_ents(mp)
 static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 {
 	return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
@@ -92,7 +91,6 @@ static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
 /*
  * Get address of the bestcount field in the single-leaf block.
  */
-#define	XFS_DIR2_LEAF_TAIL_P(mp,lp)	xfs_dir2_leaf_tail_p(mp, lp)
 static inline xfs_dir2_leaf_tail_t *
 xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 {
@@ -104,7 +102,6 @@ xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
 /*
  * Get address of the bests array in the single-leaf block.
  */
-#define	XFS_DIR2_LEAF_BESTS_P(ltp)	xfs_dir2_leaf_bests_p(ltp)
 static inline __be16 *
 xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 {
@@ -114,7 +111,6 @@ xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
 /*
  * Convert dataptr to byte in file space
  */
-#define	XFS_DIR2_DATAPTR_TO_BYTE(mp,dp)	xfs_dir2_dataptr_to_byte(mp, dp)
 static inline xfs_dir2_off_t
 xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
@@ -124,7 +120,6 @@ xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 /*
  * Convert byte in file space to dataptr.  It had better be aligned.
  */
-#define	XFS_DIR2_BYTE_TO_DATAPTR(mp,by)	xfs_dir2_byte_to_dataptr(mp,by)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -134,7 +129,6 @@ xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert byte in space to (DB) block
  */
-#define	XFS_DIR2_BYTE_TO_DB(mp,by)	xfs_dir2_byte_to_db(mp, by)
 static inline xfs_dir2_db_t
 xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -145,17 +139,15 @@ xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert dataptr to a block number
  */
-#define	XFS_DIR2_DATAPTR_TO_DB(mp,dp)	xfs_dir2_dataptr_to_db(mp, dp)
 static inline xfs_dir2_db_t
 xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 
 /*
  * Convert byte in space to offset in a block
  */
-#define	XFS_DIR2_BYTE_TO_OFF(mp,by)	xfs_dir2_byte_to_off(mp, by)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
@@ -166,18 +158,15 @@ xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
 /*
  * Convert dataptr to a byte offset in a block
  */
-#define	XFS_DIR2_DATAPTR_TO_OFF(mp,dp)	xfs_dir2_dataptr_to_off(mp, dp)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
 {
-	return XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp));
+	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
 }
 
 /*
  * Convert block and offset to byte in space
  */
-#define	XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)	\
-	xfs_dir2_db_off_to_byte(mp, db, o)
 static inline xfs_dir2_off_t
 xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
 			xfs_dir2_data_aoff_t o)
@@ -189,7 +178,6 @@ xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
 /*
  * Convert block (DB) to block (dablk)
  */
-#define	XFS_DIR2_DB_TO_DA(mp,db)	xfs_dir2_db_to_da(mp, db)
 static inline xfs_dablk_t
 xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -199,29 +187,25 @@ xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
  * Convert byte in space to (DA) block
  */
-#define	XFS_DIR2_BYTE_TO_DA(mp,by)	xfs_dir2_byte_to_da(mp, by)
 static inline xfs_dablk_t
 xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
 {
-	return XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by));
+	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
 }
 
 /*
  * Convert block and offset to dataptr
  */
-#define	XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)	\
-	xfs_dir2_db_off_to_dataptr(mp, db, o)
 static inline xfs_dir2_dataptr_t
 xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
 			   xfs_dir2_data_aoff_t o)
 {
-	return XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o));
+	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
 }
 
 /*
  * Convert block (dablk) to block (DB)
  */
-#define	XFS_DIR2_DA_TO_DB(mp,da)	xfs_dir2_da_to_db(mp, da)
 static inline xfs_dir2_db_t
 xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 {
@@ -231,11 +215,10 @@ xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
 /*
  * Convert block (dablk) to byte offset in space
  */
-#define XFS_DIR2_DA_TO_BYTE(mp,da)	xfs_dir2_da_to_byte(mp, da)
 static inline xfs_dir2_off_t
 xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
 {
-	return XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0);
+	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index d083c3819934..91c61d9632c8 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -136,14 +136,14 @@ xfs_dir2_leaf_to_node(
 	/*
 	 * Get the buffer for the new freespace block.
 	 */
-	if ((error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+	if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
 			XFS_DATA_FORK))) {
 		return error;
 	}
 	ASSERT(fbp != NULL);
 	free = fbp->data;
 	leaf = lbp->data;
-	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
 	/*
 	 * Initialize the freespace block header.
 	 */
@@ -155,7 +155,7 @@ xfs_dir2_leaf_to_node(
 	 * Copy freespace entries from the leaf block to the new block.
 	 * Count active entries.
 	 */
-	for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P(ltp), to = free->bests;
+	for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
 	     i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
 		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
 			n++;
@@ -215,7 +215,7 @@ xfs_dir2_leafn_add(
 	 * a compact.
 	 */
 
-	if (be16_to_cpu(leaf->hdr.count) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+	if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
 		if (!leaf->hdr.stale)
 			return XFS_ERROR(ENOSPC);
 		compact = be16_to_cpu(leaf->hdr.stale) > 1;
@@ -327,7 +327,7 @@ xfs_dir2_leafn_add(
 	 * Insert the new entry, log everything.
 	 */
 	lep->hashval = cpu_to_be32(args->hashval);
-	lep->address = cpu_to_be32(XFS_DIR2_DB_OFF_TO_DATAPTR(mp,
+	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
 				args->blkno, args->index));
 	xfs_dir2_leaf_log_header(tp, bp);
 	xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
@@ -352,7 +352,7 @@ xfs_dir2_leafn_check(
 	leaf = bp->data;
 	mp = dp->i_mount;
 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
-	ASSERT(be16_to_cpu(leaf->hdr.count) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
 		if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
 			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
@@ -440,7 +440,7 @@ xfs_dir2_leafn_lookup_int(
 	if (args->addname) {
 		curfdb = curbp ? state->extrablk.blkno : -1;
 		curdb = -1;
-		length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+		length = xfs_dir2_data_entsize(args->namelen);
 		if ((free = (curbp ? curbp->data : NULL)))
 			ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
 	}
@@ -465,7 +465,7 @@ xfs_dir2_leafn_lookup_int(
 		/*
 		 * Pull the data block number from the entry.
 		 */
-		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 		/*
 		 * For addname, we're looking for a place to put the new entry.
 		 * We want to use a data block with an entry of equal
@@ -482,7 +482,7 @@ xfs_dir2_leafn_lookup_int(
 				 * Convert the data block to the free block
 				 * holding its freespace information.
 				 */
-				newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb);
+				newfdb = xfs_dir2_db_to_fdb(mp, newdb);
 				/*
 				 * If it's not the one we have in hand,
 				 * read it in.
@@ -497,7 +497,7 @@ xfs_dir2_leafn_lookup_int(
 					 * Read the free block.
 					 */
 					if ((error = xfs_da_read_buf(tp, dp,
-							XFS_DIR2_DB_TO_DA(mp,
+							xfs_dir2_db_to_da(mp,
 								newfdb),
 							-1, &curbp,
 							XFS_DATA_FORK))) {
@@ -517,7 +517,7 @@ xfs_dir2_leafn_lookup_int(
 				/*
 				 * Get the index for our entry.
 				 */
-				fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb);
+				fi = xfs_dir2_db_to_fdindex(mp, curdb);
 				/*
 				 * If it has room, return it.
 				 */
@@ -561,7 +561,7 @@ xfs_dir2_leafn_lookup_int(
 				 */
 				if ((error =
 				    xfs_da_read_buf(tp, dp,
-					    XFS_DIR2_DB_TO_DA(mp, newdb), -1,
+					    xfs_dir2_db_to_da(mp, newdb), -1,
 					    &curbp, XFS_DATA_FORK))) {
 					return error;
 				}
@@ -573,7 +573,7 @@ xfs_dir2_leafn_lookup_int(
 			 */
 			dep = (xfs_dir2_data_entry_t *)
 			      ((char *)curbp->data +
-			       XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address)));
+			       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
 			/*
 			 * Compare the entry, return it if it matches.
 			 */
@@ -876,9 +876,9 @@ xfs_dir2_leafn_remove(
 	/*
 	 * Extract the data block and offset from the entry.
 	 */
-	db = XFS_DIR2_DATAPTR_TO_DB(mp, be32_to_cpu(lep->address));
+	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->blkno == db);
-	off = XFS_DIR2_DATAPTR_TO_OFF(mp, be32_to_cpu(lep->address));
+	off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
 	ASSERT(dblk->index == off);
 	/*
 	 * Kill the leaf entry by marking it stale.
@@ -898,7 +898,7 @@ xfs_dir2_leafn_remove(
 	longest = be16_to_cpu(data->hdr.bestfree[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
-		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
 	/*
 	 * Rescan the data block freespaces for bestfree.
 	 * Log the data block header if needed.
@@ -924,8 +924,8 @@ xfs_dir2_leafn_remove(
 		 * Convert the data block number to a free block,
 		 * read in the free block.
 		 */
-		fdb = XFS_DIR2_DB_TO_FDB(mp, db);
-		if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb),
+		fdb = xfs_dir2_db_to_fdb(mp, db);
+		if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
 				-1, &fbp, XFS_DATA_FORK))) {
 			return error;
 		}
@@ -937,7 +937,7 @@ xfs_dir2_leafn_remove(
 		/*
 		 * Calculate which entry we need to fix.
 		 */
-		findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
+		findex = xfs_dir2_db_to_fdindex(mp, db);
 		longest = be16_to_cpu(data->hdr.bestfree[0].length);
 		/*
 		 * If the data block is now empty we can get rid of it
@@ -1073,7 +1073,7 @@ xfs_dir2_leafn_split(
 	/*
 	 * Initialize the new leaf block.
 	 */
-	error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno),
+	error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
 		&newblk->bp, XFS_DIR2_LEAFN_MAGIC);
 	if (error) {
 		return error;
@@ -1385,7 +1385,7 @@ xfs_dir2_node_addname_int(
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	length = xfs_dir2_data_entsize(args->namelen);
 	/*
 	 * If we came in with a freespace block that means that lookup
 	 * found an entry with our hash value.  This is the freespace
@@ -1438,7 +1438,7 @@ xfs_dir2_node_addname_int(
 
 		if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
 			return error;
-		lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo);
+		lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
 		fbno = ifbno;
 	}
 	/*
@@ -1474,7 +1474,7 @@ xfs_dir2_node_addname_int(
 			 * to avoid it.
 			 */
 			if ((error = xfs_da_read_buf(tp, dp,
-					XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+					xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
 					XFS_DATA_FORK))) {
 				return error;
 			}
@@ -1550,9 +1550,9 @@ xfs_dir2_node_addname_int(
 		 * Get the freespace block corresponding to the data block
 		 * that was just allocated.
 		 */
-		fbno = XFS_DIR2_DB_TO_FDB(mp, dbno);
+		fbno = xfs_dir2_db_to_fdb(mp, dbno);
 		if (unlikely(error = xfs_da_read_buf(tp, dp,
-				XFS_DIR2_DB_TO_DA(mp, fbno), -2, &fbp,
+				xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
 				XFS_DATA_FORK))) {
 			xfs_da_buf_done(dbp);
 			return error;
@@ -1567,14 +1567,14 @@ xfs_dir2_node_addname_int(
 				return error;
 			}
 
-			if (unlikely(XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno)) {
+			if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
 				cmn_err(CE_ALERT,
 					"xfs_dir2_node_addname_int: dir ino "
 					"%llu needed freesp block %lld for\n"
 					"  data block %lld, got %lld\n"
 					"  ifbno %llu lastfbno %d\n",
 					(unsigned long long)dp->i_ino,
-					(long long)XFS_DIR2_DB_TO_FDB(mp, dbno),
+					(long long)xfs_dir2_db_to_fdb(mp, dbno),
 					(long long)dbno, (long long)fbno,
 					(unsigned long long)ifbno, lastfbno);
 				if (fblk) {
@@ -1598,7 +1598,7 @@ xfs_dir2_node_addname_int(
 			 * Get a buffer for the new block.
 			 */
 			if ((error = xfs_da_get_buf(tp, dp,
-						   XFS_DIR2_DB_TO_DA(mp, fbno),
+						   xfs_dir2_db_to_da(mp, fbno),
 						   -1, &fbp, XFS_DATA_FORK))) {
 				return error;
 			}
@@ -1623,7 +1623,7 @@ xfs_dir2_node_addname_int(
 		/*
 		 * Set the freespace block index from the data block number.
 		 */
-		findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+		findex = xfs_dir2_db_to_fdindex(mp, dbno);
 		/*
 		 * If it's after the end of the current entries in the
 		 * freespace block, extend that table.
@@ -1669,7 +1669,7 @@ xfs_dir2_node_addname_int(
 		 * Read the data block in.
 		 */
 		if (unlikely(
-		    error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno),
+		    error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
 				-1, &dbp, XFS_DATA_FORK))) {
 			if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
 				xfs_da_buf_done(fbp);
@@ -1698,7 +1698,7 @@ xfs_dir2_node_addname_int(
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+	tagp = xfs_dir2_data_entry_tag_p(dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)data);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
@@ -1904,7 +1904,7 @@ xfs_dir2_node_replace(
 		ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
 		dep = (xfs_dir2_data_entry_t *)
 		      ((char *)data +
-		       XFS_DIR2_DATAPTR_TO_OFF(state->mp, be32_to_cpu(lep->address)));
+		       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
 		ASSERT(inum != be64_to_cpu(dep->inumber));
 		/*
 		 * Fill in the new inode number and log the entry.
@@ -1980,7 +1980,7 @@ xfs_dir2_node_trim_free(
 	 * Blow the block away.
 	 */
 	if ((error =
-	    xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo),
+	    xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
 		    bp))) {
 		/*
 		 * Can't fail with ENOSPC since that only happens with no
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index c7c870ee7857..dde72db3d695 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -36,7 +36,7 @@ struct xfs_trans;
 #define	XFS_DIR2_FREE_SPACE	2
 #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
 #define	XFS_DIR2_FREE_FIRSTDB(mp)	\
-	XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET)
+	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
 
 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F */
 
@@ -60,7 +60,6 @@ typedef struct xfs_dir2_free {
 /*
  * Convert data space db to the corresponding free db.
  */
-#define	XFS_DIR2_DB_TO_FDB(mp,db)	xfs_dir2_db_to_fdb(mp, db)
 static inline xfs_dir2_db_t
 xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
@@ -70,7 +69,6 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 /*
  * Convert data space db to the corresponding index in a free db.
  */
-#define	XFS_DIR2_DB_TO_FDINDEX(mp,db)	xfs_dir2_db_to_fdindex(mp, db)
 static inline int
 xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
 {
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 0cd77b17bf92..38fc4f22b76d 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -89,8 +89,8 @@ xfs_dir2_block_sfsize(
 	mp = dp->i_mount;
 
 	count = i8count = namelen = 0;
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
-	blp = XFS_DIR2_BLOCK_LEAF_P(btp);
+	btp = xfs_dir2_block_tail_p(mp, block);
+	blp = xfs_dir2_block_leaf_p(btp);
 
 	/*
 	 * Iterate over the block's data entries by using the leaf pointers.
@@ -102,7 +102,7 @@ xfs_dir2_block_sfsize(
 		 * Calculate the pointer to the entry at hand.
 		 */
 		dep = (xfs_dir2_data_entry_t *)
-		      ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+		      ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
 		/*
 		 * Detect . and .., so we can special-case them.
 		 * . is not included in sf directories.
@@ -124,7 +124,7 @@ xfs_dir2_block_sfsize(
 		/*
 		 * Calculate the new size, see if we should give up yet.
 		 */
-		size = XFS_DIR2_SF_HDR_SIZE(i8count) +		/* header */
+		size = xfs_dir2_sf_hdr_size(i8count) +		/* header */
 		       count +					/* namelen */
 		       count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
 		       namelen +				/* name */
@@ -139,7 +139,7 @@ xfs_dir2_block_sfsize(
 	 */
 	sfhp->count = count;
 	sfhp->i8count = i8count;
-	XFS_DIR2_SF_PUT_INUMBER((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+	xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
 	return size;
 }
 
@@ -199,15 +199,15 @@ xfs_dir2_block_to_sf(
 	 * Copy the header into the newly allocate local space.
 	 */
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	memcpy(sfp, sfhp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+	memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
 	dp->i_d.di_size = size;
 	/*
 	 * Set up to loop over the block's entries.
 	 */
-	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+	btp = xfs_dir2_block_tail_p(mp, block);
 	ptr = (char *)block->u;
-	endptr = (char *)XFS_DIR2_BLOCK_LEAF_P(btp);
-	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
+	sfep = xfs_dir2_sf_firstentry(sfp);
 	/*
 	 * Loop over the active and unused entries.
 	 * Stop when we reach the leaf/tail portion of the block.
@@ -233,22 +233,22 @@ xfs_dir2_block_to_sf(
 		else if (dep->namelen == 2 &&
 			 dep->name[0] == '.' && dep->name[1] == '.')
 			ASSERT(be64_to_cpu(dep->inumber) ==
-			       XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent));
+			       xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
 		/*
 		 * Normal entry, copy it into shortform.
 		 */
 		else {
 			sfep->namelen = dep->namelen;
-			XFS_DIR2_SF_PUT_OFFSET(sfep,
+			xfs_dir2_sf_put_offset(sfep,
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)block));
 			memcpy(sfep->name, dep->name, dep->namelen);
 			temp = be64_to_cpu(dep->inumber);
-			XFS_DIR2_SF_PUT_INUMBER(sfp, &temp,
-				XFS_DIR2_SF_INUMBERP(sfep));
-			sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			xfs_dir2_sf_put_inumber(sfp, &temp,
+				xfs_dir2_sf_inumberp(sfep));
+			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		}
-		ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+		ptr += xfs_dir2_data_entsize(dep->namelen);
 	}
 	ASSERT((char *)sfep - (char *)sfp == size);
 	xfs_dir2_sf_check(args);
@@ -294,11 +294,11 @@ xfs_dir2_sf_addname(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	incr_isize = add_entsize;
 	objchange = 0;
 #if XFS_BIG_INUMS
@@ -392,7 +392,7 @@ xfs_dir2_sf_addname_easy(
 	/*
 	 * Grow the in-inode space.
 	 */
-	xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen),
+	xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
 		XFS_DATA_FORK);
 	/*
 	 * Need to set up again due to realloc of the inode data.
@@ -403,10 +403,10 @@ xfs_dir2_sf_addname_easy(
 	 * Fill in the new entry.
 	 */
 	sfep->namelen = args->namelen;
-	XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-		XFS_DIR2_SF_INUMBERP(sfep));
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
 	/*
 	 * Update the header and inode.
 	 */
@@ -463,14 +463,14 @@ xfs_dir2_sf_addname_hard(
 	 * If it's going to end up at the end then oldsfep will point there.
 	 */
 	for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
-	      oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp),
-	      add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen),
+	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
+	      add_datasize = xfs_dir2_data_entsize(args->namelen),
 	      eof = (char *)oldsfep == &buf[old_isize];
 	     !eof;
-	     offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen),
-	      oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep),
+	     offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
+	      oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
 	      eof = (char *)oldsfep == &buf[old_isize]) {
-		new_offset = XFS_DIR2_SF_GET_OFFSET(oldsfep);
+		new_offset = xfs_dir2_sf_get_offset(oldsfep);
 		if (offset + add_datasize <= new_offset)
 			break;
 	}
@@ -495,10 +495,10 @@ xfs_dir2_sf_addname_hard(
 	 * Fill in the new entry, and update the header counts.
 	 */
 	sfep->namelen = args->namelen;
-	XFS_DIR2_SF_PUT_OFFSET(sfep, offset);
+	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-		XFS_DIR2_SF_INUMBERP(sfep));
+	xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+		xfs_dir2_sf_inumberp(sfep));
 	sfp->hdr.count++;
 #if XFS_BIG_INUMS
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -508,7 +508,7 @@ xfs_dir2_sf_addname_hard(
 	 * If there's more left to copy, do that.
 	 */
 	if (!eof) {
-		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
 	kmem_free(buf, old_isize);
@@ -544,9 +544,9 @@ xfs_dir2_sf_addname_pick(
 	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	size = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+	size = xfs_dir2_data_entsize(args->namelen);
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
-	sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	sfep = xfs_dir2_sf_firstentry(sfp);
 	holefit = 0;
 	/*
 	 * Loop over sf entries.
@@ -555,10 +555,10 @@ xfs_dir2_sf_addname_pick(
 	 */
 	for (i = 0; i < sfp->hdr.count; i++) {
 		if (!holefit)
-			holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET(sfep);
-		offset = XFS_DIR2_SF_GET_OFFSET(sfep) +
-			 XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
-		sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
+		offset = xfs_dir2_sf_get_offset(sfep) +
+			 xfs_dir2_data_entsize(sfep->namelen);
+		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
 	}
 	/*
 	 * Calculate data bytes used excluding the new entry, if this
@@ -617,18 +617,18 @@ xfs_dir2_sf_check(
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 	offset = XFS_DIR2_DATA_FIRST_OFFSET;
-	ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
-		ASSERT(XFS_DIR2_SF_GET_OFFSET(sfep) >= offset);
-		ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
+		ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
 		offset =
-			XFS_DIR2_SF_GET_OFFSET(sfep) +
-			XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+			xfs_dir2_sf_get_offset(sfep) +
+			xfs_dir2_data_entsize(sfep->namelen);
 	}
 	ASSERT(i8count == sfp->hdr.i8count);
 	ASSERT(XFS_BIG_INUMS || i8count == 0);
@@ -671,7 +671,7 @@ xfs_dir2_sf_create(
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
 	ASSERT(dp->i_df.if_bytes == 0);
 	i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
-	size = XFS_DIR2_SF_HDR_SIZE(i8count);
+	size = xfs_dir2_sf_hdr_size(i8count);
 	/*
 	 * Make a buffer for the data.
 	 */
@@ -684,7 +684,7 @@ xfs_dir2_sf_create(
 	/*
 	 * Now can put in the inode number, since i8count is set.
 	 */
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &pino, &sfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
 	sfp->hdr.count = 0;
 	dp->i_d.di_size = size;
 	xfs_dir2_sf_check(args);
@@ -727,12 +727,12 @@ xfs_dir2_sf_getdents(
 
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
 
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
-	if (XFS_DIR2_DATAPTR_TO_DB(mp, dir_offset) > mp->m_dirdatablk) {
+	if (xfs_dir2_dataptr_to_db(mp, dir_offset) > mp->m_dirdatablk) {
 		*eofp = 1;
 		return 0;
 	}
@@ -747,9 +747,9 @@ xfs_dir2_sf_getdents(
 	 * Put . entry unless we're starting past it.
 	 */
 	if (dir_offset <=
-		    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					       XFS_DIR2_DATA_DOT_OFFSET)) {
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, 0,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, 0,
 						XFS_DIR2_DATA_DOTDOT_OFFSET);
 		p.ino = dp->i_ino;
 #if XFS_BIG_INUMS
@@ -762,7 +762,7 @@ xfs_dir2_sf_getdents(
 
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						XFS_DIR2_DATA_DOT_OFFSET);
 			return error;
 		}
@@ -772,11 +772,11 @@ xfs_dir2_sf_getdents(
 	 * Put .. entry unless we're starting past it.
 	 */
 	if (dir_offset <=
-		    XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		    xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					       XFS_DIR2_DATA_DOTDOT_OFFSET)) {
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 						XFS_DIR2_DATA_FIRST_OFFSET);
-		p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		p.ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 #if XFS_BIG_INUMS
 		p.ino += mp->m_inoadd;
 #endif
@@ -787,7 +787,7 @@ xfs_dir2_sf_getdents(
 
 		if (!p.done) {
 			uio->uio_offset =
-				XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+				xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
 					XFS_DIR2_DATA_DOTDOT_OFFSET);
 			return error;
 		}
@@ -796,23 +796,23 @@ xfs_dir2_sf_getdents(
 	/*
 	 * Loop while there are more entries and put'ing works.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 		     i < sfp->hdr.count;
-			     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+			     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 
-		off = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
-				XFS_DIR2_SF_GET_OFFSET(sfep));
+		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+				xfs_dir2_sf_get_offset(sfep));
 
 		if (dir_offset > off)
 			continue;
 
 		p.namelen = sfep->namelen;
 
-		p.cook = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
-			XFS_DIR2_SF_GET_OFFSET(sfep) +
-			XFS_DIR2_DATA_ENTSIZE(p.namelen));
+		p.cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+			xfs_dir2_sf_get_offset(sfep) +
+			xfs_dir2_data_entsize(p.namelen));
 
-		p.ino = XFS_DIR2_SF_GET_INUMBER(sfp, XFS_DIR2_SF_INUMBERP(sfep));
+		p.ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
 #if XFS_BIG_INUMS
 		p.ino += mp->m_inoadd;
 #endif
@@ -832,7 +832,7 @@ xfs_dir2_sf_getdents(
 	*eofp = 1;
 
 	uio->uio_offset =
-		XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk + 1, 0);
+		xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0);
 
 	return 0;
 }
@@ -865,7 +865,7 @@ xfs_dir2_sf_lookup(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Special case for .
 	 */
@@ -878,21 +878,21 @@ xfs_dir2_sf_lookup(
 	 */
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
-		args->inumber = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		return XFS_ERROR(EEXIST);
 	}
 	/*
 	 * Loop over all the entries trying to match ours.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
 		    memcmp(args->name, sfep->name, args->namelen) == 0) {
 			args->inumber =
-				XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep));
 			return XFS_ERROR(EEXIST);
 		}
 	}
@@ -934,19 +934,19 @@ xfs_dir2_sf_removename(
 	ASSERT(dp->i_df.if_bytes == oldsize);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 	/*
 	 * Loop over the old directory entries.
 	 * Find the one we're deleting.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 		if (sfep->namelen == args->namelen &&
 		    sfep->name[0] == args->name[0] &&
 		    memcmp(sfep->name, args->name, args->namelen) == 0) {
-			ASSERT(XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep)) ==
+			ASSERT(xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep)) ==
 				args->inumber);
 			break;
 		}
@@ -961,7 +961,7 @@ xfs_dir2_sf_removename(
 	 * Calculate sizes.
 	 */
 	byteoff = (int)((char *)sfep - (char *)sfp);
-	entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+	entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
 	newsize = oldsize - entsize;
 	/*
 	 * Copy the part if any after the removed entry, sliding it down.
@@ -1027,7 +1027,7 @@ xfs_dir2_sf_replace(
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 	sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
-	ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
 #if XFS_BIG_INUMS
 	/*
 	 * New inode number is large, and need to convert to 8-byte inodes.
@@ -1067,28 +1067,28 @@ xfs_dir2_sf_replace(
 	if (args->namelen == 2 &&
 	    args->name[0] == '.' && args->name[1] == '.') {
 #if XFS_BIG_INUMS || defined(DEBUG)
-		ino = XFS_DIR2_SF_GET_INUMBER(sfp, &sfp->hdr.parent);
+		ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
 		ASSERT(args->inumber != ino);
 #endif
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber, &sfp->hdr.parent);
+		xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
 	}
 	/*
 	 * Normal entry, look for the name.
 	 */
 	else {
-		for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 		     i < sfp->hdr.count;
-		     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+		     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
 			if (sfep->namelen == args->namelen &&
 			    sfep->name[0] == args->name[0] &&
 			    memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-				ino = XFS_DIR2_SF_GET_INUMBER(sfp,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				ino = xfs_dir2_sf_get_inumber(sfp,
+					xfs_dir2_sf_inumberp(sfep));
 				ASSERT(args->inumber != ino);
 #endif
-				XFS_DIR2_SF_PUT_INUMBER(sfp, &args->inumber,
-					XFS_DIR2_SF_INUMBERP(sfep));
+				xfs_dir2_sf_put_inumber(sfp, &args->inumber,
+					xfs_dir2_sf_inumberp(sfep));
 				break;
 			}
 		}
@@ -1189,22 +1189,22 @@ xfs_dir2_sf_toino4(
 	 */
 	sfp->hdr.count = oldsfp->hdr.count;
 	sfp->hdr.i8count = 0;
-	ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
-		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
-		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
+		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
-			XFS_DIR2_SF_INUMBERP(oldsfep));
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
@@ -1266,22 +1266,22 @@ xfs_dir2_sf_toino8(
 	 */
 	sfp->hdr.count = oldsfp->hdr.count;
 	sfp->hdr.i8count = 1;
-	ino = XFS_DIR2_SF_GET_INUMBER(oldsfp, &oldsfp->hdr.parent);
-	XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, &sfp->hdr.parent);
+	ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
+	xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
 	/*
 	 * Copy the entries field by field.
 	 */
-	for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
-		    oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->hdr.count;
-	     i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
-		  oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
+		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		ino = XFS_DIR2_SF_GET_INUMBER(oldsfp,
-			XFS_DIR2_SF_INUMBERP(oldsfep));
-		XFS_DIR2_SF_PUT_INUMBER(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep));
+		ino = xfs_dir2_sf_get_inumber(oldsfp,
+			xfs_dir2_sf_inumberp(oldsfep));
+		xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
 	}
 	/*
 	 * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 42f015b70018..11e503209afa 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -90,7 +90,6 @@ typedef struct xfs_dir2_sf {
 	xfs_dir2_sf_entry_t	list[1];	/* shortform entries */
 } xfs_dir2_sf_t;
 
-#define	XFS_DIR2_SF_HDR_SIZE(i8count)	xfs_dir2_sf_hdr_size(i8count)
 static inline int xfs_dir2_sf_hdr_size(int i8count)
 {
 	return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
@@ -98,14 +97,11 @@ static inline int xfs_dir2_sf_hdr_size(int i8count)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define	XFS_DIR2_SF_INUMBERP(sfep)	xfs_dir2_sf_inumberp(sfep)
 static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
 {
 	return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
 }
 
-#define	XFS_DIR2_SF_GET_INUMBER(sfp, from) \
-	xfs_dir2_sf_get_inumber(sfp, from)
 static inline xfs_intino_t
 xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
 {
@@ -114,8 +110,6 @@ xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
 		(xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
 }
 
-#define	XFS_DIR2_SF_PUT_INUMBER(sfp,from,to) \
-	xfs_dir2_sf_put_inumber(sfp,from,to)
 static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
 						xfs_dir2_inou_t *to)
 {
@@ -125,24 +119,18 @@ static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
 		XFS_PUT_DIR_INO8(*(from), (to)->i8);
 }
 
-#define	XFS_DIR2_SF_GET_OFFSET(sfep)	\
-	xfs_dir2_sf_get_offset(sfep)
 static inline xfs_dir2_data_aoff_t
 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
 {
 	return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
 }
 
-#define	XFS_DIR2_SF_PUT_OFFSET(sfep,off) \
-	xfs_dir2_sf_put_offset(sfep,off)
 static inline void
 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
 {
 	INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
 }
 
-#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)	\
-	xfs_dir2_sf_entsize_byname(sfp,len)
 static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
 {
 	return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
@@ -150,8 +138,6 @@ static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)	\
-	xfs_dir2_sf_entsize_byentry(sfp,sfep)
 static inline int
 xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
@@ -160,19 +146,17 @@ xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 		((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
 }
 
-#define XFS_DIR2_SF_FIRSTENTRY(sfp)	xfs_dir2_sf_firstentry(sfp)
 static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
 {
 	return ((xfs_dir2_sf_entry_t *) \
-		((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)));
+		((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
 }
 
-#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)	xfs_dir2_sf_nextentry(sfp,sfep)
 static inline xfs_dir2_sf_entry_t *
 xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
 {
 	return ((xfs_dir2_sf_entry_t *) \
-		((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)));
+		((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 16a087d8e1af9b974125870dceb9e4a35249ad1d Mon Sep 17 00:00:00 2001
From: Vignesh Babu <vignesh.babu@wipro.com>
Date: Thu, 28 Jun 2007 16:46:37 +1000
Subject: [XFS] Use is_power_of_2 instead of open coding checks

SGI-PV: 966576
SGI-Modid: xfs-linux-melb:xfs-kern:28950a

Signed-off-by: Vignesh Babu <vignesh.babu@wipro.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3ca5d43b8345..8fdd30d9ba56 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,6 +49,7 @@
 #include "xfs_quota.h"
 #include "xfs_acl.h"
 
+#include <linux/log2.h>
 
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
@@ -4184,7 +4185,7 @@ xfs_iext_realloc_direct(
 			ifp->if_bytes = new_size;
 			return;
 		}
-		if ((new_size & (new_size - 1)) != 0) {
+		if (!is_power_of_2(new_size)){
 			rnew_size = xfs_iroundup(new_size);
 		}
 		if (rnew_size != ifp->if_real_bytes) {
@@ -4207,7 +4208,7 @@ xfs_iext_realloc_direct(
 	 */
 	else {
 		new_size += ifp->if_bytes;
-		if ((new_size & (new_size - 1)) != 0) {
+		if (!is_power_of_2(new_size)) {
 			rnew_size = xfs_iroundup(new_size);
 		}
 		xfs_iext_inline_to_direct(ifp, rnew_size);
-- 
cgit v1.2.3-59-g8ed1b


From fbf3ce8d8ec508f6bd99b36de034d2ae3e1ae7ac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 28 Jun 2007 16:46:47 +1000
Subject: [XFS] XFS should not be looking at filp reference counts

A check for file_count is always a bad idea. Linux has the ->release
method to deal with cleanups on last close and ->flush is only for the
very rare case where we want to perform an operation on every drop of a
reference to a file struct.

This patch gets rid of vop_close and surrounding code in favour of simply
doing the page flushing from ->release.

SGI-PV: 966562
SGI-Modid: xfs-linux-melb:xfs-kern:28952a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c  | 11 -----------
 fs/xfs/linux-2.6/xfs_vnode.h |  5 -----
 fs/xfs/xfs_vnodeops.c        | 47 +++++++++++++++-----------------------------
 3 files changed, 16 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 8c43cd2e237a..cbcd40c8c2a0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -183,15 +183,6 @@ xfs_file_open(
 	return -bhv_vop_open(vn_from_inode(inode), NULL);
 }
 
-STATIC int
-xfs_file_close(
-	struct file	*filp,
-	fl_owner_t	id)
-{
-	return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0,
-				file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL);
-}
-
 STATIC int
 xfs_file_release(
 	struct inode	*inode,
@@ -436,7 +427,6 @@ const struct file_operations xfs_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
-	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 #ifdef HAVE_FOP_OPEN_EXEC
@@ -458,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = {
 #endif
 	.mmap		= xfs_file_mmap,
 	.open		= xfs_file_open,
-	.flush		= xfs_file_close,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
 };
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 013048a92643..5742d65f0785 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -129,10 +129,7 @@ typedef enum bhv_vchange {
 	VCHANGE_FLAGS_IOEXCL_COUNT	= 4
 } bhv_vchange_t;
 
-typedef enum { L_FALSE, L_TRUE } lastclose_t;
-
 typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
-typedef int	(*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
 typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
@@ -200,7 +197,6 @@ typedef int	(*vop_iflush_t)(bhv_desc_t *, int);
 typedef struct bhv_vnodeops {
 	bhv_position_t  vn_position;    /* position within behavior chain */
 	vop_open_t		vop_open;
-	vop_close_t		vop_close;
 	vop_read_t		vop_read;
 	vop_write_t		vop_write;
 	vop_splice_read_t	vop_splice_read;
@@ -245,7 +241,6 @@ typedef struct bhv_vnodeops {
 #define VNHEAD(vp)	((vp)->v_bh.bh_first)
 #define VOP(op, vp)	(*((bhv_vnodeops_t *)VNHEAD(vp)->bd_ops)->op)
 #define bhv_vop_open(vp, cr)		VOP(vop_open, vp)(VNHEAD(vp),cr)
-#define bhv_vop_close(vp, f,last,cr)	VOP(vop_close, vp)(VNHEAD(vp),f,last,cr)
 #define bhv_vop_read(vp,file,iov,segs,offset,ioflags,cr)		\
 		VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
 #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)		\
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5dbca95598e0..2067d0b0a10e 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -77,36 +77,6 @@ xfs_open(
 	return 0;
 }
 
-STATIC int
-xfs_close(
-	bhv_desc_t	*bdp,
-	int		flags,
-	lastclose_t	lastclose,
-	cred_t		*credp)
-{
-	bhv_vnode_t	*vp = BHV_TO_VNODE(bdp);
-	xfs_inode_t	*ip = XFS_BHVTOI(bdp);
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return XFS_ERROR(EIO);
-
-	if (lastclose != L_TRUE || !VN_ISREG(vp))
-		return 0;
-
-	/*
-	 * If we previously truncated this file and removed old data in
-	 * the process, we want to initiate "early" writeout on the last
-	 * close.  This is an attempt to combat the notorious NULL files
-	 * problem which is particularly noticable from a truncate down,
-	 * buffered (re-)write (delalloc), followed by a crash.  What we
-	 * are effectively doing here is significantly reducing the time
-	 * window where we'd otherwise be exposed to that problem.
-	 */
-	if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
-		return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
-	return 0;
-}
-
 /*
  * xfs_getattr
  */
@@ -1566,6 +1536,22 @@ xfs_release(
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
 		return 0;
 
+	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		/*
+		 * If we previously truncated this file and removed old data
+		 * in the process, we want to initiate "early" writeout on
+		 * the last close.  This is an attempt to combat the notorious
+		 * NULL files problem which is particularly noticable from a
+		 * truncate down, buffered (re-)write (delalloc), followed by
+		 * a crash.  What we are effectively doing here is
+		 * significantly reducing the time window where we'd otherwise
+		 * be exposed to that problem.
+		 */
+		if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
+			bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
+	}
+
+
 #ifdef HAVE_REFCACHE
 	/* If we are in the NFS reference cache then don't do this now */
 	if (ip->i_refcache)
@@ -4681,7 +4667,6 @@ xfs_change_file_space(
 bhv_vnodeops_t xfs_vnodeops = {
 	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
 	.vop_open		= xfs_open,
-	.vop_close		= xfs_close,
 	.vop_read		= xfs_read,
 #ifdef HAVE_SPLICE
 	.vop_splice_read	= xfs_splice_read,
-- 
cgit v1.2.3-59-g8ed1b


From 0892ccd6fe13e08ad9e57007afbb78fe02d66005 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 28 Jun 2007 16:46:56 +1000
Subject: [XFS] Use uninitialized_var macro to stop warning about rtx

Appease gcc in regards to "warning: 'rtx' is used uninitialized in
this function".

SGI-PV: 907752
SGI-Modid: xfs-linux-melb:xfs-kern:29007a

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_bmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 183add2f21b7..09d86388bb71 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2605,7 +2605,6 @@ xfs_bmap_rtalloc(
 	xfs_extlen_t	prod = 0;	/* product factor for allocators */
 	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
 	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtx;		/* realtime extent number */
 	xfs_rtblock_t	rtb;
 
 	mp = ap->ip->i_mount;
@@ -2643,6 +2642,8 @@ xfs_bmap_rtalloc(
 	 * pick an extent that will space things out in the rt area.
 	 */
 	if (ap->eof && ap->off == 0) {
+		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
+
 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
 		if (error)
 			return error;
-- 
cgit v1.2.3-59-g8ed1b


From 2a82b8be8a8dacb48cb7371449a7a9daa558b4a8 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 11 Jul 2007 11:09:12 +1000
Subject: [XFS] Concurrent Multi-File Data Streams

In media spaces, video is often stored in a frame-per-file format. When
dealing with uncompressed realtime HD video streams in this format, it is
crucial that files do not get fragmented and that multiple files a placed
contiguously on disk.

When multiple streams are being ingested and played out at the same time,
it is critical that the filesystem does not cross the streams and
interleave them together as this creates seek and readahead cache miss
latency and prevents both ingest and playout from meeting frame rate
targets.

This patch set creates a "stream of files" concept into the allocator to
place all the data from a single stream contiguously on disk so that RAID
array readahead can be used effectively. Each additional stream gets
placed in different allocation groups within the filesystem, thereby
ensuring that we don't cross any streams. When an AG fills up, we select a
new AG for the stream that is not in use.

The core of the functionality is the stream tracking - each inode that we
create in a directory needs to be associated with the directories' stream.
Hence every time we create a file, we look up the directories' stream
object and associate the new file with that object.

Once we have a stream object for a file, we use the AG that the stream
object point to for allocations. If we can't allocate in that AG (e.g. it
is full) we move the entire stream to another AG. Other inodes in the same
stream are moved to the new AG on their next allocation (i.e. lazy
update).

Stream objects are kept in a cache and hold a reference on the inode.
Hence the inode cannot be reclaimed while there is an outstanding stream
reference. This means that on unlink we need to remove the stream
association and we also need to flush all the associations on certain
events that want to reclaim all unreferenced inodes (e.g. filesystem
freeze).

SGI-PV: 964469
SGI-Modid: xfs-linux-melb:xfs-kern:29096a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Donald Douwsma <donaldd@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Vlad Apostolov <vapo@sgi.com>
---
 fs/xfs/Makefile-linux-2.6      |   2 +
 fs/xfs/linux-2.6/xfs_globals.c |   1 +
 fs/xfs/linux-2.6/xfs_linux.h   |   1 +
 fs/xfs/linux-2.6/xfs_sysctl.c  |  11 +
 fs/xfs/linux-2.6/xfs_sysctl.h  |   2 +
 fs/xfs/xfs.h                   |   1 +
 fs/xfs/xfs_ag.h                |   1 +
 fs/xfs/xfs_bmap.c              |  69 +++-
 fs/xfs/xfs_clnt.h              |   2 +
 fs/xfs/xfs_dinode.h            |   4 +-
 fs/xfs/xfs_filestream.c        | 771 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_filestream.h        | 136 ++++++++
 fs/xfs/xfs_fs.h                |   1 +
 fs/xfs/xfs_fsops.c             |   2 +
 fs/xfs/xfs_inode.c             |  17 +-
 fs/xfs/xfs_inode.h             |   1 +
 fs/xfs/xfs_mount.h             |   4 +
 fs/xfs/xfs_mru_cache.c         | 608 ++++++++++++++++++++++++++++++++
 fs/xfs/xfs_mru_cache.h         |  57 +++
 fs/xfs/xfs_vfsops.c            |  26 ++
 fs/xfs/xfs_vnodeops.c          |  25 +-
 21 files changed, 1730 insertions(+), 12 deletions(-)
 create mode 100644 fs/xfs/xfs_filestream.c
 create mode 100644 fs/xfs/xfs_filestream.h
 create mode 100644 fs/xfs/xfs_mru_cache.c
 create mode 100644 fs/xfs/xfs_mru_cache.h

(limited to 'fs')

diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index b49989bb89ad..e7a9a83f0087 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -64,6 +64,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_dir2_sf.o \
 				   xfs_error.o \
 				   xfs_extfree_item.o \
+				   xfs_filestream.o \
 				   xfs_fsops.o \
 				   xfs_ialloc.o \
 				   xfs_ialloc_btree.o \
@@ -77,6 +78,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_log.o \
 				   xfs_log_recover.o \
 				   xfs_mount.o \
+				   xfs_mru_cache.o \
 				   xfs_rename.o \
 				   xfs_trans.o \
 				   xfs_trans_ail.o \
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ed3a5e1b4b67..bb72c3d4141f 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
 	.inherit_nosym	= {	0,		0,		1	},
 	.rotorstep	= {	1,		1,		255	},
 	.inherit_nodfrg	= {	0,		1,		1	},
+	.fstrm_timer	= {	1,		50,		3600*100},
 };
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index af24a457d3a3..330c4ba9d404 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -123,6 +123,7 @@
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 #define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs	xfs_params.fstrm_timer.val
 
 #define current_cpu()		(raw_smp_processor_id())
 #define current_pid()		(current->pid)
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index cd6eaa44aa2b..bb997d75c05c 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
 		.extra1		= &xfs_params.inherit_nodfrg.min,
 		.extra2		= &xfs_params.inherit_nodfrg.max
 	},
+	{
+		.ctl_name	= XFS_FILESTREAM_TIMER,
+		.procname	= "filestream_centisecs",
+		.data		= &xfs_params.fstrm_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xfs_params.fstrm_timer.min,
+		.extra2		= &xfs_params.fstrm_timer.max,
+	},
 	/* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
 	{
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index a631fb8cc5ac..98b97e399d6f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
 	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
 	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
 	xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+	xfs_sysctl_val_t fstrm_timer;	/* Filestream dir-AG assoc'n timeout. */
 } xfs_param_t;
 
 /*
@@ -86,6 +87,7 @@ enum {
 	XFS_INHERIT_NOSYM = 19,
 	XFS_ROTORSTEP = 20,
 	XFS_INHERIT_NODFRG = 21,
+	XFS_FILESTREAM_TIMER = 22,
 };
 
 extern xfs_param_t	xfs_params;
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index bf0a12040b13..b5a7d92c6843 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -38,6 +38,7 @@
 #define XFS_RW_TRACE 1
 #define XFS_BUF_TRACE 1
 #define XFS_VNODE_TRACE 1
+#define XFS_FILESTREAMS_TRACE 1
 #endif
 
 #include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1dd0029c60e..51c09c114a20 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -196,6 +196,7 @@ typedef struct xfs_perag
 	lock_t		pagb_lock;	/* lock for pagb_list */
 #endif
 	xfs_perag_busy_t *pagb_list;	/* unstable blocks */
+	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
 } xfs_perag_t;
 
 #define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 09d86388bb71..51ba689a4552 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -52,6 +52,7 @@
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
 #include "xfs_buf_item.h"
+#include "xfs_filestream.h"
 
 
 #ifdef DEBUG
@@ -2725,9 +2726,15 @@ xfs_bmap_btalloc(
 	}
 	nullfb = ap->firstblock == NULLFSBLOCK;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-	if (nullfb)
-		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-	else
+	if (nullfb) {
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
+			ag = xfs_filestream_lookup_ag(ap->ip);
+			ag = (ag != NULLAGNUMBER) ? ag : 0;
+			ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
+		} else {
+			ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+		}
+	} else
 		ap->rval = ap->firstblock;
 
 	xfs_bmap_adjacent(ap);
@@ -2751,13 +2758,22 @@ xfs_bmap_btalloc(
 	args.firstblock = ap->firstblock;
 	blen = 0;
 	if (nullfb) {
-		args.type = XFS_ALLOCTYPE_START_BNO;
+		if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+			args.type = XFS_ALLOCTYPE_NEAR_BNO;
+		else
+			args.type = XFS_ALLOCTYPE_START_BNO;
 		args.total = ap->total;
+
 		/*
-		 * Find the longest available space.
-		 * We're going to try for the whole allocation at once.
+		 * Search for an allocation group with a single extent
+		 * large enough for the request.
+		 *
+		 * If one isn't found, then adjust the minimum allocation
+		 * size to the largest space found.
 		 */
 		startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+		if (startag == NULLAGNUMBER)
+			startag = ag = 0;
 		notinit = 0;
 		down_read(&mp->m_peraglock);
 		while (blen < ap->alen) {
@@ -2783,6 +2799,35 @@ xfs_bmap_btalloc(
 					blen = longest;
 			} else
 				notinit = 1;
+
+			if (xfs_inode_is_filestream(ap->ip)) {
+				if (blen >= ap->alen)
+					break;
+
+				if (ap->userdata) {
+					/*
+					 * If startag is an invalid AG, we've
+					 * come here once before and
+					 * xfs_filestream_new_ag picked the
+					 * best currently available.
+					 *
+					 * Don't continue looping, since we
+					 * could loop forever.
+					 */
+					if (startag == NULLAGNUMBER)
+						break;
+
+					error = xfs_filestream_new_ag(ap, &ag);
+					if (error) {
+						up_read(&mp->m_peraglock);
+						return error;
+					}
+
+					/* loop again to set 'blen'*/
+					startag = NULLAGNUMBER;
+					continue;
+				}
+			}
 			if (++ag == mp->m_sb.sb_agcount)
 				ag = 0;
 			if (ag == startag)
@@ -2807,8 +2852,18 @@ xfs_bmap_btalloc(
 		 */
 		else
 			args.minlen = ap->alen;
+
+		/*
+		 * set the failure fallback case to look in the selected
+		 * AG as the stream may have moved.
+		 */
+		if (xfs_inode_is_filestream(ap->ip))
+			ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
 	} else if (ap->low) {
-		args.type = XFS_ALLOCTYPE_START_BNO;
+		if (xfs_inode_is_filestream(ap->ip))
+			args.type = XFS_ALLOCTYPE_FIRST_AG;
+		else
+			args.type = XFS_ALLOCTYPE_START_BNO;
 		args.total = args.minlen = ap->minlen;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 5b7eb81453be..f89196cb08d2 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -99,5 +99,7 @@ struct xfs_mount_args {
  */
 #define XFSMNT2_COMPAT_IOSIZE	0x00000001	/* don't report large preferred
 						 * I/O size in stat(2) */
+#define XFSMNT2_FILESTREAMS	0x00000002	/* enable the filestreams
+						 * allocator */
 
 #endif	/* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index b33826961c45..fefd0116bac9 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE_BIT      11	/* inode extent size allocator hint */
 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
 #define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
+#define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
 #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
+#define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
 
 #define XFS_DIFLAG_ANY \
 	(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
 	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
-	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
+	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
 
 #endif	/* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
new file mode 100644
index 000000000000..ce2278611bb7
--- /dev/null
+++ b/fs/xfs/xfs_filestream.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inum.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_bmap.h"
+#include "xfs_alloc.h"
+#include "xfs_utils.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
+
+#ifdef XFS_FILESTREAMS_TRACE
+
+ktrace_t *xfs_filestreams_trace_buf;
+
+STATIC void
+xfs_filestreams_trace(
+	xfs_mount_t	*mp,	/* mount point */
+	int		type,	/* type of trace */
+	const char	*func,	/* source function */
+	int		line,	/* source line number */
+	__psunsigned_t	arg0,
+	__psunsigned_t	arg1,
+	__psunsigned_t	arg2,
+	__psunsigned_t	arg3,
+	__psunsigned_t	arg4,
+	__psunsigned_t	arg5)
+{
+	ktrace_enter(xfs_filestreams_trace_buf,
+		(void *)(__psint_t)(type | (line << 16)),
+		(void *)func,
+		(void *)(__psunsigned_t)current_pid(),
+		(void *)mp,
+		(void *)(__psunsigned_t)arg0,
+		(void *)(__psunsigned_t)arg1,
+		(void *)(__psunsigned_t)arg2,
+		(void *)(__psunsigned_t)arg3,
+		(void *)(__psunsigned_t)arg4,
+		(void *)(__psunsigned_t)arg5,
+		NULL, NULL, NULL, NULL, NULL, NULL);
+}
+
+#define TRACE0(mp,t)			TRACE6(mp,t,0,0,0,0,0,0)
+#define TRACE1(mp,t,a0)			TRACE6(mp,t,a0,0,0,0,0,0)
+#define TRACE2(mp,t,a0,a1)		TRACE6(mp,t,a0,a1,0,0,0,0)
+#define TRACE3(mp,t,a0,a1,a2)		TRACE6(mp,t,a0,a1,a2,0,0,0)
+#define TRACE4(mp,t,a0,a1,a2,a3)	TRACE6(mp,t,a0,a1,a2,a3,0,0)
+#define TRACE5(mp,t,a0,a1,a2,a3,a4)	TRACE6(mp,t,a0,a1,a2,a3,a4,0)
+#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \
+	xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \
+				(__psunsigned_t)a0, (__psunsigned_t)a1, \
+				(__psunsigned_t)a2, (__psunsigned_t)a3, \
+				(__psunsigned_t)a4, (__psunsigned_t)a5)
+
+#define TRACE_AG_SCAN(mp, ag, ag2) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2);
+#define TRACE_AG_PICK1(mp, max_ag, maxfree) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree);
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \
+		TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \
+			 cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \
+		TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \
+		TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \
+		TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag) \
+		TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag);
+
+
+#else
+#define TRACE_AG_SCAN(mp, ag, ag2)
+#define TRACE_AG_PICK1(mp, max_ag, maxfree)
+#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag)
+#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2)
+#define TRACE_FREE(mp, ip, pip, ag, cnt)
+#define TRACE_LOOKUP(mp, ip, pip, ag, cnt)
+#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt)
+#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt)
+#define TRACE_ORPHAN(mp, ip, ag)
+#endif
+
+static kmem_zone_t *item_zone;
+
+/*
+ * Structure for associating a file or a directory with an allocation group.
+ * The parent directory pointer is only needed for files, but since there will
+ * generally be vastly more files than directories in the cache, using the same
+ * data structure simplifies the code with very little memory overhead.
+ */
+typedef struct fstrm_item
+{
+	xfs_agnumber_t	ag;	/* AG currently in use for the file/directory. */
+	xfs_inode_t	*ip;	/* inode self-pointer. */
+	xfs_inode_t	*pip;	/* Parent directory inode pointer. */
+} fstrm_item_t;
+
+
+/*
+ * Scan the AGs starting at startag looking for an AG that isn't in use and has
+ * at least minlen blocks free.
+ */
+static int
+_xfs_filestream_pick_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	startag,
+	xfs_agnumber_t	*agp,
+	int		flags,
+	xfs_extlen_t	minlen)
+{
+	int		err, trylock, nscan;
+	xfs_extlen_t	delta, longest, need, free, minfree, maxfree = 0;
+	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER;
+	struct xfs_perag *pag;
+
+	/* 2% of an AG's blocks must be free for it to be chosen. */
+	minfree = mp->m_sb.sb_agblocks / 50;
+
+	ag = startag;
+	*agp = NULLAGNUMBER;
+
+	/* For the first pass, don't sleep trying to init the per-AG. */
+	trylock = XFS_ALLOC_FLAG_TRYLOCK;
+
+	for (nscan = 0; 1; nscan++) {
+
+		TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
+
+		pag = mp->m_perag + ag;
+
+		if (!pag->pagf_init) {
+			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
+			if (err && !trylock)
+				return err;
+		}
+
+		/* Might fail sometimes during the 1st pass with trylock set. */
+		if (!pag->pagf_init)
+			goto next_ag;
+
+		/* Keep track of the AG with the most free blocks. */
+		if (pag->pagf_freeblks > maxfree) {
+			maxfree = pag->pagf_freeblks;
+			max_ag = ag;
+		}
+
+		/*
+		 * The AG reference count does two things: it enforces mutual
+		 * exclusion when examining the suitability of an AG in this
+		 * loop, and it guards against two filestreams being established
+		 * in the same AG as each other.
+		 */
+		if (xfs_filestream_get_ag(mp, ag) > 1) {
+			xfs_filestream_put_ag(mp, ag);
+			goto next_ag;
+		}
+
+		need = XFS_MIN_FREELIST_PAG(pag, mp);
+		delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+		longest = (pag->pagf_longest > delta) ?
+		          (pag->pagf_longest - delta) :
+		          (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+
+		if (((minlen && longest >= minlen) ||
+		     (!minlen && pag->pagf_freeblks >= minfree)) &&
+		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
+		     (flags & XFS_PICK_LOWSPACE))) {
+
+			/* Break out, retaining the reference on the AG. */
+			free = pag->pagf_freeblks;
+			*agp = ag;
+			break;
+		}
+
+		/* Drop the reference on this AG, it's not usable. */
+		xfs_filestream_put_ag(mp, ag);
+next_ag:
+		/* Move to the next AG, wrapping to AG 0 if necessary. */
+		if (++ag >= mp->m_sb.sb_agcount)
+			ag = 0;
+
+		/* If a full pass of the AGs hasn't been done yet, continue. */
+		if (ag != startag)
+			continue;
+
+		/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
+		if (trylock != 0) {
+			trylock = 0;
+			continue;
+		}
+
+		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
+		if (!(flags & XFS_PICK_LOWSPACE)) {
+			flags |= XFS_PICK_LOWSPACE;
+			continue;
+		}
+
+		/*
+		 * Take the AG with the most free space, regardless of whether
+		 * it's already in use by another filestream.
+		 */
+		if (max_ag != NULLAGNUMBER) {
+			xfs_filestream_get_ag(mp, max_ag);
+			TRACE_AG_PICK1(mp, max_ag, maxfree);
+			free = maxfree;
+			*agp = max_ag;
+			break;
+		}
+
+		/* take AG 0 if none matched */
+		TRACE_AG_PICK1(mp, max_ag, maxfree);
+		*agp = 0;
+		return 0;
+	}
+
+	TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
+			free, nscan, flags);
+
+	return 0;
+}
+
+/*
+ * Set the allocation group number for a file or a directory, updating inode
+ * references and per-AG references as appropriate.  Must be called with the
+ * m_peraglock held in read mode.
+ */
+static int
+_xfs_filestream_update_ag(
+	xfs_inode_t	*ip,
+	xfs_inode_t	*pip,
+	xfs_agnumber_t	ag)
+{
+	int		err = 0;
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	old_ag;
+	xfs_inode_t	*old_pip;
+
+	/*
+	 * Either ip is a regular file and pip is a directory, or ip is a
+	 * directory and pip is NULL.
+	 */
+	ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
+	               (pip->i_d.di_mode & S_IFDIR)) ||
+	              ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+
+	mp = ip->i_mount;
+	cache = mp->m_filestream;
+
+	item = xfs_mru_cache_lookup(cache, ip->i_ino);
+	if (item) {
+		ASSERT(item->ip == ip);
+		old_ag = item->ag;
+		item->ag = ag;
+		old_pip = item->pip;
+		item->pip = pip;
+		xfs_mru_cache_done(cache);
+
+		/*
+		 * If the AG has changed, drop the old ref and take a new one,
+		 * effectively transferring the reference from old to new AG.
+		 */
+		if (ag != old_ag) {
+			xfs_filestream_put_ag(mp, old_ag);
+			xfs_filestream_get_ag(mp, ag);
+		}
+
+		/*
+		 * If ip is a file and its pip has changed, drop the old ref and
+		 * take a new one.
+		 */
+		if (pip && pip != old_pip) {
+			IRELE(old_pip);
+			IHOLD(pip);
+		}
+
+		TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag),
+				ag, xfs_filestream_peek_ag(mp, ag));
+		return 0;
+	}
+
+	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL);
+	if (!item)
+		return ENOMEM;
+
+	item->ag = ag;
+	item->ip = ip;
+	item->pip = pip;
+
+	err = xfs_mru_cache_insert(cache, ip->i_ino, item);
+	if (err) {
+		kmem_zone_free(item_zone, item);
+		return err;
+	}
+
+	/* Take a reference on the AG. */
+	xfs_filestream_get_ag(mp, ag);
+
+	/*
+	 * Take a reference on the inode itself regardless of whether it's a
+	 * regular file or a directory.
+	 */
+	IHOLD(ip);
+
+	/*
+	 * In the case of a regular file, take a reference on the parent inode
+	 * as well to ensure it remains in-core.
+	 */
+	if (pip)
+		IHOLD(pip);
+
+	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag),
+			ag, xfs_filestream_peek_ag(mp, ag));
+
+	return 0;
+}
+
+/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
+void
+xfs_fstrm_free_func(
+	xfs_ino_t	ino,
+	fstrm_item_t	*item)
+{
+	xfs_inode_t	*ip = item->ip;
+	int ref;
+
+	ASSERT(ip->i_ino == ino);
+
+	xfs_iflags_clear(ip, XFS_IFILESTREAM);
+
+	/* Drop the reference taken on the AG when the item was added. */
+	ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+
+	ASSERT(ref >= 0);
+	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
+		xfs_filestream_peek_ag(ip->i_mount, item->ag));
+
+	/*
+	 * _xfs_filestream_update_ag() always takes a reference on the inode
+	 * itself, whether it's a file or a directory.  Release it here.
+	 * This can result in the inode being freed and so we must
+	 * not hold any inode locks when freeing filesstreams objects
+	 * otherwise we can deadlock here.
+	 */
+	IRELE(ip);
+
+	/*
+	 * In the case of a regular file, _xfs_filestream_update_ag() also
+	 * takes a ref on the parent inode to keep it in-core.  Release that
+	 * too.
+	 */
+	if (item->pip)
+		IRELE(item->pip);
+
+	/* Finally, free the memory allocated for the item. */
+	kmem_zone_free(item_zone, item);
+}
+
+/*
+ * xfs_filestream_init() is called at xfs initialisation time to set up the
+ * memory zone that will be used for filestream data structure allocation.
+ */
+int
+xfs_filestream_init(void)
+{
+	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+#ifdef XFS_FILESTREAMS_TRACE
+	xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
+#endif
+	return item_zone ? 0 : -ENOMEM;
+}
+
+/*
+ * xfs_filestream_uninit() is called at xfs termination time to destroy the
+ * memory zone that was used for filestream data structure allocation.
+ */
+void
+xfs_filestream_uninit(void)
+{
+#ifdef XFS_FILESTREAMS_TRACE
+	ktrace_free(xfs_filestreams_trace_buf);
+#endif
+	kmem_zone_destroy(item_zone);
+}
+
+/*
+ * xfs_filestream_mount() is called when a file system is mounted with the
+ * filestream option.  It is responsible for allocating the data structures
+ * needed to track the new file system's file streams.
+ */
+int
+xfs_filestream_mount(
+	xfs_mount_t	*mp)
+{
+	int		err;
+	unsigned int	lifetime, grp_count;
+
+	/*
+	 * The filestream timer tunable is currently fixed within the range of
+	 * one second to four minutes, with five seconds being the default.  The
+	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
+	 * timer tunable to within about 10 percent.  This requires at least 10
+	 * groups.
+	 */
+	lifetime  = xfs_fstrm_centisecs * 10;
+	grp_count = 10;
+
+	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
+	                     (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
+
+	return err;
+}
+
+/*
+ * xfs_filestream_unmount() is called when a file system that was mounted with
+ * the filestream option is unmounted.  It drains the data structures created
+ * to track the file system's file streams and frees all the memory that was
+ * allocated.
+ */
+void
+xfs_filestream_unmount(
+	xfs_mount_t	*mp)
+{
+	xfs_mru_cache_destroy(mp->m_filestream);
+}
+
+/*
+ * If the mount point's m_perag array is going to be reallocated, all
+ * outstanding cache entries must be flushed to avoid accessing reference count
+ * addresses that have been freed.  The call to xfs_filestream_flush() must be
+ * made inside the block that holds the m_peraglock in write mode to do the
+ * reallocation.
+ */
+void
+xfs_filestream_flush(
+	xfs_mount_t	*mp)
+{
+	/* point in time flush, so keep the reaper running */
+	xfs_mru_cache_flush(mp->m_filestream, 1);
+}
+
+/*
+ * Return the AG of the filestream the file or directory belongs to, or
+ * NULLAGNUMBER otherwise.
+ */
+xfs_agnumber_t
+xfs_filestream_lookup_ag(
+	xfs_inode_t	*ip)
+{
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	ag;
+	int		ref;
+
+	if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+		ASSERT(0);
+		return NULLAGNUMBER;
+	}
+
+	cache = ip->i_mount->m_filestream;
+	item = xfs_mru_cache_lookup(cache, ip->i_ino);
+	if (!item) {
+		TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0);
+		return NULLAGNUMBER;
+	}
+
+	ASSERT(ip == item->ip);
+	ag = item->ag;
+	ref = xfs_filestream_peek_ag(ip->i_mount, ag);
+	xfs_mru_cache_done(cache);
+
+	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref);
+	return ag;
+}
+
+/*
+ * xfs_filestream_associate() should only be called to associate a regular file
+ * with its parent directory.  Calling it with a child directory isn't
+ * appropriate because filestreams don't apply to entire directory hierarchies.
+ * Creating a file in a child directory of an existing filestream directory
+ * starts a new filestream with its own allocation group association.
+ *
+ * Returns < 0 on error, 0 if successful association occurred, > 0 if
+ * we failed to get an association because of locking issues.
+ */
+int
+xfs_filestream_associate(
+	xfs_inode_t	*pip,
+	xfs_inode_t	*ip)
+{
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	fstrm_item_t	*item;
+	xfs_agnumber_t	ag, rotorstep, startag;
+	int		err = 0;
+
+	ASSERT(pip->i_d.di_mode & S_IFDIR);
+	ASSERT(ip->i_d.di_mode & S_IFREG);
+	if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+		return -EINVAL;
+
+	mp = pip->i_mount;
+	cache = mp->m_filestream;
+	down_read(&mp->m_peraglock);
+
+	/*
+	 * We have a problem, Houston.
+	 *
+	 * Taking the iolock here violates inode locking order - we already
+	 * hold the ilock. Hence if we block getting this lock we may never
+	 * wake. Unfortunately, that means if we can't get the lock, we're
+	 * screwed in terms of getting a stream association - we can't spin
+	 * waiting for the lock because someone else is waiting on the lock we
+	 * hold and we cannot drop that as we are in a transaction here.
+	 *
+	 * Lucky for us, this inversion is rarely a problem because it's a
+	 * directory inode that we are trying to lock here and that means the
+	 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
+	 * used. i.e. freeze, remount-ro, quotasync or unmount.
+	 *
+	 * So, if we can't get the iolock without sleeping then just give up
+	 */
+	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
+		up_read(&mp->m_peraglock);
+		return 1;
+	}
+
+	/* If the parent directory is already in the cache, use its AG. */
+	item = xfs_mru_cache_lookup(cache, pip->i_ino);
+	if (item) {
+		ASSERT(item->ip == pip);
+		ag = item->ag;
+		xfs_mru_cache_done(cache);
+
+		TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+		err = _xfs_filestream_update_ag(ip, pip, ag);
+
+		goto exit;
+	}
+
+	/*
+	 * Set the starting AG using the rotor for inode32, otherwise
+	 * use the directory inode's AG.
+	 */
+	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+		rotorstep = xfs_rotorstep;
+		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
+		mp->m_agfrotor = (mp->m_agfrotor + 1) %
+		                 (mp->m_sb.sb_agcount * rotorstep);
+	} else
+		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
+
+	/* Pick a new AG for the parent inode starting at startag. */
+	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0);
+	if (err || ag == NULLAGNUMBER)
+		goto exit_did_pick;
+
+	/* Associate the parent inode with the AG. */
+	err = _xfs_filestream_update_ag(pip, NULL, ag);
+	if (err)
+		goto exit_did_pick;
+
+	/* Associate the file inode with the AG. */
+	err = _xfs_filestream_update_ag(ip, pip, ag);
+	if (err)
+		goto exit_did_pick;
+
+	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag));
+
+exit_did_pick:
+	/*
+	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+	 * reference it took on it, since the file and directory will have taken
+	 * their own now if they were successfully cached.
+	 */
+	if (ag != NULLAGNUMBER)
+		xfs_filestream_put_ag(mp, ag);
+
+exit:
+	xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+	up_read(&mp->m_peraglock);
+	return -err;
+}
+
+/*
+ * Pick a new allocation group for the current file and its file stream.  This
+ * function is called by xfs_bmap_filestreams() with the mount point's per-ag
+ * lock held.
+ */
+int
+xfs_filestream_new_ag(
+	xfs_bmalloca_t	*ap,
+	xfs_agnumber_t	*agp)
+{
+	int		flags, err;
+	xfs_inode_t	*ip, *pip = NULL;
+	xfs_mount_t	*mp;
+	xfs_mru_cache_t	*cache;
+	xfs_extlen_t	minlen;
+	fstrm_item_t	*dir, *file;
+	xfs_agnumber_t	ag = NULLAGNUMBER;
+
+	ip = ap->ip;
+	mp = ip->i_mount;
+	cache = mp->m_filestream;
+	minlen = ap->alen;
+	*agp = NULLAGNUMBER;
+
+	/*
+	 * Look for the file in the cache, removing it if it's found.  Doing
+	 * this allows it to be held across the dir lookup that follows.
+	 */
+	file = xfs_mru_cache_remove(cache, ip->i_ino);
+	if (file) {
+		ASSERT(ip == file->ip);
+
+		/* Save the file's parent inode and old AG number for later. */
+		pip = file->pip;
+		ag = file->ag;
+
+		/* Look for the file's directory in the cache. */
+		dir = xfs_mru_cache_lookup(cache, pip->i_ino);
+		if (dir) {
+			ASSERT(pip == dir->ip);
+
+			/*
+			 * If the directory has already moved on to a new AG,
+			 * use that AG as the new AG for the file. Don't
+			 * forget to twiddle the AG refcounts to match the
+			 * movement.
+			 */
+			if (dir->ag != file->ag) {
+				xfs_filestream_put_ag(mp, file->ag);
+				xfs_filestream_get_ag(mp, dir->ag);
+				*agp = file->ag = dir->ag;
+			}
+
+			xfs_mru_cache_done(cache);
+		}
+
+		/*
+		 * Put the file back in the cache.  If this fails, the free
+		 * function needs to be called to tidy up in the same way as if
+		 * the item had simply expired from the cache.
+		 */
+		err = xfs_mru_cache_insert(cache, ip->i_ino, file);
+		if (err) {
+			xfs_fstrm_free_func(ip->i_ino, file);
+			return err;
+		}
+
+		/*
+		 * If the file's AG was moved to the directory's new AG, there's
+		 * nothing more to be done.
+		 */
+		if (*agp != NULLAGNUMBER) {
+			TRACE_MOVEAG(mp, ip, pip,
+					ag, xfs_filestream_peek_ag(mp, ag),
+					*agp, xfs_filestream_peek_ag(mp, *agp));
+			return 0;
+		}
+	}
+
+	/*
+	 * If the file's parent directory is known, take its iolock in exclusive
+	 * mode to prevent two sibling files from racing each other to migrate
+	 * themselves and their parent to different AGs.
+	 */
+	if (pip)
+		xfs_ilock(pip, XFS_IOLOCK_EXCL);
+
+	/*
+	 * A new AG needs to be found for the file.  If the file's parent
+	 * directory is also known, it will be moved to the new AG as well to
+	 * ensure that files created inside it in future use the new AG.
+	 */
+	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
+	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
+	        (ap->low ? XFS_PICK_LOWSPACE : 0);
+
+	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
+	if (err || *agp == NULLAGNUMBER)
+		goto exit;
+
+	/*
+	 * If the file wasn't found in the file cache, then its parent directory
+	 * inode isn't known.  For this to have happened, the file must either
+	 * be pre-existing, or it was created long enough ago that its cache
+	 * entry has expired.  This isn't the sort of usage that the filestreams
+	 * allocator is trying to optimise, so there's no point trying to track
+	 * its new AG somehow in the filestream data structures.
+	 */
+	if (!pip) {
+		TRACE_ORPHAN(mp, ip, *agp);
+		goto exit;
+	}
+
+	/* Associate the parent inode with the AG. */
+	err = _xfs_filestream_update_ag(pip, NULL, *agp);
+	if (err)
+		goto exit;
+
+	/* Associate the file inode with the AG. */
+	err = _xfs_filestream_update_ag(ip, pip, *agp);
+	if (err)
+		goto exit;
+
+	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0,
+			*agp, xfs_filestream_peek_ag(mp, *agp));
+
+exit:
+	/*
+	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the
+	 * reference it took on it, since the file and directory will have taken
+	 * their own now if they were successfully cached.
+	 */
+	if (*agp != NULLAGNUMBER)
+		xfs_filestream_put_ag(mp, *agp);
+	else
+		*agp = 0;
+
+	if (pip)
+		xfs_iunlock(pip, XFS_IOLOCK_EXCL);
+
+	return err;
+}
+
+/*
+ * Remove an association between an inode and a filestream object.
+ * Typically this is done on last close of an unlinked file.
+ */
+void
+xfs_filestream_deassociate(
+	xfs_inode_t	*ip)
+{
+	xfs_mru_cache_t	*cache = ip->i_mount->m_filestream;
+
+	xfs_mru_cache_delete(cache, ip->i_ino);
+}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
new file mode 100644
index 000000000000..f655f7dc334c
--- /dev/null
+++ b/fs/xfs/xfs_filestream.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_FILESTREAM_H__
+#define __XFS_FILESTREAM_H__
+
+#ifdef __KERNEL__
+
+struct xfs_mount;
+struct xfs_inode;
+struct xfs_perag;
+struct xfs_bmalloca;
+
+#ifdef XFS_FILESTREAMS_TRACE
+#define XFS_FSTRM_KTRACE_INFO		1
+#define XFS_FSTRM_KTRACE_AGSCAN		2
+#define XFS_FSTRM_KTRACE_AGPICK1	3
+#define XFS_FSTRM_KTRACE_AGPICK2	4
+#define XFS_FSTRM_KTRACE_UPDATE		5
+#define XFS_FSTRM_KTRACE_FREE		6
+#define	XFS_FSTRM_KTRACE_ITEM_LOOKUP	7
+#define	XFS_FSTRM_KTRACE_ASSOCIATE	8
+#define	XFS_FSTRM_KTRACE_MOVEAG		9
+#define	XFS_FSTRM_KTRACE_ORPHAN		10
+
+#define XFS_FSTRM_KTRACE_SIZE	16384
+extern ktrace_t *xfs_filestreams_trace_buf;
+
+#endif
+
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation.  When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all.  The race condition this addresses is the following:
+ *
+ *  - The work queue scheduler fires and pulls a filestream directory cache
+ *    element off the LRU end of the cache for deletion, then gets pre-empted.
+ *  - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ *    remaining items from the cache and reallocates the mount point's per-ag
+ *    array, resetting all the counters to zero.
+ *  - The work queue thread resumes and calls the free function for the element
+ *    it started cleaning up earlier.  In the process it decrements the
+ *    filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done().  In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+STATIC_INLINE int
+xfs_filestream_peek_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_read(&mp->m_perag[agno].pagf_fstrms);
+}
+
+STATIC_INLINE int
+xfs_filestream_get_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
+}
+
+STATIC_INLINE int
+xfs_filestream_put_ag(
+	xfs_mount_t	*mp,
+	xfs_agnumber_t	agno)
+{
+	return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
+}
+
+/* allocation selection flags */
+typedef enum xfs_fstrm_alloc {
+	XFS_PICK_USERDATA = 1,
+	XFS_PICK_LOWSPACE = 2,
+} xfs_fstrm_alloc_t;
+
+/* prototypes for filestream.c */
+int xfs_filestream_init(void);
+void xfs_filestream_uninit(void);
+int xfs_filestream_mount(struct xfs_mount *mp);
+void xfs_filestream_unmount(struct xfs_mount *mp);
+void xfs_filestream_flush(struct xfs_mount *mp);
+xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
+int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
+void xfs_filestream_deassociate(struct xfs_inode *ip);
+int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
+
+
+/* filestreams for the inode? */
+STATIC_INLINE int
+xfs_inode_is_filestream(
+	struct xfs_inode	*ip)
+{
+	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
+		xfs_iflags_test(ip, XFS_IFILESTREAM) ||
+		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1b60cfc28be5..ec3c9c27e0de 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -66,6 +66,7 @@ struct fsxattr {
 #define XFS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
 #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
 #define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
+#define XFS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
 #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2251a49f3e17..432e82347ed6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_rtalloc.h"
 #include "xfs_rw.h"
+#include "xfs_filestream.h"
 
 /*
  * File system operations
@@ -165,6 +166,7 @@ xfs_growfs_data_private(
 	new = nb - mp->m_sb.sb_dblocks;
 	oagcount = mp->m_sb.sb_agcount;
 	if (nagcount > oagcount) {
+		xfs_filestream_flush(mp);
 		down_write(&mp->m_peraglock);
 		mp->m_perag = kmem_realloc(mp->m_perag,
 			sizeof(xfs_perag_t) * nagcount,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8fdd30d9ba56..2ef100be6c4f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -48,6 +48,7 @@
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
 #include "xfs_acl.h"
+#include "xfs_filestream.h"
 
 #include <linux/log2.h>
 
@@ -818,6 +819,8 @@ _xfs_dic2xflags(
 			flags |= XFS_XFLAG_EXTSZINHERIT;
 		if (di_flags & XFS_DIFLAG_NODEFRAG)
 			flags |= XFS_XFLAG_NODEFRAG;
+		if (di_flags & XFS_DIFLAG_FILESTREAM)
+			flags |= XFS_XFLAG_FILESTREAM;
 	}
 
 	return flags;
@@ -1151,7 +1154,7 @@ xfs_ialloc(
 	/*
 	 * Project ids won't be stored on disk if we are using a version 1 inode.
 	 */
-	if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
+	if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
 		xfs_bump_ino_vers2(tp, ip);
 
 	if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
@@ -1196,8 +1199,16 @@ xfs_ialloc(
 		flags |= XFS_ILOG_DEV;
 		break;
 	case S_IFREG:
+		if (xfs_inode_is_filestream(pip)) {
+			error = xfs_filestream_associate(pip, ip);
+			if (error < 0)
+				return -error;
+			if (!error)
+				xfs_iflags_set(ip, XFS_IFILESTREAM);
+		}
+		/* fall through */
 	case S_IFDIR:
-		if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
+		if (pip->i_d.di_flags & XFS_DIFLAG_ANY) {
 			uint	di_flags = 0;
 
 			if ((mode & S_IFMT) == S_IFDIR) {
@@ -1234,6 +1245,8 @@ xfs_ialloc(
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
 			    xfs_inherit_nodefrag)
 				di_flags |= XFS_DIFLAG_NODEFRAG;
+			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
+				di_flags |= XFS_DIFLAG_FILESTREAM;
 			ip->i_d.di_flags |= di_flags;
 		}
 		/* FALLTHROUGH */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f75afecef8e7..d418eeed4ebd 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
 #define XFS_ISTALE	0x0010	/* inode has been staled */
 #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
 #define XFS_INEW	0x0040
+#define XFS_IFILESTREAM	0x0080	/* inode is in a filestream directory */
 
 /*
  * Flags for inode locking.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0bca2d422719..76ad74758696 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
+struct xfs_mru_cache;
 
 extern struct bhv_vfsops xfs_vfsops;
 extern struct bhv_vnodeops xfs_vnodeops;
@@ -424,6 +425,7 @@ typedef struct xfs_mount {
 	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 	struct mutex		m_icsb_mutex;	/* balancer sync lock */
 #endif
+	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 } xfs_mount_t;
 
 /*
@@ -463,6 +465,8 @@ typedef struct xfs_mount {
 						 * I/O size in stat() */
 #define XFS_MOUNT_NO_PERCPU_SB	(1ULL << 23)	/* don't use per-cpu superblock
 						   counters */
+#define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
+						   allocator */
 
 
 /*
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
new file mode 100644
index 000000000000..7deb9e3cbbd3
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_mru_cache.h"
+
+/*
+ * The MRU Cache data structure consists of a data store, an array of lists and
+ * a lock to protect its internal state.  At initialisation time, the client
+ * supplies an element lifetime in milliseconds and a group count, as well as a
+ * function pointer to call when deleting elements.  A data structure for
+ * queueing up work in the form of timed callbacks is also included.
+ *
+ * The group count controls how many lists are created, and thereby how finely
+ * the elements are grouped in time.  When reaping occurs, all the elements in
+ * all the lists whose time has expired are deleted.
+ *
+ * To give an example of how this works in practice, consider a client that
+ * initialises an MRU Cache with a lifetime of ten seconds and a group count of
+ * five.  Five internal lists will be created, each representing a two second
+ * period in time.  When the first element is added, time zero for the data
+ * structure is initialised to the current time.
+ *
+ * All the elements added in the first two seconds are appended to the first
+ * list.  Elements added in the third second go into the second list, and so on.
+ * If an element is accessed at any point, it is removed from its list and
+ * inserted at the head of the current most-recently-used list.
+ *
+ * The reaper function will have nothing to do until at least twelve seconds
+ * have elapsed since the first element was added.  The reason for this is that
+ * if it were called at t=11s, there could be elements in the first list that
+ * have only been inactive for nine seconds, so it still does nothing.  If it is
+ * called anywhere between t=12 and t=14 seconds, it will delete all the
+ * elements that remain in the first list.  It's therefore possible for elements
+ * to remain in the data store even after they've been inactive for up to
+ * (t + t/g) seconds, where t is the inactive element lifetime and g is the
+ * number of groups.
+ *
+ * The above example assumes that the reaper function gets called at least once
+ * every (t/g) seconds.  If it is called less frequently, unused elements will
+ * accumulate in the reap list until the reaper function is eventually called.
+ * The current implementation uses work queue callbacks to carefully time the
+ * reaper function calls, so this should happen rarely, if at all.
+ *
+ * From a design perspective, the primary reason for the choice of a list array
+ * representing discrete time intervals is that it's only practical to reap
+ * expired elements in groups of some appreciable size.  This automatically
+ * introduces a granularity to element lifetimes, so there's no point storing an
+ * individual timeout with each element that specifies a more precise reap time.
+ * The bonus is a saving of sizeof(long) bytes of memory per element stored.
+ *
+ * The elements could have been stored in just one list, but an array of
+ * counters or pointers would need to be maintained to allow them to be divided
+ * up into discrete time groups.  More critically, the process of touching or
+ * removing an element would involve walking large portions of the entire list,
+ * which would have a detrimental effect on performance.  The additional memory
+ * requirement for the array of list heads is minimal.
+ *
+ * When an element is touched or deleted, it needs to be removed from its
+ * current list.  Doubly linked lists are used to make the list maintenance
+ * portion of these operations O(1).  Since reaper timing can be imprecise,
+ * inserts and lookups can occur when there are no free lists available.  When
+ * this happens, all the elements on the LRU list need to be migrated to the end
+ * of the reap list.  To keep the list maintenance portion of these operations
+ * O(1) also, list tails need to be accessible without walking the entire list.
+ * This is the reason why doubly linked list heads are used.
+ */
+
+/*
+ * An MRU Cache is a dynamic data structure that stores its elements in a way
+ * that allows efficient lookups, but also groups them into discrete time
+ * intervals based on insertion time.  This allows elements to be efficiently
+ * and automatically reaped after a fixed period of inactivity.
+ *
+ * When a client data pointer is stored in the MRU Cache it needs to be added to
+ * both the data store and to one of the lists.  It must also be possible to
+ * access each of these entries via the other, i.e. to:
+ *
+ *    a) Walk a list, removing the corresponding data store entry for each item.
+ *    b) Look up a data store entry, then access its list entry directly.
+ *
+ * To achieve both of these goals, each entry must contain both a list entry and
+ * a key, in addition to the user's data pointer.  Note that it's not a good
+ * idea to have the client embed one of these structures at the top of their own
+ * data structure, because inserting the same item more than once would most
+ * likely result in a loop in one of the lists.  That's a sure-fire recipe for
+ * an infinite loop in the code.
+ */
+typedef struct xfs_mru_cache_elem
+{
+	struct list_head list_node;
+	unsigned long	key;
+	void		*value;
+} xfs_mru_cache_elem_t;
+
+static kmem_zone_t		*xfs_mru_elem_zone;
+static struct workqueue_struct	*xfs_mru_reap_wq;
+
+/*
+ * When inserting, destroying or reaping, it's first necessary to update the
+ * lists relative to a particular time.  In the case of destroying, that time
+ * will be well in the future to ensure that all items are moved to the reap
+ * list.  In all other cases though, the time will be the current time.
+ *
+ * This function enters a loop, moving the contents of the LRU list to the reap
+ * list again and again until either a) the lists are all empty, or b) time zero
+ * has been advanced sufficiently to be within the immediate element lifetime.
+ *
+ * Case a) above is detected by counting how many groups are migrated and
+ * stopping when they've all been moved.  Case b) is detected by monitoring the
+ * time_zero field, which is updated as each group is migrated.
+ *
+ * The return value is the earliest time that more migration could be needed, or
+ * zero if there's no need to schedule more work because the lists are empty.
+ */
+STATIC unsigned long
+_xfs_mru_cache_migrate(
+	xfs_mru_cache_t	*mru,
+	unsigned long	now)
+{
+	unsigned int	grp;
+	unsigned int	migrated = 0;
+	struct list_head *lru_list;
+
+	/* Nothing to do if the data store is empty. */
+	if (!mru->time_zero)
+		return 0;
+
+	/* While time zero is older than the time spanned by all the lists. */
+	while (mru->time_zero <= now - mru->grp_count * mru->grp_time) {
+
+		/*
+		 * If the LRU list isn't empty, migrate its elements to the tail
+		 * of the reap list.
+		 */
+		lru_list = mru->lists + mru->lru_grp;
+		if (!list_empty(lru_list))
+			list_splice_init(lru_list, mru->reap_list.prev);
+
+		/*
+		 * Advance the LRU group number, freeing the old LRU list to
+		 * become the new MRU list; advance time zero accordingly.
+		 */
+		mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count;
+		mru->time_zero += mru->grp_time;
+
+		/*
+		 * If reaping is so far behind that all the elements on all the
+		 * lists have been migrated to the reap list, it's now empty.
+		 */
+		if (++migrated == mru->grp_count) {
+			mru->lru_grp = 0;
+			mru->time_zero = 0;
+			return 0;
+		}
+	}
+
+	/* Find the first non-empty list from the LRU end. */
+	for (grp = 0; grp < mru->grp_count; grp++) {
+
+		/* Check the grp'th list from the LRU end. */
+		lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count);
+		if (!list_empty(lru_list))
+			return mru->time_zero +
+			       (mru->grp_count + grp) * mru->grp_time;
+	}
+
+	/* All the lists must be empty. */
+	mru->lru_grp = 0;
+	mru->time_zero = 0;
+	return 0;
+}
+
+/*
+ * When inserting or doing a lookup, an element needs to be inserted into the
+ * MRU list.  The lists must be migrated first to ensure that they're
+ * up-to-date, otherwise the new element could be given a shorter lifetime in
+ * the cache than it should.
+ */
+STATIC void
+_xfs_mru_cache_list_insert(
+	xfs_mru_cache_t		*mru,
+	xfs_mru_cache_elem_t	*elem)
+{
+	unsigned int	grp = 0;
+	unsigned long	now = jiffies;
+
+	/*
+	 * If the data store is empty, initialise time zero, leave grp set to
+	 * zero and start the work queue timer if necessary.  Otherwise, set grp
+	 * to the number of group times that have elapsed since time zero.
+	 */
+	if (!_xfs_mru_cache_migrate(mru, now)) {
+		mru->time_zero = now;
+		if (!mru->next_reap)
+			mru->next_reap = mru->grp_count * mru->grp_time;
+	} else {
+		grp = (now - mru->time_zero) / mru->grp_time;
+		grp = (mru->lru_grp + grp) % mru->grp_count;
+	}
+
+	/* Insert the element at the tail of the corresponding list. */
+	list_add_tail(&elem->list_node, mru->lists + grp);
+}
+
+/*
+ * When destroying or reaping, all the elements that were migrated to the reap
+ * list need to be deleted.  For each element this involves removing it from the
+ * data store, removing it from the reap list, calling the client's free
+ * function and deleting the element from the element zone.
+ */
+STATIC void
+_xfs_mru_cache_clear_reap_list(
+	xfs_mru_cache_t		*mru)
+{
+	xfs_mru_cache_elem_t	*elem, *next;
+	struct list_head	tmp;
+
+	INIT_LIST_HEAD(&tmp);
+	list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) {
+
+		/* Remove the element from the data store. */
+		radix_tree_delete(&mru->store, elem->key);
+
+		/*
+		 * remove to temp list so it can be freed without
+		 * needing to hold the lock
+		 */
+		list_move(&elem->list_node, &tmp);
+	}
+	mutex_spinunlock(&mru->lock, 0);
+
+	list_for_each_entry_safe(elem, next, &tmp, list_node) {
+
+		/* Remove the element from the reap list. */
+		list_del_init(&elem->list_node);
+
+		/* Call the client's free function with the key and value pointer. */
+		mru->free_func(elem->key, elem->value);
+
+		/* Free the element structure. */
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+	}
+
+	mutex_spinlock(&mru->lock);
+}
+
+/*
+ * We fire the reap timer every group expiry interval so
+ * we always have a reaper ready to run. This makes shutdown
+ * and flushing of the reaper easy to do. Hence we need to
+ * keep when the next reap must occur so we can determine
+ * at each interval whether there is anything we need to do.
+ */
+STATIC void
+_xfs_mru_cache_reap(
+	struct work_struct	*work)
+{
+	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work);
+	unsigned long		now;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return;
+
+	mutex_spinlock(&mru->lock);
+	now = jiffies;
+	if (mru->reap_all ||
+	    (mru->next_reap && time_after(now, mru->next_reap))) {
+		if (mru->reap_all)
+			now += mru->grp_count * mru->grp_time * 2;
+		mru->next_reap = _xfs_mru_cache_migrate(mru, now);
+		_xfs_mru_cache_clear_reap_list(mru);
+	}
+
+	/*
+	 * the process that triggered the reap_all is responsible
+	 * for restating the periodic reap if it is required.
+	 */
+	if (!mru->reap_all)
+		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+	mru->reap_all = 0;
+	mutex_spinunlock(&mru->lock, 0);
+}
+
+int
+xfs_mru_cache_init(void)
+{
+	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
+	                                 "xfs_mru_cache_elem");
+	if (!xfs_mru_elem_zone)
+		return ENOMEM;
+
+	xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+	if (!xfs_mru_reap_wq) {
+		kmem_zone_destroy(xfs_mru_elem_zone);
+		return ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+xfs_mru_cache_uninit(void)
+{
+	destroy_workqueue(xfs_mru_reap_wq);
+	kmem_zone_destroy(xfs_mru_elem_zone);
+}
+
+/*
+ * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create()
+ * with the address of the pointer, a lifetime value in milliseconds, a group
+ * count and a free function to use when deleting elements.  This function
+ * returns 0 if the initialisation was successful.
+ */
+int
+xfs_mru_cache_create(
+	xfs_mru_cache_t		**mrup,
+	unsigned int		lifetime_ms,
+	unsigned int		grp_count,
+	xfs_mru_cache_free_func_t free_func)
+{
+	xfs_mru_cache_t	*mru = NULL;
+	int		err = 0, grp;
+	unsigned int	grp_time;
+
+	if (mrup)
+		*mrup = NULL;
+
+	if (!mrup || !grp_count || !lifetime_ms || !free_func)
+		return EINVAL;
+
+	if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
+		return EINVAL;
+
+	if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
+		return ENOMEM;
+
+	/* An extra list is needed to avoid reaping up to a grp_time early. */
+	mru->grp_count = grp_count + 1;
+	mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+
+	if (!mru->lists) {
+		err = ENOMEM;
+		goto exit;
+	}
+
+	for (grp = 0; grp < mru->grp_count; grp++)
+		INIT_LIST_HEAD(mru->lists + grp);
+
+	/*
+	 * We use GFP_KERNEL radix tree preload and do inserts under a
+	 * spinlock so GFP_ATOMIC is appropriate for the radix tree itself.
+	 */
+	INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
+	INIT_LIST_HEAD(&mru->reap_list);
+	spinlock_init(&mru->lock, "xfs_mru_cache");
+	INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
+
+	mru->grp_time  = grp_time;
+	mru->free_func = free_func;
+
+	/* start up the reaper event */
+	mru->next_reap = 0;
+	mru->reap_all = 0;
+	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+
+	*mrup = mru;
+
+exit:
+	if (err && mru && mru->lists)
+		kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+	if (err && mru)
+		kmem_free(mru, sizeof(*mru));
+
+	return err;
+}
+
+/*
+ * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
+ * free functions as they're deleted.  When this function returns, the caller is
+ * guaranteed that all the free functions for all the elements have finished
+ * executing.
+ *
+ * While we are flushing, we stop the periodic reaper event from triggering.
+ * Normally, we want to restart this periodic event, but if we are shutting
+ * down the cache we do not want it restarted. hence the restart parameter
+ * where 0 = do not restart reaper and 1 = restart reaper.
+ */
+void
+xfs_mru_cache_flush(
+	xfs_mru_cache_t		*mru,
+	int			restart)
+{
+	if (!mru || !mru->lists)
+		return;
+
+	cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+
+	mutex_spinlock(&mru->lock);
+	mru->reap_all = 1;
+	mutex_spinunlock(&mru->lock, 0);
+
+	queue_work(xfs_mru_reap_wq, &mru->work.work);
+	flush_workqueue(xfs_mru_reap_wq);
+
+	mutex_spinlock(&mru->lock);
+	WARN_ON_ONCE(mru->reap_all != 0);
+	mru->reap_all = 0;
+	if (restart)
+		queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
+	mutex_spinunlock(&mru->lock, 0);
+}
+
+void
+xfs_mru_cache_destroy(
+	xfs_mru_cache_t		*mru)
+{
+	if (!mru || !mru->lists)
+		return;
+
+	/* we don't want the reaper to restart here */
+	xfs_mru_cache_flush(mru, 0);
+
+	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+	kmem_free(mru, sizeof(*mru));
+}
+
+/*
+ * To insert an element, call xfs_mru_cache_insert() with the data store, the
+ * element's key and the client data pointer.  This function returns 0 on
+ * success or ENOMEM if memory for the data element couldn't be allocated.
+ */
+int
+xfs_mru_cache_insert(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key,
+	void		*value)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return EINVAL;
+
+	elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP);
+	if (!elem)
+		return ENOMEM;
+
+	if (radix_tree_preload(GFP_KERNEL)) {
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+		return ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&elem->list_node);
+	elem->key = key;
+	elem->value = value;
+
+	mutex_spinlock(&mru->lock);
+
+	radix_tree_insert(&mru->store, key, elem);
+	radix_tree_preload_end();
+	_xfs_mru_cache_list_insert(mru, elem);
+
+	mutex_spinunlock(&mru->lock, 0);
+
+	return 0;
+}
+
+/*
+ * To remove an element without calling the free function, call
+ * xfs_mru_cache_remove() with the data store and the element's key.  On success
+ * the client data pointer for the removed element is returned, otherwise this
+ * function will return a NULL pointer.
+ */
+void *
+xfs_mru_cache_remove(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+	void		*value = NULL;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_delete(&mru->store, key);
+	if (elem) {
+		value = elem->value;
+		list_del(&elem->list_node);
+	}
+
+	mutex_spinunlock(&mru->lock, 0);
+
+	if (elem)
+		kmem_zone_free(xfs_mru_elem_zone, elem);
+
+	return value;
+}
+
+/*
+ * To remove and element and call the free function, call xfs_mru_cache_delete()
+ * with the data store and the element's key.
+ */
+void
+xfs_mru_cache_delete(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	void		*value = xfs_mru_cache_remove(mru, key);
+
+	if (value)
+		mru->free_func(key, value);
+}
+
+/*
+ * To look up an element using its key, call xfs_mru_cache_lookup() with the
+ * data store and the element's key.  If found, the element will be moved to the
+ * head of the MRU list to indicate that it's been touched.
+ *
+ * The internal data structures are protected by a spinlock that is STILL HELD
+ * when this function returns.  Call xfs_mru_cache_done() to release it.  Note
+ * that it is not safe to call any function that might sleep in the interim.
+ *
+ * The implementation could have used reference counting to avoid this
+ * restriction, but since most clients simply want to get, set or test a member
+ * of the returned data structure, the extra per-element memory isn't warranted.
+ *
+ * If the element isn't found, this function returns NULL and the spinlock is
+ * released.  xfs_mru_cache_done() should NOT be called when this occurs.
+ */
+void *
+xfs_mru_cache_lookup(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_lookup(&mru->store, key);
+	if (elem) {
+		list_del(&elem->list_node);
+		_xfs_mru_cache_list_insert(mru, elem);
+	}
+	else
+		mutex_spinunlock(&mru->lock, 0);
+
+	return elem ? elem->value : NULL;
+}
+
+/*
+ * To look up an element using its key, but leave its location in the internal
+ * lists alone, call xfs_mru_cache_peek().  If the element isn't found, this
+ * function returns NULL.
+ *
+ * See the comments above the declaration of the xfs_mru_cache_lookup() function
+ * for important locking information pertaining to this call.
+ */
+void *
+xfs_mru_cache_peek(
+	xfs_mru_cache_t	*mru,
+	unsigned long	key)
+{
+	xfs_mru_cache_elem_t *elem;
+
+	ASSERT(mru && mru->lists);
+	if (!mru || !mru->lists)
+		return NULL;
+
+	mutex_spinlock(&mru->lock);
+	elem = radix_tree_lookup(&mru->store, key);
+	if (!elem)
+		mutex_spinunlock(&mru->lock, 0);
+
+	return elem ? elem->value : NULL;
+}
+
+/*
+ * To release the internal data structure spinlock after having performed an
+ * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done()
+ * with the data store pointer.
+ */
+void
+xfs_mru_cache_done(
+	xfs_mru_cache_t	*mru)
+{
+	mutex_spinunlock(&mru->lock, 0);
+}
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
new file mode 100644
index 000000000000..624fd10ee8e5
--- /dev/null
+++ b/fs/xfs/xfs_mru_cache.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2006-2007 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_MRU_CACHE_H__
+#define __XFS_MRU_CACHE_H__
+
+
+/* Function pointer type for callback to free a client's data pointer. */
+typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
+
+typedef struct xfs_mru_cache
+{
+	struct radix_tree_root	store;     /* Core storage data structure.  */
+	struct list_head	*lists;    /* Array of lists, one per grp.  */
+	struct list_head	reap_list; /* Elements overdue for reaping. */
+	spinlock_t		lock;      /* Lock to protect this struct.  */
+	unsigned int		grp_count; /* Number of discrete groups.    */
+	unsigned int		grp_time;  /* Time period spanned by grps.  */
+	unsigned int		lru_grp;   /* Group containing time zero.   */
+	unsigned long		time_zero; /* Time first element was added. */
+	unsigned long		next_reap; /* Time that the reaper should
+					      next do something. */
+	unsigned int		reap_all;  /* if set, reap all lists */
+	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
+	struct delayed_work	work;      /* Workqueue data for reaping.   */
+} xfs_mru_cache_t;
+
+int xfs_mru_cache_init(void);
+void xfs_mru_cache_uninit(void);
+int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
+			     unsigned int grp_count,
+			     xfs_mru_cache_free_func_t free_func);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
+int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
+				void *value);
+void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
+void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
+void xfs_mru_cache_done(struct xfs_mru_cache *mru);
+
+#endif /* __XFS_MRU_CACHE_H__ */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index c343fde10ef9..11f5ea29a038 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -51,6 +51,8 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_mru_cache.h"
+#include "xfs_filestream.h"
 #include "xfs_fsops.h"
 
 STATIC int	xfs_sync(bhv_desc_t *, int, cred_t *);
@@ -81,6 +83,8 @@ xfs_init(void)
 	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
 	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
 	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
+	xfs_mru_cache_init();
+	xfs_filestream_init();
 
 	/*
 	 * The size of the zone allocated buf log item is the maximum
@@ -164,6 +168,8 @@ xfs_cleanup(void)
 	xfs_cleanup_procfs();
 	xfs_sysctl_unregister();
 	xfs_refcache_destroy();
+	xfs_filestream_uninit();
+	xfs_mru_cache_uninit();
 	xfs_acl_zone_destroy(xfs_acl_zone);
 
 #ifdef XFS_DIR2_TRACE
@@ -320,6 +326,9 @@ xfs_start_flags(
 	else
 		mp->m_flags &= ~XFS_MOUNT_BARRIER;
 
+	if (ap->flags2 & XFSMNT2_FILESTREAMS)
+		mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+
 	return 0;
 }
 
@@ -518,6 +527,9 @@ xfs_mount(
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
 
+	if ((error = xfs_filestream_mount(mp)))
+		goto error2;
+
 	error = XFS_IOINIT(vfsp, args, flags);
 	if (error)
 		goto error2;
@@ -575,6 +587,13 @@ xfs_unmount(
 	 */
 	xfs_refcache_purge_mp(mp);
 
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
 	XFS_bflush(mp->m_ddev_targp);
 	error = xfs_unmount_flush(mp, 0);
 	if (error)
@@ -694,6 +713,7 @@ xfs_mntupdate(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 		}
 	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
+		xfs_filestream_flush(mp);
 		bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
 		xfs_attr_quiesce(mp);
 		vfsp->vfs_flag |= VFS_RDONLY;
@@ -909,6 +929,9 @@ xfs_sync(
 {
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
+	if (flags & SYNC_IOWAIT)
+		xfs_filestream_flush(mp);
+
 	return xfs_syncsub(mp, flags, NULL);
 }
 
@@ -1659,6 +1682,7 @@ xfs_vget(
 					 * in stat(). */
 #define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
 #define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
 
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
@@ -1845,6 +1869,8 @@ xfs_parseargs(
 			args->flags |= XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
 			args->flags &= ~XFSMNT_ATTR2;
+		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+			args->flags2 |= XFSMNT2_FILESTREAMS;
 		} else if (!strcmp(this_char, "osyncisdsync")) {
 			/* no-op, this is now the default */
 			cmn_err(CE_WARN,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 2067d0b0a10e..60fd0be90a16 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,6 +51,7 @@
 #include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
+#include "xfs_filestream.h"
 
 STATIC int
 xfs_open(
@@ -783,6 +784,8 @@ xfs_setattr(
 				di_flags |= XFS_DIFLAG_PROJINHERIT;
 			if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
 				di_flags |= XFS_DIFLAG_NODEFRAG;
+			if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
+				di_flags |= XFS_DIFLAG_FILESTREAM;
 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1536,7 +1539,17 @@ xfs_release(
 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
 		return 0;
 
-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (!XFS_FORCED_SHUTDOWN(mp)) {
+		/*
+		 * If we are using filestreams, and we have an unlinked
+		 * file that we are processing the last close on, then nothing
+		 * will be able to reopen and write to this file. Purge this
+		 * inode from the filestreams cache so that it doesn't delay
+		 * teardown of the inode.
+		 */
+		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
+			xfs_filestream_deassociate(ip);
+
 		/*
 		 * If we previously truncated this file and removed old data
 		 * in the process, we want to initiate "early" writeout on
@@ -1551,7 +1564,6 @@ xfs_release(
 			bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
 	}
 
-
 #ifdef HAVE_REFCACHE
 	/* If we are in the NFS reference cache then don't do this now */
 	if (ip->i_refcache)
@@ -2541,6 +2553,15 @@ xfs_remove(
 	 */
 	xfs_refcache_purge_ip(ip);
 
+	/*
+	 * If we are using filestreams, kill the stream association.
+	 * If the file is still open it may get a new one but that
+	 * will get killed on last close in xfs_close() so we don't
+	 * have to worry about that.
+	 */
+	if (link_zero && xfs_inode_is_filestream(ip))
+		xfs_filestream_deassociate(ip);
+
 	vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From b11f94d537e6b69f13770143fd7ded3d09fdbaab Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Wed, 11 Jul 2007 11:09:33 +1000
Subject: [XFS] Quota inode has no parent.

Avoid using a special "zero inode" as the parent of the quota inode as
this can confuse the filestreams code into thinking the quota inode has a
parent. We do not want the quota inode to follow filestreams allocation
rules, so pass a NULL as the parent inode and detect this condition when
doing stream associations.

SGI-PV: 964469
SGI-Modid: xfs-linux-melb:xfs-kern:29098a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/quota/xfs_qm.c |  3 +--
 fs/xfs/xfs_inode.c    | 13 +++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index eeb3d9cc0e8e..7def4c699343 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -65,7 +65,6 @@ kmem_zone_t	*qm_dqtrxzone;
 static struct shrinker *xfs_qm_shaker;
 
 static cred_t	xfs_zerocr;
-static xfs_inode_t	xfs_zeroino;
 
 STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
@@ -1415,7 +1414,7 @@ xfs_qm_qino_alloc(
 		return error;
 	}
 
-	if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0,
+	if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
 				   &xfs_zerocr, 0, 1, ip, &committed))) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2ef100be6c4f..59829523ab07 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1078,6 +1078,11 @@ xfs_iread_extents(
  * also returns the [locked] bp pointing to the head of the freelist
  * as ialloc_context.  The caller should hold this buffer across
  * the commit and pass it back into this routine on the second call.
+ *
+ * If we are allocating quota inodes, we do not have a parent inode
+ * to attach to or associate with (i.e. pip == NULL) because they
+ * are not linked into the directory structure - they are attached
+ * directly to the superblock - and so have no parent.
  */
 int
 xfs_ialloc(
@@ -1103,7 +1108,7 @@ xfs_ialloc(
 	 * Call the space management code to pick
 	 * the on-disk inode to be allocated.
 	 */
-	error = xfs_dialloc(tp, pip->i_ino, mode, okalloc,
+	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
 			    ialloc_context, call_again, &ino);
 	if (error != 0) {
 		return error;
@@ -1157,7 +1162,7 @@ xfs_ialloc(
 	if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
 		xfs_bump_ino_vers2(tp, ip);
 
-	if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
+	if (pip && XFS_INHERIT_GID(pip, vp->v_vfsp)) {
 		ip->i_d.di_gid = pip->i_d.di_gid;
 		if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
 			ip->i_d.di_mode |= S_ISGID;
@@ -1199,7 +1204,7 @@ xfs_ialloc(
 		flags |= XFS_ILOG_DEV;
 		break;
 	case S_IFREG:
-		if (xfs_inode_is_filestream(pip)) {
+		if (pip && xfs_inode_is_filestream(pip)) {
 			error = xfs_filestream_associate(pip, ip);
 			if (error < 0)
 				return -error;
@@ -1208,7 +1213,7 @@ xfs_ialloc(
 		}
 		/* fall through */
 	case S_IFDIR:
-		if (pip->i_d.di_flags & XFS_DIFLAG_ANY) {
+		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
 			uint	di_flags = 0;
 
 			if ((mode & S_IFMT) == S_IFDIR) {
-- 
cgit v1.2.3-59-g8ed1b


From 3a59c94c4b48878c6af047cdfc8c137d0fa7a0f0 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Wed, 11 Jul 2007 11:09:47 +1000
Subject: [XFS] Clean up function name handling in tracing code

Remove the hardcoded "fnames" for tracing, and just embed them in tracing
macros via __FUNCTION__. Kills a lot of #ifdefs too.

SGI-PV: 967353
SGI-Modid: xfs-linux-melb:xfs-kern:29099a

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_alloc.c      |  53 +++-------
 fs/xfs/xfs_bmap.c       | 268 ++++++++++++++++++++----------------------------
 fs/xfs/xfs_bmap.h       |   6 +-
 fs/xfs/xfs_bmap_btree.c |  88 +++-------------
 fs/xfs/xfs_inode.c      |   8 +-
 5 files changed, 150 insertions(+), 273 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 98f95d4c4bcc..012a649a19c3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -55,17 +55,17 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 ktrace_t *xfs_alloc_trace_buf;
 
 #define	TRACE_ALLOC(s,a)	\
-	xfs_alloc_trace_alloc(fname, s, a, __LINE__)
+	xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__)
 #define	TRACE_FREE(s,a,b,x,f)	\
-	xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
+	xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__)
 #define	TRACE_MODAGF(s,a,f)	\
-	xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
-#define	TRACE_BUSY(fname,s,ag,agb,l,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
-#define	TRACE_UNBUSY(fname,s,ag,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
-#define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)	\
-	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
+	xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__)
+#define	TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
+#define	TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
+#define	TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp)	\
+	xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
 #else
 #define	TRACE_ALLOC(s,a)
 #define	TRACE_FREE(s,a,b,x,f)
@@ -420,7 +420,7 @@ xfs_alloc_read_agfl(
  */
 STATIC void
 xfs_alloc_trace_alloc(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_alloc_arg_t	*args,		/* allocation argument structure */
 	int		line)		/* source line number */
@@ -453,7 +453,7 @@ xfs_alloc_trace_alloc(
  */
 STATIC void
 xfs_alloc_trace_free(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
@@ -479,7 +479,7 @@ xfs_alloc_trace_free(
  */
 STATIC void
 xfs_alloc_trace_modagf(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agf_t	*agf,		/* new agf value */
@@ -507,7 +507,7 @@ xfs_alloc_trace_modagf(
 
 STATIC void
 xfs_alloc_trace_busy(
-	char		*name,		/* function tag string */
+	const char	*name,		/* function tag string */
 	char		*str,		/* additional string */
 	xfs_mount_t	*mp,		/* file system mount point */
 	xfs_agnumber_t	agno,		/* allocation group number */
@@ -549,9 +549,6 @@ xfs_alloc_ag_vextent(
 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
 {
 	int		error=0;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent";
-#endif
 
 	ASSERT(args->minlen > 0);
 	ASSERT(args->maxlen > 0);
@@ -635,9 +632,6 @@ xfs_alloc_ag_vextent_exact(
 	xfs_agblock_t	fbno;	/* start block of found extent */
 	xfs_agblock_t	fend;	/* end block of found extent */
 	xfs_extlen_t	flen;	/* length of found extent */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_exact";
-#endif
 	int		i;	/* success/failure of operation */
 	xfs_agblock_t	maxend;	/* end of maximal extent */
 	xfs_agblock_t	minend;	/* end of minimal extent */
@@ -737,9 +731,6 @@ xfs_alloc_ag_vextent_near(
 	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
 	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
 	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_near";
-#endif
 	xfs_agblock_t	gtbno;		/* start bno of right side entry */
 	xfs_agblock_t	gtbnoa;		/* aligned ... */
 	xfs_extlen_t	gtdiff;		/* difference to right side entry */
@@ -1270,9 +1261,6 @@ xfs_alloc_ag_vextent_size(
 	int		error;		/* error result */
 	xfs_agblock_t	fbno;		/* start of found freespace */
 	xfs_extlen_t	flen;		/* length of found freespace */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_size";
-#endif
 	int		i;		/* temp status variable */
 	xfs_agblock_t	rbno;		/* returned block number */
 	xfs_extlen_t	rlen;		/* length of returned extent */
@@ -1427,9 +1415,6 @@ xfs_alloc_ag_vextent_small(
 	int		error;
 	xfs_agblock_t	fbno;
 	xfs_extlen_t	flen;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_ag_vextent_small";
-#endif
 	int		i;
 
 	if ((error = xfs_alloc_decrement(ccur, 0, &i)))
@@ -1516,9 +1501,6 @@ xfs_free_ag_extent(
 	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
 	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
 	int		error;		/* error return value */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_free_ag_extent";
-#endif
 	xfs_agblock_t	gtbno;		/* start of right neighbor block */
 	xfs_extlen_t	gtlen;		/* length of right neighbor block */
 	int		haveleft;	/* have a left neighbor block */
@@ -2003,9 +1985,6 @@ xfs_alloc_get_freelist(
 	xfs_agblock_t	bno;	/* block number returned */
 	int		error;
 	int		logflags;
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_get_freelist";
-#endif
 	xfs_mount_t	*mp;	/* mount structure */
 	xfs_perag_t	*pag;	/* per allocation group data */
 
@@ -2128,9 +2107,6 @@ xfs_alloc_put_freelist(
 	__be32			*blockp;/* pointer to array entry */
 	int			error;
 	int			logflags;
-#ifdef XFS_ALLOC_TRACE
-	static char		fname[] = "xfs_alloc_put_freelist";
-#endif
 	xfs_mount_t		*mp;	/* mount structure */
 	xfs_perag_t		*pag;	/* per allocation group data */
 
@@ -2263,9 +2239,6 @@ xfs_alloc_vextent(
 	xfs_agblock_t	agsize;	/* allocation group size */
 	int		error;
 	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
-#ifdef XFS_ALLOC_TRACE
-	static char	fname[] = "xfs_alloc_vextent";
-#endif
 	xfs_extlen_t	minleft;/* minimum left value, temp copy */
 	xfs_mount_t	*mp;	/* mount structure pointer */
 	xfs_agnumber_t	sagno;	/* starting allocation group number */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 51ba689a4552..94b5c5fe2681 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -278,7 +278,7 @@ xfs_bmap_isaeof(
 STATIC void
 xfs_bmap_trace_addentry(
 	int		opcode,		/* operation */
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(ies) */
@@ -292,7 +292,7 @@ xfs_bmap_trace_addentry(
  */
 STATIC void
 xfs_bmap_trace_delete(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
@@ -305,7 +305,7 @@ xfs_bmap_trace_delete(
  */
 STATIC void
 xfs_bmap_trace_insert(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
@@ -319,7 +319,7 @@ xfs_bmap_trace_insert(
  */
 STATIC void
 xfs_bmap_trace_post_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry updated */
@@ -330,17 +330,25 @@ xfs_bmap_trace_post_update(
  */
 STATIC void
 xfs_bmap_trace_pre_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry to be updated */
 	int		whichfork);	/* data or attr fork */
 
+#define	XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)	\
+	xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w)
+#define	XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)	\
+	xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w)
+#define	XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)	\
+	xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w)
+#define	XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)	\
+	xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w)
 #else
-#define	xfs_bmap_trace_delete(f,d,ip,i,c,w)
-#define	xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)
-#define	xfs_bmap_trace_post_update(f,d,ip,i,w)
-#define	xfs_bmap_trace_pre_update(f,d,ip,i,w)
+#define	XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
+#define	XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
+#define	XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
+#define	XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
 #endif	/* XFS_BMAP_TRACE */
 
 /*
@@ -532,9 +540,6 @@ xfs_bmap_add_extent(
 	xfs_filblks_t		da_new; /* new count del alloc blocks used */
 	xfs_filblks_t		da_old; /* old count del alloc blocks used */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent";
-#endif
 	xfs_ifork_t		*ifp;	/* inode fork ptr */
 	int			logflags; /* returned value */
 	xfs_extnum_t		nextents; /* number of extents in file now */
@@ -552,8 +557,8 @@ xfs_bmap_add_extent(
 	 * already extents in the list.
 	 */
 	if (nextents == 0) {
-		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
-			NULL, whichfork);
+		XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL,
+			whichfork);
 		xfs_iext_insert(ifp, 0, 1, new);
 		ASSERT(cur == NULL);
 		ifp->if_lastex = 0;
@@ -711,9 +716,6 @@ xfs_bmap_add_extent_delay_real(
 	int			diff;	/* temp value */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_delay_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
@@ -809,15 +811,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The left and right neighbors are both contiguous with new.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents--;
@@ -856,15 +857,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The left neighbor is contiguous, the right is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
-		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -893,16 +893,13 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in all of a previously delayed allocation extent.
 		 * The right neighbor is contiguous, the left is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep, new->br_startblock);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount + RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
-		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx + 1, 1);
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -932,11 +929,9 @@ xfs_bmap_add_extent_delay_real(
 		 * Neither the left nor right neighbors are contiguous with
 		 * the new one.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep, new->br_startblock);
-		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		ip->i_d.di_nextents++;
 		if (cur == NULL)
@@ -964,17 +959,14 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in the first part of a previous delayed allocation.
 		 * The left neighbor is contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK);
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		ip->i_df.if_lastex = idx - 1;
 		if (cur == NULL)
@@ -996,8 +988,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock));
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: The boundary between two in-core extents moved. */
 		temp = LEFT.br_startoff;
@@ -1010,11 +1001,11 @@ xfs_bmap_add_extent_delay_real(
 		 * Filling in the first part of a previous delayed allocation.
 		 * The left neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_startoff(ep, new_endoff);
 		temp = PREV.br_blockcount - new->br_blockcount;
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -1047,8 +1038,7 @@ xfs_bmap_add_extent_delay_real(
 			(cur ? cur->bc_private.b.allocated : 0));
 		ep = xfs_iext_get_ext(ifp, idx + 1);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1061,17 +1051,14 @@ xfs_bmap_add_extent_delay_real(
 		 * The right neighbor is contiguous with the new allocation.
 		 */
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
 			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount,
 			RIGHT.br_state);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx + 1;
 		if (cur == NULL)
 			rval = XFS_ILOG_DEXT;
@@ -1092,8 +1079,7 @@ xfs_bmap_add_extent_delay_real(
 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 			STARTBLOCKVAL(PREV.br_startblock));
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: The boundary between two in-core extents moved. */
 		temp = PREV.br_startoff;
@@ -1107,10 +1093,10 @@ xfs_bmap_add_extent_delay_real(
 		 * The right neighbor is not contiguous.
 		 */
 		temp = PREV.br_blockcount - new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
-		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
-			new, NULL, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
+			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
@@ -1142,7 +1128,7 @@ xfs_bmap_add_extent_delay_real(
 			(cur ? cur->bc_private.b.allocated : 0));
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		*dnew = temp;
 		/* DELTA: One in-core extent is split in two. */
 		temp = PREV.br_startoff;
@@ -1156,7 +1142,7 @@ xfs_bmap_add_extent_delay_real(
 		 * This case is avoided almost all the time.
 		 */
 		temp = new->br_startoff - PREV.br_startoff;
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep, temp);
 		r[0] = *new;
 		r[1].br_state = PREV.br_state;
@@ -1164,7 +1150,7 @@ xfs_bmap_add_extent_delay_real(
 		r[1].br_startoff = new_endoff;
 		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
 		r[1].br_blockcount = temp2;
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
@@ -1223,13 +1209,11 @@ xfs_bmap_add_extent_delay_real(
 		}
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
 			NULLSTARTBLOCK((int)temp2));
-		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK);
 		*dnew = temp + temp2;
 		/* DELTA: One in-core extent is split in three. */
 		temp = PREV.br_startoff;
@@ -1288,9 +1272,6 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
@@ -1391,15 +1372,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The left and right neighbors are both contiguous with new.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount +
 			RIGHT.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 2);
 		ip->i_df.if_lastex = idx - 1;
 		ip->i_d.di_nextents -= 2;
@@ -1442,15 +1422,14 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The left neighbor is contiguous, the right is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + PREV.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
-		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -1485,16 +1464,15 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting all of a previous oldext extent to newext.
 		 * The right neighbor is contiguous, the left is not.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount + RIGHT.br_blockcount);
 		xfs_bmbt_set_state(ep, newext);
-		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
-		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
-			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx + 1, 1);
 		ip->i_d.di_nextents--;
 		if (cur == NULL)
@@ -1530,10 +1508,10 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Neither the left nor right neighbors are contiguous with
 		 * the new one.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_state(ep, newext);
-		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		if (cur == NULL)
@@ -1560,21 +1538,21 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the first part of a previous oldext extent to newext.
 		 * The left neighbor is contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			LEFT.br_blockcount + new->br_blockcount);
 		xfs_bmbt_set_startoff(ep,
 			PREV.br_startoff + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1,
 			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_startblock(ep,
 			new->br_startblock + new->br_blockcount);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		if (cur == NULL)
@@ -1611,15 +1589,15 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the first part of a previous oldext extent to newext.
 		 * The left neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK);
 		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
 		xfs_bmbt_set_startoff(ep, new_endoff);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
 		xfs_bmbt_set_startblock(ep,
 			new->br_startblock + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -1654,18 +1632,18 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the last part of a previous oldext extent to newext.
 		 * The right neighbor is contiguous with the new allocation.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx,
 			XFS_DATA_FORK);
-		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
 			new->br_startoff, new->br_startblock,
 			new->br_blockcount + RIGHT.br_blockcount, newext);
-		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+		XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx + 1;
 		if (cur == NULL)
@@ -1701,12 +1679,12 @@ xfs_bmap_add_extent_unwritten_real(
 		 * Setting the last part of a previous oldext extent to newext.
 		 * The right neighbor is not contiguous.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			PREV.br_blockcount - new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
-		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
-			new, NULL, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL,
+			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 1, new);
 		ip->i_df.if_lastex = idx + 1;
 		ip->i_d.di_nextents++;
@@ -1745,17 +1723,17 @@ xfs_bmap_add_extent_unwritten_real(
 		 * newext.  Contiguity is impossible here.
 		 * One extent becomes three extents.
 		 */
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(ep,
 			new->br_startoff - PREV.br_startoff);
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK);
 		r[0] = *new;
 		r[1].br_startoff = new_endoff;
 		r[1].br_blockcount =
 			PREV.br_startoff + PREV.br_blockcount - new_endoff;
 		r[1].br_startblock = new->br_startblock + new->br_blockcount;
 		r[1].br_state = oldext;
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1],
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
 		ip->i_df.if_lastex = idx + 1;
@@ -1846,9 +1824,6 @@ xfs_bmap_add_extent_hole_delay(
 	int			rsvd)		/* OK to allocate reserved blocks */
 {
 	xfs_bmbt_rec_t		*ep;	/* extent record for idx */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
-#endif
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
@@ -1920,7 +1895,7 @@ xfs_bmap_add_extent_hole_delay(
 		 */
 		temp = left.br_blockcount + new->br_blockcount +
 			right.br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1929,10 +1904,9 @@ xfs_bmap_add_extent_hole_delay(
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
 			NULLSTARTBLOCK((int)newlen));
-		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
-			XFS_DATA_FORK);
-		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
 			XFS_DATA_FORK);
+		XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK);
 		xfs_iext_remove(ifp, idx, 1);
 		ip->i_df.if_lastex = idx - 1;
 		/* DELTA: Two in-core extents were replaced by one. */
@@ -1947,7 +1921,7 @@ xfs_bmap_add_extent_hole_delay(
 		 * Merge the new allocation with the left neighbor.
 		 */
 		temp = left.br_blockcount + new->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
 		oldlen = STARTBLOCKVAL(left.br_startblock) +
@@ -1955,7 +1929,7 @@ xfs_bmap_add_extent_hole_delay(
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
 			NULLSTARTBLOCK((int)newlen));
-		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1,
 			XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx - 1;
 		/* DELTA: One in-core extent grew into a hole. */
@@ -1969,14 +1943,14 @@ xfs_bmap_add_extent_hole_delay(
 		 * on the right.
 		 * Merge the new allocation with the right neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK);
 		temp = new->br_blockcount + right.br_blockcount;
 		oldlen = STARTBLOCKVAL(new->br_startblock) +
 			STARTBLOCKVAL(right.br_startblock);
 		newlen = xfs_bmap_worst_indlen(ip, temp);
 		xfs_bmbt_set_allf(ep, new->br_startoff,
 			NULLSTARTBLOCK((int)newlen), temp, right.br_state);
-		xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+		XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK);
 		ip->i_df.if_lastex = idx;
 		/* DELTA: One in-core extent grew into a hole. */
 		temp2 = temp;
@@ -1990,7 +1964,7 @@ xfs_bmap_add_extent_hole_delay(
 		 * Insert a new entry.
 		 */
 		oldlen = newlen = 0;
-		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+		XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL,
 			XFS_DATA_FORK);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ip->i_df.if_lastex = idx;
@@ -2040,9 +2014,6 @@ xfs_bmap_add_extent_hole_real(
 {
 	xfs_bmbt_rec_t		*ep;	/* pointer to extent entry ins. point */
 	int			error;	/* error return value */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_add_extent_hole_real";
-#endif
 	int			i;	/* temp state */
 	xfs_ifork_t		*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
@@ -2119,15 +2090,14 @@ xfs_bmap_add_extent_hole_real(
 		 * left and on the right.
 		 * Merge all three into a single extent record.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1,
 			whichfork);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount +
 			right.br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+		XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1,
 			whichfork);
-		xfs_bmap_trace_delete(fname, "LC|RC", ip,
-			idx, 1, whichfork);
+		XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork);
 		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx - 1;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -2169,10 +2139,10 @@ xfs_bmap_add_extent_hole_real(
 		 * on the left.
 		 * Merge the new allocation with the left neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork);
 		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
 			left.br_blockcount + new->br_blockcount);
-		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork);
 		ifp->if_lastex = idx - 1;
 		if (cur == NULL) {
 			rval = XFS_ILOG_FEXT(whichfork);
@@ -2203,11 +2173,11 @@ xfs_bmap_add_extent_hole_real(
 		 * on the right.
 		 * Merge the new allocation with the right neighbor.
 		 */
-		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork);
 		xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
 			new->br_blockcount + right.br_blockcount,
 			right.br_state);
-		xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork);
 		ifp->if_lastex = idx;
 		if (cur == NULL) {
 			rval = XFS_ILOG_FEXT(whichfork);
@@ -2238,8 +2208,7 @@ xfs_bmap_add_extent_hole_real(
 		 * real allocation.
 		 * Insert a new entry.
 		 */
-		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
-			whichfork);
+		XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork);
 		xfs_iext_insert(ifp, idx, 1, new);
 		ifp->if_lastex = idx;
 		XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -3104,9 +3073,6 @@ xfs_bmap_del_extent(
 	xfs_bmbt_rec_t		*ep;	/* current extent entry pointer */
 	int			error;	/* error return value */
 	int			flags;	/* inode logging flags */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_del_extent";
-#endif
 	xfs_bmbt_irec_t		got;	/* current extent entry */
 	xfs_fileoff_t		got_endoff;	/* first offset past got */
 	int			i;	/* temp state */
@@ -3200,7 +3166,7 @@ xfs_bmap_del_extent(
 		/*
 		 * Matches the whole extent.  Delete the entry.
 		 */
-		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
+		XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork);
 		xfs_iext_remove(ifp, idx, 1);
 		ifp->if_lastex = idx;
 		if (delay)
@@ -3221,7 +3187,7 @@ xfs_bmap_del_extent(
 		/*
 		 * Deleting the first part of the extent.
 		 */
-		xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork);
 		xfs_bmbt_set_startoff(ep, del_endoff);
 		temp = got.br_blockcount - del->br_blockcount;
 		xfs_bmbt_set_blockcount(ep, temp);
@@ -3230,13 +3196,13 @@ xfs_bmap_del_extent(
 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 				da_old);
 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-			xfs_bmap_trace_post_update(fname, "2", ip, idx,
+			XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx,
 				whichfork);
 			da_new = temp;
 			break;
 		}
 		xfs_bmbt_set_startblock(ep, del_endblock);
-		xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork);
 		if (!cur) {
 			flags |= XFS_ILOG_FEXT(whichfork);
 			break;
@@ -3252,19 +3218,19 @@ xfs_bmap_del_extent(
 		 * Deleting the last part of the extent.
 		 */
 		temp = got.br_blockcount - del->br_blockcount;
-		xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork);
 		xfs_bmbt_set_blockcount(ep, temp);
 		ifp->if_lastex = idx;
 		if (delay) {
 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
 				da_old);
 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
-			xfs_bmap_trace_post_update(fname, "1", ip, idx,
+			XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx,
 				whichfork);
 			da_new = temp;
 			break;
 		}
-		xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork);
 		if (!cur) {
 			flags |= XFS_ILOG_FEXT(whichfork);
 			break;
@@ -3281,7 +3247,7 @@ xfs_bmap_del_extent(
 		 * Deleting the middle of the extent.
 		 */
 		temp = del->br_startoff - got.br_startoff;
-		xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
+		XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork);
 		xfs_bmbt_set_blockcount(ep, temp);
 		new.br_startoff = del_endoff;
 		temp2 = got_endoff - del_endoff;
@@ -3368,8 +3334,8 @@ xfs_bmap_del_extent(
 				}
 			}
 		}
-		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
-		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
+		XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork);
+		XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL,
 			whichfork);
 		xfs_iext_insert(ifp, idx + 1, 1, &new);
 		ifp->if_lastex = idx + 1;
@@ -3609,9 +3575,6 @@ xfs_bmap_local_to_extents(
 {
 	int		error;		/* error return value */
 	int		flags;		/* logging flags returned */
-#ifdef XFS_BMAP_TRACE
-	static char	fname[] = "xfs_bmap_local_to_extents";
-#endif
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 
 	/*
@@ -3666,7 +3629,7 @@ xfs_bmap_local_to_extents(
 		xfs_iext_add(ifp, 0, 1);
 		ep = xfs_iext_get_ext(ifp, 0);
 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
-		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
+		XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
 		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
@@ -3789,7 +3752,7 @@ ktrace_t	*xfs_bmap_trace_buf;
 STATIC void
 xfs_bmap_trace_addentry(
 	int		opcode,		/* operation */
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(ies) */
@@ -3848,7 +3811,7 @@ xfs_bmap_trace_addentry(
  */
 STATIC void
 xfs_bmap_trace_delete(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
@@ -3870,7 +3833,7 @@ xfs_bmap_trace_delete(
  */
 STATIC void
 xfs_bmap_trace_insert(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
@@ -3899,7 +3862,7 @@ xfs_bmap_trace_insert(
  */
 STATIC void
 xfs_bmap_trace_post_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry updated */
@@ -3917,7 +3880,7 @@ xfs_bmap_trace_post_update(
  */
 STATIC void
 xfs_bmap_trace_pre_update(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	char		*desc,		/* operation description */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	idx,		/* index of entry to be updated */
@@ -4534,9 +4497,6 @@ xfs_bmap_read_extents(
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_exntfmt_t		exntf;	/* XFS_EXTFMT_NOSTATE, if checking */
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_bmap_read_extents";
-#endif
 	xfs_extnum_t		i, j;	/* index into the extents list */
 	xfs_ifork_t		*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
@@ -4653,7 +4613,7 @@ xfs_bmap_read_extents(
 	}
 	ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
 	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
-	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
+	XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
 	return 0;
 error0:
 	xfs_trans_brelse(tp, bp);
@@ -4666,7 +4626,7 @@ error0:
  */
 void
 xfs_bmap_trace_exlist(
-	char		*fname,		/* function name */
+	const char	*fname,		/* function name */
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_extnum_t	cnt,		/* count of entries in the list */
 	int		whichfork)	/* data or attr fork */
@@ -4681,7 +4641,7 @@ xfs_bmap_trace_exlist(
 	for (idx = 0; idx < cnt; idx++) {
 		ep = xfs_iext_get_ext(ifp, idx);
 		xfs_bmbt_get_all(ep, &s);
-		xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
+		XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
 			whichfork);
 	}
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 4f24c7e39b31..524b1c9d5246 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -144,12 +144,14 @@ extern ktrace_t	*xfs_bmap_trace_buf;
  */
 void
 xfs_bmap_trace_exlist(
-	char			*fname,		/* function name */
+	const char		*fname,		/* function name */
 	struct xfs_inode	*ip,		/* incore inode pointer */
 	xfs_extnum_t		cnt,		/* count of entries in list */
 	int			whichfork);	/* data or attr fork */
+#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)	\
+	xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w)
 #else
-#define	xfs_bmap_trace_exlist(f,ip,c,w)
+#define	XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 
 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0bf192fea3eb..89b891f51cfb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -76,7 +76,7 @@ static char	EXIT[] = "exit";
  */
 STATIC void
 xfs_bmbt_trace_enter(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	char		*s,
 	int		type,
@@ -117,7 +117,7 @@ xfs_bmbt_trace_enter(
  */
 STATIC void
 xfs_bmbt_trace_argbi(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	xfs_buf_t	*b,
 	int		i,
@@ -134,7 +134,7 @@ xfs_bmbt_trace_argbi(
  */
 STATIC void
 xfs_bmbt_trace_argbii(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	xfs_buf_t	*b,
 	int		i0,
@@ -153,7 +153,7 @@ xfs_bmbt_trace_argbii(
  */
 STATIC void
 xfs_bmbt_trace_argfffi(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	xfs_dfiloff_t		o,
 	xfs_dfsbno_t		b,
@@ -172,7 +172,7 @@ xfs_bmbt_trace_argfffi(
  */
 STATIC void
 xfs_bmbt_trace_argi(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	int		i,
 	int		line)
@@ -188,7 +188,7 @@ xfs_bmbt_trace_argi(
  */
 STATIC void
 xfs_bmbt_trace_argifk(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_fsblock_t		f,
@@ -206,7 +206,7 @@ xfs_bmbt_trace_argifk(
  */
 STATIC void
 xfs_bmbt_trace_argifr(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_fsblock_t		f,
@@ -235,7 +235,7 @@ xfs_bmbt_trace_argifr(
  */
 STATIC void
 xfs_bmbt_trace_argik(
-	char			*func,
+	const char		*func,
 	xfs_btree_cur_t		*cur,
 	int			i,
 	xfs_bmbt_key_t		*k,
@@ -255,7 +255,7 @@ xfs_bmbt_trace_argik(
  */
 STATIC void
 xfs_bmbt_trace_cursor(
-	char		*func,
+	const char	*func,
 	xfs_btree_cur_t	*cur,
 	char		*s,
 	int		line)
@@ -274,21 +274,21 @@ xfs_bmbt_trace_cursor(
 }
 
 #define	XFS_BMBT_TRACE_ARGBI(c,b,i)	\
-	xfs_bmbt_trace_argbi(fname, c, b, i, __LINE__)
+	xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__)
 #define	XFS_BMBT_TRACE_ARGBII(c,b,i,j)	\
-	xfs_bmbt_trace_argbii(fname, c, b, i, j, __LINE__)
+	xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__)
 #define	XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)	\
-	xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__)
+	xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__)
 #define	XFS_BMBT_TRACE_ARGI(c,i)	\
-	xfs_bmbt_trace_argi(fname, c, i, __LINE__)
+	xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIFK(c,i,f,s)	\
-	xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__)
+	xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIFR(c,i,f,r)	\
-	xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__)
+	xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__)
 #define	XFS_BMBT_TRACE_ARGIK(c,i,k)	\
-	xfs_bmbt_trace_argik(fname, c, i, k, __LINE__)
+	xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__)
 #define	XFS_BMBT_TRACE_CURSOR(c,s)	\
-	xfs_bmbt_trace_cursor(fname, c, s, __LINE__)
+	xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__)
 #else
 #define	XFS_BMBT_TRACE_ARGBI(c,b,i)
 #define	XFS_BMBT_TRACE_ARGBII(c,b,i,j)
@@ -318,9 +318,6 @@ xfs_bmbt_delrec(
 	xfs_fsblock_t		bno;		/* fs-relative block number */
 	xfs_buf_t		*bp;		/* buffer for block */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_delrec";
-#endif
 	int			i;		/* loop counter */
 	int			j;		/* temp state */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
@@ -694,9 +691,6 @@ xfs_bmbt_insrec(
 	xfs_bmbt_block_t	*block;		/* bmap btree block */
 	xfs_buf_t		*bp;		/* buffer for block */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_insrec";
-#endif
 	int			i;		/* loop index */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
 	xfs_bmbt_key_t		*kp=NULL;	/* pointer to bmap btree key */
@@ -880,9 +874,6 @@ xfs_bmbt_killroot(
 	xfs_bmbt_ptr_t		*cpp;
 #ifdef DEBUG
 	int			error;
-#endif
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_killroot";
 #endif
 	int			i;
 	xfs_bmbt_key_t		*kp;
@@ -973,9 +964,6 @@ xfs_bmbt_log_keys(
 	int		kfirst,
 	int		klast)
 {
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_log_keys";
-#endif
 	xfs_trans_t	*tp;
 
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1012,9 +1000,6 @@ xfs_bmbt_log_ptrs(
 	int		pfirst,
 	int		plast)
 {
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_log_ptrs";
-#endif
 	xfs_trans_t	*tp;
 
 	XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
@@ -1055,9 +1040,6 @@ xfs_bmbt_lookup(
 	xfs_daddr_t		d;
 	xfs_sfiloff_t		diff;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_lookup";
-#endif
 	xfs_fsblock_t		fsbno=0;
 	int			high;
 	int			i;
@@ -1195,9 +1177,6 @@ xfs_bmbt_lshift(
 	int			*stat)		/* success/failure */
 {
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_lshift";
-#endif
 #ifdef DEBUG
 	int			i;		/* loop counter */
 #endif
@@ -1331,9 +1310,6 @@ xfs_bmbt_rshift(
 	int			*stat)		/* success/failure */
 {
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_rshift";
-#endif
 	int			i;		/* loop counter */
 	xfs_bmbt_key_t		key;		/* bmap btree key */
 	xfs_buf_t		*lbp;		/* left buffer pointer */
@@ -1492,9 +1468,6 @@ xfs_bmbt_split(
 {
 	xfs_alloc_arg_t		args;		/* block allocation args */
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_split";
-#endif
 	int			i;		/* loop counter */
 	xfs_fsblock_t		lbno;		/* left sibling block number */
 	xfs_buf_t		*lbp;		/* left buffer pointer */
@@ -1640,9 +1613,6 @@ xfs_bmbt_updkey(
 	xfs_buf_t		*bp;
 #ifdef DEBUG
 	int			error;
-#endif
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_updkey";
 #endif
 	xfs_bmbt_key_t		*kp;
 	int			ptr;
@@ -1712,9 +1682,6 @@ xfs_bmbt_decrement(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_decrement";
-#endif
 	xfs_fsblock_t		fsbno;
 	int			lev;
 	xfs_mount_t		*mp;
@@ -1785,9 +1752,6 @@ xfs_bmbt_delete(
 	int		*stat)		/* success/failure */
 {
 	int		error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_delete";
-#endif
 	int		i;
 	int		level;
 
@@ -2000,9 +1964,6 @@ xfs_bmbt_increment(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_increment";
-#endif
 	xfs_fsblock_t		fsbno;
 	int			lev;
 	xfs_mount_t		*mp;
@@ -2080,9 +2041,6 @@ xfs_bmbt_insert(
 	int		*stat)		/* success/failure */
 {
 	int		error;		/* error return value */
-#ifdef XFS_BMBT_TRACE
-	static char	fname[] = "xfs_bmbt_insert";
-#endif
 	int		i;
 	int		level;
 	xfs_fsblock_t	nbno;
@@ -2142,9 +2100,6 @@ xfs_bmbt_log_block(
 	int			fields)
 {
 	int			first;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_log_block";
-#endif
 	int			last;
 	xfs_trans_t		*tp;
 	static const short	offsets[] = {
@@ -2181,9 +2136,6 @@ xfs_bmbt_log_recs(
 {
 	xfs_bmbt_block_t	*block;
 	int			first;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_log_recs";
-#endif
 	int			last;
 	xfs_bmbt_rec_t		*rp;
 	xfs_trans_t		*tp;
@@ -2245,9 +2197,6 @@ xfs_bmbt_newroot(
 	xfs_bmbt_key_t		*ckp;		/* child key pointer */
 	xfs_bmbt_ptr_t		*cpp;		/* child ptr pointer */
 	int			error;		/* error return code */
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_newroot";
-#endif
 #ifdef DEBUG
 	int			i;		/* loop counter */
 #endif
@@ -2630,9 +2579,6 @@ xfs_bmbt_update(
 	xfs_bmbt_block_t	*block;
 	xfs_buf_t		*bp;
 	int			error;
-#ifdef XFS_BMBT_TRACE
-	static char		fname[] = "xfs_bmbt_update";
-#endif
 	xfs_bmbt_key_t		key;
 	int			ptr;
 	xfs_bmbt_rec_t		*rp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 59829523ab07..cdc4c28926d0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -645,8 +645,7 @@ xfs_iformat_extents(
 			ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1),
 								ARCH_CONVERT);
 		}
-		xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
-			whichfork);
+		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
 		if (whichfork != XFS_DATA_FORK ||
 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
 				if (unlikely(xfs_check_nostate_extents(
@@ -2894,9 +2893,6 @@ xfs_iextents_copy(
 	int			copied;
 	xfs_bmbt_rec_t		*dest_ep;
 	xfs_bmbt_rec_t		*ep;
-#ifdef XFS_BMAP_TRACE
-	static char		fname[] = "xfs_iextents_copy";
-#endif
 	int			i;
 	xfs_ifork_t		*ifp;
 	int			nrecs;
@@ -2907,7 +2903,7 @@ xfs_iextents_copy(
 	ASSERT(ifp->if_bytes > 0);
 
 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
+	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
 	ASSERT(nrecs > 0);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 547e00c3c681265b1fe5e34c7643f3ddac748ba0 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Wed, 11 Jul 2007 11:09:57 +1000
Subject: [XFS] Compat ioctl handler for XFS_IOC_FSGEOMETRY_V1.

i386 struct xfs_fsop_geom_v1 has no padding after the last member, so the
size is different.

SGI-PV: 967354
SGI-Modid: xfs-linux-melb:xfs-kern:29100a

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b83cebc165f1..5b91335e97dd 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -75,6 +75,42 @@ xfs_ioctl32_flock(
 	return (unsigned long)p;
 }
 
+typedef struct compat_xfs_fsop_geom_v1 {
+	__u32		blocksize;	/* filesystem (data) block size */
+	__u32		rtextsize;	/* realtime extent size		*/
+	__u32		agblocks;	/* fsblocks in an AG		*/
+	__u32		agcount;	/* number of allocation groups	*/
+	__u32		logblocks;	/* fsblocks in the log		*/
+	__u32		sectsize;	/* (data) sector size, bytes	*/
+	__u32		inodesize;	/* inode size in bytes		*/
+	__u32		imaxpct;	/* max allowed inode space(%)	*/
+	__u64		datablocks;	/* fsblocks in data subvolume	*/
+	__u64		rtblocks;	/* fsblocks in realtime subvol	*/
+	__u64		rtextents;	/* rt extents in realtime subvol*/
+	__u64		logstart;	/* starting fsblock of the log	*/
+	unsigned char	uuid[16];	/* unique id of the filesystem	*/
+	__u32		sunit;		/* stripe unit, fsblocks	*/
+	__u32		swidth;		/* stripe width, fsblocks	*/
+	__s32		version;	/* structure version		*/
+	__u32		flags;		/* superblock version flags	*/
+	__u32		logsectsize;	/* log sector size, bytes	*/
+	__u32		rtsectsize;	/* realtime sector size, bytes	*/
+	__u32		dirblocksize;	/* directory block size, bytes	*/
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+	_IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
+
+STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
+{
+	compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg;
+	xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(p, p32, sizeof(*p32)))
+		return -EFAULT;
+	return (unsigned long)p;
+}
+
 #else
 
 typedef struct xfs_fsop_bulkreq32 {
@@ -118,7 +154,6 @@ xfs_compat_ioctl(
 
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
-	case XFS_IOC_FSGEOMETRY_V1:
 	case XFS_IOC_FSGEOMETRY:
 	case XFS_IOC_GETVERSION:
 	case XFS_IOC_GETXFLAGS:
@@ -166,6 +201,10 @@ xfs_compat_ioctl(
 		arg = xfs_ioctl32_flock(arg);
 		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
 		break;
+	case XFS_IOC_FSGEOMETRY_V1_32:
+		arg = xfs_ioctl32_geom_v1(arg);
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
+		break;
 
 #else /* These are handled fine if no alignment issues */
 	case XFS_IOC_ALLOCSP:
@@ -176,6 +215,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_FREESP64:
 	case XFS_IOC_RESVSP64:
 	case XFS_IOC_UNRESVSP64:
+	case XFS_IOC_FSGEOMETRY_V1:
 		break;
 
 	/* xfs_bstat_t still has wrong u32 vs u64 alignment */
-- 
cgit v1.2.3-59-g8ed1b


From 1fa503df66f7bffc0ff62662626897eec79446c2 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Wed, 11 Jul 2007 11:10:09 +1000
Subject: [XFS] Compat ioctl handler for handle operations

32bit struct xfs_fsop_handlereq has different size and offsets (due to
pointers). TODO: case XFS_IOC_{FSSETDM,ATTRLIST,ATTRMULTI}_BY_HANDLE still
not handled.

SGI-PV: 967354
SGI-Modid: xfs-linux-melb:xfs-kern:29101a

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl32.c | 57 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 5b91335e97dd..87163fb9746b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -141,6 +141,50 @@ xfs_ioctl32_bulkstat(
 }
 #endif
 
+typedef struct compat_xfs_fsop_handlereq {
+	__u32		fd;		/* fd for FD_TO_HANDLE		*/
+	compat_uptr_t	path;		/* user pathname		*/
+	__u32		oflags;		/* open flags			*/
+	compat_uptr_t	ihandle;	/* user supplied handle		*/
+	__u32		ihandlen;	/* user supplied length		*/
+	compat_uptr_t	ohandle;	/* user buffer for handle	*/
+	compat_uptr_t	ohandlen;	/* user buffer length		*/
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+	_IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+	_IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+	_IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+	_IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+	_IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
+{
+	compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg;
+	xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p));
+	u32 addr;
+
+	if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) ||
+	    get_user(addr, &p32->path) ||
+	    put_user(compat_ptr(addr), &p->path) ||
+	    copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) ||
+	    get_user(addr, &p32->ihandle) ||
+	    put_user(compat_ptr(addr), &p->ihandle) ||
+	    copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) ||
+	    get_user(addr, &p32->ohandle) ||
+	    put_user(compat_ptr(addr), &p->ohandle) ||
+	    get_user(addr, &p32->ohandlen) ||
+	    put_user(compat_ptr(addr), &p->ohandlen))
+		return -EFAULT;
+
+	return (unsigned long)p;
+}
+
+
 STATIC long
 xfs_compat_ioctl(
 	int		mode,
@@ -166,12 +210,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_GETBMAPA:
 	case XFS_IOC_GETBMAPX:
 /* not handled
-	case XFS_IOC_FD_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_FSHANDLE:
-	case XFS_IOC_OPEN_BY_HANDLE:
 	case XFS_IOC_FSSETDM_BY_HANDLE:
-	case XFS_IOC_READLINK_BY_HANDLE:
 	case XFS_IOC_ATTRLIST_BY_HANDLE:
 	case XFS_IOC_ATTRMULTI_BY_HANDLE:
 */
@@ -228,6 +267,14 @@ xfs_compat_ioctl(
 		arg = xfs_ioctl32_bulkstat(arg);
 		break;
 #endif
+	case XFS_IOC_FD_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_FSHANDLE_32:
+	case XFS_IOC_OPEN_BY_HANDLE_32:
+	case XFS_IOC_READLINK_BY_HANDLE_32:
+		arg = xfs_ioctl32_fshandle(arg);
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From faa63e9584df41020440756b8b90b7b63f95e4f6 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Wed, 11 Jul 2007 11:10:19 +1000
Subject: [XFS] Fix XFS_IOC_FSBULKSTAT{,_SINGLE} & XFS_IOC_FSINUMBERS in compat
 mode

* 32bit struct xfs_fsop_bulkreq has different size and layout of
members, no matter the alignment. Move the code out of the #else
branch (why was it there in the first place?). Define _32 variants of
the ioctl constants.
* 32bit struct xfs_bstat is different because of time_t and on
i386 because of different padding. Make xfs_bulkstat_one() accept a
custom "output formatter" in the private_data argument which takes care
of the xfs_bulkstat_one_compat() that takes care of the different
layout in the compat case.
* i386 struct xfs_inogrp has different padding.
Add a similar "output formatter" mecanism to xfs_inumbers().

SGI-PV: 967354
SGI-Modid: xfs-linux-melb:xfs-kern:29102a

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c   |   2 +-
 fs/xfs/linux-2.6/xfs_ioctl32.c | 224 ++++++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_itable.c            |  42 ++++++--
 fs/xfs/xfs_itable.h            |  20 +++-
 4 files changed, 255 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ff5c41ff8d40..5917808abbd6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat(
 
 	if (cmd == XFS_IOC_FSINUMBERS)
 		error = xfs_inumbers(mp, &inlast, &count,
-						bulkreq.ubuffer);
+					bulkreq.ubuffer, xfs_inumbers_fmt);
 	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
 		error = xfs_bulkstat_single(mp, &inlast,
 						bulkreq.ubuffer, &done);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 87163fb9746b..141cf15067c2 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -23,10 +23,25 @@
 #include <linux/fs.h>
 #include <asm/uaccess.h>
 #include "xfs.h"
-#include "xfs_types.h"
 #include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir2_sf.h"
 #include "xfs_vfs.h"
 #include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
 #include "xfs_dfrag.h"
 
 #define  _NATIVE_IOC(cmd, type) \
@@ -34,6 +49,7 @@
 
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 #define BROKEN_X86_ALIGNMENT
+#define _PACKED __attribute__((packed))
 /* on ia32 l_start is on a 32-bit boundary */
 typedef struct xfs_flock64_32 {
 	__s16		l_type;
@@ -111,35 +127,196 @@ STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
 	return (unsigned long)p;
 }
 
+typedef struct compat_xfs_inogrp {
+	__u64		xi_startino;	/* starting inode number	*/
+	__s32		xi_alloccount;	/* # bits set in allocmask	*/
+	__u64		xi_allocmask;	/* mask of allocated inodes	*/
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+STATIC int xfs_inumbers_fmt_compat(
+	void __user *ubuffer,
+	const xfs_inogrp_t *buffer,
+	long count,
+	long *written)
+{
+	compat_xfs_inogrp_t *p32 = ubuffer;
+	long i;
+
+	for (i = 0; i < count; i++) {
+		if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+		    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+		    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+			return -EFAULT;
+	}
+	*written = count * sizeof(*p32);
+	return 0;
+}
+
 #else
 
-typedef struct xfs_fsop_bulkreq32 {
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define _PACKED
+
+#endif
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+typedef struct compat_xfs_bstime {
+	__s32		tv_sec;		/* seconds		*/
+	__s32		tv_nsec;	/* and nanoseconds	*/
+} compat_xfs_bstime_t;
+
+STATIC int xfs_bstime_store_compat(
+	compat_xfs_bstime_t __user *p32,
+	const xfs_bstime_t *p)
+{
+	__s32 sec32;
+
+	sec32 = p->tv_sec;
+	if (put_user(sec32, &p32->tv_sec) ||
+	    put_user(p->tv_nsec, &p32->tv_nsec))
+		return -EFAULT;
+	return 0;
+}
+
+typedef struct compat_xfs_bstat {
+	__u64		bs_ino;		/* inode number			*/
+	__u16		bs_mode;	/* type and mode		*/
+	__u16		bs_nlink;	/* number of links		*/
+	__u32		bs_uid;		/* user id			*/
+	__u32		bs_gid;		/* group id			*/
+	__u32		bs_rdev;	/* device value			*/
+	__s32		bs_blksize;	/* block size			*/
+	__s64		bs_size;	/* file size			*/
+	compat_xfs_bstime_t bs_atime;	/* access time			*/
+	compat_xfs_bstime_t bs_mtime;	/* modify time			*/
+	compat_xfs_bstime_t bs_ctime;	/* inode change time		*/
+	int64_t		bs_blocks;	/* number of blocks		*/
+	__u32		bs_xflags;	/* extended flags		*/
+	__s32		bs_extsize;	/* extent size			*/
+	__s32		bs_extents;	/* number of extents		*/
+	__u32		bs_gen;		/* generation count		*/
+	__u16		bs_projid;	/* project id			*/
+	unsigned char	bs_pad[14];	/* pad space, unused		*/
+	__u32		bs_dmevmask;	/* DMIG event mask		*/
+	__u16		bs_dmstate;	/* DMIG state info		*/
+	__u16		bs_aextents;	/* attribute number of extents	*/
+} _PACKED compat_xfs_bstat_t;
+
+STATIC int xfs_bulkstat_one_fmt_compat(
+	void			__user *ubuffer,
+	const xfs_bstat_t	*buffer)
+{
+	compat_xfs_bstat_t __user *p32 = ubuffer;
+
+	if (put_user(buffer->bs_ino, &p32->bs_ino) ||
+	    put_user(buffer->bs_mode, &p32->bs_mode) ||
+	    put_user(buffer->bs_nlink, &p32->bs_nlink) ||
+	    put_user(buffer->bs_uid, &p32->bs_uid) ||
+	    put_user(buffer->bs_gid, &p32->bs_gid) ||
+	    put_user(buffer->bs_rdev, &p32->bs_rdev) ||
+	    put_user(buffer->bs_blksize, &p32->bs_blksize) ||
+	    put_user(buffer->bs_size, &p32->bs_size) ||
+	    xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+	    xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+	    xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+	    put_user(buffer->bs_blocks, &p32->bs_blocks) ||
+	    put_user(buffer->bs_xflags, &p32->bs_xflags) ||
+	    put_user(buffer->bs_extsize, &p32->bs_extsize) ||
+	    put_user(buffer->bs_extents, &p32->bs_extents) ||
+	    put_user(buffer->bs_gen, &p32->bs_gen) ||
+	    put_user(buffer->bs_projid, &p32->bs_projid) ||
+	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
+	    put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
+	    put_user(buffer->bs_aextents, &p32->bs_aextents))
+		return -EFAULT;
+	return sizeof(*p32);
+}
+
+
+
+typedef struct compat_xfs_fsop_bulkreq {
 	compat_uptr_t	lastip;		/* last inode # pointer		*/
 	__s32		icount;		/* count of entries in buffer	*/
 	compat_uptr_t	ubuffer;	/* user buffer for inode desc.	*/
-	__s32		ocount;		/* output count pointer		*/
-} xfs_fsop_bulkreq32_t;
+	compat_uptr_t	ocount;		/* output count pointer		*/
+} compat_xfs_fsop_bulkreq_t;
 
-STATIC unsigned long
-xfs_ioctl32_bulkstat(
-	unsigned long		arg)
+#define XFS_IOC_FSBULKSTAT_32 \
+	_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+	_IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+	_IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_ioc_bulkstat_compat(
+	xfs_mount_t		*mp,
+	unsigned int		cmd,
+	void			__user *arg)
 {
-	xfs_fsop_bulkreq32_t	__user *p32 = (void __user *)arg;
-	xfs_fsop_bulkreq_t	__user *p = compat_alloc_user_space(sizeof(*p));
+	compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
 	u32			addr;
+	xfs_fsop_bulkreq_t	bulkreq;
+	int			count;	/* # of records returned */
+	xfs_ino_t		inlast;	/* last inode number */
+	int			done;
+	int			error;
+
+	/* done = 1 if there are more stats to get and if bulkstat */
+	/* should be called again (unused here, but used in dmapi) */
 
-	if (get_user(addr, &p32->lastip) ||
-	    put_user(compat_ptr(addr), &p->lastip) ||
-	    copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
-	    get_user(addr, &p32->ubuffer) ||
-	    put_user(compat_ptr(addr), &p->ubuffer) ||
-	    get_user(addr, &p32->ocount) ||
-	    put_user(compat_ptr(addr), &p->ocount))
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (get_user(addr, &p32->lastip))
+		return -EFAULT;
+	bulkreq.lastip = compat_ptr(addr);
+	if (get_user(bulkreq.icount, &p32->icount) ||
+	    get_user(addr, &p32->ubuffer))
 		return -EFAULT;
+	bulkreq.ubuffer = compat_ptr(addr);
+	if (get_user(addr, &p32->ocount))
+		return -EFAULT;
+	bulkreq.ocount = compat_ptr(addr);
 
-	return (unsigned long)p;
+	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+		return -XFS_ERROR(EFAULT);
+
+	if ((count = bulkreq.icount) <= 0)
+		return -XFS_ERROR(EINVAL);
+
+	if (cmd == XFS_IOC_FSINUMBERS)
+		error = xfs_inumbers(mp, &inlast, &count,
+				bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+	else {
+		/* declare a var to get a warning in case the type changes */
+		bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat;
+		error = xfs_bulkstat(mp, &inlast, &count,
+			xfs_bulkstat_one, formatter,
+			sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
+			BULKSTAT_FG_QUICK, &done);
+	}
+	if (error)
+		return -error;
+
+	if (bulkreq.ocount != NULL) {
+		if (copy_to_user(bulkreq.lastip, &inlast,
+						sizeof(xfs_ino_t)))
+			return -XFS_ERROR(EFAULT);
+
+		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+			return -XFS_ERROR(EFAULT);
+	}
+
+	return 0;
 }
-#endif
+
+
 
 typedef struct compat_xfs_fsop_handlereq {
 	__u32		fd;		/* fd for FD_TO_HANDLE		*/
@@ -261,12 +438,13 @@ xfs_compat_ioctl(
 	case XFS_IOC_SWAPEXT:
 		break;
 
-	case XFS_IOC_FSBULKSTAT_SINGLE:
-	case XFS_IOC_FSBULKSTAT:
-	case XFS_IOC_FSINUMBERS:
-		arg = xfs_ioctl32_bulkstat(arg);
-		break;
 #endif
+	case XFS_IOC_FSBULKSTAT_32:
+	case XFS_IOC_FSBULKSTAT_SINGLE_32:
+	case XFS_IOC_FSINUMBERS_32:
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
+		return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
+				cmd, (void*)arg);
 	case XFS_IOC_FD_TO_HANDLE_32:
 	case XFS_IOC_PATH_TO_HANDLE_32:
 	case XFS_IOC_PATH_TO_FSHANDLE_32:
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index e725ddd3de5f..4c2454bcc714 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -202,6 +202,16 @@ xfs_bulkstat_one_dinode(
 	return 0;
 }
 
+STATIC int
+xfs_bulkstat_one_fmt(
+	void			__user *ubuffer,
+	const xfs_bstat_t	*buffer)
+{
+	if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
+		return -EFAULT;
+	return sizeof(*buffer);
+}
+
 /*
  * Return stat information for one inode.
  * Return 0 if ok, else errno.
@@ -221,6 +231,7 @@ xfs_bulkstat_one(
 	xfs_bstat_t	*buf;		/* return buffer */
 	int		error = 0;	/* error value */
 	xfs_dinode_t	*dip;		/* dinode inode pointer */
+	bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
 
 	dip = (xfs_dinode_t *)dibuff;
 	*stat = BULKSTAT_RV_NOTHING;
@@ -243,14 +254,15 @@ xfs_bulkstat_one(
 		xfs_bulkstat_one_dinode(mp, ino, dip, buf);
 	}
 
-	if (copy_to_user(buffer, buf, sizeof(*buf)))  {
+	error = formatter(buffer, buf);
+	if (error < 0)  {
 		error = EFAULT;
 		goto out_free;
 	}
 
 	*stat = BULKSTAT_RV_DIDONE;
 	if (ubused)
-		*ubused = sizeof(*buf);
+		*ubused = error;
 
  out_free:
 	kmem_free(buf, sizeof(*buf));
@@ -748,6 +760,19 @@ xfs_bulkstat_single(
 	return 0;
 }
 
+int
+xfs_inumbers_fmt(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written)	/* # of bytes written */
+{
+	if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
+		return -EFAULT;
+	*written = count * sizeof(*buffer);
+	return 0;
+}
+
 /*
  * Return inode number table for the filesystem.
  */
@@ -756,7 +781,8 @@ xfs_inumbers(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_ino_t	*lastino,	/* last inode returned */
 	int		*count,		/* size of buffer/count returned */
-	xfs_inogrp_t	__user *ubuffer)/* buffer with inode descriptions */
+	void		__user *ubuffer,/* buffer with inode descriptions */
+	inumbers_fmt_pf	formatter)
 {
 	xfs_buf_t	*agbp;
 	xfs_agino_t	agino;
@@ -835,12 +861,12 @@ xfs_inumbers(
 		bufidx++;
 		left--;
 		if (bufidx == bcount) {
-			if (copy_to_user(ubuffer, buffer,
-					bufidx * sizeof(*buffer))) {
+			long written;
+			if (formatter(ubuffer, buffer, bufidx, &written)) {
 				error = XFS_ERROR(EFAULT);
 				break;
 			}
-			ubuffer += bufidx;
+			ubuffer += written;
 			*count += bufidx;
 			bufidx = 0;
 		}
@@ -862,8 +888,8 @@ xfs_inumbers(
 	}
 	if (!error) {
 		if (bufidx) {
-			if (copy_to_user(ubuffer, buffer,
-					bufidx * sizeof(*buffer)))
+			long written;
+			if (formatter(ubuffer, buffer, bufidx, &written))
 				error = XFS_ERROR(EFAULT);
 			else
 				*count += bufidx;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index f25a28862a17..a1f18fce9b70 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -69,6 +69,10 @@ xfs_bulkstat_single(
 	char			__user *buffer,
 	int			*done);
 
+typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_bstat_t	*buffer);        /* buffer to read from */
+
 int
 xfs_bulkstat_one(
 	xfs_mount_t		*mp,
@@ -86,11 +90,25 @@ xfs_internal_inum(
 	xfs_mount_t		*mp,
 	xfs_ino_t		ino);
 
+typedef int (*inumbers_fmt_pf)(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written);	/* # of bytes written */
+
+int
+xfs_inumbers_fmt(
+	void			__user *ubuffer, /* buffer to write to */
+	const xfs_inogrp_t	*buffer,	/* buffer to read from */
+	long			count,		/* # of elements to read */
+	long			*written);	/* # of bytes written */
+
 int					/* error status */
 xfs_inumbers(
 	xfs_mount_t		*mp,	/* mount point for filesystem */
 	xfs_ino_t		*last,	/* last inode returned */
 	int			*count,	/* size of buffer/count returned */
-	xfs_inogrp_t		__user *buffer);/* buffer with inode info */
+	void			__user *buffer, /* buffer with inode info */
+	inumbers_fmt_pf		formatter);
 
 #endif	/* __XFS_ITABLE_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 0f1145cc18e970ebe37da114fc34c297f135e062 Mon Sep 17 00:00:00 2001
From: David Chinner <dgc@sgi.com>
Date: Fri, 29 Jun 2007 17:26:09 +1000
Subject: [XFS] Fix lockdep annotations for xfs_lock_inodes

SGI-PV: 967035
SGI-Modid: xfs-linux-melb:xfs-kern:29026a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
---
 fs/xfs/xfs_inode.h    | 15 +++++++++------
 fs/xfs/xfs_vnodeops.c |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d418eeed4ebd..012dfd4a958c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -415,19 +415,22 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
  * gets a lockdep subclass of 1 and the second lock will have a lockdep
  * subclass of 0.
  *
- * XFS_I[O]LOCK_INUMORDER - for locking several inodes at the some time
+ * XFS_LOCK_INUMORDER - for locking several inodes at the some time
  * with xfs_lock_inodes().  This flag is used as the starting subclass
  * and each subsequent lock acquired will increment the subclass by one.
  * So the first lock acquired will have a lockdep subclass of 2, the
- * second lock will have a lockdep subclass of 3, and so on.
+ * second lock will have a lockdep subclass of 3, and so on. It is
+ * the responsibility of the class builder to shift this to the correct
+ * portion of the lock_mode lockdep mask.
  */
+#define XFS_LOCK_PARENT		1
+#define XFS_LOCK_INUMORDER	2
+
 #define XFS_IOLOCK_SHIFT	16
-#define	XFS_IOLOCK_PARENT	(1 << XFS_IOLOCK_SHIFT)
-#define	XFS_IOLOCK_INUMORDER	(2 << XFS_IOLOCK_SHIFT)
+#define	XFS_IOLOCK_PARENT	(XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
 
 #define XFS_ILOCK_SHIFT		24
-#define	XFS_ILOCK_PARENT	(1 << XFS_ILOCK_SHIFT)
-#define	XFS_ILOCK_INUMORDER	(2 << XFS_ILOCK_SHIFT)
+#define	XFS_ILOCK_PARENT	(XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
 
 #define XFS_IOLOCK_DEP_MASK	0x00ff0000
 #define XFS_ILOCK_DEP_MASK	0xff000000
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 60fd0be90a16..79b522779aa4 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2224,9 +2224,9 @@ static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
 	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-		lock_mode |= (subclass + XFS_IOLOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
 	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
-		lock_mode |= (subclass + XFS_ILOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
 
 	return lock_mode;
 }
-- 
cgit v1.2.3-59-g8ed1b