From 0a32c26e720a8b38971d0685976f4a7d63f9e2ef Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 5 Jun 2013 12:09:08 +1000 Subject: xfs: inode unlinked list needs to recalculate the inode CRC The inode unlinked list manipulations operate directly on the inode buffer, and so bypass the inode CRC calculation mechanisms. Hence an inode on the unlinked list has an invalid CRC. Fix this by recalculating the CRC whenever we modify an unlinked list pointer in an inode, ncluding during log recovery. This is trivial to do and results in unlinked list operations always leaving a consistent inode in the buffer. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index efbe1accb6ca..7f7be5f98f52 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1638,6 +1638,10 @@ xfs_iunlink( dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1723,6 +1727,10 @@ xfs_iunlink_remove( dip->di_next_unlinked = cpu_to_be32(NULLAGINO); offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1796,6 +1804,10 @@ xfs_iunlink_remove( dip->di_next_unlinked = cpu_to_be32(NULLAGINO); offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1809,6 +1821,10 @@ xfs_iunlink_remove( last_dip->di_next_unlinked = cpu_to_be32(next_agino); ASSERT(next_agino != 0); offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, last_dip); + xfs_trans_inode_buf(tp, last_ibp); xfs_trans_log_buf(tp, last_ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); -- cgit v1.2.3-59-g8ed1b From cca9f93a52d2ead50b5da59ca83d5f469ee4be5f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jun 2013 16:04:49 +1000 Subject: xfs: don't do IO when creating an new inode When we are allocating a new inode, we read the inode cluster off disk to increment the generation number. We are already using a random generation number for newly allocated inodes, so if we are not using the ikeep mode, we can just generate a new generation number when we initialise the newly allocated inode. This avoids the need for reading the inode buffer during inode creation. This will speed up allocation of inodes in cold, partially allocated clusters as they will no longer need to be read from disk during allocation. It will also reduce the CPU overhead of inode allocation by not having the process the buffer read, even on cache hits. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..d1f76da6f8bf 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1028,6 +1028,11 @@ xfs_dinode_calc_crc( /* * Read the disk inode attributes into the in-core inode structure. + * + * If we are initialising a new inode and we are not utilising the + * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core + * with a random generation number. If we are keeping inodes around, we need to + * read the inode cluster to get the existing generation number off disk. */ int xfs_iread( @@ -1047,6 +1052,22 @@ xfs_iread( if (error) return error; + /* shortcut IO on inode allocation if possible */ + if ((iget_flags & XFS_IGET_CREATE) && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + /* initialise the on-disk inode core */ + memset(&ip->i_d, 0, sizeof(ip->i_d)); + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_gen = prandom_u32(); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + ip->i_d.di_version = 3; + ip->i_d.di_ino = ip->i_ino; + uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); + } else + ip->i_d.di_version = 2; + return 0; + } + /* * Get pointers to the on-disk inode and the buffer containing it. */ @@ -1133,17 +1154,16 @@ xfs_iread( xfs_buf_set_ref(bp, XFS_INO_REF); /* - * Use xfs_trans_brelse() to release the buffer containing the - * on-disk inode, because it was acquired with xfs_trans_read_buf() - * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal + * Use xfs_trans_brelse() to release the buffer containing the on-disk + * inode, because it was acquired with xfs_trans_read_buf() in + * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, - * because inodes on disk are never destroyed and we will be - * locking the new in-core inode before putting it in the hash - * table where other processes can find it. Thus we don't have - * to worry about the inode being changed just because we released - * the buffer. + * because inodes on disk are never destroyed and we will be locking the + * new in-core inode before putting it in the cache where other + * processes can find it. Thus we don't have to worry about the inode + * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); -- cgit v1.2.3-59-g8ed1b From 1baaed8fa955ab0d23aab24477dae566ed6a105b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 27 Jun 2013 16:04:50 +1000 Subject: xfs: xfs_ifree doesn't need to modify the inode buffer Long ago, bulkstat used to read inodes directly from the backing buffer for speed. This had the unfortunate problem of being cache incoherent with unlinks, and so xfs_ifree() had to mark the inode as free directly in the backing buffer. bulkstat was changed some time ago to use inode cache coherent lookups, and so will never see unlinked inodes in it's lookups. Hence xfs_ifree() does not need to touch the inode backing buffer anymore. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_inode.c | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d1f76da6f8bf..9ecfe1e559fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2048,8 +2048,6 @@ xfs_ifree( int error; int delete; xfs_ino_t first_ino; - xfs_dinode_t *dip; - xfs_buf_t *ibp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(ip->i_d.di_nlink == 0); @@ -2062,14 +2060,13 @@ xfs_ifree( * Pull the on-disk inode from the AGI unlinked list. */ error = xfs_iunlink_remove(tp, ip); - if (error != 0) { + if (error) return error; - } error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); - if (error != 0) { + if (error) return error; - } + ip->i_d.di_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; @@ -2081,31 +2078,10 @@ xfs_ifree( * by reincarnations of this inode. */ ip->i_d.di_gen++; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, - 0, 0); - if (error) - return error; - - /* - * Clear the on-disk di_mode. This is to prevent xfs_bulkstat - * from picking up this inode when it is reclaimed (its incore state - * initialzed but not flushed to disk yet). The in-core di_mode is - * already cleared and a corresponding transaction logged. - * The hack here just synchronizes the in-core to on-disk - * di_mode value in advance before the actual inode sync to disk. - * This is OK because the inode is already unlinked and would never - * change its di_mode again for this inode generation. - * This is a temporary hack that would require a proper fix - * in the future. - */ - dip->di_mode = 0; - - if (delete) { + if (delete) error = xfs_ifree_cluster(ip, tp, first_ino); - } return error; } -- cgit v1.2.3-59-g8ed1b