From 6ca497a83e592d64e050c4d04b6dedb8c915f39a Mon Sep 17 00:00:00 2001 From: wengang wang Date: Fri, 6 Mar 2009 21:29:10 +0800 Subject: ocfs2: fix rare stale inode errors when exporting via nfs For nfs exporting, ocfs2_get_dentry() returns the dentry for fh. ocfs2_get_dentry() may read from disk when the inode is not in memory, without any cross cluster lock. this leads to the file system loading a stale inode. This patch fixes above problem. Solution is that in case of inode is not in memory, we get the cluster lock(PR) of alloc inode where the inode in question is allocated from (this causes node on which deletion is done sync the alloc inode) before reading out the inode itsself. then we check the bitmap in the group (the inode in question allcated from) to see if the bit is clear. if it's clear then it's stale. if the bit is set, we then check generation as the existing code does. We have to read out the inode in question from disk first to know its alloc slot and allot bit. And if its not stale we read it out using ocfs2_iget(). The second read should then be from cache. And also we have to add a per superblock nfs_sync_lock to cover the lock for alloc inode and that for inode in question. this is because ocfs2_get_dentry() and ocfs2_delete_inode() lock on them in reverse order. nfs_sync_lock is locked in EX mode in ocfs2_get_dentry() and in PR mode in ocfs2_delete_inode(). so that mutliple ocfs2_delete_inode() can run concurrently in normal case. [mfasheh@suse.com: build warning fixes and comment cleanups] Signed-off-by: Wengang Wang Acked-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/export.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/export.c') diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 2f27b332d8b3..de3da8eb558c 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -31,6 +31,7 @@ #include "ocfs2.h" +#include "alloc.h" #include "dir.h" #include "dlmglue.h" #include "dcache.h" @@ -38,6 +39,7 @@ #include "inode.h" #include "buffer_head_io.h" +#include "suballoc.h" struct ocfs2_inode_handle { @@ -49,29 +51,97 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, struct ocfs2_inode_handle *handle) { struct inode *inode; + struct ocfs2_super *osb = OCFS2_SB(sb); + u64 blkno = handle->ih_blkno; + int status, set; struct dentry *result; mlog_entry("(0x%p, 0x%p)\n", sb, handle); - if (handle->ih_blkno == 0) { - mlog_errno(-ESTALE); - return ERR_PTR(-ESTALE); + if (blkno == 0) { + mlog(0, "nfs wants inode with blkno: 0\n"); + result = ERR_PTR(-ESTALE); + goto bail; + } + + inode = ocfs2_ilookup(sb, blkno); + /* + * If the inode exists in memory, we only need to check it's + * generation number + */ + if (inode) + goto check_gen; + + /* + * This will synchronize us against ocfs2_delete_inode() on + * all nodes + */ + status = ocfs2_nfs_sync_lock(osb, 1); + if (status < 0) { + mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); + goto check_err; + } + + status = ocfs2_test_inode_bit(osb, blkno, &set); + if (status < 0) { + if (status == -EINVAL) { + /* + * The blkno NFS gave us doesn't even show up + * as an inode, we return -ESTALE to be + * nice + */ + mlog(0, "test inode bit failed %d\n", status); + status = -ESTALE; + } else { + mlog(ML_ERROR, "test inode bit failed %d\n", status); + } + goto unlock_nfs_sync; + } + + /* If the inode allocator bit is clear, this inode must be stale */ + if (!set) { + mlog(0, "inode %llu suballoc bit is clear\n", blkno); + status = -ESTALE; + goto unlock_nfs_sync; } - inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); + inode = ocfs2_iget(osb, blkno, 0, 0); - if (IS_ERR(inode)) - return (void *)inode; +unlock_nfs_sync: + ocfs2_nfs_sync_unlock(osb, 1); +check_err: + if (status < 0) { + if (status == -ESTALE) { + mlog(0, "stale inode ino: %llu generation: %u\n", + blkno, handle->ih_generation); + } + result = ERR_PTR(status); + goto bail; + } + + if (IS_ERR(inode)) { + mlog_errno(PTR_ERR(inode)); + result = (void *)inode; + goto bail; + } + +check_gen: if (handle->ih_generation != inode->i_generation) { iput(inode); - return ERR_PTR(-ESTALE); + mlog(0, "stale inode ino: %llu generation: %u\n", blkno, + handle->ih_generation); + result = ERR_PTR(-ESTALE); + goto bail; } result = d_obtain_alias(inode); if (!IS_ERR(result)) result->d_op = &ocfs2_dentry_ops; + else + mlog_errno(PTR_ERR(result)); +bail: mlog_exit_ptr(result); return result; } -- cgit v1.2.3-59-g8ed1b From 5b09b507daaa882d888b6cd78ee89ba9caace44b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 21 Apr 2009 16:31:20 -0700 Subject: ocfs2: Fix some printk() warnings. The old %llu vs u64 battle. Cast them correctly. Signed-off-by: Joel Becker --- fs/ocfs2/export.c | 9 +++++---- fs/ocfs2/suballoc.c | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'fs/ocfs2/export.c') diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index de3da8eb558c..15713cbb865c 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, /* If the inode allocator bit is clear, this inode must be stale */ if (!set) { - mlog(0, "inode %llu suballoc bit is clear\n", blkno); + mlog(0, "inode %llu suballoc bit is clear\n", + (unsigned long long)blkno); status = -ESTALE; goto unlock_nfs_sync; } @@ -114,7 +115,7 @@ check_err: if (status < 0) { if (status == -ESTALE) { mlog(0, "stale inode ino: %llu generation: %u\n", - blkno, handle->ih_generation); + (unsigned long long)blkno, handle->ih_generation); } result = ERR_PTR(status); goto bail; @@ -129,8 +130,8 @@ check_err: check_gen: if (handle->ih_generation != inode->i_generation) { iput(inode); - mlog(0, "stale inode ino: %llu generation: %u\n", blkno, - handle->ih_generation); + mlog(0, "stale inode ino: %llu generation: %u\n", + (unsigned long long)blkno, handle->ih_generation); result = ERR_PTR(-ESTALE); goto bail; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index eb21dbb0ee0b..8439f6b324b9 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2197,18 +2197,20 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, struct buffer_head *inode_bh = NULL; struct ocfs2_dinode *inode_fe; - mlog_entry("blkno: %llu\n", blkno); + mlog_entry("blkno: %llu\n", (unsigned long long)blkno); /* dirty read disk */ status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); if (status < 0) { - mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status); + mlog(ML_ERROR, "read block %llu failed %d\n", + (unsigned long long)blkno, status); goto bail; } inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; if (!OCFS2_IS_VALID_DINODE(inode_fe)) { - mlog(ML_ERROR, "invalid inode %llu requested\n", blkno); + mlog(ML_ERROR, "invalid inode %llu requested\n", + (unsigned long long)blkno); status = -EINVAL; goto bail; } @@ -2216,7 +2218,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT && (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", - blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); + (unsigned long long)blkno, + (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); status = -EINVAL; goto bail; } @@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, u64 bg_blkno; int status; - mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); + mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, + (unsigned int)bit); alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { @@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, &group_bh); if (status < 0) { - mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status); + mlog(ML_ERROR, "read group %llu failed %d\n", + (unsigned long long)bg_blkno, status); goto bail; } @@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; - mlog_entry("blkno: %llu", blkno); + mlog_entry("blkno: %llu", (unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, &suballoc_bit); -- cgit v1.2.3-59-g8ed1b