From 7d4fb40ad7efe4586d1341d4731377fb4530836f Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Fri, 9 Jun 2006 15:27:16 +1000 Subject: [XFS] Start writeout earlier (on last close) in the case where we have a truncate down followed by delayed allocation (buffered writes) - worst case scenario for the notorious NULL files problem. This reduces the window where we are exposed to that problem significantly. SGI-PV: 917976 SGI-Modid: xfs-linux-melb:xfs-kern:26100a Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_aops.c | 13 +++++++ fs/xfs/linux-2.6/xfs_file.c | 15 +++++++ fs/xfs/linux-2.6/xfs_fs_subr.c | 53 +++++-------------------- fs/xfs/linux-2.6/xfs_vnode.h | 39 ++++++++++++------- fs/xfs/xfs_vnodeops.c | 88 +++++++++++++++++++++++++----------------- 5 files changed, 114 insertions(+), 94 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 5835e699a7fc..c0a904316854 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1157,6 +1157,18 @@ out_unlock: return error; } +STATIC int +xfs_vm_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + struct vnode *vp = vn_from_inode(mapping->host); + + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); + return generic_writepages(mapping, wbc); +} + /* * Called to move a page into cleanable state - and from there * to be released. Possibly the page is already clean. We always @@ -1451,6 +1463,7 @@ struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, + .writepages = xfs_vm_writepages, .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 7c9f7598807f..97615cc74ef5 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -323,6 +323,17 @@ xfs_file_open( return -error; } +STATIC int +xfs_file_close( + struct file *filp) +{ + vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode); + int error; + + VOP_CLOSE(vp, 0, file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL, error); + return -error; +} + STATIC int xfs_file_release( struct inode *inode, @@ -349,6 +360,8 @@ xfs_file_fsync( if (datasync) flags |= FSYNC_DATA; + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); return -error; } @@ -578,6 +591,7 @@ const struct file_operations xfs_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, + .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, #ifdef HAVE_FOP_OPEN_EXEC @@ -602,6 +616,7 @@ const struct file_operations xfs_invis_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, + .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, }; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 575f2a790f31..f0c56daf4d6d 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -15,40 +15,12 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - #include "xfs.h" -/* - * Stub for no-op vnode operations that return error status. - */ -int -fs_noerr(void) -{ - return 0; -} +int fs_noerr(void) { return 0; } +int fs_nosys(void) { return ENOSYS; } +void fs_noval(void) { return; } -/* - * Operation unsupported under this file system. - */ -int -fs_nosys(void) -{ - return ENOSYS; -} - -/* - * Stub for inactive, strategy, and read/write lock/unlock. Does nothing. - */ -/* ARGSUSED */ -void -fs_noval(void) -{ -} - -/* - * vnode pcache layer for vnode_tosspages. - * 'last' parameter unused but left in for IRIX compatibility - */ void fs_tosspages( bhv_desc_t *bdp, @@ -63,11 +35,6 @@ fs_tosspages( truncate_inode_pages(ip->i_mapping, first); } - -/* - * vnode pcache layer for vnode_flushinval_pages. - * 'last' parameter unused but left in for IRIX compatibility - */ void fs_flushinval_pages( bhv_desc_t *bdp, @@ -79,16 +46,13 @@ fs_flushinval_pages( struct inode *ip = vn_to_inode(vp); if (VN_CACHED(vp)) { + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); filemap_write_and_wait(ip->i_mapping); - truncate_inode_pages(ip->i_mapping, first); } } -/* - * vnode pcache layer for vnode_flush_pages. - * 'last' parameter unused but left in for IRIX compatibility - */ int fs_flush_pages( bhv_desc_t *bdp, @@ -100,12 +64,13 @@ fs_flush_pages( vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = vn_to_inode(vp); - if (VN_CACHED(vp)) { + if (VN_DIRTY(vp)) { + if (VN_TRUNC(vp)) + VUNTRUNCATE(vp); filemap_fdatawrite(ip->i_mapping); if (flags & XFS_B_ASYNC) return 0; filemap_fdatawait(ip->i_mapping); } - return 0; } diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index a64b7db67003..569a4e7b5cc1 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -56,12 +56,18 @@ typedef xfs_ino_t vnumber_t; typedef struct dentry vname_t; typedef bhv_head_t vn_bhv_head_t; +typedef enum vflags { + VMODIFIED = 0x08, /* XFS inode state possibly differs */ + /* to the Linux inode state. */ + VTRUNCATED = 0x40, /* truncated down so flush-on-close */ +} vflags_t; + /* * MP locking protocols: * v_flag, v_vfsp VN_LOCK/VN_UNLOCK */ typedef struct vnode { - __u32 v_flag; /* vnode flags (see below) */ + vflags_t v_flag; /* vnode flags (see above) */ struct vfs *v_vfsp; /* ptr to containing VFS */ vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ @@ -125,12 +131,6 @@ static inline struct inode *vn_to_inode(struct vnode *vnode) return &vnode->v_inode; } -/* - * Vnode flags. - */ -#define VMODIFIED 0x8 /* XFS inode state possibly differs */ - /* to the Linux inode state. */ - /* * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter. */ @@ -162,8 +162,10 @@ typedef enum vchange { VCHANGE_FLAGS_IOEXCL_COUNT = 4 } vchange_t; +typedef enum { L_FALSE, L_TRUE } lastclose_t; typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); +typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *); typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); @@ -234,6 +236,7 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int); typedef struct vnodeops { bhv_position_t vn_position; /* position within behavior chain */ vop_open_t vop_open; + vop_close_t vop_close; vop_read_t vop_read; vop_write_t vop_write; vop_sendfile_t vop_sendfile; @@ -278,6 +281,10 @@ typedef struct vnodeops { */ #define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op) +#define VOP_OPEN(vp, cr, rv) \ + rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) +#define VOP_CLOSE(vp, f, last, cr, rv) \ + rv = _VOP_(vop_close, vp)((vp)->v_fbhv, f, last, cr) #define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \ rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) #define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \ @@ -290,8 +297,6 @@ typedef struct vnodeops { rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr) #define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) -#define VOP_OPEN(vp, cr, rv) \ - rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) #define VOP_GETATTR(vp, vap, f, cr, rv) \ rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr) #define VOP_SETATTR(vp, vap, f, cr, rv) \ @@ -556,8 +561,6 @@ static inline struct vnode *vn_grab(struct vnode *vp) */ #define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) #define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) -#define VN_FLAGSET(vp,b) vn_flagset(vp,b) -#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b) static __inline__ void vn_flagset(struct vnode *vp, uint flag) { @@ -566,13 +569,22 @@ static __inline__ void vn_flagset(struct vnode *vp, uint flag) spin_unlock(&vp->v_lock); } -static __inline__ void vn_flagclr(struct vnode *vp, uint flag) +static __inline__ uint vn_flagclr(struct vnode *vp, uint flag) { + uint cleared; + spin_lock(&vp->v_lock); + cleared = (vp->v_flag & flag); vp->v_flag &= ~flag; spin_unlock(&vp->v_lock); + return cleared; } +#define VMODIFY(vp) vn_flagset(vp, VMODIFIED) +#define VUNMODIFY(vp) vn_flagclr(vp, VMODIFIED) +#define VTRUNCATE(vp) vn_flagset(vp, VTRUNCATED) +#define VUNTRUNCATE(vp) vn_flagclr(vp, VTRUNCATED) + /* * Dealing with bad inodes */ @@ -612,8 +624,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt) #define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) #define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ PAGECACHE_TAG_DIRTY) -#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) -#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED) +#define VN_TRUNC(vp) ((vp)->v_flag & VTRUNCATED) /* * Flags to VOP_SETATTR/VOP_GETATTR. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index cb36a56392e7..35906bae92e1 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -16,8 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include - #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -58,32 +56,14 @@ #include "xfs_log_priv.h" #include "xfs_mac.h" - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 - -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ STATIC int xfs_open( bhv_desc_t *bdp, cred_t *credp) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -101,6 +81,36 @@ xfs_open( return 0; } +STATIC int +xfs_close( + bhv_desc_t *bdp, + int flags, + lastclose_t lastclose, + cred_t *credp) +{ + vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); + int error = 0; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + if (lastclose != L_TRUE || !VN_ISREG(vp)) + return 0; + + /* + * If we previously truncated this file and removed old data in + * the process, we want to initiate "early" writeout on the last + * close. This is an attempt to combat the notorious NULL files + * problem which is particularly noticable from a truncate down, + * buffered (re-)write (delalloc), followed by a crash. What we + * are effectively doing here is significantly reducing the time + * window where we'd otherwise be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + VOP_FLUSH_PAGES(vp, 0, -1, XFS_B_ASYNC, FI_NONE, error); + return error; +} /* * xfs_getattr @@ -665,9 +675,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + VTRUNCATE(vp); } /* * Have to do this even if the file's size doesn't change. @@ -936,6 +954,13 @@ xfs_access( } +/* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + /* * xfs_readlink * @@ -1470,9 +1495,6 @@ xfs_inactive_symlink_local( return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1531,10 +1553,10 @@ xfs_release( vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; - if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ if (vp->v_vfsp->vfs_flag & VFS_RDONLY) @@ -1546,8 +1568,6 @@ xfs_release( return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || @@ -3745,7 +3765,6 @@ xfs_inode_flush( return error; } - int xfs_set_dmattrs ( bhv_desc_t *bdp, @@ -3786,10 +3805,6 @@ xfs_set_dmattrs ( return error; } - -/* - * xfs_reclaim - */ STATIC int xfs_reclaim( bhv_desc_t *bdp) @@ -4645,6 +4660,7 @@ xfs_change_file_space( vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, + .vop_close = xfs_close, .vop_read = xfs_read, #ifdef HAVE_SENDFILE .vop_sendfile = xfs_sendfile, -- cgit v1.2.3-59-g8ed1b