diff options
Diffstat (limited to 'fs/xfs/xfs_iops.c')
-rw-r--r-- | fs/xfs/xfs_iops.c | 572 |
1 files changed, 264 insertions, 308 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 81f2f93caec0..2e10e1c66ad6 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -13,6 +13,8 @@ #include "xfs_inode.h" #include "xfs_acl.h" #include "xfs_quota.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_trans.h" #include "xfs_trace.h" @@ -21,18 +23,20 @@ #include "xfs_dir2.h" #include "xfs_iomap.h" #include "xfs_error.h" +#include "xfs_ioctl.h" +#include "xfs_xattr.h" -#include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/security.h> #include <linux/iversion.h> +#include <linux/fiemap.h> /* - * Directories have different lock order w.r.t. mmap_sem compared to regular + * Directories have different lock order w.r.t. mmap_lock compared to regular * files. This is due to readdir potentially triggering page faults on a user * buffer inside filldir(), and this happens with the ilock on the directory * held. For regular files, the lock order is the other way around - the - * mmap_sem is taken during the page fault, and then we lock the ilock to do + * mmap_lock is taken during the page fault, and then we lock the ilock to do * block mapping. Hence we need a different class for the directory ilock so * that lockdep can tell them apart. */ @@ -50,10 +54,15 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, - strlen(xattr->name), - xattr->value, xattr->value_len, - ATTR_SECURE); + struct xfs_da_args args = { + .dp = ip, + .attr_filter = XFS_ATTR_SECURE, + .name = xattr->name, + .namelen = strlen(xattr->name), + .value = xattr->value, + .valuelen = xattr->value_len, + }; + error = xfs_attr_change(&args); if (error < 0) break; } @@ -66,9 +75,8 @@ xfs_initxattrs( * these attrs can be journalled at inode creation time (along with the * inode, of course, such that log replay can't cause these to be lost). */ - -STATIC int -xfs_init_security( +int +xfs_inode_init_security( struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -113,7 +121,7 @@ xfs_cleanup_inode( /* Oh, the horror. * If we can't add the ACL or we fail in - * xfs_init_security we must back out. + * xfs_inode_init_security we must back out. * ENOSPC can hit here, among other things. */ xfs_dentry_to_name(&teardown, dentry); @@ -121,13 +129,45 @@ xfs_cleanup_inode( xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); } +/* + * Check to see if we are likely to need an extended attribute to be added to + * the inode we are about to allocate. This allows the attribute fork to be + * created during the inode allocation, reducing the number of transactions we + * need to do in this fast path. + * + * The security checks are optimistic, but not guaranteed. The two LSMs that + * require xattrs to be added here (selinux and smack) are also the only two + * LSMs that add a sb->s_security structure to the superblock. Hence if security + * is enabled and sb->s_security is set, we have a pretty good idea that we are + * going to be asked to add a security xattr immediately after allocating the + * xfs inode and instantiating the VFS inode. + */ +static inline bool +xfs_create_need_xattr( + struct inode *dir, + struct posix_acl *default_acl, + struct posix_acl *acl) +{ + if (acl) + return true; + if (default_acl) + return true; +#if IS_ENABLED(CONFIG_SECURITY) + if (dir->i_sb->s_security) + return true; +#endif + return false; +} + + STATIC int xfs_generic_create( + struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev, - bool tmpfile) /* unnamed file */ + struct file *tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; @@ -156,20 +196,21 @@ xfs_generic_create( goto out_free_acl; if (!tmpfile) { - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev, + xfs_create_need_xattr(dir, default_acl, acl), + &ip); } else { - error = xfs_create_tmpfile(XFS_I(dir), mode, &ip); + error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip); } if (unlikely(error)) goto out_free_acl; inode = VFS_I(ip); - error = xfs_init_security(inode, dir, &dentry->d_name); + error = xfs_inode_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; -#ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) @@ -180,7 +221,6 @@ xfs_generic_create( if (error) goto out_cleanup_inode; } -#endif xfs_setup_iops(ip); @@ -194,17 +234,15 @@ xfs_generic_create( * d_tmpfile can immediately set it back to zero. */ set_nlink(inode, 1); - d_tmpfile(dentry, inode); + d_tmpfile(tmpfile, inode); } else d_instantiate(dentry, inode); xfs_finish_inode_setup(ip); out_free_acl: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); + posix_acl_release(default_acl); + posix_acl_release(acl); return error; out_cleanup_inode: @@ -217,31 +255,35 @@ xfs_generic_create( STATIC int xfs_vn_mknod( - struct inode *dir, - struct dentry *dentry, - umode_t mode, - dev_t rdev) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) { - return xfs_generic_create(dir, dentry, mode, rdev, false); + return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, NULL); } STATIC int xfs_vn_create( - struct inode *dir, - struct dentry *dentry, - umode_t mode, - bool flags) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + bool flags) { - return xfs_vn_mknod(dir, dentry, mode, 0); + return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, NULL); } STATIC int xfs_vn_mkdir( - struct inode *dir, - struct dentry *dentry, - umode_t mode) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); + return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0, + NULL); } STATIC struct dentry * @@ -351,16 +393,17 @@ xfs_vn_unlink( * but still hashed. This is incompatible with case-insensitive * mode, so invalidate (unhash) the dentry in CI-mode. */ - if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) + if (xfs_has_asciici(XFS_M(dir->i_sb))) d_invalidate(dentry); return 0; } STATIC int xfs_vn_symlink( - struct inode *dir, - struct dentry *dentry, - const char *symname) + struct user_namespace *mnt_userns, + struct inode *dir, + struct dentry *dentry, + const char *symname) { struct inode *inode; struct xfs_inode *cip = NULL; @@ -374,13 +417,13 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); + error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; inode = VFS_I(cip); - error = xfs_init_security(inode, dir, &dentry->d_name); + error = xfs_inode_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; @@ -400,11 +443,12 @@ xfs_vn_symlink( STATIC int xfs_vn_rename( - struct inode *odir, - struct dentry *odentry, - struct inode *ndir, - struct dentry *ndentry, - unsigned int flags) + struct user_namespace *mnt_userns, + struct inode *odir, + struct dentry *odentry, + struct inode *ndir, + struct dentry *ndentry, + unsigned int flags) { struct inode *new_inode = d_inode(ndentry); int omode = 0; @@ -428,8 +472,8 @@ xfs_vn_rename( if (unlikely(error)) return error; - return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), - XFS_I(ndir), &nname, + return xfs_rename(mnt_userns, XFS_I(odir), &oname, + XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL, flags); } @@ -467,27 +511,6 @@ xfs_vn_get_link( return ERR_PTR(error); } -STATIC const char * -xfs_vn_get_link_inline( - struct dentry *dentry, - struct inode *inode, - struct delayed_call *done) -{ - struct xfs_inode *ip = XFS_I(inode); - char *link; - - ASSERT(ip->i_df.if_flags & XFS_IFINLINE); - - /* - * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if - * if_data is junk. - */ - link = ip->i_df.if_u1.if_data; - if (XFS_IS_CORRUPT(ip->i_mount, !link)) - return ERR_PTR(-EFSCORRUPTED); - return link; -} - static uint32_t xfs_stat_blksize( struct xfs_inode *ip) @@ -499,7 +522,7 @@ xfs_stat_blksize( * always return the realtime extent size. */ if (XFS_IS_REALTIME_INODE(ip)) - return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + return XFS_FSB_TO_B(mp, xfs_get_extsz_hint(ip)); /* * Allow large block sizes to be reported to userspace programs if the @@ -514,10 +537,10 @@ xfs_stat_blksize( * default buffered I/O size, return that, otherwise return the compat * default. */ - if (mp->m_flags & XFS_MOUNT_LARGEIO) { + if (xfs_has_large_iosize(mp)) { if (mp->m_swidth) - return mp->m_swidth << mp->m_sb.sb_blocklog; - if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + return XFS_FSB_TO_B(mp, mp->m_swidth); + if (xfs_has_allocsize(mp)) return 1U << mp->m_allocsize_log; } @@ -526,6 +549,7 @@ xfs_stat_blksize( STATIC int xfs_vn_getattr( + struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, @@ -534,29 +558,30 @@ xfs_vn_getattr( struct inode *inode = d_inode(path->dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; + vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); trace_xfs_getattr(ip); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; + stat->uid = vfsuid_into_kuid(vfsuid); + stat->gid = vfsgid_into_kgid(vfsgid); stat->ino = ip->i_ino; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->blocks = - XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks); - if (ip->i_d.di_version == 3) { + if (xfs_has_v3inodes(mp)) { if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; - stat->btime = ip->i_d.di_crtime; + stat->btime = ip->i_crtime; } } @@ -564,11 +589,11 @@ xfs_vn_getattr( * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. */ - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) stat->attributes |= STATX_ATTR_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + if (ip->i_diflags & XFS_DIFLAG_APPEND) stat->attributes |= STATX_ATTR_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP) + if (ip->i_diflags & XFS_DIFLAG_NODUMP) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_IMMUTABLE | @@ -581,6 +606,16 @@ xfs_vn_getattr( stat->blksize = BLKDEV_IOSIZE; stat->rdev = inode->i_rdev; break; + case S_IFREG: + if (request_mask & STATX_DIOALIGN) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct block_device *bdev = target->bt_bdev; + + stat->result_mask |= STATX_DIOALIGN; + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + } + fallthrough; default: stat->blksize = xfs_stat_blksize(ip); stat->rdev = 0; @@ -590,51 +625,21 @@ xfs_vn_getattr( return 0; } -static void -xfs_setattr_mode( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; -} - -void -xfs_setattr_time( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (iattr->ia_valid & ATTR_ATIME) - inode->i_atime = iattr->ia_atime; - if (iattr->ia_valid & ATTR_CTIME) - inode->i_ctime = iattr->ia_ctime; - if (iattr->ia_valid & ATTR_MTIME) - inode->i_mtime = iattr->ia_mtime; -} - static int xfs_vn_change_ok( - struct dentry *dentry, - struct iattr *iattr) + struct user_namespace *mnt_userns, + struct dentry *dentry, + struct iattr *iattr) { struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; - return setattr_prepare(dentry, iattr); + return setattr_prepare(mnt_userns, dentry, iattr); } /* @@ -643,21 +648,21 @@ xfs_vn_change_ok( * Caution: The caller of this function is responsible for calling * setattr_prepare() or otherwise verifying the change is fine. */ -int +static int xfs_setattr_nonsize( + struct user_namespace *mnt_userns, struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_trans_t *tp; int error; - kuid_t uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID; - kgid_t gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID; + kuid_t uid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID; struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; + struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL; ASSERT((mask & ATTR_SIZE) == 0); @@ -673,13 +678,15 @@ xfs_setattr_nonsize( uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; + uid = from_vfsuid(mnt_userns, i_user_ns(inode), + iattr->ia_vfsuid); qflags |= XFS_QMOPT_UQUOTA; } else { uid = inode->i_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; + gid = from_vfsgid(mnt_userns, i_user_ns(inode), + iattr->ia_vfsgid); qflags |= XFS_QMOPT_GQUOTA; } else { gid = inode->i_gid; @@ -692,114 +699,50 @@ xfs_setattr_nonsize( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), - xfs_kgid_to_gid(gid), - ip->i_d.di_projid, + error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_projid, qflags, &udqp, &gdqp, NULL); if (error) return error; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL, + has_capability_noaudit(current, CAP_FOWNER), &tp); if (error) goto out_dqrele; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - /* - * Change file ownership. Must be the owner or privileged. + * Register quota modifications in the transaction. Must be the owner + * or privileged. These IDs could have changed since we last looked at + * them. But, we're assured that if the ownership did change while we + * didn't have the inode locked, inode's dquot(s) would have changed + * also. */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = inode->i_uid; - igid = inode->i_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) || - (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) { - ASSERT(tp); - error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - NULL, capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (error) /* out of quota */ - goto out_cancel; - } + if (XFS_IS_UQUOTA_ON(mp) && + i_uid_needs_update(mnt_userns, iattr, inode)) { + ASSERT(udqp); + old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((inode->i_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - inode->i_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (!uid_eq(iuid, uid)) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = xfs_kuid_to_uid(uid); - inode->i_uid = uid; - } - if (!gid_eq(igid, gid)) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || - !XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = xfs_kgid_to_gid(gid); - inode->i_gid = gid; - } + if (XFS_IS_GQUOTA_ON(mp) && + i_gid_needs_update(mnt_userns, iattr, inode)) { + ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); + ASSERT(gdqp); + old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - if (mask & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + setattr_copy(mnt_userns, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* * Release any dquot(s) the inode had kept before chown. */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(old_udqp); + xfs_qm_dqrele(old_gdqp); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -813,39 +756,20 @@ xfs_setattr_nonsize( * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = posix_acl_chmod(inode, inode->i_mode); + if (mask & ATTR_MODE) { + error = posix_acl_chmod(mnt_userns, inode, inode->i_mode); if (error) return error; } return 0; -out_cancel: - xfs_trans_cancel(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; } -int -xfs_vn_setattr_nonsize( - struct dentry *dentry, - struct iattr *iattr) -{ - struct xfs_inode *ip = XFS_I(d_inode(dentry)); - int error; - - trace_xfs_setattr(ip); - - error = xfs_vn_change_ok(dentry, iattr); - if (error) - return error; - return xfs_setattr_nonsize(ip, iattr, 0); -} - /* * Truncate file. Must have write permission and not be a directory. * @@ -854,6 +778,7 @@ xfs_vn_setattr_nonsize( */ STATIC int xfs_setattr_size( + struct user_namespace *mnt_userns, struct xfs_inode *ip, struct iattr *iattr) { @@ -869,7 +794,7 @@ xfs_setattr_size( ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(inode->i_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; @@ -877,7 +802,7 @@ xfs_setattr_size( /* * Short circuit the truncate case for zero length files. */ - if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { + if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) { if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; @@ -885,7 +810,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); + return xfs_setattr_nonsize(mnt_userns, ip, iattr); } /* @@ -912,11 +837,20 @@ xfs_setattr_size( */ if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + error = xfs_zero_range(ip, oldsize, newsize - oldsize, + &did_zeroing); } else { - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + /* + * iomap won't detect a dirty page over an unwritten block (or a + * cow block over a hole) and subsequently skips zeroing the + * newly post-EOF portion of the page. Flush the new EOF to + * convert the block before the pagecache truncate. + */ + error = filemap_write_and_wait_range(inode->i_mapping, newsize, + newsize); + if (error) + return error; + error = xfs_truncate_page(ip, newsize, &did_zeroing); } if (error) @@ -940,8 +874,8 @@ xfs_setattr_size( * operation. * * And we update in-core i_size and truncate page cache beyond newsize - * before writeback the [di_size, newsize] range, so we're guaranteed - * not to write stale data past the new EOF on truncate down. + * before writeback the [i_disk_size, newsize] range, so we're + * guaranteed not to write stale data past the new EOF on truncate down. */ truncate_setsize(inode, newsize); @@ -954,9 +888,9 @@ xfs_setattr_size( * otherwise those blocks may not be zeroed after a crash. */ if (did_zeroing || - (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { + (newsize > ip->i_disk_size && oldsize != ip->i_disk_size)) { error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ip->i_d.di_size, newsize - 1); + ip->i_disk_size, newsize - 1); if (error) return error; } @@ -998,7 +932,7 @@ xfs_setattr_size( * permanent before actually freeing any blocks it doesn't matter if * they get written to. */ - ip->i_d.di_size = newsize; + ip->i_disk_size = newsize; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (newsize <= oldsize) { @@ -1019,16 +953,13 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - if (iattr->ia_valid & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); + setattr_copy(mnt_userns, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -1044,6 +975,7 @@ out_trans_cancel: int xfs_vn_setattr_size( + struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { @@ -1052,22 +984,23 @@ xfs_vn_setattr_size( trace_xfs_setattr(ip); - error = xfs_vn_change_ok(dentry, iattr); + error = xfs_vn_change_ok(mnt_userns, dentry, iattr); if (error) return error; - return xfs_setattr_size(ip, iattr); + return xfs_setattr_size(mnt_userns, ip, iattr); } STATIC int xfs_vn_setattr( + struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { + struct inode *inode = d_inode(dentry); + struct xfs_inode *ip = XFS_I(inode); int error; if (iattr->ia_valid & ATTR_SIZE) { - struct inode *inode = d_inode(dentry); - struct xfs_inode *ip = XFS_I(inode); uint iolock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); @@ -1079,10 +1012,14 @@ xfs_vn_setattr( return error; } - error = xfs_vn_setattr_size(dentry, iattr); + error = xfs_vn_setattr_size(mnt_userns, dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { - error = xfs_vn_setattr_nonsize(dentry, iattr); + trace_xfs_setattr(ip); + + error = xfs_vn_change_ok(mnt_userns, dentry, iattr); + if (!error) + error = xfs_setattr_nonsize(mnt_userns, ip, iattr); } return error; @@ -1153,11 +1090,14 @@ xfs_vn_fiemap( STATIC int xfs_vn_tmpfile( - struct inode *dir, - struct dentry *dentry, - umode_t mode) + struct user_namespace *mnt_userns, + struct inode *dir, + struct file *file, + umode_t mode) { - return xfs_generic_create(dir, dentry, mode, 0, true); + int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, file); + + return finish_open_simple(file, err); } static const struct inode_operations xfs_inode_operations = { @@ -1168,6 +1108,8 @@ static const struct inode_operations xfs_inode_operations = { .listxattr = xfs_vn_listxattr, .fiemap = xfs_vn_fiemap, .update_time = xfs_vn_update_time, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_dir_inode_operations = { @@ -1193,6 +1135,8 @@ static const struct inode_operations xfs_dir_inode_operations = { .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, .tmpfile = xfs_vn_tmpfile, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1218,6 +1162,8 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, .tmpfile = xfs_vn_tmpfile, + .fileattr_get = xfs_fileattr_get, + .fileattr_set = xfs_fileattr_set, }; static const struct inode_operations xfs_symlink_inode_operations = { @@ -1228,14 +1174,6 @@ static const struct inode_operations xfs_symlink_inode_operations = { .update_time = xfs_vn_update_time, }; -static const struct inode_operations xfs_inline_symlink_inode_operations = { - .get_link = xfs_vn_get_link_inline, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .listxattr = xfs_vn_listxattr, - .update_time = xfs_vn_update_time, -}; - /* Figure out if this file actually supports DAX. */ static bool xfs_inode_supports_dax( @@ -1243,13 +1181,12 @@ xfs_inode_supports_dax( { struct xfs_mount *mp = ip->i_mount; - /* Only supported on non-reflinked files. */ - if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip)) + /* Only supported on regular files. */ + if (!S_ISREG(VFS_I(ip)->i_mode)) return false; - /* DAX mount option or DAX iflag must be set. */ - if (!(mp->m_flags & XFS_MOUNT_DAX) && - !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) + /* Only supported on non-reflinked files. */ + if (xfs_is_reflink_inode(ip)) return false; /* Block size must match page size */ @@ -1260,35 +1197,60 @@ xfs_inode_supports_dax( return xfs_inode_buftarg(ip)->bt_daxdev != NULL; } -STATIC void +static bool +xfs_inode_should_enable_dax( + struct xfs_inode *ip) +{ + if (!IS_ENABLED(CONFIG_FS_DAX)) + return false; + if (xfs_has_dax_never(ip->i_mount)) + return false; + if (!xfs_inode_supports_dax(ip)) + return false; + if (xfs_has_dax_always(ip->i_mount)) + return true; + if (ip->i_diflags2 & XFS_DIFLAG2_DAX) + return true; + return false; +} + +void xfs_diflags_to_iflags( - struct inode *inode, - struct xfs_inode *ip) + struct xfs_inode *ip, + bool init) { - uint16_t flags = ip->i_d.di_flags; - - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | - S_NOATIME | S_DAX); - - if (flags & XFS_DIFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - if (flags & XFS_DIFLAG_APPEND) - inode->i_flags |= S_APPEND; - if (flags & XFS_DIFLAG_SYNC) - inode->i_flags |= S_SYNC; - if (flags & XFS_DIFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - if (xfs_inode_supports_dax(ip)) - inode->i_flags |= S_DAX; + struct inode *inode = VFS_I(ip); + unsigned int xflags = xfs_ip2xflags(ip); + unsigned int flags = 0; + + ASSERT(!(IS_DAX(inode) && init)); + + if (xflags & FS_XFLAG_IMMUTABLE) + flags |= S_IMMUTABLE; + if (xflags & FS_XFLAG_APPEND) + flags |= S_APPEND; + if (xflags & FS_XFLAG_SYNC) + flags |= S_SYNC; + if (xflags & FS_XFLAG_NOATIME) + flags |= S_NOATIME; + if (init && xfs_inode_should_enable_dax(ip)) + flags |= S_DAX; + + /* + * S_DAX can only be set during inode initialization and is never set by + * the VFS, so we cannot mask off S_DAX in i_flags. + */ + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME); + inode->i_flags |= flags; } /* * Initialize the Linux inode. * * When reading existing inodes from disk this is called directly from xfs_iget, - * when creating a new inode it is called from xfs_ialloc after setting up the - * inode. These callers have different criteria for clearing XFS_INEW, so leave - * it up to the caller to deal with unlocking the inode appropriately. + * when creating a new inode it is called from xfs_init_new_inode after setting + * up the inode. These callers have different criteria for clearing XFS_INEW, so + * leave it up to the caller to deal with unlocking the inode appropriately. */ void xfs_setup_inode( @@ -1298,17 +1260,14 @@ xfs_setup_inode( gfp_t gfp_mask; inode->i_ino = ip->i_ino; - inode->i_state = I_NEW; + inode->i_state |= I_NEW; inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ inode_fake_hash(inode); - inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); - inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); - - i_size_write(inode, ip->i_d.di_size); - xfs_diflags_to_iflags(inode, ip); + i_size_write(inode, ip->i_disk_size); + xfs_diflags_to_iflags(ip, true); if (S_ISDIR(inode->i_mode)) { /* @@ -1336,7 +1295,7 @@ xfs_setup_inode( * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ - if (!XFS_IFORK_Q(ip)) { + if (!xfs_inode_has_attr_fork(ip)) { inode_has_no_xattr(inode); cache_no_acl(inode); } @@ -1358,17 +1317,14 @@ xfs_setup_iops( inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: - if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + if (xfs_has_asciici(XFS_M(inode->i_sb))) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; break; case S_IFLNK: - if (ip->i_df.if_flags & XFS_IFINLINE) - inode->i_op = &xfs_inline_symlink_inode_operations; - else - inode->i_op = &xfs_symlink_inode_operations; + inode->i_op = &xfs_symlink_inode_operations; break; default: inode->i_op = &xfs_inode_operations; |