diff options
Diffstat (limited to 'fs/xfs/xfs_ioctl.c')
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 1340 |
1 files changed, 613 insertions, 727 deletions
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d42de92cb283..1f783e979629 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -15,6 +15,8 @@ #include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -35,9 +37,11 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" +#include "xfs_xattr.h" #include <linux/mount.h> #include <linux/namei.h> +#include <linux/fileattr.h> /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -292,62 +296,173 @@ xfs_readlink_by_handle( return error; } -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - void __user *arg) +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +static void +xfs_ioc_attr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen) { - int error = -ENOMEM; - attrlist_cursor_kern_t *cursor; - struct xfs_fsop_attrlist_handlereq __user *p = arg; - xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; + struct xfs_attrlist *alist = context->buffer; + struct xfs_attrlist_ent *aep; + int arraytop; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -EFAULT; - if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XFS_XATTR_LIST_MAX) + ASSERT(!context->seen_enough); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*alist)); + ASSERT(context->firstu <= context->bufsize); + + /* + * Only list entries in the right namespace. + */ + if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) + return; + + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); + + /* decrement by the actual bytes used by the attr */ + context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + + namelen + 1, sizeof(uint32_t)); + if (context->firstu < arraytop) { + trace_xfs_attr_list_full(context); + alist->al_more = 1; + context->seen_enough = 1; + return; + } + + aep = context->buffer + context->firstu; + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; + trace_xfs_attr_list_add(context); +} + +static unsigned int +xfs_attr_filter( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_ROOT) + return XFS_ATTR_ROOT; + if (ioc_flags & XFS_IOC_ATTR_SECURE) + return XFS_ATTR_SECURE; + return 0; +} + +static unsigned int +xfs_attr_flags( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_CREATE) + return XATTR_CREATE; + if (ioc_flags & XFS_IOC_ATTR_REPLACE) + return XATTR_REPLACE; + return 0; +} + +int +xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, + size_t bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) +{ + struct xfs_attr_list_context context = { }; + struct xfs_attrlist *alist; + void *buffer; + int error; + + if (bufsize < sizeof(struct xfs_attrlist) || + bufsize > XFS_XATTR_LIST_MAX) return -EINVAL; /* * Reject flags, only allow namespaces. */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) return -EINVAL; - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + /* + * Validate the cursor. + */ + if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) + return -EFAULT; + if (context.cursor.pad1 || context.cursor.pad2) + return -EINVAL; + if (!context.cursor.initted && + (context.cursor.hashval || context.cursor.blkno || + context.cursor.offset)) + return -EINVAL; - kbuf = kmem_zalloc_large(al_hreq.buflen, 0); - if (!kbuf) - goto out_dput; + buffer = kvzalloc(bufsize, GFP_KERNEL); + if (!buffer) + return -ENOMEM; - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); + /* + * Initialize the output buffer. + */ + context.dp = dp; + context.resynch = 1; + context.attr_filter = xfs_attr_filter(flags); + context.buffer = buffer; + context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.firstu = context.bufsize; + context.put_listent = xfs_ioc_attr_put_listent; + + alist = context.buffer; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; + + error = xfs_attr_list(&context); if (error) - goto out_kfree; + goto out_free; - if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + if (copy_to_user(ubuf, buffer, bufsize) || + copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) error = -EFAULT; - goto out_kfree; - } +out_free: + kmem_free(buffer); + return error; +} - if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) - error = -EFAULT; +STATIC int +xfs_attrlist_by_handle( + struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p) +{ + struct xfs_fsop_attrlist_handlereq al_hreq; + struct dentry *dentry; + int error = -ENOMEM; -out_kfree: - kmem_free(kbuf); -out_dput: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) + return -EFAULT; + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, + al_hreq.buflen, al_hreq.flags, &p->pos); dput(dentry); return error; } -int +static int xfs_attrmulti_attr_get( struct inode *inode, unsigned char *name, @@ -355,31 +470,33 @@ xfs_attrmulti_attr_get( uint32_t *len, uint32_t flags) { - unsigned char *kbuf; - int error = -EFAULT; - size_t namelen; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + .valuelen = *len, + }; + int error; if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; - kbuf = kmem_zalloc_large(*len, 0); - if (!kbuf) - return -ENOMEM; - namelen = strlen(name); - error = xfs_attr_get(XFS_I(inode), name, namelen, &kbuf, (int *)len, - flags); + error = xfs_attr_get(&args); if (error) goto out_kfree; - if (copy_to_user(ubuf, kbuf, *len)) + *len = args.valuelen; + if (copy_to_user(ubuf, args.value, args.valuelen)) error = -EFAULT; out_kfree: - kmem_free(kbuf); + kmem_free(args.value); return error; } -int +static int xfs_attrmulti_attr_set( struct inode *inode, unsigned char *name, @@ -387,42 +504,75 @@ xfs_attrmulti_attr_set( uint32_t len, uint32_t flags) { - unsigned char *kbuf; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + }; int error; - size_t namelen; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - kbuf = memdup_user(ubuf, len); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); + if (ubuf) { + if (len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + args.value = memdup_user(ubuf, len); + if (IS_ERR(args.value)) + return PTR_ERR(args.value); + args.valuelen = len; + } - namelen = strlen(name); - error = xfs_attr_set(XFS_I(inode), name, namelen, kbuf, len, flags); - if (!error) - xfs_forget_acl(inode, name, flags); - kfree(kbuf); + error = xfs_attr_change(&args); + if (!error && (flags & XFS_IOC_ATTR_ROOT)) + xfs_forget_acl(inode, name); + kfree(args.value); return error; } int -xfs_attrmulti_attr_remove( +xfs_ioc_attrmulti_one( + struct file *parfilp, struct inode *inode, - unsigned char *name, + uint32_t opcode, + void __user *uname, + void __user *value, + uint32_t *len, uint32_t flags) { + unsigned char *name; int error; - size_t namelen; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - namelen = strlen(name); - error = xfs_attr_remove(XFS_I(inode), name, namelen, flags); - if (!error) - xfs_forget_acl(inode, name, flags); + if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + name = strndup_user(uname, MAXNAMELEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + switch (opcode) { + case ATTR_OP_GET: + error = xfs_attrmulti_attr_get(inode, name, value, len, flags); + break; + case ATTR_OP_REMOVE: + value = NULL; + *len = 0; + fallthrough; + case ATTR_OP_SET: + error = mnt_want_write_file(parfilp); + if (error) + break; + error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); + mnt_drop_write_file(parfilp); + break; + default: + error = -EINVAL; + break; + } + + kfree(name); return error; } @@ -436,7 +586,6 @@ xfs_attrmulti_by_handle( xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -462,148 +611,23 @@ xfs_attrmulti_by_handle( goto out_dput; } - error = -ENOMEM; - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { - if ((ops[i].am_flags & ATTR_ROOT) && - (ops[i].am_flags & ATTR_SECURE)) { - ops[i].am_error = -EINVAL; - continue; - } - ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; - - ops[i].am_error = strncpy_from_user((char *)attr_name, - ops[i].am_attrname, MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, &ops[i].am_length, - ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, ops[i].am_length, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - d_inode(dentry), attr_name, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - default: - ops[i].am_error = -EINVAL; - } + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, + ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags); } if (copy_to_user(am_hreq.ops, ops, size)) error = -EFAULT; - kfree(attr_name); - out_kfree_ops: kfree(ops); out_dput: dput(dentry); return error; } -int -xfs_ioc_space( - struct file *filp, - xfs_flock64_t *bf) -{ - struct inode *inode = file_inode(filp); - struct xfs_inode *ip = XFS_I(inode); - struct iattr iattr; - enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR; - uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; - int error; - - if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -EPERM; - - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - - if (xfs_is_always_cow_inode(ip)) - return -EOPNOTSUPP; - - if (filp->f_flags & O_DSYNC) - flags |= XFS_PREALLOC_SYNC; - if (filp->f_mode & FMODE_NOCMTIME) - flags |= XFS_PREALLOC_INVISIBLE; - - error = mnt_want_write_file(filp); - if (error) - return error; - - xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); - if (error) - goto out_unlock; - inode_dio_wait(inode); - - switch (bf->l_whence) { - case 0: /*SEEK_SET*/ - break; - case 1: /*SEEK_CUR*/ - bf->l_start += filp->f_pos; - break; - case 2: /*SEEK_END*/ - bf->l_start += XFS_ISIZE(ip); - break; - default: - error = -EINVAL; - goto out_unlock; - } - - if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) { - error = -EINVAL; - goto out_unlock; - } - - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); - if (error) - goto out_unlock; - } - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = bf->l_start; - error = xfs_vn_setattr_size(file_dentry(filp), &iattr); - if (error) - goto out_unlock; - - error = xfs_update_prealloc_flags(ip, flags); - -out_unlock: - xfs_iunlock(ip, iolock); - mnt_drop_write_file(filp); - return error; -} - /* Return 0 on success or positive error */ int xfs_fsbulkstat_one_fmt( @@ -633,13 +657,15 @@ xfs_fsinumbers_fmt( STATIC int xfs_ioc_fsbulkstat( - xfs_mount_t *mp, + struct file *file, unsigned int cmd, void __user *arg) { + struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, + .mnt_userns = file_mnt_user_ns(file), .ocount = 0, }; xfs_ino_t lastino; @@ -651,7 +677,7 @@ xfs_ioc_fsbulkstat( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) @@ -788,6 +814,9 @@ xfs_bulk_ireq_setup( if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) return -ECANCELED; + if (hdr->flags & XFS_BULK_IREQ_NREXT64) + breq->flags |= XFS_IBULK_NREXT64; + return 0; } @@ -807,20 +836,22 @@ xfs_bulk_ireq_teardown( /* Handle the v5 bulkstat ioctl. */ STATIC int xfs_ioc_bulkstat( - struct xfs_mount *mp, + struct file *file, unsigned int cmd, struct xfs_bulkstat_req __user *arg) { + struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; struct xfs_bulk_ireq hdr; struct xfs_ibulk breq = { .mp = mp, + .mnt_userns = file_mnt_user_ns(file), }; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) @@ -870,7 +901,7 @@ xfs_ioc_inumbers( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) @@ -903,7 +934,7 @@ xfs_ioc_fsgeometry( struct xfs_fsop_geom fsgeo; size_t len; - xfs_fs_geometry(&mp->m_sb, &fsgeo, struct_version); + xfs_fs_geometry(mp, &fsgeo, struct_version); if (struct_version <= 3) len = sizeof(struct xfs_fsop_geom_v1); @@ -924,6 +955,7 @@ xfs_ioc_ag_geometry( struct xfs_mount *mp, void __user *arg) { + struct xfs_perag *pag; struct xfs_ag_geometry ageo; int error; @@ -934,7 +966,12 @@ xfs_ioc_ag_geometry( if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved))) return -EINVAL; - error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo); + pag = xfs_perag_get(mp, ageo.ag_number); + if (!pag) + return -EINVAL; + + error = xfs_ag_get_geometry(pag, &ageo); + xfs_perag_put(pag); if (error) return error; @@ -947,98 +984,72 @@ xfs_ioc_ag_geometry( * Linux extended inode flags interface. */ -STATIC unsigned int -xfs_merge_ioc_xflags( - unsigned int flags, - unsigned int start) -{ - unsigned int xflags = start; - - if (flags & FS_IMMUTABLE_FL) - xflags |= FS_XFLAG_IMMUTABLE; - else - xflags &= ~FS_XFLAG_IMMUTABLE; - if (flags & FS_APPEND_FL) - xflags |= FS_XFLAG_APPEND; - else - xflags &= ~FS_XFLAG_APPEND; - if (flags & FS_SYNC_FL) - xflags |= FS_XFLAG_SYNC; - else - xflags &= ~FS_XFLAG_SYNC; - if (flags & FS_NOATIME_FL) - xflags |= FS_XFLAG_NOATIME; - else - xflags &= ~FS_XFLAG_NOATIME; - if (flags & FS_NODUMP_FL) - xflags |= FS_XFLAG_NODUMP; - else - xflags &= ~FS_XFLAG_NODUMP; - - return xflags; -} - -STATIC unsigned int -xfs_di2lxflags( - uint16_t di_flags) -{ - unsigned int flags = 0; - - if (di_flags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (di_flags & XFS_DIFLAG_APPEND) - flags |= FS_APPEND_FL; - if (di_flags & XFS_DIFLAG_SYNC) - flags |= FS_SYNC_FL; - if (di_flags & XFS_DIFLAG_NOATIME) - flags |= FS_NOATIME_FL; - if (di_flags & XFS_DIFLAG_NODUMP) - flags |= FS_NODUMP_FL; - return flags; -} - static void xfs_fill_fsxattr( struct xfs_inode *ip, - bool attr, - struct fsxattr *fa) + int whichfork, + struct fileattr *fa) { - simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); - fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa->fsx_cowextsize = ip->i_d.di_cowextsize << - ip->i_mount->m_sb.sb_blocklog; - fa->fsx_projid = ip->i_d.di_projid; - - if (attr) { - if (ip->i_afp) { - if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa->fsx_nextents = xfs_iext_count(ip->i_afp); - else - fa->fsx_nextents = ip->i_d.di_anextents; - } else - fa->fsx_nextents = 0; - } else { - if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa->fsx_nextents = xfs_iext_count(&ip->i_df); - else - fa->fsx_nextents = ip->i_d.di_nextents; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); + + fileattr_fill_xflags(fa, xfs_ip2xflags(ip)); + + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) { + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); + } else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { + /* + * Don't let a misaligned extent size hint on a directory + * escape to userspace if it won't pass the setattr checks + * later. + */ + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && + ip->i_extsize % mp->m_sb.sb_rextsize > 0) { + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | + FS_XFLAG_EXTSZINHERIT); + fa->fsx_extsize = 0; + } else { + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); + } } + + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) + fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize); + fa->fsx_projid = ip->i_projid; + if (ifp && !xfs_need_iread_extents(ifp)) + fa->fsx_nextents = xfs_iext_count(ifp); + else + fa->fsx_nextents = xfs_ifork_nextents(ifp); } STATIC int -xfs_ioc_fsgetxattr( +xfs_ioc_fsgetxattra( xfs_inode_t *ip, - int attr, void __user *arg) { - struct fsxattr fa; + struct fileattr fa; xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_fill_fsxattr(ip, attr, &fa); + xfs_fill_fsxattr(ip, XFS_ATTR_FORK, &fa); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return copy_fsxattr_to_user(&fa, arg); +} + +int +xfs_fileattr_get( + struct dentry *dentry, + struct fileattr *fa) +{ + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + + if (d_is_special(dentry)) + return -ENOTTY; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_fill_fsxattr(ip, XFS_DATA_FORK, fa); xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; return 0; } @@ -1049,7 +1060,7 @@ xfs_flags2diflags( { /* can't set PREALLOC this way, just preserve it */ uint16_t di_flags = - (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + (ip->i_diflags & XFS_DIFLAG_PREALLOC); if (xflags & FS_XFLAG_IMMUTABLE) di_flags |= XFS_DIFLAG_IMMUTABLE; @@ -1090,7 +1101,9 @@ xfs_flags2diflags2( unsigned int xflags) { uint64_t di_flags2 = - (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); + (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | + XFS_DIFLAG2_BIGTIME | + XFS_DIFLAG2_NREXT64)); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; @@ -1100,139 +1113,69 @@ xfs_flags2diflags2( return di_flags2; } -STATIC void -xfs_diflags_to_linux( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - unsigned int xflags = xfs_ip2xflags(ip); - - if (xflags & FS_XFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (xflags & FS_XFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (xflags & FS_XFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (xflags & FS_XFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -#if 0 /* disabled until the flag switching races are sorted out */ - if (xflags & FS_XFLAG_DAX) - inode->i_flags |= S_DAX; - else - inode->i_flags &= ~S_DAX; -#endif -} - static int xfs_ioctl_setattr_xflags( struct xfs_trans *tp, struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; - uint64_t di_flags2; + uint64_t i_flags2; /* Can't change realtime flag if any extents are allocated. */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + if ((ip->i_df.if_nextents || ip->i_delayed_blks) && XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) return -EINVAL; /* If realtime flag is set then must have realtime device */ if (fa->fsx_xflags & FS_XFLAG_REALTIME) { if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) + (ip->i_extsize % mp->m_sb.sb_rextsize)) return -EINVAL; } /* Clear reflink if we are actually able to set the rt flag. */ if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; /* Don't allow us to set DAX mode for a reflinked file for now. */ if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) return -EINVAL; /* diflags2 only valid for v3 inodes. */ - di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - if (di_flags2 && ip->i_d.di_version < 3) + i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); + if (i_flags2 && !xfs_has_v3inodes(mp)) return -EINVAL; - ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); - ip->i_d.di_flags2 = di_flags2; + ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + ip->i_diflags2 = i_flags2; - xfs_diflags_to_linux(ip); + xfs_diflags_to_iflags(ip, false); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); return 0; } -/* - * If we are changing DAX flags, we have to ensure the file is clean and any - * cached objects in the address space are invalidated and removed. This - * requires us to lock out other IO and page faults similar to a truncate - * operation. The locks need to be held until the transaction has been committed - * so that the cache invalidation is atomic with respect to the DAX flag - * manipulation. - */ -static int -xfs_ioctl_setattr_dax_invalidate( +static void +xfs_ioctl_setattr_prepare_dax( struct xfs_inode *ip, - struct fsxattr *fa, - int *join_flags) + struct fileattr *fa) { - struct inode *inode = VFS_I(ip); - struct super_block *sb = inode->i_sb; - int error; - - *join_flags = 0; - - /* - * It is only valid to set the DAX flag on regular files and - * directories on filesystems where the block size is equal to the page - * size. On directories it serves as an inherited hint so we don't - * have to check the device for dax support or flush pagecache. - */ - if (fa->fsx_xflags & FS_XFLAG_DAX) { - struct xfs_buftarg *target = xfs_inode_buftarg(ip); - - if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize)) - return -EINVAL; - } - - /* If the DAX state is not changing, we have nothing to do here. */ - if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode)) - return 0; - if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode)) - return 0; + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); if (S_ISDIR(inode->i_mode)) - return 0; + return; - /* lock, flush and invalidate mapping in preparation for flag change */ - xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - error = filemap_write_and_wait(inode->i_mapping); - if (error) - goto out_unlock; - error = invalidate_inode_pages2(inode->i_mapping); - if (error) - goto out_unlock; - - *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL; - return 0; - -out_unlock: - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - return error; + if (xfs_has_dax_always(mp) || xfs_has_dax_never(mp)) + return; + if (((fa->fsx_xflags & FS_XFLAG_DAX) && + !(ip->i_diflags2 & XFS_DIFLAG2_DAX)) || + (!(fa->fsx_xflags & FS_XFLAG_DAX) && + (ip->i_diflags2 & XFS_DIFLAG2_DAX))) + d_mark_dontcache(inode); } /* @@ -1240,190 +1183,150 @@ out_unlock: * have permission to do so. On success, return a clean transaction and the * inode locked exclusively ready for further operation specific checks. On * failure, return an error without modifying or locking the inode. - * - * The inode might already be IO locked on call. If this is the case, it is - * indicated in @join_flags and we take full responsibility for ensuring they - * are unlocked from now on. Hence if we have an error here, we still have to - * unlock them. Otherwise, once they are joined to the transaction, they will - * be unlocked on commit/cancel. */ static struct xfs_trans * xfs_ioctl_setattr_get_trans( struct xfs_inode *ip, - int join_flags) + struct xfs_dquot *pdqp) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = -EROFS; - if (mp->m_flags & XFS_MOUNT_RDONLY) - goto out_unlock; + if (xfs_is_readonly(mp)) + goto out_error; error = -EIO; - if (XFS_FORCED_SHUTDOWN(mp)) - goto out_unlock; + if (xfs_is_shutdown(mp)) + goto out_error; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + error = xfs_trans_alloc_ichange(ip, NULL, NULL, pdqp, + has_capability_noaudit(current, CAP_FOWNER), &tp); if (error) - goto out_unlock; + goto out_error; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); - join_flags = 0; - - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal to the file owner - * ID, except in cases where the CAP_FSETID capability is applicable. - */ - if (!inode_owner_or_capable(VFS_I(ip))) { - error = -EPERM; - goto out_cancel; - } - - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); return tp; -out_cancel: - xfs_trans_cancel(tp); -out_unlock: - if (join_flags) - xfs_iunlock(ip, join_flags); +out_error: return ERR_PTR(error); } /* - * extent size hint validation is somewhat cumbersome. Rules are: - * - * 1. extent size hint is only valid for directories and regular files - * 2. FS_XFLAG_EXTSIZE is only valid for regular files - * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. - * 4. can only be changed on regular files if no extents are allocated - * 5. can be changed on directories at any time - * 6. extsize hint of 0 turns off hints, clears inode flags. - * 7. Extent size must be a multiple of the appropriate block size. - * 8. for non-realtime files, the extent size hint must be limited - * to half the AG size to avoid alignment extending the extent beyond the - * limits of the AG. - * - * Please keep this function in sync with xfs_scrub_inode_extsize. + * Validate a proposed extent size hint. For regular files, the hint can only + * be changed if no extents are allocated. */ static int xfs_ioctl_setattr_check_extsize( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; + xfs_failaddr_t failaddr; + uint16_t new_diflags; - if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) - return -EINVAL; - - if (fa->fsx_extsize == 0) + if (!fa->fsx_valid) return 0; - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) + if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && + XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize) return -EINVAL; - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) + if (fa->fsx_extsize & mp->m_blockmask) + return -EINVAL; + + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + + /* + * Inode verifiers do not check that the extent size hint is an integer + * multiple of the rt extent size on a directory with both rtinherit + * and extszinherit flags set. Don't let sysadmins misconfigure + * directories. + */ + if ((new_diflags & XFS_DIFLAG_RTINHERIT) && + (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) { + unsigned int rtextsize_bytes; + + rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); + if (fa->fsx_extsize % rtextsize_bytes) return -EINVAL; } - if (fa->fsx_extsize % size) - return -EINVAL; - - return 0; + failaddr = xfs_inode_validate_extsize(ip->i_mount, + XFS_B_TO_FSB(mp, fa->fsx_extsize), + VFS_I(ip)->i_mode, new_diflags); + return failaddr != NULL ? -EINVAL : 0; } -/* - * CoW extent size hint validation rules are: - * - * 1. CoW extent size hint can only be set if reflink is enabled on the fs. - * The inode does not have to have any shared blocks, but it must be a v3. - * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; - * for a directory, the hint is propagated to new files. - * 3. Can be changed on files & directories at any time. - * 4. CoW extsize hint of 0 turns off hints, clears inode flags. - * 5. Extent size must be a multiple of the appropriate block size. - * 6. The extent size hint must be limited to half the AG size to avoid - * alignment extending the extent beyond the limits of the AG. - * - * Please keep this function in sync with xfs_scrub_inode_cowextsize. - */ static int xfs_ioctl_setattr_check_cowextsize( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; - xfs_extlen_t size; - xfs_fsblock_t cowextsize_fsb; - - if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) - return 0; + xfs_failaddr_t failaddr; + uint64_t new_diflags2; + uint16_t new_diflags; - if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || - ip->i_d.di_version != 3) - return -EINVAL; - - if (fa->fsx_cowextsize == 0) + if (!fa->fsx_valid) return 0; - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); - if (cowextsize_fsb > MAXEXTLEN) - return -EINVAL; - - size = mp->m_sb.sb_blocksize; - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + if (fa->fsx_cowextsize & mp->m_blockmask) return -EINVAL; - if (fa->fsx_cowextsize % size) - return -EINVAL; + new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - return 0; + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, + XFS_B_TO_FSB(mp, fa->fsx_cowextsize), + VFS_I(ip)->i_mode, new_diflags, new_diflags2); + return failaddr != NULL ? -EINVAL : 0; } static int xfs_ioctl_setattr_check_projid( struct xfs_inode *ip, - struct fsxattr *fa) + struct fileattr *fa) { - /* Disallow 32bit project ids if projid32bit feature is not enabled. */ + if (!fa->fsx_valid) + return 0; + + /* Disallow 32bit project ids if 32bit IDs are not enabled. */ if (fa->fsx_projid > (uint16_t)-1 && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + !xfs_has_projid32(ip->i_mount)) return -EINVAL; return 0; } -STATIC int -xfs_ioctl_setattr( - xfs_inode_t *ip, - struct fsxattr *fa) +int +xfs_fileattr_set( + struct user_namespace *mnt_userns, + struct dentry *dentry, + struct fileattr *fa) { - struct fsxattr old_fa; + struct xfs_inode *ip = XFS_I(d_inode(dentry)); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - struct xfs_dquot *udqp = NULL; struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; - int code; - int join_flags = 0; + int error; trace_xfs_ioctl_setattr(ip); - code = xfs_ioctl_setattr_check_projid(ip, fa); - if (code) - return code; + if (d_is_special(dentry)) + return -ENOTTY; + + if (!fa->fsx_valid) { + if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | + FS_NOATIME_FL | FS_NODUMP_FL | + FS_SYNC_FL | FS_DAX_FL | FS_PROJINHERIT_FL)) + return -EOPNOTSUPP; + } + + error = xfs_ioctl_setattr_check_projid(ip, fa); + if (error) + return error; /* * If disk quotas is on, we make sure that the dquots do exist on disk, @@ -1433,56 +1336,36 @@ xfs_ioctl_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, - ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); - if (code) - return code; + if (fa->fsx_valid && XFS_IS_QUOTA_ON(mp)) { + error = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid, + VFS_I(ip)->i_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp); + if (error) + return error; } - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags); - if (code) - goto error_free_dquots; + xfs_ioctl_setattr_prepare_dax(ip, fa); - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); + tp = xfs_ioctl_setattr_get_trans(ip, pdqp); if (IS_ERR(tp)) { - code = PTR_ERR(tp); + error = PTR_ERR(tp); goto error_free_dquots; } - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && - ip->i_d.di_projid != fa->fsx_projid) { - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, - capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_trans_cancel; - } - - xfs_fill_fsxattr(ip, false, &old_fa); - code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa); - if (code) - goto error_trans_cancel; - - code = xfs_ioctl_setattr_check_extsize(ip, fa); - if (code) + error = xfs_ioctl_setattr_check_extsize(ip, fa); + if (error) goto error_trans_cancel; - code = xfs_ioctl_setattr_check_cowextsize(ip, fa); - if (code) + error = xfs_ioctl_setattr_check_cowextsize(ip, fa); + if (error) goto error_trans_cancel; - code = xfs_ioctl_setattr_xflags(tp, ip, fa); - if (code) + error = xfs_ioctl_setattr_xflags(tp, ip, fa); + if (error) goto error_trans_cancel; + if (!fa->fsx_valid) + goto skip_xattr; /* * Change file ownership. Must be the owner or privileged. CAP_FSETID * overrides the following restrictions: @@ -1492,17 +1375,16 @@ xfs_ioctl_setattr( */ if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID)) VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ - if (ip->i_d.di_projid != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + if (ip->i_projid != fa->fsx_projid) { + if (XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } - ASSERT(ip->i_d.di_version > 1); - ip->i_d.di_projid = fa->fsx_projid; + ip->i_projid = fa->fsx_projid; } /* @@ -1510,129 +1392,33 @@ xfs_ioctl_setattr( * extent size hint should be set on the inode. If no extent size flags * are set on the inode then unconditionally clear the extent size hint. */ - if (ip->i_d.di_flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) - ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + if (ip->i_diflags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) + ip->i_extsize = XFS_B_TO_FSB(mp, fa->fsx_extsize); else - ip->i_d.di_extsize = 0; - if (ip->i_d.di_version == 3 && - (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) - ip->i_d.di_cowextsize = fa->fsx_cowextsize >> - mp->m_sb.sb_blocklog; - else - ip->i_d.di_cowextsize = 0; + ip->i_extsize = 0; - code = xfs_trans_commit(tp); + if (xfs_has_v3inodes(mp)) { + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) + ip->i_cowextsize = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + else + ip->i_cowextsize = 0; + } + +skip_xattr: + error = xfs_trans_commit(tp); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot); - xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); - return code; + return error; error_trans_cancel: xfs_trans_cancel(tp); error_free_dquots: - xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); - return code; -} - -STATIC int -xfs_ioc_fssetxattr( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - int error; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - error = mnt_want_write_file(filp); - if (error) - return error; - error = xfs_ioctl_setattr(ip, &fa); - mnt_drop_write_file(filp); - return error; -} - -STATIC int -xfs_ioc_getxflags( - xfs_inode_t *ip, - void __user *arg) -{ - unsigned int flags; - - flags = xfs_di2lxflags(ip->i_d.di_flags); - if (copy_to_user(arg, &flags, sizeof(flags))) - return -EFAULT; - return 0; -} - -STATIC int -xfs_ioc_setxflags( - struct xfs_inode *ip, - struct file *filp, - void __user *arg) -{ - struct xfs_trans *tp; - struct fsxattr fa; - struct fsxattr old_fa; - unsigned int flags; - int join_flags = 0; - int error; - - if (copy_from_user(&flags, arg, sizeof(flags))) - return -EFAULT; - - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ - FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL)) - return -EOPNOTSUPP; - - fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - - error = mnt_want_write_file(filp); - if (error) - return error; - - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags); - if (error) - goto out_drop_write; - - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); - if (IS_ERR(tp)) { - error = PTR_ERR(tp); - goto out_drop_write; - } - - xfs_fill_fsxattr(ip, false, &old_fa); - error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa); - if (error) { - xfs_trans_cancel(tp); - goto out_drop_write; - } - - error = xfs_ioctl_setattr_xflags(tp, ip, &fa); - if (error) { - xfs_trans_cancel(tp); - goto out_drop_write; - } - - error = xfs_trans_commit(tp); -out_drop_write: - mnt_drop_write_file(filp); return error; } @@ -1672,10 +1458,8 @@ xfs_ioc_getbmap( switch (cmd) { case XFS_IOC_GETBMAPA: bmx.bmv_iflags = BMV_IF_ATTRFORK; - /*FALLTHRU*/ + fallthrough; case XFS_IOC_GETBMAP: - if (file->f_mode & FMODE_NOCMTIME) - bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; /* struct getbmap is a strict subset of struct getbmapx. */ recsize = sizeof(struct getbmap); break; @@ -1691,10 +1475,10 @@ xfs_ioc_getbmap( if (bmx.bmv_count < 2) return -EINVAL; - if (bmx.bmv_count > ULONG_MAX / recsize) + if (bmx.bmv_count >= INT_MAX / recsize) return -ENOMEM; - buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0); + buf = kvcalloc(bmx.bmv_count, sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1719,39 +1503,17 @@ out_free_buf: return error; } -struct getfsmap_info { - struct xfs_mount *mp; - struct fsmap_head __user *data; - unsigned int idx; - __u32 last_flags; -}; - -STATIC int -xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv) -{ - struct getfsmap_info *info = priv; - struct fsmap fm; - - trace_xfs_getfsmap_mapping(info->mp, xfm); - - info->last_flags = xfm->fmr_flags; - xfs_fsmap_from_internal(&fm, xfm); - if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm, - sizeof(struct fsmap))) - return -EFAULT; - - return 0; -} - STATIC int xfs_ioc_getfsmap( struct xfs_inode *ip, struct fsmap_head __user *arg) { - struct getfsmap_info info = { NULL }; struct xfs_fsmap_head xhead = {0}; struct fsmap_head head; - bool aborted = false; + struct fsmap *recs; + unsigned int count; + __u32 last_flags = 0; + bool done = false; int error; if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) @@ -1763,43 +1525,117 @@ xfs_ioc_getfsmap( sizeof(head.fmh_keys[1].fmr_reserved))) return -EINVAL; + /* + * Use an internal memory buffer so that we don't have to copy fsmap + * data to userspace while holding locks. Start by trying to allocate + * up to 128k for the buffer, but fall back to a single page if needed. + */ + count = min_t(unsigned int, head.fmh_count, + 131072 / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) { + count = min_t(unsigned int, head.fmh_count, + PAGE_SIZE / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) + return -ENOMEM; + } + xhead.fmh_iflags = head.fmh_iflags; - xhead.fmh_count = head.fmh_count; xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); - info.mp = ip->i_mount; - info.data = arg; - error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); - if (error == -ECANCELED) { - error = 0; - aborted = true; - } else if (error) - return error; + head.fmh_entries = 0; + do { + struct fsmap __user *user_recs; + struct fsmap *last_rec; + + user_recs = &arg->fmh_recs[head.fmh_entries]; + xhead.fmh_entries = 0; + xhead.fmh_count = min_t(unsigned int, count, + head.fmh_count - head.fmh_entries); + + /* Run query, record how many entries we got. */ + error = xfs_getfsmap(ip->i_mount, &xhead, recs); + switch (error) { + case 0: + /* + * There are no more records in the result set. Copy + * whatever we got to userspace and break out. + */ + done = true; + break; + case -ECANCELED: + /* + * The internal memory buffer is full. Copy whatever + * records we got to userspace and go again if we have + * not yet filled the userspace buffer. + */ + error = 0; + break; + default: + goto out_free; + } + head.fmh_entries += xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; - /* If we didn't abort, set the "last" flag in the last fmx */ - if (!aborted && info.idx) { - info.last_flags |= FMR_OF_LAST; - if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags, - &info.last_flags, sizeof(info.last_flags))) - return -EFAULT; + /* + * If the caller wanted a record count or there aren't any + * new records to return, we're done. + */ + if (head.fmh_count == 0 || xhead.fmh_entries == 0) + break; + + /* Copy all the records we got out to userspace. */ + if (copy_to_user(user_recs, recs, + xhead.fmh_entries * sizeof(struct fsmap))) { + error = -EFAULT; + goto out_free; + } + + /* Remember the last record flags we copied to userspace. */ + last_rec = &recs[xhead.fmh_entries - 1]; + last_flags = last_rec->fmr_flags; + + /* Set up the low key for the next iteration. */ + xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + } while (!done && head.fmh_entries < head.fmh_count); + + /* + * If there are no more records in the query result set and we're not + * in counting mode, mark the last record returned with the LAST flag. + */ + if (done && head.fmh_count > 0 && head.fmh_entries > 0) { + struct fsmap __user *user_rec; + + last_flags |= FMR_OF_LAST; + user_rec = &arg->fmh_recs[head.fmh_entries - 1]; + + if (copy_to_user(&user_rec->fmr_flags, &last_flags, + sizeof(last_flags))) { + error = -EFAULT; + goto out_free; + } } /* copy back header */ - head.fmh_entries = xhead.fmh_entries; - head.fmh_oflags = xhead.fmh_oflags; - if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) - return -EFAULT; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { + error = -EFAULT; + goto out_free; + } - return 0; +out_free: + kmem_free(recs); + return error; } STATIC int xfs_ioc_scrub_metadata( - struct xfs_inode *ip, + struct file *file, void __user *arg) { struct xfs_scrub_metadata scrub; @@ -1811,7 +1647,7 @@ xfs_ioc_scrub_metadata( if (copy_from_user(&scrub, arg, sizeof(scrub))) return -EFAULT; - error = xfs_scrub_metadata(ip, &scrub); + error = xfs_scrub_metadata(file, &scrub); if (error) return error; @@ -1886,7 +1722,7 @@ xfs_ioc_swapext( goto out_put_tmp_file; } - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (xfs_is_shutdown(ip->i_mount)) { error = -EIO; goto out_put_tmp_file; } @@ -1983,6 +1819,61 @@ out: return error; } +static inline int +xfs_fs_eofblocks_from_user( + struct xfs_fs_eofblocks *src, + struct xfs_icwalk *dst) +{ + if (src->eof_version != XFS_EOFBLOCKS_VERSION) + return -EINVAL; + + if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) + return -EINVAL; + + if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || + memchr_inv(src->pad64, 0, sizeof(src->pad64))) + return -EINVAL; + + dst->icw_flags = 0; + if (src->eof_flags & XFS_EOF_FLAGS_SYNC) + dst->icw_flags |= XFS_ICWALK_FLAG_SYNC; + if (src->eof_flags & XFS_EOF_FLAGS_UID) + dst->icw_flags |= XFS_ICWALK_FLAG_UID; + if (src->eof_flags & XFS_EOF_FLAGS_GID) + dst->icw_flags |= XFS_ICWALK_FLAG_GID; + if (src->eof_flags & XFS_EOF_FLAGS_PRID) + dst->icw_flags |= XFS_ICWALK_FLAG_PRID; + if (src->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) + dst->icw_flags |= XFS_ICWALK_FLAG_MINFILESIZE; + + dst->icw_prid = src->eof_prid; + dst->icw_min_file_size = src->eof_min_file_size; + + dst->icw_uid = INVALID_UID; + if (src->eof_flags & XFS_EOF_FLAGS_UID) { + dst->icw_uid = make_kuid(current_user_ns(), src->eof_uid); + if (!uid_valid(dst->icw_uid)) + return -EINVAL; + } + + dst->icw_gid = INVALID_GID; + if (src->eof_flags & XFS_EOF_FLAGS_GID) { + dst->icw_gid = make_kgid(current_user_ns(), src->eof_gid); + if (!gid_valid(dst->icw_gid)) + return -EINVAL; + } + return 0; +} + +/* + * These long-unused ioctls were removed from the official ioctl API in 5.17, + * but retain these definitions so that we can log warnings about them. + */ +#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) +#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) +#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) +#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) + /* * Note: some of the ioctl's return positive numbers as a * byte count indicating success, such as readlink_by_handle. @@ -2013,13 +1904,11 @@ xfs_file_ioctl( case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: { - xfs_flock64_t bf; - - if (copy_from_user(&bf, arg, sizeof(bf))) - return -EFAULT; - return xfs_ioc_space(filp, &bf); - } + case XFS_IOC_FREESP64: + xfs_warn_once(mp, + "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported", + current->comm); + return -ENOTTY; case XFS_IOC_DIOINFO: { struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct dioattr da; @@ -2035,10 +1924,10 @@ xfs_file_ioctl( case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: - return xfs_ioc_fsbulkstat(mp, cmd, arg); + return xfs_ioc_fsbulkstat(filp, cmd, arg); case XFS_IOC_BULKSTAT: - return xfs_ioc_bulkstat(mp, cmd, arg); + return xfs_ioc_bulkstat(filp, cmd, arg); case XFS_IOC_INUMBERS: return xfs_ioc_inumbers(mp, cmd, arg); @@ -2055,16 +1944,8 @@ xfs_file_ioctl( case XFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); - case XFS_IOC_FSGETXATTR: - return xfs_ioc_fsgetxattr(ip, 0, arg); case XFS_IOC_FSGETXATTRA: - return xfs_ioc_fsgetxattr(ip, 1, arg); - case XFS_IOC_FSSETXATTR: - return xfs_ioc_fssetxattr(ip, filp, arg); - case XFS_IOC_GETXFLAGS: - return xfs_ioc_getxflags(ip, arg); - case XFS_IOC_SETXFLAGS: - return xfs_ioc_setxflags(ip, filp, arg); + return xfs_ioc_fsgetxattra(ip, arg); case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: @@ -2075,7 +1956,7 @@ xfs_file_ioctl( return xfs_ioc_getfsmap(ip, arg); case XFS_IOC_SCRUB_METADATA: - return xfs_ioc_scrub_metadata(ip, arg); + return xfs_ioc_scrub_metadata(filp, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: @@ -2137,7 +2018,7 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; if (copy_from_user(&inout, arg, sizeof(inout))) @@ -2176,7 +2057,7 @@ xfs_file_ioctl( } case XFS_IOC_FSGROWFSDATA: { - xfs_growfs_data_t in; + struct xfs_growfs_data in; if (copy_from_user(&in, arg, sizeof(in))) return -EFAULT; @@ -2190,7 +2071,7 @@ xfs_file_ioctl( } case XFS_IOC_FSGROWFSLOG: { - xfs_growfs_log_t in; + struct xfs_growfs_log in; if (copy_from_user(&in, arg, sizeof(in))) return -EFAULT; @@ -2248,23 +2129,28 @@ xfs_file_ioctl( return xfs_errortag_clearall(mp); case XFS_IOC_FREE_EOFBLOCKS: { - struct xfs_fs_eofblocks eofb; - struct xfs_eofblocks keofb; + struct xfs_fs_eofblocks eofb; + struct xfs_icwalk icw; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (mp->m_flags & XFS_MOUNT_RDONLY) + if (xfs_is_readonly(mp)) return -EROFS; if (copy_from_user(&eofb, arg, sizeof(eofb))) return -EFAULT; - error = xfs_fs_eofblocks_from_user(&eofb, &keofb); + error = xfs_fs_eofblocks_from_user(&eofb, &icw); if (error) return error; - return xfs_icache_free_eofblocks(mp, &keofb); + trace_xfs_ioc_free_eofblocks(mp, &icw, _RET_IP_); + + sb_start_write(mp->m_super); + error = xfs_blockgc_free_space(mp, &icw); + sb_end_write(mp->m_super); + return error; } default: |