From 00acb28d96746f78389f23a7b5309a917b45c12f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:09 -0700 Subject: xfs: declare xfs_file.c symbols in xfs_file.h Move the two public symbols in xfs_file.c to xfs_file.h. We're about to add more public symbols in that source file, so let's finally create the header file. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d0e2cec6210d..1397edea20f1 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -39,6 +39,7 @@ #include "xfs_ioctl.h" #include "xfs_xattr.h" #include "xfs_rtbitmap.h" +#include "xfs_file.h" #include #include -- cgit v1.2.3-59-g8ed1b From 9a64d9b3109d01cca0b83c1d36538b7a37c5284e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:14 -0700 Subject: xfs: introduce new file range exchange ioctl Introduce a new ioctl to handle exchanging ranges of bytes between files. The goal here is to perform the exchange atomically with respect to applications -- either they see the file contents before the exchange or they see that A-B is now B-A, even if the kernel crashes. My original goal with all this code was to make it so that online repair can build a replacement directory or xattr structure in a temporary file and commit the repair by atomically exchanging all the data blocks between the two files. However, I needed a way to test this mechanism thoroughly, so I've been evolving an ioctl interface since then. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_fs.h | 41 ++++++ fs/xfs/xfs_exchrange.c | 339 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_exchrange.h | 30 +++++ fs/xfs/xfs_ioctl.c | 4 + 5 files changed, 415 insertions(+) create mode 100644 fs/xfs/xfs_exchrange.c create mode 100644 fs/xfs/xfs_exchrange.h (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 76674ad5833e..2474242f5a05 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -67,6 +67,7 @@ xfs-y += xfs_aops.o \ xfs_dir2_readdir.o \ xfs_discard.o \ xfs_error.o \ + xfs_exchrange.o \ xfs_export.o \ xfs_extent_busy.o \ xfs_file.o \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index ca1b17d01437..8a1e30cf4dc8 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -772,6 +772,46 @@ struct xfs_scrub_metadata { # define XFS_XATTR_LIST_MAX 65536 #endif +/* + * Exchange part of file1 with part of the file that this ioctl that is being + * called against (which we'll call file2). Filesystems must be able to + * restart and complete the operation even after the system goes down. + */ +struct xfs_exchange_range { + __s32 file1_fd; + __u32 pad; /* must be zeroes */ + __u64 file1_offset; /* file1 offset, bytes */ + __u64 file2_offset; /* file2 offset, bytes */ + __u64 length; /* bytes to exchange */ + + __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */ +}; + +/* + * Exchange file data all the way to the ends of both files, and then exchange + * the file sizes. This flag can be used to replace a file's contents with a + * different amount of data. length will be ignored. + */ +#define XFS_EXCHANGE_RANGE_TO_EOF (1ULL << 0) + +/* Flush all changes in file data and file metadata to disk before returning. */ +#define XFS_EXCHANGE_RANGE_DSYNC (1ULL << 1) + +/* Dry run; do all the parameter verification but do not change anything. */ +#define XFS_EXCHANGE_RANGE_DRY_RUN (1ULL << 2) + +/* + * Exchange only the parts of the two files where the file allocation units + * mapped to file1's range have been written to. This can accelerate + * scatter-gather atomic writes with a temp file if all writes are aligned to + * the file allocation unit. + */ +#define XFS_EXCHANGE_RANGE_FILE1_WRITTEN (1ULL << 3) + +#define XFS_EXCHANGE_RANGE_ALL_FLAGS (XFS_EXCHANGE_RANGE_TO_EOF | \ + XFS_EXCHANGE_RANGE_DSYNC | \ + XFS_EXCHANGE_RANGE_DRY_RUN | \ + XFS_EXCHANGE_RANGE_FILE1_WRITTEN) /* * ioctl commands that are used by Linux filesystems @@ -843,6 +883,7 @@ struct xfs_scrub_metadata { #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) +#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c new file mode 100644 index 000000000000..4cd824e47f75 --- /dev/null +++ b/fs/xfs/xfs_exchrange.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_exchrange.h" +#include + +/* + * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE. + * This part deals with struct file objects and byte ranges and does not deal + * with XFS-specific data structures such as xfs_inodes and block ranges. This + * separation may some day facilitate porting to another filesystem. + * + * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in + * file1 with the same number of bytes starting at fxr.file2_offset in file2. + * Implementations must call xfs_exchange_range_prep to prepare the two + * files prior to taking locks; and they must update the inode change and mod + * times of both files as part of the metadata update. The timestamp update + * and freshness checks must be done atomically as part of the data exchange + * operation to ensure correctness of the freshness check. + * xfs_exchange_range_finish must be called after the operation completes + * successfully but before locks are dropped. + */ + +/* Verify that we have security clearance to perform this operation. */ +static int +xfs_exchange_range_verify_area( + struct xfs_exchrange *fxr) +{ + int ret; + + ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, + true); + if (ret) + return ret; + + return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, + true); +} + +/* + * Performs necessary checks before doing a range exchange, having stabilized + * mutable inode attributes via i_rwsem. + */ +static inline int +xfs_exchange_range_checks( + struct xfs_exchrange *fxr, + unsigned int alloc_unit) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + uint64_t allocmask = alloc_unit - 1; + int64_t test_len; + uint64_t blen; + loff_t size1, size2, tmp; + int error; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2)) + return -EPERM; + if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) + return -ETXTBSY; + + size1 = i_size_read(inode1); + size2 = i_size_read(inode2); + + /* Ranges cannot start after EOF. */ + if (fxr->file1_offset > size1 || fxr->file2_offset > size2) + return -EINVAL; + + /* + * If the caller said to exchange to EOF, we set the length of the + * request large enough to cover everything to the end of both files. + */ + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + fxr->length = max_t(int64_t, size1 - fxr->file1_offset, + size2 - fxr->file2_offset); + + error = xfs_exchange_range_verify_area(fxr); + if (error) + return error; + } + + /* + * The start of both ranges must be aligned to the file allocation + * unit. + */ + if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) || + !IS_ALIGNED(fxr->file2_offset, alloc_unit)) + return -EINVAL; + + /* Ensure offsets don't wrap. */ + if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) || + check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) + return -EINVAL; + + /* + * We require both ranges to end within EOF, unless we're exchanging + * to EOF. + */ + if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && + (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2)) + return -EINVAL; + + /* + * Make sure we don't hit any file size limits. If we hit any size + * limits such that test_length was adjusted, we abort the whole + * operation. + */ + test_len = fxr->length; + error = generic_write_check_limits(fxr->file2, fxr->file2_offset, + &test_len); + if (error) + return error; + error = generic_write_check_limits(fxr->file1, fxr->file1_offset, + &test_len); + if (error) + return error; + if (test_len != fxr->length) + return -EINVAL; + + /* + * If the user wanted us to exchange up to the infile's EOF, round up + * to the next allocation unit boundary for this check. Do the same + * for the outfile. + * + * Otherwise, reject the range length if it's not aligned to an + * allocation unit. + */ + if (fxr->file1_offset + fxr->length == size1) + blen = ALIGN(size1, alloc_unit) - fxr->file1_offset; + else if (fxr->file2_offset + fxr->length == size2) + blen = ALIGN(size2, alloc_unit) - fxr->file2_offset; + else if (!IS_ALIGNED(fxr->length, alloc_unit)) + return -EINVAL; + else + blen = fxr->length; + + /* Don't allow overlapped exchanges within the same file. */ + if (inode1 == inode2 && + fxr->file2_offset + blen > fxr->file1_offset && + fxr->file1_offset + blen > fxr->file2_offset) + return -EINVAL; + + /* + * Ensure that we don't exchange a partial EOF block into the middle of + * another file. + */ + if ((fxr->length & allocmask) == 0) + return 0; + + blen = fxr->length; + if (fxr->file2_offset + blen < size2) + blen &= ~allocmask; + + if (fxr->file1_offset + blen < size1) + blen &= ~allocmask; + + return blen == fxr->length ? 0 : -EINVAL; +} + +/* + * Check that the two inodes are eligible for range exchanges, the ranges make + * sense, and then flush all dirty data. Caller must ensure that the inodes + * have been locked against any other modifications. + */ +static inline int +xfs_exchange_range_prep( + struct xfs_exchrange *fxr, + unsigned int alloc_unit) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + bool same_inode = (inode1 == inode2); + int error; + + /* Check that we don't violate system file offset limits. */ + error = xfs_exchange_range_checks(fxr, alloc_unit); + if (error || fxr->length == 0) + return error; + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode1); + if (!same_inode) + inode_dio_wait(inode2); + + error = filemap_write_and_wait_range(inode1->i_mapping, + fxr->file1_offset, + fxr->file1_offset + fxr->length - 1); + if (error) + return error; + + error = filemap_write_and_wait_range(inode2->i_mapping, + fxr->file2_offset, + fxr->file2_offset + fxr->length - 1); + if (error) + return error; + + /* + * If the files or inodes involved require synchronous writes, amend + * the request to force the filesystem to flush all data and metadata + * to disk after the operation completes. + */ + if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) || + IS_SYNC(inode1) || IS_SYNC(inode2)) + fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC; + + return 0; +} + +/* + * Finish a range exchange operation, if it was successful. Caller must ensure + * that the inodes are still locked against any other modifications. + */ +static inline int +xfs_exchange_range_finish( + struct xfs_exchrange *fxr) +{ + int error; + + error = file_remove_privs(fxr->file1); + if (error) + return error; + if (file_inode(fxr->file1) == file_inode(fxr->file2)) + return 0; + + return file_remove_privs(fxr->file2); +} + +/* Exchange parts of two files. */ +static int +xfs_exchange_range( + struct xfs_exchrange *fxr) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + int ret; + + BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & + XFS_EXCHANGE_RANGE_PRIV_FLAGS); + + /* Both files must be on the same mount/filesystem. */ + if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) + return -EXDEV; + + if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + + /* Userspace requests only honored for regular files. */ + if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode)) + return -EISDIR; + if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode)) + return -EINVAL; + + /* Both files must be opened for read and write. */ + if (!(fxr->file1->f_mode & FMODE_READ) || + !(fxr->file1->f_mode & FMODE_WRITE) || + !(fxr->file2->f_mode & FMODE_READ) || + !(fxr->file2->f_mode & FMODE_WRITE)) + return -EBADF; + + /* Neither file can be opened append-only. */ + if ((fxr->file1->f_flags & O_APPEND) || + (fxr->file2->f_flags & O_APPEND)) + return -EBADF; + + /* + * If we're not exchanging to EOF, we can check the areas before + * stabilizing both files' i_size. + */ + if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { + ret = xfs_exchange_range_verify_area(fxr); + if (ret) + return ret; + } + + /* Update cmtime if the fd/inode don't forbid it. */ + if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) + fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1; + if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2)) + fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2; + + file_start_write(fxr->file2); + ret = -EOPNOTSUPP; /* XXX call out to lower level code */ + file_end_write(fxr->file2); + if (ret) + return ret; + + fsnotify_modify(fxr->file1); + if (fxr->file2 != fxr->file1) + fsnotify_modify(fxr->file2); + return 0; +} + +/* Collect exchange-range arguments from userspace. */ +long +xfs_ioc_exchange_range( + struct file *file, + struct xfs_exchange_range __user *argp) +{ + struct xfs_exchrange fxr = { + .file2 = file, + }; + struct xfs_exchange_range args; + struct fd file1; + int error; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (memchr_inv(&args.pad, 0, sizeof(args.pad))) + return -EINVAL; + if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + + fxr.file1_offset = args.file1_offset; + fxr.file2_offset = args.file2_offset; + fxr.length = args.length; + fxr.flags = args.flags; + + file1 = fdget(args.file1_fd); + if (!file1.file) + return -EBADF; + fxr.file1 = file1.file; + + error = xfs_exchange_range(&fxr); + fdput(file1); + return error; +} diff --git a/fs/xfs/xfs_exchrange.h b/fs/xfs/xfs_exchrange.h new file mode 100644 index 000000000000..f80369c7df5d --- /dev/null +++ b/fs/xfs/xfs_exchrange.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_EXCHRANGE_H__ +#define __XFS_EXCHRANGE_H__ + +/* Update the mtime/cmtime of file1 and file2 */ +#define __XFS_EXCHANGE_RANGE_UPD_CMTIME1 (1ULL << 63) +#define __XFS_EXCHANGE_RANGE_UPD_CMTIME2 (1ULL << 62) + +#define XFS_EXCHANGE_RANGE_PRIV_FLAGS (__XFS_EXCHANGE_RANGE_UPD_CMTIME1 | \ + __XFS_EXCHANGE_RANGE_UPD_CMTIME2) + +struct xfs_exchrange { + struct file *file1; + struct file *file2; + + loff_t file1_offset; + loff_t file2_offset; + u64 length; + + u64 flags; /* XFS_EXCHANGE_RANGE flags */ +}; + +long xfs_ioc_exchange_range(struct file *file, + struct xfs_exchange_range __user *argp); + +#endif /* __XFS_EXCHRANGE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1397edea20f1..efa95892655d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -40,6 +40,7 @@ #include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_file.h" +#include "xfs_exchrange.h" #include #include @@ -2170,6 +2171,9 @@ xfs_file_ioctl( return error; } + case XFS_IOC_EXCHANGE_RANGE: + return xfs_ioc_exchange_range(filp, arg); + default: return -ENOTTY; } -- cgit v1.2.3-59-g8ed1b From 54275d8496f3e4764302cebc0e9517d950ba6589 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:21 -0700 Subject: xfs: remove xfs_da_args.attr_flags This field only ever contains XATTR_{CREATE,REPLACE}, and it only goes as deep as xfs_attr_set. Remove the field from the structure and replace it with an enum specifying exactly what kind of change we want to make to the xattr structure. Upsert is the name that we'll give to the flags==0 operation, because we're either updating an existing value or inserting it, and the caller doesn't care. Note: The "UPSERTR" name created here is to make userspace porting easier. It will be removed in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 7 ++++--- fs/xfs/libxfs/xfs_attr.h | 9 ++++++++- fs/xfs/libxfs/xfs_da_btree.h | 1 - fs/xfs/scrub/attr_repair.c | 3 +-- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_ioctl.c | 14 ++++++-------- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_trace.h | 7 +------ fs/xfs/xfs_xattr.c | 19 +++++++++++++++---- fs/xfs/xfs_xattr.h | 3 ++- 10 files changed, 39 insertions(+), 28 deletions(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 30e6084122d8..b04e09143869 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -922,7 +922,8 @@ xfs_attr_defer_add( */ int xfs_attr_set( - struct xfs_da_args *args) + struct xfs_da_args *args, + enum xfs_attr_update op) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; @@ -1008,7 +1009,7 @@ xfs_attr_set( } /* Pure create fails if the attr already exists */ - if (args->attr_flags & XATTR_CREATE) + if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE); break; @@ -1018,7 +1019,7 @@ xfs_attr_set( goto out_trans_cancel; /* Pure replace fails if no existing attr to replace. */ - if (args->attr_flags & XATTR_REPLACE) + if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET); break; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 670ab2a613fc..228360f7c85c 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -544,7 +544,14 @@ int xfs_inode_hasattr(struct xfs_inode *ip); bool xfs_attr_is_leaf(struct xfs_inode *ip); int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); -int xfs_attr_set(struct xfs_da_args *args); + +enum xfs_attr_update { + XFS_ATTRUPDATE_UPSERTR, /* set/remove value, replace any existing attr */ + XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */ + XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */ +}; + +int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op); int xfs_attr_set_iter(struct xfs_attr_intent *attr); int xfs_attr_remove_iter(struct xfs_attr_intent *attr); bool xfs_attr_namecheck(const void *name, size_t length); diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index b04a3290ffac..706b529a81fe 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -60,7 +60,6 @@ typedef struct xfs_da_args { void *value; /* set of bytes (maybe contain NULLs) */ int valuelen; /* length of value */ unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ - unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */ xfs_dahash_t hashval; /* hash value of name */ xfs_ino_t inumber; /* input/output inode number */ struct xfs_inode *dp; /* directory inode to manipulate */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 7b4318764d03..3066d662ea13 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -557,7 +557,6 @@ xrep_xattr_insert_rec( struct xfs_da_args args = { .dp = rx->sc->tempip, .attr_filter = key->flags, - .attr_flags = XATTR_CREATE, .namelen = key->namelen, .valuelen = key->valuelen, .owner = rx->sc->ip->i_ino, @@ -605,7 +604,7 @@ xrep_xattr_insert_rec( * xfs_attr_set creates and commits its own transaction. If the attr * already exists, we'll just drop it during the rebuild. */ - error = xfs_attr_set(&args); + error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE); if (error == -EEXIST) error = 0; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4bf69c9c088e..12f98af9e709 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -203,7 +203,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) xfs_acl_to_disk(args.value, acl); } - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); kvfree(args.value); /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index efa95892655d..0eca7a9096e2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -361,15 +361,15 @@ xfs_attr_filter( return 0; } -static unsigned int -xfs_attr_flags( +static inline enum xfs_attr_update +xfs_xattr_flags( u32 ioc_flags) { if (ioc_flags & XFS_IOC_ATTR_CREATE) - return XATTR_CREATE; + return XFS_ATTRUPDATE_CREATE; if (ioc_flags & XFS_IOC_ATTR_REPLACE) - return XATTR_REPLACE; - return 0; + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERTR; } int @@ -476,7 +476,6 @@ xfs_attrmulti_attr_get( struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = xfs_attr_filter(flags), - .attr_flags = xfs_attr_flags(flags), .name = name, .namelen = strlen(name), .valuelen = *len, @@ -510,7 +509,6 @@ xfs_attrmulti_attr_set( struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = xfs_attr_filter(flags), - .attr_flags = xfs_attr_flags(flags), .name = name, .namelen = strlen(name), }; @@ -528,7 +526,7 @@ xfs_attrmulti_attr_set( args.valuelen = len; } - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, xfs_xattr_flags(flags)); if (!error && (flags & XFS_IOC_ATTR_ROOT)) xfs_forget_acl(inode, name); kfree(args.value); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ad76704ab133..d02ca2248bbc 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -63,7 +63,7 @@ xfs_initxattrs( .value = xattr->value, .valuelen = xattr->value_len, }; - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); if (error < 0) break; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 08df5bb70a6c..57f225ba7e8a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1999,7 +1999,6 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __field(int, valuelen) __field(xfs_dahash_t, hashval) __field(unsigned int, attr_filter) - __field(unsigned int, attr_flags) __field(uint32_t, op_flags) ), TP_fast_assign( @@ -2011,11 +2010,10 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->valuelen = args->valuelen; __entry->hashval = args->hashval; __entry->attr_filter = args->attr_filter; - __entry->attr_flags = args->attr_flags; __entry->op_flags = args->op_flags; ), TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d " - "hashval 0x%x filter %s flags %s op_flags %s", + "hashval 0x%x filter %s op_flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->namelen, @@ -2025,9 +2023,6 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->hashval, __print_flags(__entry->attr_filter, "|", XFS_ATTR_FILTER_FLAGS), - __print_flags(__entry->attr_flags, "|", - { XATTR_CREATE, "CREATE" }, - { XATTR_REPLACE, "REPLACE" }), __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) ) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 4ebf7052eb67..6ce1e06f650a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -73,7 +73,8 @@ xfs_attr_want_log_assist( */ int xfs_attr_change( - struct xfs_da_args *args) + struct xfs_da_args *args, + enum xfs_attr_update op) { struct xfs_mount *mp = args->dp->i_mount; int error; @@ -88,7 +89,7 @@ xfs_attr_change( args->op_flags |= XFS_DA_OP_LOGGED; } - return xfs_attr_set(args); + return xfs_attr_set(args, op); } @@ -115,6 +116,17 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, return args.valuelen; } +static inline enum xfs_attr_update +xfs_xattr_flags_to_op( + int flags) +{ + if (flags & XATTR_CREATE) + return XFS_ATTRUPDATE_CREATE; + if (flags & XATTR_REPLACE) + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERTR; +} + static int xfs_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, @@ -124,7 +136,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = handler->flags, - .attr_flags = flags, .name = name, .namelen = strlen(name), .value = (void *)value, @@ -132,7 +143,7 @@ xfs_xattr_set(const struct xattr_handler *handler, }; int error; - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags)); if (!error && (handler->flags & XFS_ATTR_ROOT)) xfs_forget_acl(inode, name); return error; diff --git a/fs/xfs/xfs_xattr.h b/fs/xfs/xfs_xattr.h index cec766cad26c..c3eb858fb59e 100644 --- a/fs/xfs/xfs_xattr.h +++ b/fs/xfs/xfs_xattr.h @@ -6,7 +6,8 @@ #ifndef __XFS_XATTR_H__ #define __XFS_XATTR_H__ -int xfs_attr_change(struct xfs_da_args *args); +enum xfs_attr_update; +int xfs_attr_change(struct xfs_da_args *args, enum xfs_attr_update op); extern const struct xattr_handler * const xfs_xattr_handlers[]; -- cgit v1.2.3-59-g8ed1b From c27411d4c640037d70e2fa5751616e175e52ca5e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:22 -0700 Subject: xfs: make attr removal an explicit operation Parent pointers match attrs on name+value, unlike everything else which matches on only the name. Therefore, we cannot keep using the heuristic that !value means remove. Make this an explicit operation code. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 19 ++++++++++--------- fs/xfs/libxfs/xfs_attr.h | 3 ++- fs/xfs/xfs_acl.c | 17 +++++++++-------- fs/xfs/xfs_ioctl.c | 9 ++++++--- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_xattr.c | 9 ++++++--- 6 files changed, 34 insertions(+), 25 deletions(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index b04e09143869..f8f7445b063c 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -916,10 +916,6 @@ xfs_attr_defer_add( trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); } -/* - * Note: If args->value is NULL the attribute will be removed, just like the - * Linux ->setattr API. - */ int xfs_attr_set( struct xfs_da_args *args, @@ -955,7 +951,10 @@ xfs_attr_set( args->op_flags = XFS_DA_OP_OKNOENT | (args->op_flags & XFS_DA_OP_LOGGED); - if (args->value) { + switch (op) { + case XFS_ATTRUPDATE_UPSERT: + case XFS_ATTRUPDATE_CREATE: + case XFS_ATTRUPDATE_REPLACE: XFS_STATS_INC(mp, xs_attr_set); args->total = xfs_attr_calc_size(args, &local); @@ -975,9 +974,11 @@ xfs_attr_set( if (!local) rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen); - } else { + break; + case XFS_ATTRUPDATE_REMOVE: XFS_STATS_INC(mp, xs_attr_remove); rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + break; } /* @@ -989,7 +990,7 @@ xfs_attr_set( if (error) return error; - if (args->value || xfs_inode_hasattr(dp)) { + if (op != XFS_ATTRUPDATE_REMOVE || xfs_inode_hasattr(dp)) { error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK, XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); if (error == -EFBIG) @@ -1002,7 +1003,7 @@ xfs_attr_set( error = xfs_attr_lookup(args); switch (error) { case -EEXIST: - if (!args->value) { + if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE); break; @@ -1015,7 +1016,7 @@ xfs_attr_set( break; case -ENOATTR: /* Can't remove what isn't there. */ - if (!args->value) + if (op == XFS_ATTRUPDATE_REMOVE) goto out_trans_cancel; /* Pure replace fails if no existing attr to replace. */ diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 228360f7c85c..c8005f52102a 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -546,7 +546,8 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); enum xfs_attr_update { - XFS_ATTRUPDATE_UPSERTR, /* set/remove value, replace any existing attr */ + XFS_ATTRUPDATE_REMOVE, /* remove attr */ + XFS_ATTRUPDATE_UPSERT, /* set value, replace any existing attr */ XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */ XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */ }; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 12f98af9e709..c7c3dcfa2718 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -201,16 +201,17 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (!args.value) return -ENOMEM; xfs_acl_to_disk(args.value, acl); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERT); + kvfree(args.value); + } else { + error = xfs_attr_change(&args, XFS_ATTRUPDATE_REMOVE); + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; } - error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); - kvfree(args.value); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (!acl && error == -ENOATTR) - error = 0; if (!error) set_cached_acl(inode, type, acl); return error; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0eca7a9096e2..e30f9f40f086 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -363,13 +363,16 @@ xfs_attr_filter( static inline enum xfs_attr_update xfs_xattr_flags( - u32 ioc_flags) + u32 ioc_flags, + void *value) { + if (!value) + return XFS_ATTRUPDATE_REMOVE; if (ioc_flags & XFS_IOC_ATTR_CREATE) return XFS_ATTRUPDATE_CREATE; if (ioc_flags & XFS_IOC_ATTR_REPLACE) return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERTR; + return XFS_ATTRUPDATE_UPSERT; } int @@ -526,7 +529,7 @@ xfs_attrmulti_attr_set( args.valuelen = len; } - error = xfs_attr_change(&args, xfs_xattr_flags(flags)); + error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); if (!error && (flags & XFS_IOC_ATTR_ROOT)) xfs_forget_acl(inode, name); kfree(args.value); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index d02ca2248bbc..659fd10c0cda 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -63,7 +63,7 @@ xfs_initxattrs( .value = xattr->value, .valuelen = xattr->value_len, }; - error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERT); if (error < 0) break; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 6ce1e06f650a..0cbb93cf2869 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -118,13 +118,16 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, static inline enum xfs_attr_update xfs_xattr_flags_to_op( - int flags) + int flags, + const void *value) { + if (!value) + return XFS_ATTRUPDATE_REMOVE; if (flags & XATTR_CREATE) return XFS_ATTRUPDATE_CREATE; if (flags & XATTR_REPLACE) return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERTR; + return XFS_ATTRUPDATE_UPSERT; } static int @@ -143,7 +146,7 @@ xfs_xattr_set(const struct xattr_handler *handler, }; int error; - error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags)); + error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags, value)); if (!error && (handler->flags & XFS_ATTR_ROOT)) xfs_forget_acl(inode, name); return error; -- cgit v1.2.3-59-g8ed1b From 8f4b980ee67fe53a77b70b1fdd8e15f2fe37180c Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:53 -0700 Subject: xfs: pass the attr value to put_listent when possible Pass the attr value to put_listent when we have local xattrs or shortform xattrs. This will enable the GETPARENTS ioctl to use xfs_attr_list as its backend. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.h | 5 +++-- fs/xfs/libxfs/xfs_attr_sf.h | 1 + fs/xfs/xfs_attr_list.c | 8 +++++++- fs/xfs/xfs_ioctl.c | 1 + fs/xfs/xfs_xattr.c | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index d0ed7ea58ab0..d12583dd7eec 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -47,8 +47,9 @@ struct xfs_attrlist_cursor_kern { /* void; state communicated via *context */ -typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, - unsigned char *, int, int); +typedef void (*put_listent_func_t)(struct xfs_attr_list_context *context, + int flags, unsigned char *name, int namelen, void *value, + int valuelen); struct xfs_attr_list_context { struct xfs_trans *tp; diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index bc4422223024..73bdc0e55682 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -16,6 +16,7 @@ typedef struct xfs_attr_sf_sort { uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ xfs_dahash_t hash; /* this entry's hash value */ unsigned char *name; /* name value, pointer into buffer */ + void *value; } xfs_attr_sf_sort_t; #define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 9bc4b5322539..5c947e5ce8b8 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -92,6 +92,7 @@ xfs_attr_shortform_list( sfe->flags, sfe->nameval, (int)sfe->namelen, + &sfe->nameval[sfe->namelen], (int)sfe->valuelen); /* * Either search callback finished early or @@ -138,6 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ + sbp->value = &sfe->nameval[sfe->namelen], sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, @@ -192,6 +194,7 @@ xfs_attr_shortform_list( sbp->flags, sbp->name, sbp->namelen, + sbp->value, sbp->valuelen); if (context->seen_enough) break; @@ -479,6 +482,7 @@ xfs_attr3_leaf_list_int( */ for (; i < ichdr.count; entry++, i++) { char *name; + void *value; int namelen, valuelen; if (be32_to_cpu(entry->hashval) != cursor->hashval) { @@ -496,6 +500,7 @@ xfs_attr3_leaf_list_int( name_loc = xfs_attr3_leaf_name_local(leaf, i); name = name_loc->nameval; namelen = name_loc->namelen; + value = &name_loc->nameval[name_loc->namelen]; valuelen = be16_to_cpu(name_loc->valuelen); } else { xfs_attr_leaf_name_remote_t *name_rmt; @@ -503,6 +508,7 @@ xfs_attr3_leaf_list_int( name_rmt = xfs_attr3_leaf_name_remote(leaf, i); name = name_rmt->name; namelen = name_rmt->namelen; + value = NULL; valuelen = be32_to_cpu(name_rmt->valuelen); } @@ -513,7 +519,7 @@ xfs_attr3_leaf_list_int( return -EFSCORRUPTED; } context->put_listent(context, entry->flags, - name, namelen, valuelen); + name, namelen, value, valuelen); if (context->seen_enough) break; cursor->offset++; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index e30f9f40f086..7a2a5cf06a5c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -310,6 +310,7 @@ xfs_ioc_attr_put_listent( int flags, unsigned char *name, int namelen, + void *value, int valuelen) { struct xfs_attrlist *alist = context->buffer; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 1e82d11d980f..b43f7081b0f4 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -222,6 +222,7 @@ xfs_xattr_put_listent( int flags, unsigned char *name, int namelen, + void *value, int valuelen) { char *prefix; -- cgit v1.2.3-59-g8ed1b From af69d852dfe62b925d0df401eafad40698c889c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:54 -0700 Subject: xfs: move handle ioctl code to xfs_handle.c Move the handle managemnet code (and the attrmulti code that uses it) to xfs_handle.c. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/xfs_handle.c | 618 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_handle.h | 28 +++ fs/xfs/xfs_ioctl.c | 592 +----------------------------------------------- fs/xfs/xfs_ioctl.h | 28 --- fs/xfs/xfs_ioctl32.c | 1 + 6 files changed, 649 insertions(+), 619 deletions(-) create mode 100644 fs/xfs/xfs_handle.c create mode 100644 fs/xfs/xfs_handle.h (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0c1a0b67af93..c969b11ce0f4 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -78,6 +78,7 @@ xfs-y += xfs_aops.o \ xfs_fsmap.o \ xfs_fsops.o \ xfs_globals.o \ + xfs_handle.o \ xfs_health.o \ xfs_icache.o \ xfs_ioctl.o \ diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c new file mode 100644 index 000000000000..13c2479a3053 --- /dev/null +++ b/fs/xfs/xfs_handle.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All rights reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_ioctl.h" +#include "xfs_parent.h" +#include "xfs_da_btree.h" +#include "xfs_handle.h" +#include "xfs_health.h" +#include "xfs_icache.h" +#include "xfs_export.h" +#include "xfs_xattr.h" +#include "xfs_acl.h" + +#include + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq) +{ + int hsize; + xfs_handle_t handle; + struct inode *inode; + struct fd f = {NULL}; + struct path path; + int error; + struct xfs_inode *ip; + + if (cmd == XFS_IOC_FD_TO_HANDLE) { + f = fdget(hreq->fd); + if (!f.file) + return -EBADF; + inode = file_inode(f.file); + } else { + error = user_path_at(AT_FDCWD, hreq->path, 0, &path); + if (error) + return error; + inode = d_inode(path.dentry); + } + ip = XFS_I(inode); + + /* + * We can only generate handles for inodes residing on a XFS filesystem, + * and only for regular files, directories or symbolic links. + */ + error = -EINVAL; + if (inode->i_sb->s_magic != XFS_SB_MAGIC) + goto out_put; + + error = -EBADF; + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode)) + goto out_put; + + + memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { + /* + * This handle only contains an fsid, zero the rest. + */ + memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); + hsize = sizeof(xfs_fsid_t); + } else { + handle.ha_fid.fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.fid_len); + handle.ha_fid.fid_pad = 0; + handle.ha_fid.fid_gen = inode->i_generation; + handle.ha_fid.fid_ino = ip->i_ino; + hsize = sizeof(xfs_handle_t); + } + + error = -EFAULT; + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) + goto out_put; + + error = 0; + + out_put: + if (cmd == XFS_IOC_FD_TO_HANDLE) + fdput(f); + else + path_put(&path); + return error; +} + +/* + * No need to do permission checks on the various pathname components + * as the handle operations are privileged. + */ +STATIC int +xfs_handle_acceptable( + void *context, + struct dentry *dentry) +{ + return 1; +} + +/* + * Convert userspace handle data into a dentry. + */ +struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen) +{ + xfs_handle_t handle; + struct xfs_fid64 fid; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(file_inode(parfilp)->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (hlen != sizeof(xfs_handle_t)) + return ERR_PTR(-EINVAL); + if (copy_from_user(&handle, uhandle, hlen)) + return ERR_PTR(-EFAULT); + if (handle.ha_fid.fid_len != + sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) + return ERR_PTR(-EINVAL); + + memset(&fid, 0, sizeof(struct fid)); + fid.ino = handle.ha_fid.fid_ino; + fid.gen = handle.ha_fid.fid_gen; + + return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} + +STATIC struct dentry * +xfs_handlereq_to_dentry( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); +} + +int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + const struct cred *cred = current_cred(); + int error; + int fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + fmode_t fmode; + struct path path; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + inode = d_inode(dentry); + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + error = -EPERM; + goto out_dput; + } + +#if BITS_PER_LONG != 32 + hreq->oflags |= O_LARGEFILE; +#endif + + permflag = hreq->oflags; + fmode = OPEN_FMODE(permflag); + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (fmode & FMODE_WRITE) && IS_APPEND(inode)) { + error = -EPERM; + goto out_dput; + } + + if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + error = -EPERM; + goto out_dput; + } + + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { + error = -EISDIR; + goto out_dput; + } + + fd = get_unused_fd_flags(0); + if (fd < 0) { + error = fd; + goto out_dput; + } + + path.mnt = parfilp->f_path.mnt; + path.dentry = dentry; + filp = dentry_open(&path, hreq->oflags, cred); + dput(dentry); + if (IS_ERR(filp)) { + put_unused_fd(fd); + return PTR_ERR(filp); + } + + if (S_ISREG(inode->i_mode)) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + + fd_install(fd, filp); + return fd; + + out_dput: + dput(dentry); + return error; +} + +int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + struct dentry *dentry; + __u32 olen; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + /* Restrict this handle operation to symlinks only. */ + if (!d_is_symlink(dentry)) { + error = -EINVAL; + goto out_dput; + } + + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { + error = -EFAULT; + goto out_dput; + } + + error = vfs_readlink(dentry, hreq->ohandle, olen); + + out_dput: + dput(dentry); + return error; +} + +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +static void +xfs_ioc_attr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + void *value, + int valuelen) +{ + struct xfs_attrlist *alist = context->buffer; + struct xfs_attrlist_ent *aep; + int arraytop; + + ASSERT(!context->seen_enough); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*alist)); + ASSERT(context->firstu <= context->bufsize); + + /* + * Only list entries in the right namespace. + */ + if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) + return; + + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); + + /* decrement by the actual bytes used by the attr */ + context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + + namelen + 1, sizeof(uint32_t)); + if (context->firstu < arraytop) { + trace_xfs_attr_list_full(context); + alist->al_more = 1; + context->seen_enough = 1; + return; + } + + aep = context->buffer + context->firstu; + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; + trace_xfs_attr_list_add(context); +} + +static unsigned int +xfs_attr_filter( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_ROOT) + return XFS_ATTR_ROOT; + if (ioc_flags & XFS_IOC_ATTR_SECURE) + return XFS_ATTR_SECURE; + return 0; +} + +static inline enum xfs_attr_update +xfs_xattr_flags( + u32 ioc_flags, + void *value) +{ + if (!value) + return XFS_ATTRUPDATE_REMOVE; + if (ioc_flags & XFS_IOC_ATTR_CREATE) + return XFS_ATTRUPDATE_CREATE; + if (ioc_flags & XFS_IOC_ATTR_REPLACE) + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERT; +} + +int +xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, + size_t bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) +{ + struct xfs_attr_list_context context = { }; + struct xfs_attrlist *alist; + void *buffer; + int error; + + if (bufsize < sizeof(struct xfs_attrlist) || + bufsize > XFS_XATTR_LIST_MAX) + return -EINVAL; + + /* + * Reject flags, only allow namespaces. + */ + if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + /* + * Validate the cursor. + */ + if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) + return -EFAULT; + if (context.cursor.pad1 || context.cursor.pad2) + return -EINVAL; + if (!context.cursor.initted && + (context.cursor.hashval || context.cursor.blkno || + context.cursor.offset)) + return -EINVAL; + + buffer = kvzalloc(bufsize, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + /* + * Initialize the output buffer. + */ + context.dp = dp; + context.resynch = 1; + context.attr_filter = xfs_attr_filter(flags); + context.buffer = buffer; + context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.firstu = context.bufsize; + context.put_listent = xfs_ioc_attr_put_listent; + + alist = context.buffer; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; + + error = xfs_attr_list(&context); + if (error) + goto out_free; + + if (copy_to_user(ubuf, buffer, bufsize) || + copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) + error = -EFAULT; +out_free: + kvfree(buffer); + return error; +} + +int +xfs_attrlist_by_handle( + struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p) +{ + struct xfs_fsop_attrlist_handlereq al_hreq; + struct dentry *dentry; + int error = -ENOMEM; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) + return -EFAULT; + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, + al_hreq.buflen, al_hreq.flags, &p->pos); + dput(dentry); + return error; +} + +static int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + uint32_t *len, + uint32_t flags) +{ + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .name = name, + .namelen = strlen(name), + .valuelen = *len, + }; + int error; + + if (*len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + + error = xfs_attr_get(&args); + if (error) + goto out_kfree; + + *len = args.valuelen; + if (copy_to_user(ubuf, args.value, args.valuelen)) + error = -EFAULT; + +out_kfree: + kvfree(args.value); + return error; +} + +static int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + uint32_t len, + uint32_t flags) +{ + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .name = name, + .namelen = strlen(name), + }; + int error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + + if (ubuf) { + if (len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + args.value = memdup_user(ubuf, len); + if (IS_ERR(args.value)) + return PTR_ERR(args.value); + args.valuelen = len; + } + + error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); + if (!error && (flags & XFS_IOC_ATTR_ROOT)) + xfs_forget_acl(inode, name); + kfree(args.value); + return error; +} + +int +xfs_ioc_attrmulti_one( + struct file *parfilp, + struct inode *inode, + uint32_t opcode, + void __user *uname, + void __user *value, + uint32_t *len, + uint32_t flags) +{ + unsigned char *name; + int error; + + if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + name = strndup_user(uname, MAXNAMELEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + switch (opcode) { + case ATTR_OP_GET: + error = xfs_attrmulti_attr_get(inode, name, value, len, flags); + break; + case ATTR_OP_REMOVE: + value = NULL; + *len = 0; + fallthrough; + case ATTR_OP_SET: + error = mnt_want_write_file(parfilp); + if (error) + break; + error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); + mnt_drop_write_file(parfilp); + break; + default: + error = -EINVAL; + break; + } + + kfree(name); + return error; +} + +int +xfs_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) + return -EFAULT; + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = -E2BIG; + size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(am_hreq.ops, size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, + ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags); + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = -EFAULT; + + kfree(ops); + out_dput: + dput(dentry); + return error; +} diff --git a/fs/xfs/xfs_handle.h b/fs/xfs/xfs_handle.h new file mode 100644 index 000000000000..e39eaf4689da --- /dev/null +++ b/fs/xfs/xfs_handle.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All rights reserved. + */ +#ifndef __XFS_HANDLE_H__ +#define __XFS_HANDLE_H__ + +int xfs_attrlist_by_handle(struct file *parfilp, + struct xfs_fsop_attrlist_handlereq __user *p); +int xfs_attrmulti_by_handle(struct file *parfilp, void __user *arg); + +int xfs_find_handle(unsigned int cmd, struct xfs_fsop_handlereq *hreq); +int xfs_open_by_handle(struct file *parfilp, struct xfs_fsop_handlereq *hreq); +int xfs_readlink_by_handle(struct file *parfilp, + struct xfs_fsop_handlereq *hreq); + +int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, + uint32_t opcode, void __user *uname, void __user *value, + uint32_t *len, uint32_t flags); +int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, + size_t bufsize, int flags, + struct xfs_attrlist_cursor __user *ucursor); + +struct dentry *xfs_handle_to_dentry(struct file *parfilp, void __user *uhandle, + u32 hlen); + +#endif /* __XFS_HANDLE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 7a2a5cf06a5c..ed05fcd6261d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -23,11 +23,9 @@ #include "xfs_fsops.h" #include "xfs_discard.h" #include "xfs_quota.h" -#include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_trans.h" -#include "xfs_acl.h" #include "xfs_btree.h" #include #include "xfs_fsmap.h" @@ -37,602 +35,14 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" -#include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_file.h" #include "xfs_exchrange.h" +#include "xfs_handle.h" #include -#include #include -/* - * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to - * a file or fs handle. - * - * XFS_IOC_PATH_TO_FSHANDLE - * returns fs handle for a mount point or path within that mount point - * XFS_IOC_FD_TO_HANDLE - * returns full handle for a FD opened in user space - * XFS_IOC_PATH_TO_HANDLE - * returns full handle for a path - */ -int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq) -{ - int hsize; - xfs_handle_t handle; - struct inode *inode; - struct fd f = {NULL}; - struct path path; - int error; - struct xfs_inode *ip; - - if (cmd == XFS_IOC_FD_TO_HANDLE) { - f = fdget(hreq->fd); - if (!f.file) - return -EBADF; - inode = file_inode(f.file); - } else { - error = user_path_at(AT_FDCWD, hreq->path, 0, &path); - if (error) - return error; - inode = d_inode(path.dentry); - } - ip = XFS_I(inode); - - /* - * We can only generate handles for inodes residing on a XFS filesystem, - * and only for regular files, directories or symbolic links. - */ - error = -EINVAL; - if (inode->i_sb->s_magic != XFS_SB_MAGIC) - goto out_put; - - error = -EBADF; - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - !S_ISLNK(inode->i_mode)) - goto out_put; - - - memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); - - if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { - /* - * This handle only contains an fsid, zero the rest. - */ - memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); - hsize = sizeof(xfs_fsid_t); - } else { - handle.ha_fid.fid_len = sizeof(xfs_fid_t) - - sizeof(handle.ha_fid.fid_len); - handle.ha_fid.fid_pad = 0; - handle.ha_fid.fid_gen = inode->i_generation; - handle.ha_fid.fid_ino = ip->i_ino; - hsize = sizeof(xfs_handle_t); - } - - error = -EFAULT; - if (copy_to_user(hreq->ohandle, &handle, hsize) || - copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) - goto out_put; - - error = 0; - - out_put: - if (cmd == XFS_IOC_FD_TO_HANDLE) - fdput(f); - else - path_put(&path); - return error; -} - -/* - * No need to do permission checks on the various pathname components - * as the handle operations are privileged. - */ -STATIC int -xfs_handle_acceptable( - void *context, - struct dentry *dentry) -{ - return 1; -} - -/* - * Convert userspace handle data into a dentry. - */ -struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen) -{ - xfs_handle_t handle; - struct xfs_fid64 fid; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(file_inode(parfilp)->i_mode)) - return ERR_PTR(-ENOTDIR); - - if (hlen != sizeof(xfs_handle_t)) - return ERR_PTR(-EINVAL); - if (copy_from_user(&handle, uhandle, hlen)) - return ERR_PTR(-EFAULT); - if (handle.ha_fid.fid_len != - sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) - return ERR_PTR(-EINVAL); - - memset(&fid, 0, sizeof(struct fid)); - fid.ino = handle.ha_fid.fid_ino; - fid.gen = handle.ha_fid.fid_gen; - - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, - xfs_handle_acceptable, NULL); -} - -STATIC struct dentry * -xfs_handlereq_to_dentry( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); -} - -int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - const struct cred *cred = current_cred(); - int error; - int fd; - int permflag; - struct file *filp; - struct inode *inode; - struct dentry *dentry; - fmode_t fmode; - struct path path; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - inode = d_inode(dentry); - - /* Restrict xfs_open_by_handle to directories & regular files. */ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -EPERM; - goto out_dput; - } - -#if BITS_PER_LONG != 32 - hreq->oflags |= O_LARGEFILE; -#endif - - permflag = hreq->oflags; - fmode = OPEN_FMODE(permflag); - if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && - (fmode & FMODE_WRITE) && IS_APPEND(inode)) { - error = -EPERM; - goto out_dput; - } - - if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -EPERM; - goto out_dput; - } - - /* Can't write directories. */ - if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { - error = -EISDIR; - goto out_dput; - } - - fd = get_unused_fd_flags(0); - if (fd < 0) { - error = fd; - goto out_dput; - } - - path.mnt = parfilp->f_path.mnt; - path.dentry = dentry; - filp = dentry_open(&path, hreq->oflags, cred); - dput(dentry); - if (IS_ERR(filp)) { - put_unused_fd(fd); - return PTR_ERR(filp); - } - - if (S_ISREG(inode->i_mode)) { - filp->f_flags |= O_NOATIME; - filp->f_mode |= FMODE_NOCMTIME; - } - - fd_install(fd, filp); - return fd; - - out_dput: - dput(dentry); - return error; -} - -int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - struct dentry *dentry; - __u32 olen; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - /* Restrict this handle operation to symlinks only. */ - if (!d_is_symlink(dentry)) { - error = -EINVAL; - goto out_dput; - } - - if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -EFAULT; - goto out_dput; - } - - error = vfs_readlink(dentry, hreq->ohandle, olen); - - out_dput: - dput(dentry); - return error; -} - -/* - * Format an attribute and copy it out to the user's buffer. - * Take care to check values and protect against them changing later, - * we may be reading them directly out of a user buffer. - */ -static void -xfs_ioc_attr_put_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - void *value, - int valuelen) -{ - struct xfs_attrlist *alist = context->buffer; - struct xfs_attrlist_ent *aep; - int arraytop; - - ASSERT(!context->seen_enough); - ASSERT(context->count >= 0); - ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); - ASSERT(context->firstu >= sizeof(*alist)); - ASSERT(context->firstu <= context->bufsize); - - /* - * Only list entries in the right namespace. - */ - if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) - return; - - arraytop = sizeof(*alist) + - context->count * sizeof(alist->al_offset[0]); - - /* decrement by the actual bytes used by the attr */ - context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + - namelen + 1, sizeof(uint32_t)); - if (context->firstu < arraytop) { - trace_xfs_attr_list_full(context); - alist->al_more = 1; - context->seen_enough = 1; - return; - } - - aep = context->buffer + context->firstu; - aep->a_valuelen = valuelen; - memcpy(aep->a_name, name, namelen); - aep->a_name[namelen] = 0; - alist->al_offset[context->count++] = context->firstu; - alist->al_count = context->count; - trace_xfs_attr_list_add(context); -} - -static unsigned int -xfs_attr_filter( - u32 ioc_flags) -{ - if (ioc_flags & XFS_IOC_ATTR_ROOT) - return XFS_ATTR_ROOT; - if (ioc_flags & XFS_IOC_ATTR_SECURE) - return XFS_ATTR_SECURE; - return 0; -} - -static inline enum xfs_attr_update -xfs_xattr_flags( - u32 ioc_flags, - void *value) -{ - if (!value) - return XFS_ATTRUPDATE_REMOVE; - if (ioc_flags & XFS_IOC_ATTR_CREATE) - return XFS_ATTRUPDATE_CREATE; - if (ioc_flags & XFS_IOC_ATTR_REPLACE) - return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERT; -} - -int -xfs_ioc_attr_list( - struct xfs_inode *dp, - void __user *ubuf, - size_t bufsize, - int flags, - struct xfs_attrlist_cursor __user *ucursor) -{ - struct xfs_attr_list_context context = { }; - struct xfs_attrlist *alist; - void *buffer; - int error; - - if (bufsize < sizeof(struct xfs_attrlist) || - bufsize > XFS_XATTR_LIST_MAX) - return -EINVAL; - - /* - * Reject flags, only allow namespaces. - */ - if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) - return -EINVAL; - if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) - return -EINVAL; - - /* - * Validate the cursor. - */ - if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) - return -EFAULT; - if (context.cursor.pad1 || context.cursor.pad2) - return -EINVAL; - if (!context.cursor.initted && - (context.cursor.hashval || context.cursor.blkno || - context.cursor.offset)) - return -EINVAL; - - buffer = kvzalloc(bufsize, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - /* - * Initialize the output buffer. - */ - context.dp = dp; - context.resynch = 1; - context.attr_filter = xfs_attr_filter(flags); - context.buffer = buffer; - context.bufsize = round_down(bufsize, sizeof(uint32_t)); - context.firstu = context.bufsize; - context.put_listent = xfs_ioc_attr_put_listent; - - alist = context.buffer; - alist->al_count = 0; - alist->al_more = 0; - alist->al_offset[0] = context.bufsize; - - error = xfs_attr_list(&context); - if (error) - goto out_free; - - if (copy_to_user(ubuf, buffer, bufsize) || - copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) - error = -EFAULT; -out_free: - kvfree(buffer); - return error; -} - -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - struct xfs_fsop_attrlist_handlereq __user *p) -{ - struct xfs_fsop_attrlist_handlereq al_hreq; - struct dentry *dentry; - int error = -ENOMEM; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) - return -EFAULT; - - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, - al_hreq.buflen, al_hreq.flags, &p->pos); - dput(dentry); - return error; -} - -static int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - uint32_t *len, - uint32_t flags) -{ - struct xfs_da_args args = { - .dp = XFS_I(inode), - .attr_filter = xfs_attr_filter(flags), - .name = name, - .namelen = strlen(name), - .valuelen = *len, - }; - int error; - - if (*len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - - error = xfs_attr_get(&args); - if (error) - goto out_kfree; - - *len = args.valuelen; - if (copy_to_user(ubuf, args.value, args.valuelen)) - error = -EFAULT; - -out_kfree: - kvfree(args.value); - return error; -} - -static int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - uint32_t len, - uint32_t flags) -{ - struct xfs_da_args args = { - .dp = XFS_I(inode), - .attr_filter = xfs_attr_filter(flags), - .name = name, - .namelen = strlen(name), - }; - int error; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - - if (ubuf) { - if (len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - args.value = memdup_user(ubuf, len); - if (IS_ERR(args.value)) - return PTR_ERR(args.value); - args.valuelen = len; - } - - error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); - if (!error && (flags & XFS_IOC_ATTR_ROOT)) - xfs_forget_acl(inode, name); - kfree(args.value); - return error; -} - -int -xfs_ioc_attrmulti_one( - struct file *parfilp, - struct inode *inode, - uint32_t opcode, - void __user *uname, - void __user *value, - uint32_t *len, - uint32_t flags) -{ - unsigned char *name; - int error; - - if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) - return -EINVAL; - - name = strndup_user(uname, MAXNAMELEN); - if (IS_ERR(name)) - return PTR_ERR(name); - - switch (opcode) { - case ATTR_OP_GET: - error = xfs_attrmulti_attr_get(inode, name, value, len, flags); - break; - case ATTR_OP_REMOVE: - value = NULL; - *len = 0; - fallthrough; - case ATTR_OP_SET: - error = mnt_want_write_file(parfilp); - if (error) - break; - error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); - mnt_drop_write_file(parfilp); - break; - default: - error = -EINVAL; - break; - } - - kfree(name); - return error; -} - -STATIC int -xfs_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - xfs_attr_multiop_t *ops; - xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -EFAULT; - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = -E2BIG; - size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(am_hreq.ops, size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, - d_inode(dentry), ops[i].am_opcode, - ops[i].am_attrname, ops[i].am_attrvalue, - &ops[i].am_length, ops[i].am_flags); - } - - if (copy_to_user(am_hreq.ops, ops, size)) - error = -EFAULT; - - kfree(ops); - out_dput: - dput(dentry); - return error; -} - /* Return 0 on success or positive error */ int xfs_fsbulkstat_one_fmt( diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 38be600b5e1e..12124946f347 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -14,34 +14,6 @@ int xfs_ioc_swapext( xfs_swapext_t *sxp); -extern int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, - uint32_t opcode, void __user *uname, void __user *value, - uint32_t *len, uint32_t flags); -int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, - size_t bufsize, int flags, - struct xfs_attrlist_cursor __user *ucursor); - -extern struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen); - extern int xfs_fileattr_get( struct dentry *dentry, diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index ee35eea1ecce..b64785dc4354 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -24,6 +24,7 @@ #include "xfs_ioctl32.h" #include "xfs_trace.h" #include "xfs_sb.h" +#include "xfs_handle.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) -- cgit v1.2.3-59-g8ed1b From 233f4e12bbb2c5fb1588b857336a26e8bb6942af Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:55 -0700 Subject: xfs: add parent pointer ioctls This patch adds a pair of new file ioctls to retrieve the parent pointer of a given inode. They both return the same results, but one operates on the file descriptor passed to ioctl() whereas the other allows the caller to specify a file handle for which the caller wants results. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 74 +++++++++++ fs/xfs/libxfs/xfs_ondisk.h | 5 + fs/xfs/libxfs/xfs_parent.c | 34 ++++++ fs/xfs/libxfs/xfs_parent.h | 5 + fs/xfs/xfs_export.c | 2 +- fs/xfs/xfs_export.h | 2 + fs/xfs/xfs_handle.c | 298 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_handle.h | 5 + fs/xfs/xfs_ioctl.c | 6 +- fs/xfs/xfs_trace.c | 1 + fs/xfs/xfs_trace.h | 92 ++++++++++++++ 11 files changed, 522 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 97384ab95de4..ea654df0505f 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -816,6 +816,78 @@ struct xfs_exchange_range { XFS_EXCHANGE_RANGE_DRY_RUN | \ XFS_EXCHANGE_RANGE_FILE1_WRITTEN) +/* Iterating parent pointers of files. */ + +/* target was the root directory */ +#define XFS_GETPARENTS_OFLAG_ROOT (1U << 0) + +/* Cursor is done iterating pptrs */ +#define XFS_GETPARENTS_OFLAG_DONE (1U << 1) + +#define XFS_GETPARENTS_OFLAGS_ALL (XFS_GETPARENTS_OFLAG_ROOT | \ + XFS_GETPARENTS_OFLAG_DONE) + +#define XFS_GETPARENTS_IFLAGS_ALL (0) + +struct xfs_getparents_rec { + struct xfs_handle gpr_parent; /* Handle to parent */ + __u32 gpr_reclen; /* Length of entire record */ + __u32 gpr_reserved; /* zero */ + char gpr_name[]; /* Null-terminated filename */ +}; + +/* Iterate through this file's directory parent pointers */ +struct xfs_getparents { + /* + * Structure to track progress in iterating the parent pointers. + * Must be initialized to zeroes before the first ioctl call, and + * not touched by callers after that. + */ + struct xfs_attrlist_cursor gp_cursor; + + /* Input flags: XFS_GETPARENTS_IFLAG* */ + __u16 gp_iflags; + + /* Output flags: XFS_GETPARENTS_OFLAG* */ + __u16 gp_oflags; + + /* Size of the gp_buffer in bytes */ + __u32 gp_bufsize; + + /* Must be set to zero */ + __u64 gp_reserved; + + /* Pointer to a buffer in which to place xfs_getparents_rec */ + __u64 gp_buffer; +}; + +static inline struct xfs_getparents_rec * +xfs_getparents_first_rec(struct xfs_getparents *gp) +{ + return (struct xfs_getparents_rec *)(uintptr_t)gp->gp_buffer; +} + +static inline struct xfs_getparents_rec * +xfs_getparents_next_rec(struct xfs_getparents *gp, + struct xfs_getparents_rec *gpr) +{ + void *next = ((void *)gpr + gpr->gpr_reclen); + void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize); + + if (next >= end) + return NULL; + + return next; +} + +/* Iterate through this file handle's directory parent pointers. */ +struct xfs_getparents_by_handle { + /* Handle to file whose parents we want. */ + struct xfs_handle gph_handle; + + struct xfs_getparents gph_request; +}; + /* * ioctl commands that are used by Linux filesystems */ @@ -851,6 +923,8 @@ struct xfs_exchange_range { /* XFS_IOC_GETFSMAP ------ hoisted 59 */ #define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata) #define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry) +#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents) +#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 25952ef584ee..e8cdd77d03fa 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -156,6 +156,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); + /* parent pointer ioctls */ + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40); + XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_by_handle, 64); + /* * The v5 superblock format extended several v4 header structures with * additional data. While new fields are only accessible on v5 diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index fdf643bfde4d..504de1ef3387 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -257,3 +257,37 @@ xfs_parent_replacename( xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE); return 0; } + +/* + * Extract parent pointer information from any parent pointer xattr into + * @parent_ino/gen. The last two parameters can be NULL pointers. + * + * Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for + * garbage. + */ +int +xfs_parent_from_attr( + struct xfs_mount *mp, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + xfs_ino_t *parent_ino, + uint32_t *parent_gen) +{ + const struct xfs_parent_rec *rec = value; + + ASSERT(attr_flags & XFS_ATTR_PARENT); + + if (!xfs_parent_namecheck(attr_flags, name, namelen)) + return -EFSCORRUPTED; + if (!xfs_parent_valuecheck(mp, value, valuelen)) + return -EFSCORRUPTED; + + if (parent_ino) + *parent_ino = be64_to_cpu(rec->p_ino); + if (parent_gen) + *parent_gen = be32_to_cpu(rec->p_gen); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 768633b31367..d7ab09e738ad 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -91,4 +91,9 @@ int xfs_parent_replacename(struct xfs_trans *tp, struct xfs_inode *new_dp, const struct xfs_name *new_name, struct xfs_inode *child); +int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags, + const unsigned char *name, unsigned int namelen, + const void *value, unsigned int valuelen, + xfs_ino_t *parent_ino, uint32_t *parent_gen); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 4b03221351c0..201489d3de08 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -102,7 +102,7 @@ xfs_fs_encode_fh( return fileid_type; } -STATIC struct inode * +struct inode * xfs_nfs_get_inode( struct super_block *sb, u64 ino, diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h index 64471a3ddb04..3cd85e8901a5 100644 --- a/fs/xfs/xfs_export.h +++ b/fs/xfs/xfs_export.h @@ -57,4 +57,6 @@ struct xfs_fid64 { /* This flag goes on the wire. Don't play with it. */ #define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ +struct inode *xfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gen); + #endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c index b9f4d9860682..c8785ed59543 100644 --- a/fs/xfs/xfs_handle.c +++ b/fs/xfs/xfs_handle.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ #include "xfs.h" @@ -178,6 +179,30 @@ xfs_khandle_to_dentry( xfs_handle_acceptable, NULL); } +/* Convert handle already copied to kernel space into an xfs_inode. */ +static struct xfs_inode * +xfs_khandle_to_inode( + struct file *file, + struct xfs_handle *handle) +{ + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + struct inode *inode; + + if (!S_ISDIR(VFS_I(ip)->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (handle->ha_fid.fid_len != xfs_filehandle_fid_len()) + return ERR_PTR(-EINVAL); + + inode = xfs_nfs_get_inode(mp->m_super, handle->ha_fid.fid_ino, + handle->ha_fid.fid_gen); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + return XFS_I(inode); +} + /* * Convert userspace handle data into a dentry. */ @@ -652,3 +677,276 @@ xfs_attrmulti_by_handle( dput(dentry); return error; } + +struct xfs_getparents_ctx { + struct xfs_attr_list_context context; + struct xfs_getparents_by_handle gph; + + /* File to target */ + struct xfs_inode *ip; + + /* Internal buffer where we format records */ + void *krecords; + + /* Last record filled out */ + struct xfs_getparents_rec *lastrec; + + unsigned int count; +}; + +static inline unsigned int +xfs_getparents_rec_sizeof( + unsigned int namelen) +{ + return round_up(sizeof(struct xfs_getparents_rec) + namelen + 1, + sizeof(uint64_t)); +} + +static void +xfs_getparents_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + void *value, + int valuelen) +{ + struct xfs_getparents_ctx *gpx = + container_of(context, struct xfs_getparents_ctx, context); + struct xfs_inode *ip = context->dp; + struct xfs_mount *mp = ip->i_mount; + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_getparents_rec *gpr = gpx->krecords + context->firstu; + unsigned short reclen = + xfs_getparents_rec_sizeof(namelen); + xfs_ino_t ino; + uint32_t gen; + int error; + + if (!(flags & XFS_ATTR_PARENT)) + return; + + error = xfs_parent_from_attr(mp, flags, name, namelen, value, valuelen, + &ino, &gen); + if (error) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_PARENT); + context->seen_enough = -EFSCORRUPTED; + return; + } + + /* + * We found a parent pointer, but we've filled up the buffer. Signal + * to the caller that we did /not/ reach the end of the parent pointer + * recordset. + */ + if (context->firstu > context->bufsize - reclen) { + context->seen_enough = 1; + return; + } + + /* Format the parent pointer directly into the caller buffer. */ + gpr->gpr_reclen = reclen; + xfs_filehandle_init(mp, ino, gen, &gpr->gpr_parent); + memcpy(gpr->gpr_name, name, namelen); + gpr->gpr_name[namelen] = 0; + + trace_xfs_getparents_put_listent(ip, gp, context, gpr); + + context->firstu += reclen; + gpx->count++; + gpx->lastrec = gpr; +} + +/* Expand the last record to fill the rest of the caller's buffer. */ +static inline void +xfs_getparents_expand_lastrec( + struct xfs_getparents_ctx *gpx) +{ + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_getparents_rec *gpr = gpx->lastrec; + + if (!gpx->lastrec) + gpr = gpx->krecords; + + gpr->gpr_reclen = gp->gp_bufsize - ((void *)gpr - gpx->krecords); + + trace_xfs_getparents_expand_lastrec(gpx->ip, gp, &gpx->context, gpr); +} + +static inline void __user *u64_to_uptr(u64 val) +{ + return (void __user *)(uintptr_t)val; +} + +/* Retrieve the parent pointers for a given inode. */ +STATIC int +xfs_getparents( + struct xfs_getparents_ctx *gpx) +{ + struct xfs_getparents *gp = &gpx->gph.gph_request; + struct xfs_inode *ip = gpx->ip; + struct xfs_mount *mp = ip->i_mount; + size_t bufsize; + int error; + + /* Check size of buffer requested by user */ + if (gp->gp_bufsize > XFS_XATTR_LIST_MAX) + return -ENOMEM; + if (gp->gp_bufsize < xfs_getparents_rec_sizeof(1)) + return -EINVAL; + + if (gp->gp_iflags & ~XFS_GETPARENTS_IFLAGS_ALL) + return -EINVAL; + if (gp->gp_reserved) + return -EINVAL; + + bufsize = round_down(gp->gp_bufsize, sizeof(uint64_t)); + gpx->krecords = kvzalloc(bufsize, GFP_KERNEL); + if (!gpx->krecords) { + bufsize = min(bufsize, PAGE_SIZE); + gpx->krecords = kvzalloc(bufsize, GFP_KERNEL); + if (!gpx->krecords) + return -ENOMEM; + } + + gpx->context.dp = ip; + gpx->context.resynch = 1; + gpx->context.put_listent = xfs_getparents_put_listent; + gpx->context.bufsize = bufsize; + /* firstu is used to track the bytes filled in the buffer */ + gpx->context.firstu = 0; + + /* Copy the cursor provided by caller */ + memcpy(&gpx->context.cursor, &gp->gp_cursor, + sizeof(struct xfs_attrlist_cursor)); + gpx->count = 0; + gp->gp_oflags = 0; + + trace_xfs_getparents_begin(ip, gp, &gpx->context.cursor); + + error = xfs_attr_list(&gpx->context); + if (error) + goto out_free_buf; + if (gpx->context.seen_enough < 0) { + error = gpx->context.seen_enough; + goto out_free_buf; + } + xfs_getparents_expand_lastrec(gpx); + + /* Update the caller with the current cursor position */ + memcpy(&gp->gp_cursor, &gpx->context.cursor, + sizeof(struct xfs_attrlist_cursor)); + + /* Is this the root directory? */ + if (ip->i_ino == mp->m_sb.sb_rootino) + gp->gp_oflags |= XFS_GETPARENTS_OFLAG_ROOT; + + if (gpx->context.seen_enough == 0) { + /* + * If we did not run out of buffer space, then we reached the + * end of the pptr recordset, so set the DONE flag. + */ + gp->gp_oflags |= XFS_GETPARENTS_OFLAG_DONE; + } else if (gpx->count == 0) { + /* + * If we ran out of buffer space before copying any parent + * pointers at all, the caller's buffer was too short. Tell + * userspace that, erm, the message is too long. + */ + error = -EMSGSIZE; + goto out_free_buf; + } + + trace_xfs_getparents_end(ip, gp, &gpx->context.cursor); + + ASSERT(gpx->context.firstu <= gpx->gph.gph_request.gp_bufsize); + + /* Copy the records to userspace. */ + if (copy_to_user(u64_to_uptr(gpx->gph.gph_request.gp_buffer), + gpx->krecords, gpx->context.firstu)) + error = -EFAULT; + +out_free_buf: + kvfree(gpx->krecords); + gpx->krecords = NULL; + return error; +} + +/* Retrieve the parents of this file and pass them back to userspace. */ +int +xfs_ioc_getparents( + struct file *file, + struct xfs_getparents __user *ureq) +{ + struct xfs_getparents_ctx gpx = { + .ip = XFS_I(file_inode(file)), + }; + struct xfs_getparents *kreq = &gpx.gph.gph_request; + struct xfs_mount *mp = gpx.ip->i_mount; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!xfs_has_parent(mp)) + return -EOPNOTSUPP; + if (copy_from_user(kreq, ureq, sizeof(*kreq))) + return -EFAULT; + + error = xfs_getparents(&gpx); + if (error) + return error; + + if (copy_to_user(ureq, kreq, sizeof(*kreq))) + return -EFAULT; + + return 0; +} + +/* Retrieve the parents of this file handle and pass them back to userspace. */ +int +xfs_ioc_getparents_by_handle( + struct file *file, + struct xfs_getparents_by_handle __user *ureq) +{ + struct xfs_getparents_ctx gpx = { }; + struct xfs_inode *ip = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_getparents_by_handle *kreq = &gpx.gph; + struct xfs_handle *handle = &kreq->gph_handle; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!xfs_has_parent(mp)) + return -EOPNOTSUPP; + if (copy_from_user(kreq, ureq, sizeof(*kreq))) + return -EFAULT; + + /* + * We don't use exportfs_decode_fh because it does too much work here. + * If the handle refers to a directory, the exportfs code will walk + * upwards through the directory tree to connect the dentries to the + * root directory dentry. For GETPARENTS we don't care about that + * because we're not actually going to open a file descriptor; we only + * want to open an inode and read its parent pointers. + * + * Note that xfs_scrub uses GETPARENTS to log that it will try to fix a + * corrupted file's metadata. For this usecase we would really rather + * userspace single-step the path reconstruction to avoid loops or + * other strange things if the directory tree is corrupt. + */ + gpx.ip = xfs_khandle_to_inode(file, handle); + if (IS_ERR(gpx.ip)) + return PTR_ERR(gpx.ip); + + error = xfs_getparents(&gpx); + if (error) + goto out_rele; + + if (copy_to_user(ureq, kreq, sizeof(*kreq))) + error = -EFAULT; + +out_rele: + xfs_irele(gpx.ip); + return error; +} diff --git a/fs/xfs/xfs_handle.h b/fs/xfs/xfs_handle.h index e39eaf4689da..6799a86d8565 100644 --- a/fs/xfs/xfs_handle.h +++ b/fs/xfs/xfs_handle.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2022-2024 Oracle. * All rights reserved. */ #ifndef __XFS_HANDLE_H__ @@ -25,4 +26,8 @@ int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, struct dentry *xfs_handle_to_dentry(struct file *parfilp, void __user *uhandle, u32 hlen); +int xfs_ioc_getparents(struct file *file, struct xfs_getparents __user *arg); +int xfs_ioc_getparents_by_handle(struct file *file, + struct xfs_getparents_by_handle __user *arg); + #endif /* __XFS_HANDLE_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ed05fcd6261d..0e97070abe80 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -35,6 +35,7 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" +#include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_file.h" #include "xfs_exchrange.h" @@ -1424,7 +1425,10 @@ xfs_file_ioctl( case XFS_IOC_FSGETXATTRA: return xfs_ioc_fsgetxattra(ip, arg); - + case XFS_IOC_GETPARENTS: + return xfs_ioc_getparents(filp, arg); + case XFS_IOC_GETPARENTS_BY_HANDLE: + return xfs_ioc_getparents_by_handle(filp, arg); case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index cf92a3bd56c7..9c7fbaae2717 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -41,6 +41,7 @@ #include "xfs_bmap.h" #include "xfs_exchmaps.h" #include "xfs_exchrange.h" +#include "xfs_parent.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5621db48e763..05cb59bd0b80 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -87,6 +87,9 @@ struct xfs_bmap_intent; struct xfs_exchmaps_intent; struct xfs_exchmaps_req; struct xfs_exchrange; +struct xfs_getparents; +struct xfs_parent_irec; +struct xfs_attrlist_cursor_kern; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -5096,6 +5099,95 @@ TRACE_EVENT(xfs_exchmaps_delta_nextents, __entry->d_nexts1, __entry->d_nexts2) ); +DECLARE_EVENT_CLASS(xfs_getparents_rec_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, + const struct xfs_attr_list_context *context, + const struct xfs_getparents_rec *pptr), + TP_ARGS(ip, ppi, context, pptr), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, firstu) + __field(unsigned short, reclen) + __field(unsigned int, bufsize) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __string(name, pptr->gpr_name) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->firstu = context->firstu; + __entry->reclen = pptr->gpr_reclen; + __entry->bufsize = ppi->gp_bufsize; + __entry->parent_ino = pptr->gpr_parent.ha_fid.fid_ino; + __entry->parent_gen = pptr->gpr_parent.ha_fid.fid_gen; + __assign_str(name, pptr->gpr_name); + ), + TP_printk("dev %d:%d ino 0x%llx firstu %u reclen %u bufsize %u parent_ino 0x%llx parent_gen 0x%x name '%s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->firstu, + __entry->reclen, + __entry->bufsize, + __entry->parent_ino, + __entry->parent_gen, + __get_str(name)) +) +#define DEFINE_XFS_GETPARENTS_REC_EVENT(name) \ +DEFINE_EVENT(xfs_getparents_rec_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ + const struct xfs_attr_list_context *context, \ + const struct xfs_getparents_rec *pptr), \ + TP_ARGS(ip, ppi, context, pptr)) +DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_put_listent); +DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_expand_lastrec); + +DECLARE_EVENT_CLASS(xfs_getparents_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, + const struct xfs_attrlist_cursor_kern *cur), + TP_ARGS(ip, ppi, cur), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, iflags) + __field(unsigned short, oflags) + __field(unsigned int, bufsize) + __field(unsigned int, hashval) + __field(unsigned int, blkno) + __field(unsigned int, offset) + __field(int, initted) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->iflags = ppi->gp_iflags; + __entry->oflags = ppi->gp_oflags; + __entry->bufsize = ppi->gp_bufsize; + __entry->hashval = cur->hashval; + __entry->blkno = cur->blkno; + __entry->offset = cur->offset; + __entry->initted = cur->initted; + ), + TP_printk("dev %d:%d ino 0x%llx iflags 0x%x oflags 0x%x bufsize %u cur_init? %d hashval 0x%x blkno %u offset %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->iflags, + __entry->oflags, + __entry->bufsize, + __entry->initted, + __entry->hashval, + __entry->blkno, + __entry->offset) +) +#define DEFINE_XFS_GETPARENTS_EVENT(name) \ +DEFINE_EVENT(xfs_getparents_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ + const struct xfs_attrlist_cursor_kern *cur), \ + TP_ARGS(ip, ppi, cur)) +DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin); +DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-59-g8ed1b From be7cf174e908b1f350dd3ae4fbdf335f22af3273 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:24 -0700 Subject: xfs: move xfs_ioc_scrub_metadata to scrub.c Move the scrub ioctl handler to scrub.c to keep the code together and to reduce unnecessary code when CONFIG_XFS_ONLINE_SCRUB=n. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/scrub.c | 27 ++++++++++++++++++++++++++- fs/xfs/scrub/xfs_scrub.h | 4 ++-- fs/xfs/xfs_ioctl.c | 24 ------------------------ 3 files changed, 28 insertions(+), 27 deletions(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 4a81f828f9f1..1456cc11c406 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -578,7 +578,7 @@ xchk_scrub_create_subord( } /* Dispatch metadata scrubbing. */ -int +STATIC int xfs_scrub_metadata( struct file *file, struct xfs_scrub_metadata *sm) @@ -724,3 +724,28 @@ try_harder: run.retries++; goto retry_op; } + +/* Scrub one aspect of one piece of metadata. */ +int +xfs_ioc_scrub_metadata( + struct file *file, + void __user *arg) +{ + struct xfs_scrub_metadata scrub; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&scrub, arg, sizeof(scrub))) + return -EFAULT; + + error = xfs_scrub_metadata(file, &scrub); + if (error) + return error; + + if (copy_to_user(arg, &scrub, sizeof(scrub))) + return -EFAULT; + + return 0; +} diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h index a39befa743ce..02c930f175d0 100644 --- a/fs/xfs/scrub/xfs_scrub.h +++ b/fs/xfs/scrub/xfs_scrub.h @@ -7,9 +7,9 @@ #define __XFS_SCRUB_H__ #ifndef CONFIG_XFS_ONLINE_SCRUB -# define xfs_scrub_metadata(file, sm) (-ENOTTY) +# define xfs_ioc_scrub_metadata(f, a) (-ENOTTY) #else -int xfs_scrub_metadata(struct file *file, struct xfs_scrub_metadata *sm); +int xfs_ioc_scrub_metadata(struct file *file, void __user *arg); #endif /* CONFIG_XFS_ONLINE_SCRUB */ #endif /* __XFS_SCRUB_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e97070abe80..857b19428984 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1055,30 +1055,6 @@ out_free: return error; } -STATIC int -xfs_ioc_scrub_metadata( - struct file *file, - void __user *arg) -{ - struct xfs_scrub_metadata scrub; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&scrub, arg, sizeof(scrub))) - return -EFAULT; - - error = xfs_scrub_metadata(file, &scrub); - if (error) - return error; - - if (copy_to_user(arg, &scrub, sizeof(scrub))) - return -EFAULT; - - return 0; -} - int xfs_ioc_swapext( xfs_swapext_t *sxp) -- cgit v1.2.3-59-g8ed1b From c77b37584c2d1054452853e47e42c7350b8fe687 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:25 -0700 Subject: xfs: introduce vectored scrub mode Introduce a variant on XFS_SCRUB_METADATA that allows for a vectored mode. The caller specifies the principal metadata object that they want to scrub (allocation group, inode, etc.) once, followed by an array of scrub types they want called on that object. The kernel runs the scrub operations and writes the output flags and errno code to the corresponding array element. A new pseudo scrub type BARRIER is introduced to force the kernel to return to userspace if any corruptions have been found when scrubbing the previous scrub types in the array. This enables userspace to schedule, for example, the sequence: 1. data fork 2. barrier 3. directory If the data fork scrub is clean, then the kernel will perform the directory scrub. If not, the barrier in 2 will exit back to userspace. The alternative would have been an interface where userspace passes a pointer to an empty buffer, and the kernel formats that with xfs_scrub_vecs that tell userspace what it scrubbed and what the outcome was. With that the kernel would have to communicate that the buffer needed to have been at least X size, even though for our cases XFS_SCRUB_TYPE_NR + 2 would always be enough. Compared to that, this design keeps all the dependency policy and ordering logic in userspace where it already resides instead of duplicating it in the kernel. The downside of that is that it needs the barrier logic. When running fstests in "rebuild all metadata after each test" mode, I observed a 10% reduction in runtime due to fewer transitions across the system call boundary. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_fs.h | 33 +++++++++++ fs/xfs/scrub/scrub.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/trace.h | 79 ++++++++++++++++++++++++- fs/xfs/scrub/xfs_scrub.h | 2 + fs/xfs/xfs_ioctl.c | 2 + 5 files changed, 264 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_ioctl.c') diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 7ae1912cdbfc..97996cb79aaa 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -725,6 +725,15 @@ struct xfs_scrub_metadata { /* Number of scrub subcommands. */ #define XFS_SCRUB_TYPE_NR 29 +/* + * This special type code only applies to the vectored scrub implementation. + * + * If any of the previous scrub vectors recorded runtime errors or have + * sv_flags bits set that match the OFLAG bits in the barrier vector's + * sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace. + */ +#define XFS_SCRUB_TYPE_BARRIER (0xFFFFFFFF) + /* i: Repair this metadata. */ #define XFS_SCRUB_IFLAG_REPAIR (1u << 0) @@ -769,6 +778,29 @@ struct xfs_scrub_metadata { XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED) #define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT) +/* Vectored scrub calls to reduce the number of kernel transitions. */ + +struct xfs_scrub_vec { + __u32 sv_type; /* XFS_SCRUB_TYPE_* */ + __u32 sv_flags; /* XFS_SCRUB_FLAGS_* */ + __s32 sv_ret; /* 0 or a negative error code */ + __u32 sv_reserved; /* must be zero */ +}; + +/* Vectored metadata scrub control structure. */ +struct xfs_scrub_vec_head { + __u64 svh_ino; /* inode number. */ + __u32 svh_gen; /* inode generation. */ + __u32 svh_agno; /* ag number. */ + __u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */ + __u16 svh_rest_us; /* wait this much time between vector items */ + __u16 svh_nr; /* number of svh_vectors */ + __u64 svh_reserved; /* must be zero */ + __u64 svh_vectors; /* pointer to buffer of xfs_scrub_vec */ +}; + +#define XFS_SCRUB_VEC_FLAGS_ALL (0) + /* * ioctl limits */ @@ -928,6 +960,7 @@ struct xfs_getparents_by_handle { #define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry) #define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents) #define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) +#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 1456cc11c406..78b00ab85c9c 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -21,6 +21,7 @@ #include "xfs_exchmaps.h" #include "xfs_dir2.h" #include "xfs_parent.h" +#include "xfs_icache.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -749,3 +750,151 @@ xfs_ioc_scrub_metadata( return 0; } + +/* Decide if there have been any scrub failures up to this point. */ +static inline int +xfs_scrubv_check_barrier( + struct xfs_mount *mp, + const struct xfs_scrub_vec *vectors, + const struct xfs_scrub_vec *stop_vec) +{ + const struct xfs_scrub_vec *v; + __u32 failmask; + + failmask = stop_vec->sv_flags & XFS_SCRUB_FLAGS_OUT; + + for (v = vectors; v < stop_vec; v++) { + if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) + continue; + + /* + * Runtime errors count as a previous failure, except the ones + * used to ask userspace to retry. + */ + switch (v->sv_ret) { + case -EBUSY: + case -ENOENT: + case -EUSERS: + case 0: + break; + default: + return -ECANCELED; + } + + /* + * If any of the out-flags on the scrub vector match the mask + * that was set on the barrier vector, that's a previous fail. + */ + if (v->sv_flags & failmask) + return -ECANCELED; + } + + return 0; +} + +/* Vectored scrub implementation to reduce ioctl calls. */ +int +xfs_ioc_scrubv_metadata( + struct file *file, + void __user *arg) +{ + struct xfs_scrub_vec_head head; + struct xfs_scrub_vec_head __user *uhead = arg; + struct xfs_scrub_vec *vectors; + struct xfs_scrub_vec __user *uvectors; + struct xfs_inode *ip_in = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip_in->i_mount; + struct xfs_scrub_vec *v; + size_t vec_bytes; + unsigned int i; + int error = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&head, uhead, sizeof(head))) + return -EFAULT; + + if (head.svh_reserved) + return -EINVAL; + if (head.svh_flags & ~XFS_SCRUB_VEC_FLAGS_ALL) + return -EINVAL; + if (head.svh_nr == 0) + return 0; + + vec_bytes = array_size(head.svh_nr, sizeof(struct xfs_scrub_vec)); + if (vec_bytes > PAGE_SIZE) + return -ENOMEM; + + uvectors = (void __user *)(uintptr_t)head.svh_vectors; + vectors = memdup_user(uvectors, vec_bytes); + if (IS_ERR(vectors)) + return PTR_ERR(vectors); + + trace_xchk_scrubv_start(ip_in, &head); + + for (i = 0, v = vectors; i < head.svh_nr; i++, v++) { + if (v->sv_reserved) { + error = -EINVAL; + goto out_free; + } + + if (v->sv_type == XFS_SCRUB_TYPE_BARRIER && + (v->sv_flags & ~XFS_SCRUB_FLAGS_OUT)) { + error = -EINVAL; + goto out_free; + } + + trace_xchk_scrubv_item(mp, &head, i, v); + } + + /* Run all the scrubbers. */ + for (i = 0, v = vectors; i < head.svh_nr; i++, v++) { + struct xfs_scrub_metadata sm = { + .sm_type = v->sv_type, + .sm_flags = v->sv_flags, + .sm_ino = head.svh_ino, + .sm_gen = head.svh_gen, + .sm_agno = head.svh_agno, + }; + + if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) { + v->sv_ret = xfs_scrubv_check_barrier(mp, vectors, v); + if (v->sv_ret) { + trace_xchk_scrubv_barrier_fail(mp, &head, i, v); + break; + } + + continue; + } + + v->sv_ret = xfs_scrub_metadata(file, &sm); + v->sv_flags = sm.sm_flags; + + trace_xchk_scrubv_outcome(mp, &head, i, v); + + if (head.svh_rest_us) { + ktime_t expires; + + expires = ktime_add_ns(ktime_get(), + head.svh_rest_us * 1000); + set_current_state(TASK_KILLABLE); + schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + } + + if (fatal_signal_pending(current)) { + error = -EINTR; + goto out_free; + } + } + + if (copy_to_user(uvectors, vectors, vec_bytes) || + copy_to_user(uhead, &head, sizeof(head))) { + error = -EFAULT; + goto out_free; + } + +out_free: + kfree(vectors); + return error; +} diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index b3756722bee1..8ce74bd8530a 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -69,6 +69,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE); +TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); #define XFS_SCRUB_TYPE_STRINGS \ { XFS_SCRUB_TYPE_PROBE, "probe" }, \ @@ -99,7 +100,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE); { XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }, \ { XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \ { XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \ - { XFS_SCRUB_TYPE_DIRTREE, "dirtree" } + { XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \ + { XFS_SCRUB_TYPE_BARRIER, "barrier" } #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ @@ -208,6 +210,81 @@ DEFINE_EVENT(xchk_fsgate_class, name, \ DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_enable); DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_disable); +DECLARE_EVENT_CLASS(xchk_vector_head_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead), + TP_ARGS(ip, vhead), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_agnumber_t, agno) + __field(xfs_ino_t, inum) + __field(unsigned int, gen) + __field(unsigned int, flags) + __field(unsigned short, rest_us) + __field(unsigned short, nr_vecs) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->agno = vhead->svh_agno; + __entry->inum = vhead->svh_ino; + __entry->gen = vhead->svh_gen; + __entry->flags = vhead->svh_flags; + __entry->rest_us = vhead->svh_rest_us; + __entry->nr_vecs = vhead->svh_nr; + ), + TP_printk("dev %d:%d ino 0x%llx agno 0x%x inum 0x%llx gen 0x%x flags 0x%x rest_us %u nr_vecs %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->agno, + __entry->inum, + __entry->gen, + __entry->flags, + __entry->rest_us, + __entry->nr_vecs) +) +#define DEFINE_SCRUBV_HEAD_EVENT(name) \ +DEFINE_EVENT(xchk_vector_head_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead), \ + TP_ARGS(ip, vhead)) + +DEFINE_SCRUBV_HEAD_EVENT(xchk_scrubv_start); + +DECLARE_EVENT_CLASS(xchk_vector_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead, + unsigned int vec_nr, struct xfs_scrub_vec *v), + TP_ARGS(mp, vhead, vec_nr, v), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, vec_nr) + __field(unsigned int, vec_type) + __field(unsigned int, vec_flags) + __field(int, vec_ret) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->vec_nr = vec_nr; + __entry->vec_type = v->sv_type; + __entry->vec_flags = v->sv_flags; + __entry->vec_ret = v->sv_ret; + ), + TP_printk("dev %d:%d vec[%u] type %s flags %s ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->vec_nr, + __print_symbolic(__entry->vec_type, XFS_SCRUB_TYPE_STRINGS), + __print_flags(__entry->vec_flags, "|", XFS_SCRUB_FLAG_STRINGS), + __entry->vec_ret) +) +#define DEFINE_SCRUBV_EVENT(name) \ +DEFINE_EVENT(xchk_vector_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead, \ + unsigned int vec_nr, struct xfs_scrub_vec *v), \ + TP_ARGS(mp, vhead, vec_nr, v)) + +DEFINE_SCRUBV_EVENT(xchk_scrubv_barrier_fail); +DEFINE_SCRUBV_EVENT(xchk_scrubv_item); +DEFINE_SCRUBV_EVENT(xchk_scrubv_outcome); + TRACE_EVENT(xchk_op_error, TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int error, void *ret_ip), diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h index 02c930f175d0..f17173b83e6f 100644 --- a/fs/xfs/scrub/xfs_scrub.h +++ b/fs/xfs/scrub/xfs_scrub.h @@ -8,8 +8,10 @@ #ifndef CONFIG_XFS_ONLINE_SCRUB # define xfs_ioc_scrub_metadata(f, a) (-ENOTTY) +# define xfs_ioc_scrubv_metadata(f, a) (-ENOTTY) #else int xfs_ioc_scrub_metadata(struct file *file, void __user *arg); +int xfs_ioc_scrubv_metadata(struct file *file, void __user *arg); #endif /* CONFIG_XFS_ONLINE_SCRUB */ #endif /* __XFS_SCRUB_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 857b19428984..f0117188f302 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1413,6 +1413,8 @@ xfs_file_ioctl( case FS_IOC_GETFSMAP: return xfs_ioc_getfsmap(ip, arg); + case XFS_IOC_SCRUBV_METADATA: + return xfs_ioc_scrubv_metadata(filp, arg); case XFS_IOC_SCRUB_METADATA: return xfs_ioc_scrub_metadata(filp, arg); -- cgit v1.2.3-59-g8ed1b