aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cachefiles/ondemand.c3
-rw-r--r--fs/ceph/caps.c1
-rw-r--r--fs/fscache/cookie.c26
-rw-r--r--fs/fscache/volume.c4
-rw-r--r--fs/io_uring.c24
-rw-r--r--fs/ksmbd/smb2pdu.c48
-rw-r--r--fs/ksmbd/transport_rdma.c10
-rw-r--r--fs/ksmbd/transport_tcp.c2
-rw-r--r--fs/ksmbd/vfs.c12
-rw-r--r--fs/nfs/nfs4proc.c19
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/notify/fanotify/fanotify_user.c34
-rw-r--r--fs/read_write.c77
-rw-r--r--fs/xfs/libxfs/xfs_attr.c38
-rw-r--r--fs/xfs/libxfs/xfs_attr.h5
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c35
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/xfs_attr_item.c27
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_icache.c56
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c64
-rw-r--r--fs/xfs/xfs_log.c9
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c9
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--fs/xfs/xfs_trace.h1
28 files changed, 300 insertions, 233 deletions
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index a41ae6efc545..1fee702d5529 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -21,7 +21,8 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
* anon_fd.
*/
xas_for_each(&xas, req, ULONG_MAX) {
- if (req->msg.opcode == CACHEFILES_OP_READ) {
+ if (req->msg.object_id == object_id &&
+ req->msg.opcode == CACHEFILES_OP_READ) {
req->error = -EIO;
complete(&req->done);
xas_store(&xas, NULL);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 38c930384d41..ac8fd5e7f540 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4377,6 +4377,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
ihold(inode);
dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
spin_unlock(&mdsc->cap_dirty_lock);
+ ceph_wait_on_async_create(inode);
ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
iput(inode);
spin_lock(&mdsc->cap_dirty_lock);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 9d3cf0111709..74920826d8f6 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -372,17 +372,22 @@ nomem:
return NULL;
}
+static inline bool fscache_cookie_is_dropped(struct fscache_cookie *cookie)
+{
+ return READ_ONCE(cookie->state) == FSCACHE_COOKIE_STATE_DROPPED;
+}
+
static void fscache_wait_on_collision(struct fscache_cookie *candidate,
struct fscache_cookie *wait_for)
{
enum fscache_cookie_state *statep = &wait_for->state;
- wait_var_event_timeout(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED,
+ wait_var_event_timeout(statep, fscache_cookie_is_dropped(wait_for),
20 * HZ);
- if (READ_ONCE(*statep) != FSCACHE_COOKIE_STATE_DROPPED) {
+ if (!fscache_cookie_is_dropped(wait_for)) {
pr_notice("Potential collision c=%08x old: c=%08x",
candidate->debug_id, wait_for->debug_id);
- wait_var_event(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED);
+ wait_var_event(statep, fscache_cookie_is_dropped(wait_for));
}
}
@@ -517,7 +522,14 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
}
fscache_see_cookie(cookie, fscache_cookie_see_active);
- fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE);
+ spin_lock(&cookie->lock);
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags))
+ __fscache_set_cookie_state(cookie,
+ FSCACHE_COOKIE_STATE_INVALIDATING);
+ else
+ __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE);
+ spin_unlock(&cookie->lock);
+ wake_up_cookie_state(cookie);
trace = fscache_access_lookup_cookie_end;
out:
@@ -752,6 +764,9 @@ again_locked:
spin_lock(&cookie->lock);
}
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags))
+ fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end);
+
switch (state) {
case FSCACHE_COOKIE_STATE_RELINQUISHING:
fscache_see_cookie(cookie, fscache_cookie_see_relinquish);
@@ -1048,6 +1063,9 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
+ __fscache_begin_cookie_access(cookie, fscache_access_invalidate_cookie);
+ set_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags);
+ fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
_leave(" [look %x]", cookie->inval_counter);
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index f2aa7dbad766..a058e0136bfe 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -143,7 +143,7 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
{
wait_var_event_timeout(&candidate->flags,
!fscache_is_acquire_pending(candidate), 20 * HZ);
- if (!fscache_is_acquire_pending(candidate)) {
+ if (fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
@@ -182,7 +182,7 @@ static bool fscache_hash_volume(struct fscache_volume *candidate)
hlist_bl_add_head(&candidate->hash_link, h);
hlist_bl_unlock(h);
- if (test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags))
+ if (fscache_is_acquire_pending(candidate))
fscache_wait_on_volume_collision(candidate, collidee_debug_id);
return true;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5ff2cdb425bc..a01ea49f3017 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1183,6 +1183,7 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_RECVMSG] = {
@@ -1191,6 +1192,7 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.buffer_select = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_TIMEOUT] = {
@@ -1266,6 +1268,7 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_RECV] = {
.needs_file = 1,
@@ -1273,6 +1276,7 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.buffer_select = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_OPENAT2] = {
},
@@ -4314,18 +4318,19 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret < 0))
return ret;
} else {
+ rw = req->async_data;
+ s = &rw->s;
+
/*
* Safe and required to re-import if we're using provided
* buffers, as we dropped the selected one before retry.
*/
- if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (io_do_buffer_select(req)) {
ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
if (unlikely(ret < 0))
return ret;
}
- rw = req->async_data;
- s = &rw->s;
/*
* We come here from an earlier attempt, restore our state to
* match in case it doesn't. It's cheap enough that we don't
@@ -5061,7 +5066,7 @@ static int io_uring_cmd_prep(struct io_kiocb *req,
{
struct io_uring_cmd *ioucmd = &req->uring_cmd;
- if (sqe->rw_flags)
+ if (sqe->rw_flags || sqe->__pad1)
return -EINVAL;
ioucmd->cmd = sqe->cmd;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
@@ -6075,12 +6080,12 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
@@ -6311,12 +6316,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
@@ -7968,6 +7973,9 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req,
struct file *file;
int ret, fd;
+ if (!req->ctx->file_data)
+ return -ENXIO;
+
for (done = 0; done < req->rsrc_update.nr_args; done++) {
if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
ret = -EFAULT;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index e6f4ccc12f49..353f047e783c 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -6490,6 +6490,7 @@ int smb2_write(struct ksmbd_work *work)
goto out;
}
+ ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
writethrough = true;
@@ -6505,10 +6506,6 @@ int smb2_write(struct ksmbd_work *work)
data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
le16_to_cpu(req->DataOffset));
- ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
- if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
- writethrough = true;
-
ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
fp->filp->f_path.dentry, offset, length);
err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
@@ -7703,7 +7700,7 @@ int smb2_ioctl(struct ksmbd_work *work)
{
struct file_zero_data_information *zero_data;
struct ksmbd_file *fp;
- loff_t off, len;
+ loff_t off, len, bfz;
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB,
@@ -7720,19 +7717,26 @@ int smb2_ioctl(struct ksmbd_work *work)
zero_data =
(struct file_zero_data_information *)&req->Buffer[0];
- fp = ksmbd_lookup_fd_fast(work, id);
- if (!fp) {
- ret = -ENOENT;
+ off = le64_to_cpu(zero_data->FileOffset);
+ bfz = le64_to_cpu(zero_data->BeyondFinalZero);
+ if (off > bfz) {
+ ret = -EINVAL;
goto out;
}
- off = le64_to_cpu(zero_data->FileOffset);
- len = le64_to_cpu(zero_data->BeyondFinalZero) - off;
+ len = bfz - off;
+ if (len) {
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ ret = -ENOENT;
+ goto out;
+ }
- ret = ksmbd_vfs_zero_data(work, fp, off, len);
- ksmbd_fd_put(work, fp);
- if (ret < 0)
- goto out;
+ ret = ksmbd_vfs_zero_data(work, fp, off, len);
+ ksmbd_fd_put(work, fp);
+ if (ret < 0)
+ goto out;
+ }
break;
}
case FSCTL_QUERY_ALLOCATED_RANGES:
@@ -7806,14 +7810,24 @@ int smb2_ioctl(struct ksmbd_work *work)
src_off = le64_to_cpu(dup_ext->SourceFileOffset);
dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
length = le64_to_cpu(dup_ext->ByteCount);
- cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
- dst_off, length, 0);
+ /*
+ * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE
+ * should fall back to vfs_copy_file_range(). This could be
+ * beneficial when re-exporting nfs/smb mount, but note that
+ * this can result in partial copy that returns an error status.
+ * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented,
+ * fall back to vfs_copy_file_range(), should be avoided when
+ * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set.
+ */
+ cloned = vfs_clone_file_range(fp_in->filp, src_off,
+ fp_out->filp, dst_off, length, 0);
if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
ret = -EOPNOTSUPP;
goto dup_ext_out;
} else if (cloned != length) {
cloned = vfs_copy_file_range(fp_in->filp, src_off,
- fp_out->filp, dst_off, length, 0);
+ fp_out->filp, dst_off,
+ length, 0);
if (cloned != length) {
if (cloned < 0)
ret = cloned;
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index d035e060c2f0..35b55ee94fe5 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -5,16 +5,6 @@
*
* Author(s): Long Li <longli@microsoft.com>,
* Hyunchul Lee <hyc.lee@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#define SUBMOD_NAME "smb_direct"
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index 8fef9de787d3..143bba4e4db8 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p)
break;
}
ret = kernel_accept(iface->ksmbd_socket, &client_sk,
- O_NONBLOCK);
+ SOCK_NONBLOCK);
mutex_unlock(&iface->sock_release_lock);
if (ret) {
if (ret == -EAGAIN)
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index dcdd07c6efff..05efcdf7a4a7 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -1015,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
off, len);
- return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len);
+ return vfs_fallocate(fp->filp,
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
+ off, len);
}
int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
@@ -1046,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
*out_count = 0;
end = start + length;
while (start < end && *out_count < in_count) {
- extent_start = f->f_op->llseek(f, start, SEEK_DATA);
+ extent_start = vfs_llseek(f, start, SEEK_DATA);
if (extent_start < 0) {
if (extent_start != -ENXIO)
ret = (int)extent_start;
@@ -1056,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
if (extent_start >= end)
break;
- extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE);
+ extent_end = vfs_llseek(f, extent_start, SEEK_HOLE);
if (extent_end < 0) {
if (extent_end != -ENXIO)
ret = (int)extent_end;
@@ -1777,6 +1779,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off,
+ len, 0);
if (ret < 0)
return ret;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c0fdcf8c0032..bb0e84a46d61 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4012,22 +4012,29 @@ static int _nfs4_discover_trunking(struct nfs_server *server,
}
page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL)
- goto out;
+ if (!locations)
+ goto out_free;
+ locations->fattr = nfs_alloc_fattr();
+ if (!locations->fattr)
+ goto out_free_2;
status = nfs4_proc_get_locations(server, fhandle, locations, page,
cred);
if (status)
- goto out;
+ goto out_free_3;
for (i = 0; i < locations->nlocations; i++)
test_fs_location_for_trunking(&locations->locations[i], clp,
server);
-out:
- if (page)
- __free_page(page);
+out_free_3:
+ kfree(locations->fattr);
+out_free_2:
kfree(locations);
+out_free:
+ __free_page(page);
return status;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2540b35ec187..9bab3e9c702a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2753,5 +2753,6 @@ again:
goto again;
nfs_put_client(clp);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 840e3af63a6f..d79db56475d4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -577,6 +577,7 @@ out_err:
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
+ ssize_t ret;
/*
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
@@ -587,7 +588,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
* limit like this and pipeline multiple COPY requests.
*/
count = min_t(u64, count, 1 << 22);
- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
+ count, 0);
+ return ret;
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -1173,6 +1179,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
nfsd_copy_write_verifier(verf, nn);
err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
since);
+ err = nfserrno(err2);
break;
case -EINVAL:
err = nfserr_notsupp;
@@ -1180,8 +1187,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
default:
nfsd_reset_write_verifier(nn);
trace_nfsd_writeverf_reset(nn, rqstp, err2);
+ err = nfserrno(err2);
}
- err = nfserrno(err2);
} else
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c2255b440df9..b08ce0d821a7 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1513,8 +1513,15 @@ static int fanotify_test_fid(struct dentry *dentry)
return 0;
}
-static int fanotify_events_supported(struct path *path, __u64 mask)
+static int fanotify_events_supported(struct fsnotify_group *group,
+ struct path *path, __u64 mask,
+ unsigned int flags)
{
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ /* Strict validation of events in non-dir inode mask with v5.17+ APIs */
+ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
+ (mask & FAN_RENAME);
+
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
* files. For them fanotify permission events have high chances of
@@ -1526,6 +1533,16 @@ static int fanotify_events_supported(struct path *path, __u64 mask)
if (mask & FANOTIFY_PERM_EVENTS &&
path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
return -EINVAL;
+
+ /*
+ * We shouldn't have allowed setting dirent events and the directory
+ * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode,
+ * but because we always allowed it, error only when using new APIs.
+ */
+ if (strict_dir_events && mark_type == FAN_MARK_INODE &&
+ !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+ return -ENOTDIR;
+
return 0;
}
@@ -1672,7 +1689,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto fput_and_out;
if (flags & FAN_MARK_ADD) {
- ret = fanotify_events_supported(&path, mask);
+ ret = fanotify_events_supported(group, &path, mask, flags);
if (ret)
goto path_put_and_out;
}
@@ -1695,19 +1712,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
- /*
- * FAN_RENAME is not allowed on non-dir (for now).
- * We shouldn't have allowed setting any dirent events in mask of
- * non-dir, but because we always allowed it, error only if group
- * was initialized with the new flag FAN_REPORT_TARGET_FID.
- */
- ret = -ENOTDIR;
- if (inode && !S_ISDIR(inode->i_mode) &&
- ((mask & FAN_RENAME) ||
- ((mask & FANOTIFY_DIRENT_EVENTS) &&
- FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
- goto path_put_and_out;
-
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
diff --git a/fs/read_write.c b/fs/read_write.c
index b1b1cdfee9d3..e0777eefd846 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1397,28 +1397,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
}
EXPORT_SYMBOL(generic_copy_file_range);
-static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
-{
- /*
- * Although we now allow filesystems to handle cross sb copy, passing
- * a file of the wrong filesystem type to filesystem driver can result
- * in an attempt to dereference the wrong type of ->private_data, so
- * avoid doing that until we really have a good reason. NFS defines
- * several different file_system_type structures, but they all end up
- * using the same ->copy_file_range() function pointer.
- */
- if (file_out->f_op->copy_file_range &&
- file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
- return file_out->f_op->copy_file_range(file_in, pos_in,
- file_out, pos_out,
- len, flags);
-
- return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
- flags);
-}
-
/*
* Performs necessary checks before doing a file copy
*
@@ -1440,6 +1418,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
if (ret)
return ret;
+ /*
+ * We allow some filesystems to handle cross sb copy, but passing
+ * a file of the wrong filesystem type to filesystem driver can result
+ * in an attempt to dereference the wrong type of ->private_data, so
+ * avoid doing that until we really have a good reason.
+ *
+ * nfs and cifs define several different file_system_type structures
+ * and several different sets of file_operations, but they all end up
+ * using the same ->copy_file_range() function pointer.
+ */
+ if (file_out->f_op->copy_file_range) {
+ if (file_in->f_op->copy_file_range !=
+ file_out->f_op->copy_file_range)
+ return -EXDEV;
+ } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
+ return -EXDEV;
+ }
+
/* Don't touch certain kinds of inodes */
if (IS_IMMUTABLE(inode_out))
return -EPERM;
@@ -1505,26 +1501,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
file_start_write(file_out);
/*
- * Try cloning first, this is supported by more file systems, and
- * more efficient if both clone and copy are supported (e.g. NFS).
+ * Cloning is supported by more file systems, so we implement copy on
+ * same sb using clone, but for filesystems where both clone and copy
+ * are supported (e.g. nfs,cifs), we only call the copy method.
*/
+ if (file_out->f_op->copy_file_range) {
+ ret = file_out->f_op->copy_file_range(file_in, pos_in,
+ file_out, pos_out,
+ len, flags);
+ goto done;
+ }
+
if (file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
- loff_t cloned;
-
- cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
min_t(loff_t, MAX_RW_COUNT, len),
REMAP_FILE_CAN_SHORTEN);
- if (cloned > 0) {
- ret = cloned;
+ if (ret > 0)
goto done;
- }
}
- ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
- flags);
- WARN_ON_ONCE(ret == -EOPNOTSUPP);
+ /*
+ * We can get here for same sb copy of filesystems that do not implement
+ * ->copy_file_range() in case filesystem does not support clone or in
+ * case filesystem supports clone but rejected the clone request (e.g.
+ * because it was not block aligned).
+ *
+ * In both cases, fall back to kernel copy so we are able to maintain a
+ * consistent story about which filesystems support copy_file_range()
+ * and which filesystems do not, that will allow userspace tools to
+ * make consistent desicions w.r.t using copy_file_range().
+ */
+ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+
done:
if (ret > 0) {
fsnotify_access(file_in);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 1824f61621a2..224649a76cbb 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -50,7 +50,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
+STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/*
* Internal routines when attribute list is more than one block.
@@ -393,16 +393,10 @@ xfs_attr_sf_addname(
* It won't fit in the shortform, transform to a leaf block. GROT:
* another possible req'mt for a double-split btree op.
*/
- error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp);
+ error = xfs_attr_shortform_to_leaf(args);
if (error)
return error;
- /*
- * Prevent the leaf buffer from being unlocked so that a concurrent AIL
- * push cannot grab the half-baked leaf buffer and run into problems
- * with the write verifier.
- */
- xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
out:
trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
@@ -447,11 +441,9 @@ xfs_attr_leaf_addname(
/*
* Use the leaf buffer we may already hold locked as a result of
- * a sf-to-leaf conversion. The held buffer is no longer valid
- * after this call, regardless of the result.
+ * a sf-to-leaf conversion.
*/
- error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp);
- attr->xattri_leaf_bp = NULL;
+ error = xfs_attr_leaf_try_add(args);
if (error == -ENOSPC) {
error = xfs_attr3_leaf_to_node(args);
@@ -497,8 +489,6 @@ xfs_attr_node_addname(
struct xfs_da_args *args = attr->xattri_da_args;
int error;
- ASSERT(!attr->xattri_leaf_bp);
-
error = xfs_attr_node_addname_find_attr(attr);
if (error)
return error;
@@ -1215,24 +1205,14 @@ xfs_attr_restore_rmt_blk(
*/
STATIC int
xfs_attr_leaf_try_add(
- struct xfs_da_args *args,
- struct xfs_buf *bp)
+ struct xfs_da_args *args)
{
+ struct xfs_buf *bp;
int error;
- /*
- * If the caller provided a buffer to us, it is locked and held in
- * the transaction because it just did a shortform to leaf conversion.
- * Hence we don't need to read it again. Otherwise read in the leaf
- * buffer.
- */
- if (bp) {
- xfs_trans_bhold_release(args->trans, bp);
- } else {
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
- if (error)
- return error;
- }
+ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ if (error)
+ return error;
/*
* Look up the xattr name to set the insertion point for the new xattr.
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index b4a2fc77017e..dfb47fa63c6d 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -515,11 +515,6 @@ struct xfs_attr_intent {
*/
struct xfs_attri_log_nameval *xattri_nameval;
- /*
- * Used by xfs_attr_set to hold a leaf buffer across a transaction roll
- */
- struct xfs_buf *xattri_leaf_bp;
-
/* Used to keep track of current state of delayed operation */
enum xfs_delattr_state xattri_dela_state;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 37e7c33f6283..8f47396f8dd2 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -289,6 +289,23 @@ xfs_attr3_leaf_verify_entry(
return NULL;
}
+/*
+ * Validate an attribute leaf block.
+ *
+ * Empty leaf blocks can occur under the following circumstances:
+ *
+ * 1. setxattr adds a new extended attribute to a file;
+ * 2. The file has zero existing attributes;
+ * 3. The attribute is too large to fit in the attribute fork;
+ * 4. The attribute is small enough to fit in a leaf block;
+ * 5. A log flush occurs after committing the transaction that creates
+ * the (empty) leaf block; and
+ * 6. The filesystem goes down after the log flush but before the new
+ * attribute can be committed to the leaf block.
+ *
+ * Hence we need to ensure that we don't fail the validation purely
+ * because the leaf is empty.
+ */
static xfs_failaddr_t
xfs_attr3_leaf_verify(
struct xfs_buf *bp)
@@ -311,15 +328,6 @@ xfs_attr3_leaf_verify(
return fa;
/*
- * Empty leaf blocks should never occur; they imply the existence of a
- * software bug that needs fixing. xfs_repair also flags them as a
- * corruption that needs fixing, so we should never let these go to
- * disk.
- */
- if (ichdr.count == 0)
- return __this_address;
-
- /*
* firstused is the block offset of the first name info structure.
* Make sure it doesn't go off the block or crash into the header.
*/
@@ -922,14 +930,10 @@ xfs_attr_shortform_getvalue(
return -ENOATTR;
}
-/*
- * Convert from using the shortform to the leaf. On success, return the
- * buffer so that we can keep it locked until we're totally done with it.
- */
+/* Convert from using the shortform to the leaf format. */
int
xfs_attr_shortform_to_leaf(
- struct xfs_da_args *args,
- struct xfs_buf **leaf_bp)
+ struct xfs_da_args *args)
{
struct xfs_inode *dp;
struct xfs_attr_shortform *sf;
@@ -991,7 +995,6 @@ xfs_attr_shortform_to_leaf(
sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
- *leaf_bp = bp;
out:
kmem_free(tmpbuffer);
return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index efa757f1e912..368f4d9fa1d5 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -49,8 +49,7 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
- struct xfs_buf **leaf_bp);
+int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_sf_removename(struct xfs_da_args *args);
int xfs_attr_sf_findname(struct xfs_da_args *args,
struct xfs_attr_sf_entry **sfep,
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 135d44133477..5077a7ad5646 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -576,7 +576,7 @@ xfs_attri_item_recover(
struct xfs_trans_res tres;
struct xfs_attri_log_format *attrp;
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
- int error, ret = 0;
+ int error;
int total;
int local;
struct xfs_attrd_log_item *done_item = NULL;
@@ -655,29 +655,32 @@ xfs_attri_item_recover(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- ret = xfs_xattri_finish_update(attr, done_item);
- if (ret == -EAGAIN) {
- /* There's more work to do, so add it to this transaction */
+ error = xfs_xattri_finish_update(attr, done_item);
+ if (error == -EAGAIN) {
+ /*
+ * There's more work to do, so add the intent item to this
+ * transaction so that we can continue it later.
+ */
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
- } else
- error = ret;
+ error = xfs_defer_ops_capture_and_commit(tp, capture_list);
+ if (error)
+ goto out_unlock;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+ return 0;
+ }
if (error) {
xfs_trans_cancel(tp);
goto out_unlock;
}
error = xfs_defer_ops_capture_and_commit(tp, capture_list);
-
out_unlock:
- if (attr->xattri_leaf_bp)
- xfs_buf_relse(attr->xattri_leaf_bp);
-
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip);
out:
- if (ret != -EAGAIN)
- xfs_attr_free_item(attr);
+ xfs_attr_free_item(attr);
return error;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 52be58372c63..85e1a26c92e8 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -686,6 +686,8 @@ xfs_can_free_eofblocks(
* forever.
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
+ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
+ end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 5269354b1b69..2609825d53ee 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -440,7 +440,7 @@ xfs_inodegc_queue_all(
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
if (!llist_empty(&gc->list))
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
}
}
@@ -1841,8 +1841,8 @@ void
xfs_inodegc_worker(
struct work_struct *work)
{
- struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc,
- work);
+ struct xfs_inodegc *gc = container_of(to_delayed_work(work),
+ struct xfs_inodegc, work);
struct llist_node *node = llist_del_all(&gc->list);
struct xfs_inode *ip, *n;
@@ -1862,19 +1862,29 @@ xfs_inodegc_worker(
}
/*
- * Force all currently queued inode inactivation work to run immediately and
- * wait for the work to finish.
+ * Expedite all pending inodegc work to run immediately. This does not wait for
+ * completion of the work.
*/
void
-xfs_inodegc_flush(
+xfs_inodegc_push(
struct xfs_mount *mp)
{
if (!xfs_is_inodegc_enabled(mp))
return;
+ trace_xfs_inodegc_push(mp, __return_address);
+ xfs_inodegc_queue_all(mp);
+}
+/*
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
+ */
+void
+xfs_inodegc_flush(
+ struct xfs_mount *mp)
+{
+ xfs_inodegc_push(mp);
trace_xfs_inodegc_flush(mp, __return_address);
-
- xfs_inodegc_queue_all(mp);
flush_workqueue(mp->m_inodegc_wq);
}
@@ -2014,6 +2024,7 @@ xfs_inodegc_queue(
struct xfs_inodegc *gc;
int items;
unsigned int shrinker_hits;
+ unsigned long queue_delay = 1;
trace_xfs_inode_set_need_inactive(ip);
spin_lock(&ip->i_flags_lock);
@@ -2025,19 +2036,26 @@ xfs_inodegc_queue(
items = READ_ONCE(gc->items);
WRITE_ONCE(gc->items, items + 1);
shrinker_hits = READ_ONCE(gc->shrinker_hits);
- put_cpu_ptr(gc);
- if (!xfs_is_inodegc_enabled(mp))
+ /*
+ * We queue the work while holding the current CPU so that the work
+ * is scheduled to run on this CPU.
+ */
+ if (!xfs_is_inodegc_enabled(mp)) {
+ put_cpu_ptr(gc);
return;
-
- if (xfs_inodegc_want_queue_work(ip, items)) {
- trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
}
+ if (xfs_inodegc_want_queue_work(ip, items))
+ queue_delay = 0;
+
+ trace_xfs_inodegc_queue(mp, __return_address);
+ mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
+ put_cpu_ptr(gc);
+
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
trace_xfs_inodegc_throttle(mp, __return_address);
- flush_work(&gc->work);
+ flush_delayed_work(&gc->work);
}
}
@@ -2054,7 +2072,7 @@ xfs_inodegc_cpu_dead(
unsigned int count = 0;
dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
- cancel_work_sync(&dead_gc->work);
+ cancel_delayed_work_sync(&dead_gc->work);
if (llist_empty(&dead_gc->list))
return;
@@ -2073,12 +2091,12 @@ xfs_inodegc_cpu_dead(
llist_add_batch(first, last, &gc->list);
count += READ_ONCE(gc->items);
WRITE_ONCE(gc->items, count);
- put_cpu_ptr(gc);
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
}
+ put_cpu_ptr(gc);
}
/*
@@ -2173,7 +2191,7 @@ xfs_inodegc_shrinker_scan(
unsigned int h = READ_ONCE(gc->shrinker_hits);
WRITE_ONCE(gc->shrinker_hits, h + 1);
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
no_items = false;
}
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 2e4cfddf8b8e..6cd180721659 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -76,6 +76,7 @@ void xfs_blockgc_stop(struct xfs_mount *mp);
void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
+void xfs_inodegc_push(struct xfs_mount *mp);
void xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 52d6f2c7d58b..3e1c62ffa4f7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -132,6 +132,26 @@ xfs_ilock_attr_map_shared(
}
/*
+ * You can't set both SHARED and EXCL for the same lock,
+ * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
+ * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
+ * to set in lock_flags.
+ */
+static inline void
+xfs_lock_flags_assert(
+ uint lock_flags)
+{
+ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
+ (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+ (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ ASSERT(lock_flags != 0);
+}
+
+/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
* multi-reader locks: invalidate_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
@@ -168,18 +188,7 @@ xfs_ilock(
{
trace_xfs_ilock(ip, lock_flags, _RET_IP_);
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL) {
down_write_nested(&VFS_I(ip)->i_rwsem,
@@ -222,18 +231,7 @@ xfs_ilock_nowait(
{
trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
@@ -291,19 +289,7 @@ xfs_iunlock(
xfs_inode_t *ip,
uint lock_flags)
{
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
- ASSERT(lock_flags != 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
@@ -379,8 +365,8 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
- (lock_flags & XFS_IOLOCK_SHARED));
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
+ (lock_flags & XFS_MMAPLOCK_SHARED));
}
if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1e972f884a81..ae904b21e9cc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2092,8 +2092,6 @@ xlog_dealloc_log(
xlog_in_core_t *iclog, *next_iclog;
int i;
- xlog_cil_destroy(log);
-
/*
* Cycle all the iclogbuf locks to make sure all log IO completion
* is done before we tear down these buffers.
@@ -2105,6 +2103,13 @@ xlog_dealloc_log(
iclog = iclog->ic_next;
}
+ /*
+ * Destroy the CIL after waiting for iclog IO completion because an
+ * iclog EIO error will try to shut down the log, which accesses the
+ * CIL to wake up the waiters.
+ */
+ xlog_cil_destroy(log);
+
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
next_iclog = iclog->ic_next;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index ba5d42abf66e..d2eaebd85abf 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,7 +61,7 @@ struct xfs_error_cfg {
*/
struct xfs_inodegc {
struct llist_head list;
- struct work_struct work;
+ struct delayed_work work;
/* approximate count of inodes in the list */
unsigned int items;
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 74ac9ca9e119..392cb39cc10c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -454,9 +454,12 @@ xfs_qm_scall_getquota(
struct xfs_dquot *dqp;
int error;
- /* Flush inodegc work at the start of a quota reporting scan. */
+ /*
+ * Expedite pending inodegc work at the start of a quota reporting
+ * scan but don't block waiting for it to complete.
+ */
if (id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
/*
* Try to get the dquot. We don't want it allocated on disk, so don't
@@ -498,7 +501,7 @@ xfs_qm_scall_getquota_next(
/* Flush inodegc work at the start of a quota reporting scan. */
if (*id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
error = xfs_qm_dqget_next(mp, *id, type, &dqp);
if (error)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ed18160e6181..aa977c7ea370 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -797,8 +797,11 @@ xfs_fs_statfs(
xfs_extlen_t lsize;
int64_t ffree;
- /* Wait for whatever inactivations are in progress. */
- xfs_inodegc_flush(mp);
+ /*
+ * Expedite background inodegc but don't wait. We do not want to block
+ * here waiting hours for a billion extent file to be truncated.
+ */
+ xfs_inodegc_push(mp);
statp->f_type = XFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1074,7 +1077,7 @@ xfs_inodegc_init_percpu(
gc = per_cpu_ptr(mp->m_inodegc, cpu);
init_llist_head(&gc->list);
gc->items = 0;
- INIT_WORK(&gc->work, xfs_inodegc_worker);
+ INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
}
return 0;
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d32026585c1b..0fa1b7a2918c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -240,6 +240,7 @@ DEFINE_EVENT(xfs_fs_class, name, \
TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
TP_ARGS(mp, caller_ip))
DEFINE_FS_EVENT(xfs_inodegc_flush);
+DEFINE_FS_EVENT(xfs_inodegc_push);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
DEFINE_FS_EVENT(xfs_inodegc_queue);