aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c42
-rw-r--r--fs/ceph/caps.c75
-rw-r--r--fs/ceph/inode.c35
-rw-r--r--fs/ceph/mds_client.c121
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/quota.c19
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h29
-rw-r--r--fs/ceph/xattr.c10
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/cifs_swn.c4
-rw-r--r--fs/cifs/cifsencrypt.c8
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsfs.h5
-rw-r--r--fs/cifs/cifsglob.h24
-rw-r--r--fs/cifs/connect.c27
-rw-r--r--fs/cifs/dfs_cache.c90
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb1ops.c6
-rw-r--r--fs/cifs/smb2ops.c9
-rw-r--r--fs/cifs/smb2pdu.c6
-rw-r--r--fs/cifs/smbdirect.c4
-rw-r--r--fs/cifs/transport.c40
-rw-r--r--fs/exec.c8
-rw-r--r--fs/file.c110
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/freevxfs/vxfs.h27
-rw-r--r--fs/freevxfs/vxfs_bmap.c26
-rw-r--r--fs/freevxfs/vxfs_dir.h27
-rw-r--r--fs/freevxfs/vxfs_extern.h27
-rw-r--r--fs/freevxfs/vxfs_fshead.c26
-rw-r--r--fs/freevxfs/vxfs_fshead.h27
-rw-r--r--fs/freevxfs/vxfs_immed.c26
-rw-r--r--fs/freevxfs/vxfs_inode.c26
-rw-r--r--fs/freevxfs/vxfs_inode.h27
-rw-r--r--fs/freevxfs/vxfs_lookup.c26
-rw-r--r--fs/freevxfs/vxfs_olt.c26
-rw-r--r--fs/freevxfs/vxfs_olt.h27
-rw-r--r--fs/freevxfs/vxfs_subr.c26
-rw-r--r--fs/freevxfs/vxfs_super.c26
-rw-r--r--fs/fsopen.c4
-rw-r--r--fs/internal.h3
-rw-r--r--fs/io_uring.c356
-rw-r--r--fs/jffs2/erase.c6
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/kernfs/dir.c31
-rw-r--r--fs/kernfs/file.c47
-rw-r--r--fs/namei.c11
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/ntfs3/file.c12
-rw-r--r--fs/ntfs3/frecord.c10
-rw-r--r--fs/ntfs3/fslog.c12
-rw-r--r--fs/ntfs3/inode.c9
-rw-r--r--fs/ntfs3/xattr.c136
-rw-r--r--fs/ubifs/budget.c7
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr.c198
-rw-r--r--fs/xfs/libxfs/xfs_attr.h63
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h6
-rw-r--r--fs/xfs/libxfs/xfs_btree.c63
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c11
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h1
-rw-r--r--fs/xfs/libxfs/xfs_defer.c67
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h18
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h14
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/scrub/scrub.c17
-rw-r--r--fs/xfs/xfs_acl.c3
-rw-r--r--fs/xfs/xfs_attr_item.c364
-rw-r--r--fs/xfs/xfs_attr_item.h22
-rw-r--r--fs/xfs/xfs_buf_item_recover.c66
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_fsops.c7
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log.h7
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c93
-rw-r--r--fs/xfs/xfs_message.h6
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_mount.h18
-rw-r--r--fs/xfs/xfs_qm.c9
-rw-r--r--fs/xfs/xfs_super.c20
-rw-r--r--fs/xfs/xfs_super.h1
-rw-r--r--fs/xfs/xfs_xattr.c79
-rw-r--r--fs/xfs/xfs_xattr.h13
91 files changed, 1602 insertions, 1357 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 7584aa6e5025..e5221be6eb55 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -256,6 +256,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct iov_iter iter;
ssize_t err = 0;
size_t len;
+ int mode;
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
@@ -264,7 +265,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
goto out;
/* We need to fetch the inline data. */
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
+ mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
@@ -604,8 +606,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
true);
- if (IS_ERR(req))
+ if (IS_ERR(req)) {
+ redirty_page_for_writepage(wbc, page);
return PTR_ERR(req);
+ }
set_page_writeback(page);
if (caching)
@@ -1644,7 +1648,7 @@ int ceph_uninline_data(struct file *file)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_request *req;
+ struct ceph_osd_request *req = NULL;
struct ceph_cap_flush *prealloc_cf;
struct folio *folio = NULL;
u64 inline_version = CEPH_INLINE_NONE;
@@ -1652,10 +1656,23 @@ int ceph_uninline_data(struct file *file)
int err = 0;
u64 len;
+ spin_lock(&ci->i_ceph_lock);
+ inline_version = ci->i_inline_version;
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("uninline_data %p %llx.%llx inline_version %llu\n",
+ inode, ceph_vinop(inode), inline_version);
+
+ if (inline_version == CEPH_INLINE_NONE)
+ return 0;
+
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;
+ if (inline_version == 1) /* initial version, no data */
+ goto out_uninline;
+
folio = read_mapping_folio(inode->i_mapping, 0, file);
if (IS_ERR(folio)) {
err = PTR_ERR(folio);
@@ -1664,17 +1681,6 @@ int ceph_uninline_data(struct file *file)
folio_lock(folio);
- spin_lock(&ci->i_ceph_lock);
- inline_version = ci->i_inline_version;
- spin_unlock(&ci->i_ceph_lock);
-
- dout("uninline_data %p %llx.%llx inline_version %llu\n",
- inode, ceph_vinop(inode), inline_version);
-
- if (inline_version == 1 || /* initial version, no data */
- inline_version == CEPH_INLINE_NONE)
- goto out_unlock;
-
len = i_size_read(inode);
if (len > folio_size(folio))
len = folio_size(folio);
@@ -1739,6 +1745,7 @@ int ceph_uninline_data(struct file *file)
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err);
+out_uninline:
if (!err) {
int dirty;
@@ -1757,8 +1764,10 @@ out_put_req:
if (err == -ECANCELED)
err = 0;
out_unlock:
- folio_unlock(folio);
- folio_put(folio);
+ if (folio) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
out:
ceph_free_cap_flush(prealloc_cf);
dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
@@ -1777,7 +1786,6 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma)
if (!mapping->a_ops->read_folio)
return -ENOEXEC;
- file_accessed(file);
vma->vm_ops = &ceph_vmops;
return 0;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5c14ef04e474..bf2e94005598 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1577,7 +1577,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
while (first_tid <= last_tid) {
struct ceph_cap *cap = ci->i_auth_cap;
- struct ceph_cap_flush *cf;
+ struct ceph_cap_flush *cf = NULL, *iter;
int ret;
if (!(cap && cap->session == session)) {
@@ -1587,8 +1587,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
}
ret = -ENOENT;
- list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
- if (cf->tid >= first_tid) {
+ list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) {
+ if (iter->tid >= first_tid) {
+ cf = iter;
ret = 0;
break;
}
@@ -1910,6 +1911,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct rb_node *p;
bool queue_invalidate = false;
bool tried_invalidate = false;
+ bool queue_writeback = false;
if (session)
ceph_get_mds_session(session);
@@ -2062,10 +2064,27 @@ retry:
}
/* completed revocation? going down and there are no caps? */
- if (revoking && (revoking & cap_used) == 0) {
- dout("completed revocation of %s\n",
- ceph_cap_string(cap->implemented & ~cap->issued));
- goto ack;
+ if (revoking) {
+ if ((revoking & cap_used) == 0) {
+ dout("completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /*
+ * If the "i_wrbuffer_ref" was increased by mmap or generic
+ * cache write just before the ceph_check_caps() is called,
+ * the Fb capability revoking will fail this time. Then we
+ * must wait for the BDI's delayed work to flush the dirty
+ * pages and to release the "i_wrbuffer_ref", which will cost
+ * at most 5 seconds. That means the MDS needs to wait at
+ * most 5 seconds to finished the Fb capability's revocation.
+ *
+ * Let's queue a writeback for it.
+ */
+ if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+ (revoking & CEPH_CAP_FILE_BUFFER))
+ queue_writeback = true;
}
/* want more caps from mds? */
@@ -2135,6 +2154,8 @@ ack:
spin_unlock(&ci->i_ceph_lock);
ceph_put_mds_session(session);
+ if (queue_writeback)
+ ceph_queue_writeback(inode);
if (queue_invalidate)
ceph_queue_invalidate(inode);
}
@@ -2218,9 +2239,9 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
}
/*
- * wait for any unsafe requests to complete.
+ * flush the mdlog and wait for any unsafe requests to complete.
*/
-static int unsafe_request_wait(struct inode *inode)
+static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -2336,7 +2357,7 @@ retry:
kfree(sessions);
}
- dout("unsafe_request_wait %p wait on tid %llu %llu\n",
+ dout("%s %p wait on tid %llu %llu\n", __func__,
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
if (req1) {
ret = !wait_for_completion_timeout(&req1->r_safe_completion,
@@ -2380,7 +2401,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
- err = unsafe_request_wait(inode);
+ err = flush_mdlog_and_wait_inode_unsafe_requests(inode);
/*
* only wait on non-file metadata writeback (the mds
@@ -3182,10 +3203,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc)
{
struct inode *inode = &ci->vfs_inode;
- struct ceph_cap_snap *capsnap = NULL;
+ struct ceph_cap_snap *capsnap = NULL, *iter;
int put = 0;
bool last = false;
- bool found = false;
bool flush_snaps = false;
bool complete_capsnap = false;
@@ -3212,14 +3232,14 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
last ? " LAST" : "");
} else {
- list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
- if (capsnap->context == snapc) {
- found = true;
+ list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
+ if (iter->context == snapc) {
+ capsnap = iter;
break;
}
}
- if (!found) {
+ if (!capsnap) {
/*
* The capsnap should already be removed when removing
* auth cap in the case of a forced unmount.
@@ -3769,8 +3789,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
u64 follows = le64_to_cpu(m->snap_follows);
- struct ceph_cap_snap *capsnap;
- bool flushed = false;
+ struct ceph_cap_snap *capsnap = NULL, *iter;
bool wake_ci = false;
bool wake_mdsc = false;
@@ -3778,26 +3797,26 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
inode, ci, session->s_mds, follows);
spin_lock(&ci->i_ceph_lock);
- list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
- if (capsnap->follows == follows) {
- if (capsnap->cap_flush.tid != flush_tid) {
+ list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
+ if (iter->follows == follows) {
+ if (iter->cap_flush.tid != flush_tid) {
dout(" cap_snap %p follows %lld tid %lld !="
- " %lld\n", capsnap, follows,
- flush_tid, capsnap->cap_flush.tid);
+ " %lld\n", iter, follows,
+ flush_tid, iter->cap_flush.tid);
break;
}
- flushed = true;
+ capsnap = iter;
break;
} else {
dout(" skipping cap_snap %p follows %lld\n",
- capsnap, capsnap->follows);
+ iter, iter->follows);
}
}
- if (flushed)
+ if (capsnap)
ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
spin_unlock(&ci->i_ceph_lock);
- if (flushed) {
+ if (capsnap) {
ceph_put_snap_context(capsnap->context);
ceph_put_cap_snap(capsnap);
if (wake_ci)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 63113e2a4890..b7e9cac3aeef 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -578,7 +578,7 @@ void ceph_evict_inode(struct inode *inode)
__ceph_remove_caps(ci);
- if (__ceph_has_any_quota(ci))
+ if (__ceph_has_quota(ci, QUOTA_GET_ANY))
ceph_adjust_quota_realms_count(inode, false);
/*
@@ -1466,10 +1466,12 @@ retry_lookup:
} else if (have_lease) {
if (d_unhashed(dn))
d_add(dn, NULL);
+ }
+
+ if (!d_unhashed(dn) && have_lease)
update_dentry_lease(dir, dn,
rinfo->dlease, session,
req->r_request_started);
- }
goto done;
}
@@ -1884,7 +1886,6 @@ static void ceph_do_invalidate_pages(struct inode *inode)
orig_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
- ceph_fscache_invalidate(inode, false);
if (invalidate_inode_pages2(inode->i_mapping) < 0) {
pr_err("invalidate_inode_pages2 %llx.%llx failed\n",
ceph_vinop(inode));
@@ -2258,6 +2259,30 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
}
+int ceph_try_to_choose_auth_mds(struct inode *inode, int mask)
+{
+ int issued = ceph_caps_issued(ceph_inode(inode));
+
+ /*
+ * If any 'x' caps is issued we can just choose the auth MDS
+ * instead of the random replica MDSes. Because only when the
+ * Locker is in LOCK_EXEC state will the loner client could
+ * get the 'x' caps. And if we send the getattr requests to
+ * any replica MDS it must auth pin and tries to rdlock from
+ * the auth MDS, and then the auth MDS need to do the Locker
+ * state transition to LOCK_SYNC. And after that the lock state
+ * will change back.
+ *
+ * This cost much when doing the Locker state transition and
+ * usually will need to revoke caps from clients.
+ */
+ if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL))
+ || (mask & CEPH_STAT_RSTAT))
+ return USE_AUTH_MDS;
+ else
+ return USE_ANY_MDS;
+}
+
/*
* Verify that we have a lease on the given mask. If not,
* do a getattr against an mds.
@@ -2281,7 +2306,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1))
return 0;
- mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS;
+ mode = ceph_try_to_choose_auth_mds(inode, mask);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2423,7 +2448,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
return -ESTALE;
/* Skip the getattr altogether if we're asked not to sync */
- if (!(flags & AT_STATX_DONT_SYNC)) {
+ if ((flags & AT_STATX_SYNC_TYPE) != AT_STATX_DONT_SYNC) {
err = ceph_do_getattr(inode,
statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 00c3de177dd6..f5d110d90b77 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -437,7 +437,7 @@ static int ceph_parse_deleg_inos(void **p, void *end,
ceph_decode_32_safe(p, end, sets, bad);
dout("got %u sets of delegated inodes\n", sets);
while (sets--) {
- u64 start, len, ino;
+ u64 start, len;
ceph_decode_64_safe(p, end, start, bad);
ceph_decode_64_safe(p, end, len, bad);
@@ -449,7 +449,7 @@ static int ceph_parse_deleg_inos(void **p, void *end,
continue;
}
while (len--) {
- int err = xa_insert(&s->s_delegated_inos, ino = start++,
+ int err = xa_insert(&s->s_delegated_inos, start++,
DELEGATED_INO_AVAILABLE,
GFP_KERNEL);
if (!err) {
@@ -2651,7 +2651,28 @@ static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg;
- int flags = 0;
+ int flags = 0, max_retry;
+
+ /*
+ * The type of 'r_attempts' in kernel 'ceph_mds_request'
+ * is 'int', while in 'ceph_mds_request_head' the type of
+ * 'num_retry' is '__u8'. So in case the request retries
+ * exceeding 256 times, the MDS will receive a incorrect
+ * retry seq.
+ *
+ * In this case it's ususally a bug in MDS and continue
+ * retrying the request makes no sense.
+ *
+ * In future this could be fixed in ceph code, so avoid
+ * using the hardcode here.
+ */
+ max_retry = sizeof_field(struct ceph_mds_request_head, num_retry);
+ max_retry = 1 << (max_retry * BITS_PER_BYTE);
+ if (req->r_attempts >= max_retry) {
+ pr_warn_ratelimited("%s request tid %llu seq overflow\n",
+ __func__, req->r_tid);
+ return -EMULTIHOP;
+ }
req->r_attempts++;
if (req->r_inode) {
@@ -2663,7 +2684,7 @@ static int __prepare_send_request(struct ceph_mds_session *session,
else
req->r_sent_on_mseq = -1;
}
- dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
+ dout("%s %p tid %lld %s (attempt %d)\n", __func__, req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
@@ -3265,6 +3286,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
int err = -EINVAL;
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
+ bool aborted = false;
ceph_decode_need(&p, end, 2*sizeof(u32), bad);
next_mds = ceph_decode_32(&p);
@@ -3273,16 +3295,41 @@ static void handle_forward(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
req = lookup_get_request(mdsc, tid);
if (!req) {
+ mutex_unlock(&mdsc->mutex);
dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
- goto out; /* dup reply? */
+ return; /* dup reply? */
}
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd) {
- dout("forward tid %llu to mds%d - old seq %d <= %d\n",
- tid, next_mds, req->r_num_fwd, fwd_seq);
+ /*
+ * The type of 'num_fwd' in ceph 'MClientRequestForward'
+ * is 'int32_t', while in 'ceph_mds_request_head' the
+ * type is '__u8'. So in case the request bounces between
+ * MDSes exceeding 256 times, the client will get stuck.
+ *
+ * In this case it's ususally a bug in MDS and continue
+ * bouncing the request makes no sense.
+ *
+ * In future this could be fixed in ceph code, so avoid
+ * using the hardcode here.
+ */
+ int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
+ max = 1 << (max * BITS_PER_BYTE);
+ if (req->r_num_fwd >= max) {
+ mutex_lock(&req->r_fill_mutex);
+ req->r_err = -EMULTIHOP;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+ mutex_unlock(&req->r_fill_mutex);
+ aborted = true;
+ pr_warn_ratelimited("forward tid %llu seq overflow\n",
+ tid);
+ } else {
+ dout("forward tid %llu to mds%d - old seq %d <= %d\n",
+ tid, next_mds, req->r_num_fwd, fwd_seq);
+ }
} else {
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@@ -3294,9 +3341,12 @@ static void handle_forward(struct ceph_mds_client *mdsc,
put_request_session(req);
__do_request(mdsc, req);
}
- ceph_mdsc_put_request(req);
-out:
mutex_unlock(&mdsc->mutex);
+
+ /* kick calling process */
+ if (aborted)
+ complete_request(mdsc, req);
+ ceph_mdsc_put_request(req);
return;
bad:
@@ -3375,13 +3425,17 @@ static void handle_session(struct ceph_mds_session *session,
}
if (msg_version >= 5) {
- u32 flags;
- /* version >= 4, struct_v, struct_cv, len, metric_spec */
- ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
+ u32 flags, len;
+
+ /* version >= 4 */
+ ceph_decode_skip_16(&p, end, bad); /* struct_v, struct_cv */
+ ceph_decode_32_safe(&p, end, len, bad); /* len */
+ ceph_decode_skip_n(&p, end, len, bad); /* metric_spec */
+
/* version >= 5, flags */
- ceph_decode_32_safe(&p, end, flags, bad);
+ ceph_decode_32_safe(&p, end, flags, bad);
if (flags & CEPH_SESSION_BLOCKLISTED) {
- pr_warn("mds%d session blocklisted\n", session->s_mds);
+ pr_warn("mds%d session blocklisted\n", session->s_mds);
blocklisted = true;
}
}
@@ -4396,12 +4450,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
memcpy((void *)(lease + 1) + 4,
dentry->d_name.name, dentry->d_name.len);
spin_unlock(&dentry->d_lock);
- /*
- * if this is a preemptive lease RELEASE, no need to
- * flush request stream, since the actual request will
- * soon follow.
- */
- msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
ceph_con_send(&session->s_con, msg);
}
@@ -4696,15 +4744,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
}
/*
- * wait for all write mds requests to flush.
+ * flush the mdlog and wait for all write mds requests to flush.
*/
-static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
+static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc,
+ u64 want_tid)
{
struct ceph_mds_request *req = NULL, *nextreq;
+ struct ceph_mds_session *last_session = NULL;
struct rb_node *n;
mutex_lock(&mdsc->mutex);
- dout("wait_unsafe_requests want %lld\n", want_tid);
+ dout("%s want %lld\n", __func__, want_tid);
restart:
req = __get_oldest_req(mdsc);
while (req && req->r_tid <= want_tid) {
@@ -4716,14 +4766,32 @@ restart:
nextreq = NULL;
if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
(req->r_op & CEPH_MDS_OP_WRITE)) {
+ struct ceph_mds_session *s = req->r_session;
+
+ if (!s) {
+ req = nextreq;
+ continue;
+ }
+
/* write op */
ceph_mdsc_get_request(req);
if (nextreq)
ceph_mdsc_get_request(nextreq);
+ s = ceph_get_mds_session(s);
mutex_unlock(&mdsc->mutex);
- dout("wait_unsafe_requests wait on %llu (want %llu)\n",
+
+ /* send flush mdlog request to MDS */
+ if (last_session != s) {
+ send_flush_mdlog(s);
+ ceph_put_mds_session(last_session);
+ last_session = s;
+ } else {
+ ceph_put_mds_session(s);
+ }
+ dout("%s wait on %llu (want %llu)\n", __func__,
req->r_tid, want_tid);
wait_for_completion(&req->r_safe_completion);
+
mutex_lock(&mdsc->mutex);
ceph_mdsc_put_request(req);
if (!nextreq)
@@ -4738,7 +4806,8 @@ restart:
req = nextreq;
}
mutex_unlock(&mdsc->mutex);
- dout("wait_unsafe_requests done\n");
+ ceph_put_mds_session(last_session);
+ dout("%s done\n", __func__);
}
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
@@ -4767,7 +4836,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
dout("sync want tid %lld flush_seq %lld\n",
want_tid, want_flush);
- wait_unsafe_requests(mdsc, want_tid);
+ flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid);
wait_caps_flush(mdsc, want_flush);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 33497846e47e..1140aecd82ce 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -579,7 +579,7 @@ static inline int ceph_wait_on_async_create(struct inode *inode)
struct ceph_inode_info *ci = ceph_inode(inode);
return wait_on_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT,
- TASK_INTERRUPTIBLE);
+ TASK_KILLABLE);
}
extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index a338a3ec0dc4..64592adfe48f 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -195,9 +195,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
/*
* This function walks through the snaprealm for an inode and returns the
- * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
- * or max_bytes). If the root is reached, return the root ceph_snap_realm
- * instead.
+ * ceph_snap_realm for the first snaprealm that has quotas set (max_files,
+ * max_bytes, or any, depending on the 'which_quota' argument). If the root is
+ * reached, return the root ceph_snap_realm instead.
*
* Note that the caller is responsible for calling ceph_put_snap_realm() on the
* returned realm.
@@ -209,7 +209,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
* will be restarted.
*/
static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
- struct inode *inode, bool retry)
+ struct inode *inode,
+ enum quota_get_realm which_quota,
+ bool retry)
{
struct ceph_inode_info *ci = NULL;
struct ceph_snap_realm *realm, *next;
@@ -248,7 +250,7 @@ restart:
}
ci = ceph_inode(in);
- has_quota = __ceph_has_any_quota(ci);
+ has_quota = __ceph_has_quota(ci, which_quota);
iput(in);
next = realm->parent;
@@ -279,8 +281,8 @@ restart:
* dropped and we can then restart the whole operation.
*/
down_read(&mdsc->snap_rwsem);
- old_realm = get_quota_realm(mdsc, old, true);
- new_realm = get_quota_realm(mdsc, new, false);
+ old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true);
+ new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false);
if (PTR_ERR(new_realm) == -EAGAIN) {
up_read(&mdsc->snap_rwsem);
if (old_realm)
@@ -483,7 +485,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
bool is_updated = false;
down_read(&mdsc->snap_rwsem);
- realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
+ realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root),
+ QUOTA_GET_MAX_BYTES, true);
up_read(&mdsc->snap_rwsem);
if (!realm)
return false;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e6987d295079..b73b4f75462c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1119,6 +1119,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
s->s_time_gran = 1;
s->s_time_min = 0;
s->s_time_max = U32_MAX;
+ s->s_flags |= SB_NODIRATIME | SB_NOATIME;
ret = set_anon_super_fc(s, fc);
if (ret != 0)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 20ceab74e871..dd7dac0f984a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1022,6 +1022,7 @@ static inline void ceph_queue_flush_snaps(struct inode *inode)
ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS);
}
+extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask);
extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
int mask, bool force);
static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
@@ -1278,9 +1279,29 @@ extern void ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
/* quota.c */
-static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci)
+
+enum quota_get_realm {
+ QUOTA_GET_MAX_FILES,
+ QUOTA_GET_MAX_BYTES,
+ QUOTA_GET_ANY
+};
+
+static inline bool __ceph_has_quota(struct ceph_inode_info *ci,
+ enum quota_get_realm which)
{
- return ci->i_max_files || ci->i_max_bytes;
+ bool has_quota = false;
+
+ switch (which) {
+ case QUOTA_GET_MAX_BYTES:
+ has_quota = !!ci->i_max_bytes;
+ break;
+ case QUOTA_GET_MAX_FILES:
+ has_quota = !!ci->i_max_files;
+ break;
+ default:
+ has_quota = !!(ci->i_max_files || ci->i_max_bytes);
+ }
+ return has_quota;
}
extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc);
@@ -1289,10 +1310,10 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci,
u64 max_bytes, u64 max_files)
{
bool had_quota, has_quota;
- had_quota = __ceph_has_any_quota(ci);
+ had_quota = __ceph_has_quota(ci, QUOTA_GET_ANY);
ci->i_max_bytes = max_bytes;
ci->i_max_files = max_files;
- has_quota = __ceph_has_any_quota(ci);
+ has_quota = __ceph_has_quota(ci, QUOTA_GET_ANY);
if (had_quota != has_quota)
ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index afec84088471..8c2dc2c762a4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -366,6 +366,14 @@ static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
}
#define XATTR_RSTAT_FIELD(_type, _name) \
XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
+#define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \
+ { \
+ .name = CEPH_XATTR_NAME(_type, _name), \
+ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+ .exists_cb = NULL, \
+ .flags = VXATTR_FLAG_RSTAT, \
+ }
#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
{ \
.name = CEPH_XATTR_NAME2(_type, _name, _field), \
@@ -404,7 +412,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_RSTAT_FIELD(dir, rsubdirs),
XATTR_RSTAT_FIELD(dir, rsnaps),
XATTR_RSTAT_FIELD(dir, rbytes),
- XATTR_RSTAT_FIELD(dir, rctime),
+ XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime),
{
.name = "ceph.dir.pin",
.name_size = sizeof("ceph.dir.pin"),
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index cc8fdcb35b71..8c9f2c00be72 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_CIFS) += cifs.o
cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
inode.o link.o misc.o netmisc.o smbencrypt.o transport.o \
cifs_unicode.o nterr.o cifsencrypt.o \
- readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \
+ readdir.o ioctl.o sess.o export.o unc.o winucase.o \
smb2ops.o smb2maperror.o smb2transport.o \
smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \
dns_resolve.o cifs_spnego_negtokeninit.asn1.o asn1.o
@@ -30,3 +30,5 @@ cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o
cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o
+
+cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 180c234c2f46..1e4c7cc5287f 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -465,7 +465,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
int ret = 0;
/* Store the reconnect address */
- mutex_lock(&tcon->ses->server->srv_mutex);
+ cifs_server_lock(tcon->ses->server);
if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr))
goto unlock;
@@ -501,7 +501,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
cifs_signal_cifsd_for_reconnect(tcon->ses->server, false);
unlock:
- mutex_unlock(&tcon->ses->server->srv_mutex);
+ cifs_server_unlock(tcon->ses->server);
return ret;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 0912d8bbbac1..663cb9db4908 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -236,9 +236,9 @@ int cifs_verify_signature(struct smb_rqst *rqst,
cpu_to_le32(expected_sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
if (rc)
return rc;
@@ -626,7 +626,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
memcpy(ses->auth_key.response + baselen, tiblob, tilen);
- mutex_lock(&ses->server->srv_mutex);
+ cifs_server_lock(ses->server);
rc = cifs_alloc_hash("hmac(md5)",
&ses->server->secmech.hmacmd5,
@@ -678,7 +678,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
unlock:
- mutex_unlock(&ses->server->srv_mutex);
+ cifs_server_unlock(ses->server);
setup_ntlmv2_rsp_ret:
kfree(tiblob);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f539a39d47f5..12c872800326 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -838,7 +838,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
int flags, struct smb3_fs_context *old_ctx)
{
int rc;
- struct super_block *sb;
+ struct super_block *sb = NULL;
struct cifs_sb_info *cifs_sb = NULL;
struct cifs_mnt_data mnt_data;
struct dentry *root;
@@ -934,9 +934,11 @@ out_super:
return root;
out:
if (cifs_sb) {
- kfree(cifs_sb->prepath);
- smb3_cleanup_fs_context(cifs_sb->ctx);
- kfree(cifs_sb);
+ if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */
+ kfree(cifs_sb->prepath);
+ smb3_cleanup_fs_context(cifs_sb->ctx);
+ kfree(cifs_sb);
+ }
}
return root;
}
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c0542bdcd06b..dd7e070ca243 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,6 +152,7 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define SMB3_PRODUCT_BUILD 35
-#define CIFS_VERSION "2.36"
+/* when changing internal version - update following two lines at same time */
+#define SMB3_PRODUCT_BUILD 37
+#define CIFS_VERSION "2.37"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 68da230c7f11..f873379066c7 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -16,6 +16,7 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/utsname.h>
+#include <linux/sched/mm.h>
#include <linux/netfs.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
@@ -628,7 +629,8 @@ struct TCP_Server_Info {
unsigned int in_flight; /* number of requests on the wire to server */
unsigned int max_in_flight; /* max number of requests that were on wire */
spinlock_t req_lock; /* protect the two values above */
- struct mutex srv_mutex;
+ struct mutex _srv_mutex;
+ unsigned int nofs_flag;
struct task_struct *tsk;
char server_GUID[16];
__u16 sec_mode;
@@ -743,6 +745,22 @@ struct TCP_Server_Info {
#endif
};
+static inline void cifs_server_lock(struct TCP_Server_Info *server)
+{
+ unsigned int nofs_flag = memalloc_nofs_save();
+
+ mutex_lock(&server->_srv_mutex);
+ server->nofs_flag = nofs_flag;
+}
+
+static inline void cifs_server_unlock(struct TCP_Server_Info *server)
+{
+ unsigned int nofs_flag = server->nofs_flag;
+
+ mutex_unlock(&server->_srv_mutex);
+ memalloc_nofs_restore(nofs_flag);
+}
+
struct cifs_credits {
unsigned int value;
unsigned int instance;
@@ -1945,11 +1963,13 @@ extern mempool_t *cifs_mid_poolp;
/* Operations for different SMB versions */
#define SMB1_VERSION_STRING "1.0"
+#define SMB20_VERSION_STRING "2.0"
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
extern struct smb_version_operations smb1_operations;
extern struct smb_version_values smb1_values;
-#define SMB20_VERSION_STRING "2.0"
extern struct smb_version_operations smb20_operations;
extern struct smb_version_values smb20_values;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
#define SMB21_VERSION_STRING "2.1"
extern struct smb_version_operations smb21_operations;
extern struct smb_version_values smb21_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 53373a3649e1..d46702f5a663 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -148,7 +148,7 @@ static void cifs_resolve_server(struct work_struct *work)
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, resolve.work);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
/*
* Resolve the hostname again to make sure that IP address is up-to-date.
@@ -159,7 +159,7 @@ static void cifs_resolve_server(struct work_struct *work)
__func__, rc);
}
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
}
/*
@@ -267,7 +267,7 @@ cifs_abort_connection(struct TCP_Server_Info *server)
/* do not want to be sending data on a socket we are freeing */
cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
if (server->ssocket) {
cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state,
server->ssocket->flags);
@@ -296,7 +296,7 @@ cifs_abort_connection(struct TCP_Server_Info *server)
mid->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
list_for_each_entry_safe(mid, nmid, &retry_list, qhead) {
@@ -306,9 +306,9 @@ cifs_abort_connection(struct TCP_Server_Info *server)
}
if (cifs_rdma_enabled(server)) {
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
smbd_destroy(server);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
}
}
@@ -359,7 +359,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
do {
try_to_freeze();
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
if (!cifs_swn_set_server_dstaddr(server)) {
/* resolve the hostname again to make sure that IP address is up-to-date */
@@ -372,7 +372,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
else
rc = generic_ip_connect(server);
if (rc) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
msleep(3000);
} else {
@@ -383,7 +383,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
}
} while (server->tcpStatus == CifsNeedReconnect);
@@ -488,12 +488,12 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
do {
try_to_freeze();
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
rc = reconnect_target_unlocked(server, &tl, &target_hint);
if (rc) {
/* Failed to reconnect socket */
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
msleep(3000);
continue;
@@ -510,7 +510,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
} while (server->tcpStatus == CifsNeedReconnect);
@@ -1565,7 +1565,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
init_waitqueue_head(&tcp_ses->response_q);
init_waitqueue_head(&tcp_ses->request_q);
INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
- mutex_init(&tcp_ses->srv_mutex);
+ mutex_init(&tcp_ses->_srv_mutex);
memcpy(tcp_ses->workstation_RFC1001_name,
ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
memcpy(tcp_ses->server_RFC1001_name,
@@ -1845,7 +1845,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
unsigned int rc, xid;
unsigned int chan_count;
struct TCP_Server_Info *server = ses->server;
- cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
spin_lock(&cifs_tcp_ses_lock);
if (ses->ses_status == SES_EXITING) {
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index c5dd6f7305bd..34a8f3baed5e 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1229,6 +1229,30 @@ void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
kref_put(&mg->refcount, mount_group_release);
}
+/* Extract share from DFS target and return a pointer to prefix path or NULL */
+static const char *parse_target_share(const char *target, char **share)
+{
+ const char *s, *seps = "/\\";
+ size_t len;
+
+ s = strpbrk(target + 1, seps);
+ if (!s)
+ return ERR_PTR(-EINVAL);
+
+ len = strcspn(s + 1, seps);
+ if (!len)
+ return ERR_PTR(-EINVAL);
+ s += len;
+
+ len = s - target + 1;
+ *share = kstrndup(target, len, GFP_KERNEL);
+ if (!*share)
+ return ERR_PTR(-ENOMEM);
+
+ s = target + len;
+ return s + strspn(s, seps);
+}
+
/**
* dfs_cache_get_tgt_share - parse a DFS target
*
@@ -1242,56 +1266,46 @@ void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share,
char **prefix)
{
- char *s, sep, *p;
- size_t len;
- size_t plen1, plen2;
+ char sep;
+ char *target_share;
+ char *ppath = NULL;
+ const char *target_ppath, *dfsref_ppath;
+ size_t target_pplen, dfsref_pplen;
+ size_t len, c;
if (!it || !path || !share || !prefix || strlen(path) < it->it_path_consumed)
return -EINVAL;
- *share = NULL;
- *prefix = NULL;
-
sep = it->it_name[0];
if (sep != '\\' && sep != '/')
return -EINVAL;
- s = strchr(it->it_name + 1, sep);
- if (!s)
- return -EINVAL;
+ target_ppath = parse_target_share(it->it_name, &target_share);
+ if (IS_ERR(target_ppath))
+ return PTR_ERR(target_ppath);
- /* point to prefix in target node */
- s = strchrnul(s + 1, sep);
+ /* point to prefix in DFS referral path */
+ dfsref_ppath = path + it->it_path_consumed;
+ dfsref_ppath += strspn(dfsref_ppath, "/\\");
- /* extract target share */
- *share = kstrndup(it->it_name, s - it->it_name, GFP_KERNEL);
- if (!*share)
- return -ENOMEM;
+ target_pplen = strlen(target_ppath);
+ dfsref_pplen = strlen(dfsref_ppath);
- /* skip separator */
- if (*s)
- s++;
- /* point to prefix in DFS path */
- p = path + it->it_path_consumed;
- if (*p == sep)
- p++;
-
- /* merge prefix paths from DFS path and target node */
- plen1 = it->it_name + strlen(it->it_name) - s;
- plen2 = path + strlen(path) - p;
- if (plen1 || plen2) {
- len = plen1 + plen2 + 2;
- *prefix = kmalloc(len, GFP_KERNEL);
- if (!*prefix) {
- kfree(*share);
- *share = NULL;
+ /* merge prefix paths from DFS referral path and target node */
+ if (target_pplen || dfsref_pplen) {
+ len = target_pplen + dfsref_pplen + 2;
+ ppath = kzalloc(len, GFP_KERNEL);
+ if (!ppath) {
+ kfree(target_share);
return -ENOMEM;
}
- if (plen1)
- scnprintf(*prefix, len, "%.*s%c%.*s", (int)plen1, s, sep, (int)plen2, p);
- else
- strscpy(*prefix, p, len);
+ c = strscpy(ppath, target_ppath, len);
+ if (c && dfsref_pplen)
+ ppath[c] = sep;
+ strlcat(ppath, dfsref_ppath, len);
}
+ *share = target_share;
+ *prefix = ppath;
return 0;
}
@@ -1327,9 +1341,9 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c
cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n",
__func__, ip);
} else {
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
}
kfree(ip);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index c6214cfc575f..3b7915af1f62 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -1120,14 +1120,14 @@ sess_establish_session(struct sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
struct TCP_Server_Info *server = sess_data->server;
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
if (!server->session_estab) {
if (server->sign) {
server->session_key.response =
kmemdup(ses->auth_key.response,
ses->auth_key.len, GFP_KERNEL);
if (!server->session_key.response) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
return -ENOMEM;
}
server->session_key.len =
@@ -1136,7 +1136,7 @@ sess_establish_session(struct sess_data *sess_data)
server->sequence_number = 0x2;
server->session_estab = true;
}
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "CIFS session established successfully\n");
return 0;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index c71c9a44bef4..2e20ee4dab7b 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -38,10 +38,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
in_buf->WordCount = 0;
put_bcc(0, in_buf);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
if (rc) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
return rc;
}
@@ -55,7 +55,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
if (rc < 0)
server->sequence_number--;
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n",
get_mid(in_buf), rc);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d7ade739cde1..98a76fa791c0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3859,7 +3859,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
if (rc)
goto out;
- if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0)
+ if (cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)
smb2_set_sparse(xid, tcon, cfile, inode, false);
eof = cpu_to_le64(off + len);
@@ -4345,11 +4345,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
}
}
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
static bool
smb2_is_read_op(__u32 oplock)
{
return oplock == SMB2_OPLOCK_LEVEL_II;
}
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
static bool
smb21_is_read_op(__u32 oplock)
@@ -5448,7 +5450,7 @@ out:
return rc;
}
-
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -5547,6 +5549,7 @@ struct smb_version_operations smb20_operations = {
.is_status_io_timeout = smb2_is_status_io_timeout,
.is_network_name_deleted = smb2_is_network_name_deleted,
};
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
struct smb_version_operations smb21_operations = {
.compare_fids = smb2_compare_fids,
@@ -5878,6 +5881,7 @@ struct smb_version_operations smb311_operations = {
.is_network_name_deleted = smb2_is_network_name_deleted,
};
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
struct smb_version_values smb20_values = {
.version_string = SMB20_VERSION_STRING,
.protocol_id = SMB20_PROT_ID,
@@ -5898,6 +5902,7 @@ struct smb_version_values smb20_values = {
.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
.create_lease_size = sizeof(struct create_lease),
};
+#endif /* ALLOW_INSECURE_LEGACY */
struct smb_version_values smb21_values = {
.version_string = SMB21_VERSION_STRING,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 084be3a90198..0e8c85249579 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1369,13 +1369,13 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
struct TCP_Server_Info *server = sess_data->server;
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
if (server->ops->generate_signingkey) {
rc = server->ops->generate_signingkey(ses, server);
if (rc) {
cifs_dbg(FYI,
"SMB3 session key generation failed\n");
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
return rc;
}
}
@@ -1383,7 +1383,7 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
server->sequence_number = 0x2;
server->session_estab = true;
}
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
cifs_dbg(FYI, "SMB2/3 session established successfully\n");
return rc;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index c3278db1cade..5fbbec22bcc8 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1382,9 +1382,9 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "freeing mr list\n");
wake_up_interruptible_all(&info->wait_mr);
while (atomic_read(&info->mr_used_count)) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
msleep(1000);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
}
destroy_mr_list(info);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 05eca41e3b1e..bfc9bd55870a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -822,7 +822,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
} else
instance = exist_credits->instance;
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
/*
* We can't use credits obtained from the previous session to send this
@@ -830,14 +830,14 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
* return -EAGAIN in such cases to let callers handle it.
*/
if (instance != server->reconnect_instance) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
add_credits_and_wake_if(server, &credits, optype);
return -EAGAIN;
}
mid = server->ops->setup_async_request(server, rqst);
if (IS_ERR(mid)) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
add_credits_and_wake_if(server, &credits, optype);
return PTR_ERR(mid);
}
@@ -868,7 +868,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
cifs_delete_mid(mid);
}
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
if (rc == 0)
return 0;
@@ -1109,7 +1109,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
* of smb data.
*/
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
/*
* All the parts of the compound chain belong obtained credits from the
@@ -1119,7 +1119,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
* handle it.
*/
if (instance != server->reconnect_instance) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
for (j = 0; j < num_rqst; j++)
add_credits(server, &credits[j], optype);
return -EAGAIN;
@@ -1131,7 +1131,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
revert_current_mid(server, i);
for (j = 0; j < i; j++)
cifs_delete_mid(midQ[j]);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
/* Update # of requests on wire to server */
for (j = 0; j < num_rqst; j++)
@@ -1163,7 +1163,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
server->sequence_number -= 2;
}
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
/*
* If sending failed for some reason or it is an oplock break that we
@@ -1190,9 +1190,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
spin_unlock(&cifs_tcp_ses_lock);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
spin_lock(&cifs_tcp_ses_lock);
}
@@ -1266,9 +1266,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
.iov_len = resp_iov[0].iov_len
};
spin_unlock(&cifs_tcp_ses_lock);
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
smb311_update_preauth_hash(ses, server, &iov, 1);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
spin_lock(&cifs_tcp_ses_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -1385,11 +1385,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
and avoid races inside tcp sendmsg code that could cause corruption
of smb data */
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
rc = allocate_mid(ses, in_buf, &midQ);
if (rc) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
/* Update # of requests on wire to server */
add_credits(server, &credits, 0);
return rc;
@@ -1397,7 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
if (rc) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
goto out;
}
@@ -1411,7 +1411,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
if (rc < 0)
server->sequence_number -= 2;
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
if (rc < 0)
goto out;
@@ -1530,18 +1530,18 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
and avoid races inside tcp sendmsg code that could cause corruption
of smb data */
- mutex_lock(&server->srv_mutex);
+ cifs_server_lock(server);
rc = allocate_mid(ses, in_buf, &midQ);
if (rc) {
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
return rc;
}
rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
if (rc) {
cifs_delete_mid(midQ);
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
return rc;
}
@@ -1554,7 +1554,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
if (rc < 0)
server->sequence_number -= 2;
- mutex_unlock(&server->srv_mutex);
+ cifs_server_unlock(server);
if (rc < 0) {
cifs_delete_mid(midQ);
diff --git a/fs/exec.c b/fs/exec.c
index 14b4b3755580..0989fb8472a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1312,9 +1312,7 @@ int begin_new_exec(struct linux_binprm * bprm)
if (retval)
goto out_unlock;
- if (me->flags & PF_KTHREAD)
- free_kthread_struct(me);
- me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
+ me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
me->personality &= ~bprm->per_clear;
@@ -1959,6 +1957,10 @@ int kernel_execve(const char *kernel_filename,
int fd = AT_FDCWD;
int retval;
+ /* It is non-sense for kernel threads to call execve */
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
+ return -EINVAL;
+
filename = getname_kernel(kernel_filename);
if (IS_ERR(filename))
return PTR_ERR(filename);
diff --git a/fs/file.c b/fs/file.c
index ee9317346702..dd6692048f4f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -630,32 +630,23 @@ EXPORT_SYMBOL(fd_install);
* @files: file struct to retrieve file from
* @fd: file descriptor to retrieve file for
*
- * If this functions returns an EINVAL error pointer the fd was beyond the
- * current maximum number of file descriptors for that fdtable.
+ * Context: files_lock must be held.
*
- * Returns: The file associated with @fd, on error returns an error pointer.
+ * Returns: The file associated with @fd (NULL if @fd is not open)
*/
static struct file *pick_file(struct files_struct *files, unsigned fd)
{
+ struct fdtable *fdt = files_fdtable(files);
struct file *file;
- struct fdtable *fdt;
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (fd >= fdt->max_fds) {
- file = ERR_PTR(-EINVAL);
- goto out_unlock;
- }
+ if (fd >= fdt->max_fds)
+ return NULL;
+
file = fdt->fd[fd];
- if (!file) {
- file = ERR_PTR(-EBADF);
- goto out_unlock;
+ if (file) {
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ __put_unused_fd(files, fd);
}
- rcu_assign_pointer(fdt->fd[fd], NULL);
- __put_unused_fd(files, fd);
-
-out_unlock:
- spin_unlock(&files->file_lock);
return file;
}
@@ -664,8 +655,10 @@ int close_fd(unsigned fd)
struct files_struct *files = current->files;
struct file *file;
+ spin_lock(&files->file_lock);
file = pick_file(files, fd);
- if (IS_ERR(file))
+ spin_unlock(&files->file_lock);
+ if (!file)
return -EBADF;
return filp_close(file, files);
@@ -702,20 +695,25 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
unsigned int max_fd)
{
+ unsigned n;
+
+ rcu_read_lock();
+ n = last_fd(files_fdtable(cur_fds));
+ rcu_read_unlock();
+ max_fd = min(max_fd, n);
+
while (fd <= max_fd) {
struct file *file;
+ spin_lock(&cur_fds->file_lock);
file = pick_file(cur_fds, fd++);
- if (!IS_ERR(file)) {
+ spin_unlock(&cur_fds->file_lock);
+
+ if (file) {
/* found a valid file to close */
filp_close(file, cur_fds);
cond_resched();
- continue;
}
-
- /* beyond the last fd in that table */
- if (PTR_ERR(file) == -EINVAL)
- return;
}
}
@@ -795,26 +793,9 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
* See close_fd_get_file() below, this variant assumes current->files->file_lock
* is held.
*/
-int __close_fd_get_file(unsigned int fd, struct file **res)
+struct file *__close_fd_get_file(unsigned int fd)
{
- struct files_struct *files = current->files;
- struct file *file;
- struct fdtable *fdt;
-
- fdt = files_fdtable(files);
- if (fd >= fdt->max_fds)
- goto out_err;
- file = fdt->fd[fd];
- if (!file)
- goto out_err;
- rcu_assign_pointer(fdt->fd[fd], NULL);
- __put_unused_fd(files, fd);
- get_file(file);
- *res = file;
- return 0;
-out_err:
- *res = NULL;
- return -ENOENT;
+ return pick_file(current->files, fd);
}
/*
@@ -822,16 +803,16 @@ out_err:
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
-int close_fd_get_file(unsigned int fd, struct file **res)
+struct file *close_fd_get_file(unsigned int fd)
{
struct files_struct *files = current->files;
- int ret;
+ struct file *file;
spin_lock(&files->file_lock);
- ret = __close_fd_get_file(fd, res);
+ file = pick_file(files, fd);
spin_unlock(&files->file_lock);
- return ret;
+ return file;
}
void do_close_on_exec(struct files_struct *files)
@@ -871,7 +852,7 @@ void do_close_on_exec(struct files_struct *files)
}
static inline struct file *__fget_files_rcu(struct files_struct *files,
- unsigned int fd, fmode_t mask, unsigned int refs)
+ unsigned int fd, fmode_t mask)
{
for (;;) {
struct file *file;
@@ -897,10 +878,9 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
* Such a race can take two forms:
*
* (a) the file ref already went down to zero,
- * and get_file_rcu_many() fails. Just try
- * again:
+ * and get_file_rcu() fails. Just try again:
*/
- if (unlikely(!get_file_rcu_many(file, refs)))
+ if (unlikely(!get_file_rcu(file)))
continue;
/*
@@ -909,11 +889,11 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
* pointer having changed, because it always goes
* hand-in-hand with 'fdt'.
*
- * If so, we need to put our refs and try again.
+ * If so, we need to put our ref and try again.
*/
if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
unlikely(rcu_dereference_raw(*fdentry) != file)) {
- fput_many(file, refs);
+ fput(file);
continue;
}
@@ -926,37 +906,31 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
}
static struct file *__fget_files(struct files_struct *files, unsigned int fd,
- fmode_t mask, unsigned int refs)
+ fmode_t mask)
{
struct file *file;
rcu_read_lock();
- file = __fget_files_rcu(files, fd, mask, refs);
+ file = __fget_files_rcu(files, fd, mask);
rcu_read_unlock();
return file;
}
-static inline struct file *__fget(unsigned int fd, fmode_t mask,
- unsigned int refs)
-{
- return __fget_files(current->files, fd, mask, refs);
-}
-
-struct file *fget_many(unsigned int fd, unsigned int refs)
+static inline struct file *__fget(unsigned int fd, fmode_t mask)
{
- return __fget(fd, FMODE_PATH, refs);
+ return __fget_files(current->files, fd, mask);
}
struct file *fget(unsigned int fd)
{
- return __fget(fd, FMODE_PATH, 1);
+ return __fget(fd, FMODE_PATH);
}
EXPORT_SYMBOL(fget);
struct file *fget_raw(unsigned int fd)
{
- return __fget(fd, 0, 1);
+ return __fget(fd, 0);
}
EXPORT_SYMBOL(fget_raw);
@@ -966,7 +940,7 @@ struct file *fget_task(struct task_struct *task, unsigned int fd)
task_lock(task);
if (task->files)
- file = __fget_files(task->files, fd, 0, 1);
+ file = __fget_files(task->files, fd, 0);
task_unlock(task);
return file;
@@ -1035,7 +1009,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
return 0;
return (unsigned long)file;
} else {
- file = __fget(fd, mask, 1);
+ file = __fget(fd, mask);
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;
diff --git a/fs/file_table.c b/fs/file_table.c
index ada8fe814db9..5424e3a8df5f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -368,9 +368,9 @@ EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
-void fput_many(struct file *file, unsigned int refs)
+void fput(struct file *file)
{
- if (atomic_long_sub_and_test(refs, &file->f_count)) {
+ if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
@@ -389,11 +389,6 @@ void fput_many(struct file *file, unsigned int refs)
}
}
-void fput(struct file *file)
-{
- fput_many(file, 1);
-}
-
/*
* synchronous analog of fput(); for kernel threads that might be needed
* in some umount() (and thus can't use flush_delayed_fput() without
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index a41ea0ba6943..bffd156d6434 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -1,32 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_SUPER_H_
#define _VXFS_SUPER_H_
diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index 1fd41cf98b9f..de2a5bccb930 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -1,30 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h
index acc5477b3f23..fbcd603365ad 100644
--- a/fs/freevxfs/vxfs_dir.h
+++ b/fs/freevxfs/vxfs_dir.h
@@ -1,31 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_DIR_H_
#define _VXFS_DIR_H_
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index f5c428e21024..3a2180c5e208 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -1,31 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_EXTERN_H_
#define _VXFS_EXTERN_H_
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index a4610a77649e..c1174a3f8990 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -1,31 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_fshead.h b/fs/freevxfs/vxfs_fshead.h
index e026f0c49159..dfd2147599c4 100644
--- a/fs/freevxfs/vxfs_fshead.h
+++ b/fs/freevxfs/vxfs_fshead.h
@@ -1,32 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_FSHEAD_H_
#define _VXFS_FSHEAD_H_
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index a37431e443d3..c2ef9f0debbd 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -1,30 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 1f41b25ef38b..ceb6a12649ba 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -1,31 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_inode.h b/fs/freevxfs/vxfs_inode.h
index f012abed125d..1e9e138d2b33 100644
--- a/fs/freevxfs/vxfs_inode.h
+++ b/fs/freevxfs/vxfs_inode.h
@@ -1,32 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_INODE_H_
#define _VXFS_INODE_H_
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index a51425634f65..f04ba2ed1e1a 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -1,31 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c
index 813da6685151..23f35187c289 100644
--- a/fs/freevxfs/vxfs_olt.c
+++ b/fs/freevxfs/vxfs_olt.c
@@ -1,30 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h
index 0c0b0c9fa557..53afba08d617 100644
--- a/fs/freevxfs/vxfs_olt.h
+++ b/fs/freevxfs/vxfs_olt.h
@@ -1,31 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
*/
#ifndef _VXFS_OLT_H_
#define _VXFS_OLT_H_
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 6143ebab940d..0e633d2bfc7d 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -1,30 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 22eed5a73ac2..c3b82f716f9a 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -1,31 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2000-2001 Christoph Hellwig.
* Copyright (c) 2016 Krzysztof Blaszkowski
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL").
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
*/
/*
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 27a890aa493a..fc9d2d9fd234 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -119,7 +119,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
const char *fs_name;
int ret;
- if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ if (!may_mount())
return -EPERM;
if (flags & ~FSOPEN_CLOEXEC)
@@ -162,7 +162,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
unsigned int lookup_flags;
int ret;
- if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ if (!may_mount())
return -EPERM;
if ((flags & ~(FSPICK_CLOEXEC |
diff --git a/fs/internal.h b/fs/internal.h
index 9a6c233ee7f1..87e96b9024ce 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -84,6 +84,7 @@ extern int __mnt_want_write_file(struct file *);
extern void __mnt_drop_write_file(struct file *);
extern void dissolve_on_fput(struct vfsmount *);
+extern bool may_mount(void);
int path_mount(const char *dev_name, struct path *path,
const char *type_page, unsigned long flags, void *data_page);
@@ -125,7 +126,7 @@ extern struct file *do_file_open_root(const struct path *,
const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
-extern int __close_fd_get_file(unsigned int fd, struct file **res);
+extern struct file *__close_fd_get_file(unsigned int fd);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9f1c682d7caf..86f9df56526b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,8 @@
IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
- REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
+ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
+ REQ_F_ASYNC_DATA)
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
IO_REQ_CLEAN_FLAGS)
@@ -540,6 +541,7 @@ struct io_uring_task {
const struct io_ring_ctx *last;
struct io_wq *io_wq;
struct percpu_counter inflight;
+ atomic_t inflight_tracked;
atomic_t in_idle;
spinlock_t task_lock;
@@ -574,6 +576,7 @@ struct io_close {
struct file *file;
int fd;
u32 file_slot;
+ u32 flags;
};
struct io_timeout_data {
@@ -1355,8 +1358,6 @@ static void io_clean_op(struct io_kiocb *req);
static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
-static void io_drop_inflight_file(struct io_kiocb *req);
-static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
static void io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
@@ -1366,7 +1367,9 @@ static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
+static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset);
+static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
@@ -1757,9 +1760,29 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
__must_hold(&req->ctx->timeout_lock)
{
+ struct io_kiocb *req;
+
if (task && head->task != task)
return false;
- return cancel_all;
+ if (cancel_all)
+ return true;
+
+ io_for_each_link(req, head) {
+ if (req->flags & REQ_F_INFLIGHT)
+ return true;
+ }
+ return false;
+}
+
+static bool io_match_linked(struct io_kiocb *head)
+{
+ struct io_kiocb *req;
+
+ io_for_each_link(req, head) {
+ if (req->flags & REQ_F_INFLIGHT)
+ return true;
+ }
+ return false;
}
/*
@@ -1769,9 +1792,24 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
{
+ bool matched;
+
if (task && head->task != task)
return false;
- return cancel_all;
+ if (cancel_all)
+ return true;
+
+ if (head->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = head->ctx;
+
+ /* protect against races with linked timeouts */
+ spin_lock_irq(&ctx->timeout_lock);
+ matched = io_match_linked(head);
+ spin_unlock_irq(&ctx->timeout_lock);
+ } else {
+ matched = io_match_linked(head);
+ }
+ return matched;
}
static inline bool req_has_async_data(struct io_kiocb *req)
@@ -1927,6 +1965,14 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
return req->flags & REQ_F_FIXED_FILE;
}
+static inline void io_req_track_inflight(struct io_kiocb *req)
+{
+ if (!(req->flags & REQ_F_INFLIGHT)) {
+ req->flags |= REQ_F_INFLIGHT;
+ atomic_inc(&current->io_uring->inflight_tracked);
+ }
+}
+
static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
{
if (WARN_ON_ONCE(!req->link))
@@ -2988,8 +3034,6 @@ static void __io_req_task_work_add(struct io_kiocb *req,
unsigned long flags;
bool running;
- io_drop_inflight_file(req);
-
spin_lock_irqsave(&tctx->task_lock, flags);
wq_list_add_tail(&req->io_task_work.node, list);
running = tctx->task_running;
@@ -4176,6 +4220,16 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
return 0;
}
+static int io_readv_prep_async(struct io_kiocb *req)
+{
+ return io_rw_prep_async(req, READ);
+}
+
+static int io_writev_prep_async(struct io_kiocb *req)
+{
+ return io_rw_prep_async(req, WRITE);
+}
+
/*
* This is our waitqueue callback handler, registered through __folio_lock_async()
* when we initially tried to do the IO with the iocb armed our waitqueue.
@@ -5103,42 +5157,6 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_shutdown_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
-{
-#if defined(CONFIG_NET)
- if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
- sqe->buf_index || sqe->splice_fd_in))
- return -EINVAL;
-
- req->shutdown.how = READ_ONCE(sqe->len);
- return 0;
-#else
- return -EOPNOTSUPP;
-#endif
-}
-
-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
-{
-#if defined(CONFIG_NET)
- struct socket *sock;
- int ret;
-
- if (issue_flags & IO_URING_F_NONBLOCK)
- return -EAGAIN;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- ret = __sys_shutdown_sock(sock, req->shutdown.how);
- io_req_complete(req, ret);
- return 0;
-#else
- return -EOPNOTSUPP;
-#endif
-}
-
static int __io_splice_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -5445,15 +5463,11 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
unsigned long nr = ctx->nr_user_files;
int ret;
- if (table->alloc_hint >= nr)
- table->alloc_hint = 0;
-
do {
ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
- if (ret != nr) {
- table->alloc_hint = ret + 1;
+ if (ret != nr)
return ret;
- }
+
if (!table->alloc_hint)
break;
@@ -5464,6 +5478,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
return -ENFILE;
}
+/*
+ * Note when io_fixed_fd_install() returns error value, it will ensure
+ * fput() is called correspondingly.
+ */
static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot)
{
@@ -5471,26 +5489,24 @@ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct io_ring_ctx *ctx = req->ctx;
int ret;
+ io_ring_submit_lock(ctx, issue_flags);
+
if (alloc_slot) {
- io_ring_submit_lock(ctx, issue_flags);
ret = io_file_bitmap_get(ctx);
- if (unlikely(ret < 0)) {
- io_ring_submit_unlock(ctx, issue_flags);
- return ret;
- }
-
+ if (unlikely(ret < 0))
+ goto err;
file_slot = ret;
} else {
file_slot--;
}
ret = io_install_fixed_file(req, file, issue_flags, file_slot);
- if (alloc_slot) {
- io_ring_submit_unlock(ctx, issue_flags);
- if (!ret)
- return file_slot;
- }
-
+ if (!ret && alloc_slot)
+ ret = file_slot;
+err:
+ io_ring_submit_unlock(ctx, issue_flags);
+ if (unlikely(ret < 0))
+ fput(file);
return ret;
}
@@ -5972,14 +5988,18 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
- if (req->close.file_slot && req->close.fd)
+ req->close.flags = READ_ONCE(sqe->close_flags);
+ if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
+ return -EINVAL;
+ if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
+ req->close.file_slot && req->close.fd)
return -EINVAL;
return 0;
@@ -5995,7 +6015,8 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
- goto err;
+ if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
+ goto err;
}
spin_lock(&files->file_lock);
@@ -6018,13 +6039,10 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
}
- ret = __close_fd_get_file(close->fd, &file);
+ file = __close_fd_get_file(close->fd);
spin_unlock(&files->file_lock);
- if (ret < 0) {
- if (ret == -ENOENT)
- ret = -EBADF;
+ if (!file)
goto err;
- }
/* No ->flush() or already async, safely close from here */
ret = filp_close(file, current->files);
@@ -6063,6 +6081,34 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
}
#if defined(CONFIG_NET)
+static int io_shutdown_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
+ sqe->buf_index || sqe->splice_fd_in))
+ return -EINVAL;
+
+ req->shutdown.how = READ_ONCE(sqe->len);
+ return 0;
+}
+
+static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct socket *sock;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = __sys_shutdown_sock(sock, req->shutdown.how);
+ io_req_complete(req, ret);
+ return 0;
+}
+
static bool io_net_retry(struct socket *sock, int flags)
{
if (!(flags & MSG_WAITALL))
@@ -6674,8 +6720,8 @@ static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
fd_install(fd, file);
ret = fd;
} else {
- ret = io_install_fixed_file(req, file, issue_flags,
- sock->file_slot - 1);
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ sock->file_slot);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -6767,6 +6813,7 @@ IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept);
IO_NETOP_PREP(socket);
+IO_NETOP_PREP(shutdown);
IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
@@ -6905,10 +6952,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
- unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
-
- if (unlikely(!io_assign_file(req, flags)))
- return -EBADF;
req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
}
@@ -7390,7 +7433,7 @@ static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
}
-static int io_poll_update_prep(struct io_kiocb *req,
+static int io_poll_remove_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_poll_update *upd = &req->poll_update;
@@ -7454,7 +7497,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
+static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_cancel_data cd = { .data = req->poll_update.old_user_data, };
struct io_ring_ctx *ctx = req->ctx;
@@ -7698,8 +7741,9 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool is_timeout_link)
+static int __io_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe,
+ bool is_timeout_link)
{
struct io_timeout_data *data;
unsigned flags;
@@ -7754,6 +7798,18 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
+static int io_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_timeout_prep(req, sqe, false);
+}
+
+static int io_link_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_timeout_prep(req, sqe, true);
+}
+
static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -7970,7 +8026,7 @@ done:
return 0;
}
-static int io_rsrc_update_prep(struct io_kiocb *req,
+static int io_files_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
@@ -7986,6 +8042,41 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
return 0;
}
+static int io_files_update_with_index_alloc(struct io_kiocb *req,
+ unsigned int issue_flags)
+{
+ __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg);
+ unsigned int done;
+ struct file *file;
+ int ret, fd;
+
+ for (done = 0; done < req->rsrc_update.nr_args; done++) {
+ if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ file = fget(fd);
+ if (!file) {
+ ret = -EBADF;
+ break;
+ }
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ IORING_FILE_INDEX_ALLOC);
+ if (ret < 0)
+ break;
+ if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
+ ret = -EFAULT;
+ __io_close_fixed(req, issue_flags, ret);
+ break;
+ }
+ }
+
+ if (done)
+ return done;
+ return ret;
+}
+
static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -7999,10 +8090,14 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
up.resv = 0;
up.resv2 = 0;
- io_ring_submit_lock(ctx, issue_flags);
- ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
- &up, req->rsrc_update.nr_args);
- io_ring_submit_unlock(ctx, issue_flags);
+ if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) {
+ ret = io_files_update_with_index_alloc(req, issue_flags);
+ } else {
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
+ &up, req->rsrc_update.nr_args);
+ io_ring_submit_unlock(ctx, issue_flags);
+ }
if (ret < 0)
req_set_fail(req);
@@ -8025,7 +8120,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_POLL_ADD:
return io_poll_add_prep(req, sqe);
case IORING_OP_POLL_REMOVE:
- return io_poll_update_prep(req, sqe);
+ return io_poll_remove_prep(req, sqe);
case IORING_OP_FSYNC:
return io_fsync_prep(req, sqe);
case IORING_OP_SYNC_FILE_RANGE:
@@ -8039,13 +8134,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_CONNECT:
return io_connect_prep(req, sqe);
case IORING_OP_TIMEOUT:
- return io_timeout_prep(req, sqe, false);
+ return io_timeout_prep(req, sqe);
case IORING_OP_TIMEOUT_REMOVE:
return io_timeout_remove_prep(req, sqe);
case IORING_OP_ASYNC_CANCEL:
return io_async_cancel_prep(req, sqe);
case IORING_OP_LINK_TIMEOUT:
- return io_timeout_prep(req, sqe, true);
+ return io_link_timeout_prep(req, sqe);
case IORING_OP_ACCEPT:
return io_accept_prep(req, sqe);
case IORING_OP_FALLOCATE:
@@ -8055,7 +8150,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_CLOSE:
return io_close_prep(req, sqe);
case IORING_OP_FILES_UPDATE:
- return io_rsrc_update_prep(req, sqe);
+ return io_files_update_prep(req, sqe);
case IORING_OP_STATX:
return io_statx_prep(req, sqe);
case IORING_OP_FADVISE:
@@ -8123,9 +8218,9 @@ static int io_req_prep_async(struct io_kiocb *req)
switch (req->opcode) {
case IORING_OP_READV:
- return io_rw_prep_async(req, READ);
+ return io_readv_prep_async(req);
case IORING_OP_WRITEV:
- return io_rw_prep_async(req, WRITE);
+ return io_writev_prep_async(req);
case IORING_OP_SENDMSG:
return io_sendmsg_prep_async(req);
case IORING_OP_RECVMSG:
@@ -8264,6 +8359,11 @@ static void io_clean_op(struct io_kiocb *req)
kfree(req->apoll);
req->apoll = NULL;
}
+ if (req->flags & REQ_F_INFLIGHT) {
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ atomic_dec(&tctx->inflight_tracked);
+ }
if (req->flags & REQ_F_CREDS)
put_cred(req->creds);
if (req->flags & REQ_F_ASYNC_DATA) {
@@ -8288,6 +8388,7 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
{
+ const struct io_op_def *def = &io_op_defs[req->opcode];
const struct cred *creds = NULL;
int ret;
@@ -8297,7 +8398,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
creds = override_creds(req->creds);
- if (!io_op_defs[req->opcode].audit_skip)
+ if (!def->audit_skip)
audit_uring_entry(req->opcode);
switch (req->opcode) {
@@ -8321,7 +8422,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
ret = io_poll_add(req, issue_flags);
break;
case IORING_OP_POLL_REMOVE:
- ret = io_poll_update(req, issue_flags);
+ ret = io_poll_remove(req, issue_flags);
break;
case IORING_OP_SYNC_FILE_RANGE:
ret = io_sync_file_range(req, issue_flags);
@@ -8436,7 +8537,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
break;
}
- if (!io_op_defs[req->opcode].audit_skip)
+ if (!def->audit_skip)
audit_uring_exit(!ret, ret);
if (creds)
@@ -8569,19 +8670,6 @@ out:
return file;
}
-/*
- * Drop the file for requeue operations. Only used of req->file is the
- * io_uring descriptor itself.
- */
-static void io_drop_inflight_file(struct io_kiocb *req)
-{
- if (unlikely(req->flags & REQ_F_INFLIGHT)) {
- fput(req->file);
- req->file = NULL;
- req->flags &= ~REQ_F_INFLIGHT;
- }
-}
-
static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
@@ -8590,7 +8678,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
/* we don't allow fixed io_uring files */
if (file && file->f_op == &io_uring_fops)
- req->flags |= REQ_F_INFLIGHT;
+ io_req_track_inflight(req);
return file;
}
@@ -8788,6 +8876,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
__must_hold(&ctx->uring_lock)
{
+ const struct io_op_def *def;
unsigned int sqe_flags;
int personality;
u8 opcode;
@@ -8805,12 +8894,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->opcode = 0;
return -EINVAL;
}
+ def = &io_op_defs[opcode];
if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) {
/* enforce forwards compatibility on users */
if (sqe_flags & ~SQE_VALID_FLAGS)
return -EINVAL;
if (sqe_flags & IOSQE_BUFFER_SELECT) {
- if (!io_op_defs[opcode].buffer_select)
+ if (!def->buffer_select)
return -EOPNOTSUPP;
req->buf_index = READ_ONCE(sqe->buf_group);
}
@@ -8836,12 +8926,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
}
- if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+ if (!def->ioprio && sqe->ioprio)
return -EINVAL;
- if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+ if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (io_op_defs[opcode].needs_file) {
+ if (def->needs_file) {
struct io_submit_state *state = &ctx->submit_state;
req->cqe.fd = READ_ONCE(sqe->fd);
@@ -8850,7 +8940,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
* Plug now if we have more than 2 IO left after this, and the
* target is potentially a read/write to block based storage.
*/
- if (state->need_plug && io_op_defs[opcode].plug) {
+ if (state->need_plug && def->plug) {
state->plug_started = true;
state->need_plug = false;
blk_start_plug_nr_ios(&state->plug, state->submit_nr);
@@ -9658,8 +9748,7 @@ static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
{
WARN_ON_ONCE(test_bit(bit, table->bitmap));
__set_bit(bit, table->bitmap);
- if (bit == table->alloc_hint)
- table->alloc_hint++;
+ table->alloc_hint = bit + 1;
}
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
@@ -10113,21 +10202,19 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index)
+ __must_hold(&req->ctx->uring_lock)
{
struct io_ring_ctx *ctx = req->ctx;
bool needs_switch = false;
struct io_fixed_file *file_slot;
- int ret = -EBADF;
+ int ret;
- io_ring_submit_lock(ctx, issue_flags);
if (file->f_op == &io_uring_fops)
- goto err;
- ret = -ENXIO;
+ return -EBADF;
if (!ctx->file_data)
- goto err;
- ret = -EINVAL;
+ return -ENXIO;
if (slot_index >= ctx->nr_user_files)
- goto err;
+ return -EINVAL;
slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
@@ -10158,15 +10245,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
err:
if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data);
- io_ring_submit_unlock(ctx, issue_flags);
if (ret)
fput(file);
return ret;
}
-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset)
{
- unsigned int offset = req->close.file_slot - 1;
struct io_ring_ctx *ctx = req->ctx;
struct io_fixed_file *file_slot;
struct file *file;
@@ -10203,6 +10289,11 @@ out:
return ret;
}
+static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+{
+ return __io_close_fixed(req, issue_flags, req->close.file_slot - 1);
+}
+
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
@@ -10351,6 +10442,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
atomic_set(&tctx->in_idle, 0);
+ atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
@@ -11046,6 +11138,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
__io_remove_buffers(ctx, bl, -1U);
+ kfree(bl);
}
while (!list_empty(&ctx->io_buffers_pages)) {
@@ -11581,7 +11674,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
{
if (tracked)
- return 0;
+ return atomic_read(&tctx->inflight_tracked);
return percpu_counter_sum(&tctx->inflight);
}
@@ -11957,14 +12050,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
return -EINVAL;
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
f.file = tctx->registered_rings[fd];
- if (unlikely(!f.file))
- return -EBADF;
+ f.flags = 0;
} else {
f = fdget(fd);
- if (unlikely(!f.file))
- return -EBADF;
}
+ if (unlikely(!f.file))
+ return -EBADF;
+
ret = -EOPNOTSUPP;
if (unlikely(f.file->f_op != &io_uring_fops))
goto out_fput;
@@ -12062,8 +12155,7 @@ iopoll_locked:
out:
percpu_ref_put(&ctx->refs);
out_fput:
- if (!(flags & IORING_ENTER_REGISTERED_RING))
- fdput(f);
+ fdput(f);
return ret;
}
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 7e9abdb89712..acd32f05b519 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -43,9 +43,9 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
jffs2_dbg(1, "%s(): erase block %#08x (range %#08x-%#08x)\n",
__func__,
jeb->offset, jeb->offset, jeb->offset + c->sector_size);
- instr = kmalloc(sizeof(struct erase_info), GFP_KERNEL);
+ instr = kzalloc(sizeof(struct erase_info), GFP_KERNEL);
if (!instr) {
- pr_warn("kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
+ pr_warn("kzalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
mutex_lock(&c->erase_free_sem);
spin_lock(&c->erase_completion_lock);
list_move(&jeb->list, &c->erase_pending_list);
@@ -57,8 +57,6 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
return;
}
- memset(instr, 0, sizeof(*instr));
-
instr->addr = jeb->offset;
instr->len = c->sector_size;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 00a110f40e10..39cec28096a7 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -604,6 +604,7 @@ out_root:
jffs2_free_raw_node_refs(c);
kvfree(c->blocks);
jffs2_clear_xattr_subsystem(c);
+ jffs2_sum_exit(c);
out_inohash:
kfree(c->inocache_list);
out_wbuf:
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index e205fde7163a..6eca72cfa1f2 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -18,7 +18,15 @@
#include "kernfs-internal.h"
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
-static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
+/*
+ * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
+ * call pr_cont() while holding rename_lock. Because sometimes pr_cont()
+ * will perform wakeups when releasing console_sem. Holding rename_lock
+ * will introduce deadlock if the scheduler reads the kernfs_name in the
+ * wakeup path.
+ */
+static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
+static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
@@ -229,12 +237,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
{
unsigned long flags;
- spin_lock_irqsave(&kernfs_rename_lock, flags);
+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
- kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
+ kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
pr_cont("%s", kernfs_pr_cont_buf);
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
}
/**
@@ -248,10 +256,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
unsigned long flags;
int sz;
- spin_lock_irqsave(&kernfs_rename_lock, flags);
+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
- sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
- sizeof(kernfs_pr_cont_buf));
+ sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
+ sizeof(kernfs_pr_cont_buf));
if (sz < 0) {
pr_cont("(error)");
goto out;
@@ -265,7 +273,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
pr_cont("%s", kernfs_pr_cont_buf);
out:
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
}
/**
@@ -823,13 +831,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
- /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
- spin_lock_irq(&kernfs_rename_lock);
+ spin_lock_irq(&kernfs_pr_cont_lock);
len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
if (len >= sizeof(kernfs_pr_cont_buf)) {
- spin_unlock_irq(&kernfs_rename_lock);
+ spin_unlock_irq(&kernfs_pr_cont_lock);
return NULL;
}
@@ -841,7 +848,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
parent = kernfs_find_ns(parent, name, ns);
}
- spin_unlock_irq(&kernfs_rename_lock);
+ spin_unlock_irq(&kernfs_pr_cont_lock);
return parent;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 88423069407c..e3abfa843879 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -33,7 +33,6 @@ static DEFINE_SPINLOCK(kernfs_open_node_lock);
static DEFINE_MUTEX(kernfs_open_file_mutex);
struct kernfs_open_node {
- atomic_t refcnt;
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
@@ -530,10 +529,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
}
on = kn->attr.open;
- if (on) {
- atomic_inc(&on->refcnt);
+ if (on)
list_add_tail(&of->list, &on->files);
- }
spin_unlock_irq(&kernfs_open_node_lock);
mutex_unlock(&kernfs_open_file_mutex);
@@ -548,7 +545,6 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
if (!new_on)
return -ENOMEM;
- atomic_set(&new_on->refcnt, 0);
atomic_set(&new_on->event, 1);
init_waitqueue_head(&new_on->poll);
INIT_LIST_HEAD(&new_on->files);
@@ -556,17 +552,19 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
}
/**
- * kernfs_put_open_node - put kernfs_open_node
- * @kn: target kernfs_nodet
+ * kernfs_unlink_open_file - Unlink @of from @kn.
+ *
+ * @kn: target kernfs_node
* @of: associated kernfs_open_file
*
- * Put @kn->attr.open and unlink @of from the files list. If
- * reference count reaches zero, disassociate and free it.
+ * Unlink @of from list of @kn's associated open files. If list of
+ * associated open files becomes empty, disassociate and free
+ * kernfs_open_node.
*
* LOCKING:
* None.
*/
-static void kernfs_put_open_node(struct kernfs_node *kn,
+static void kernfs_unlink_open_file(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
struct kernfs_open_node *on = kn->attr.open;
@@ -578,7 +576,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
if (of)
list_del(&of->list);
- if (atomic_dec_and_test(&on->refcnt))
+ if (list_empty(&on->files))
kn->attr.open = NULL;
else
on = NULL;
@@ -706,7 +704,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
return 0;
err_put_node:
- kernfs_put_open_node(kn, of);
+ kernfs_unlink_open_file(kn, of);
err_seq_release:
seq_release(inode, file);
err_free:
@@ -752,7 +750,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
mutex_unlock(&kernfs_open_file_mutex);
}
- kernfs_put_open_node(kn, of);
+ kernfs_unlink_open_file(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
kfree(of);
@@ -768,15 +766,24 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
- spin_lock_irq(&kernfs_open_node_lock);
- on = kn->attr.open;
- if (on)
- atomic_inc(&on->refcnt);
- spin_unlock_irq(&kernfs_open_node_lock);
- if (!on)
+ /*
+ * lockless opportunistic check is safe below because no one is adding to
+ * ->attr.open at this point of time. This check allows early bail out
+ * if ->attr.open is already NULL. kernfs_unlink_open_file makes
+ * ->attr.open NULL only while holding kernfs_open_file_mutex so below
+ * check under kernfs_open_file_mutex will ensure bailing out if
+ * ->attr.open became NULL while waiting for the mutex.
+ */
+ if (!kn->attr.open)
return;
mutex_lock(&kernfs_open_file_mutex);
+ if (!kn->attr.open) {
+ mutex_unlock(&kernfs_open_file_mutex);
+ return;
+ }
+
+ on = kn->attr.open;
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
@@ -789,8 +796,6 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
}
mutex_unlock(&kernfs_open_file_mutex);
-
- kernfs_put_open_node(kn, NULL);
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 776ecf679965..1f28d3f463c3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -730,13 +730,6 @@ static bool legitimize_links(struct nameidata *nd)
static bool legitimize_root(struct nameidata *nd)
{
- /*
- * For scoped-lookups (where nd->root has been zeroed), we need to
- * restart the whole lookup from scratch -- because set_root() is wrong
- * for these lookups (nd->dfd is the root, not the filesystem root).
- */
- if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
- return false;
/* Nothing to do if nd->root is zero or is managed by the VFS user. */
if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
return true;
@@ -798,7 +791,7 @@ out:
* @seq: seq number to check @dentry against
* Returns: true on success, false on failure
*
- * Similar to to try_to_unlazy(), but here we have the next dentry already
+ * Similar to try_to_unlazy(), but here we have the next dentry already
* picked by rcu-walk and want to legitimize that in addition to the current
* nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context.
* Nothing should touch nameidata between try_to_unlazy_next() failure and
@@ -1755,7 +1748,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
// unlazy even if we fail to grab the link - cleanup needs it
bool grabbed_link = legitimize_path(nd, link, seq);
- if (!try_to_unlazy(nd) != 0 || !grabbed_link)
+ if (!try_to_unlazy(nd) || !grabbed_link)
return -ECHILD;
if (nd_alloc_stack(nd))
diff --git a/fs/namespace.c b/fs/namespace.c
index 41461f55c039..e6a7e769d25d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1760,7 +1760,7 @@ out_unlock:
/*
* Is the caller allowed to modify his namespace?
*/
-static inline bool may_mount(void)
+bool may_mount(void)
{
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 7b861e4f0533..03d3a270eff4 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -328,7 +328,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
char *read_name = NULL;
int len, status = 0;
- server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+ server = NFS_SB(ss_mnt->mnt_sb);
if (!fattr)
return ERR_PTR(-ENOMEM);
@@ -346,7 +346,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
- r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr);
+ r_ino = nfs_fhget(ss_mnt->mnt_sb, src_fh, fattr);
if (IS_ERR(r_ino)) {
res = ERR_CAST(r_ino);
goto out_free_name;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index a4fcdc7927ca..8e9d2b35175f 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -492,7 +492,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
down_write(&ni->file.run_lock);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
- &new_valid, true, NULL);
+ &new_valid, ni->mi.sbi->options->prealloc, NULL);
up_write(&ni->file.run_lock);
if (new_valid < ni->i_valid)
@@ -659,7 +659,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
/*
* Normal file: Allocate clusters, do not change 'valid' size.
*/
- err = ntfs_set_size(inode, max(end, i_size));
+ loff_t new_size = max(end, i_size);
+
+ err = inode_newsize_ok(inode, new_size);
+ if (err)
+ goto out;
+
+ err = ntfs_set_size(inode, new_size);
if (err)
goto out;
@@ -759,7 +765,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
inode_dio_wait(inode);
- if (attr->ia_size < oldsize)
+ if (attr->ia_size <= oldsize)
err = ntfs_truncate(inode, attr->ia_size);
else if (attr->ia_size > oldsize)
err = ntfs_extend(inode, attr->ia_size, 0, NULL);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 6f47a9c17f89..18842998c8fa 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -1964,10 +1964,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
vcn += clen;
- if (vbo + bytes >= end) {
+ if (vbo + bytes >= end)
bytes = end - vbo;
- flags |= FIEMAP_EXTENT_LAST;
- }
if (vbo + bytes <= valid) {
;
@@ -1977,6 +1975,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
/* vbo < valid && valid < vbo + bytes */
u64 dlen = valid - vbo;
+ if (vbo + dlen >= end)
+ flags |= FIEMAP_EXTENT_LAST;
+
err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
flags);
if (err < 0)
@@ -1995,6 +1996,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
flags |= FIEMAP_EXTENT_UNWRITTEN;
}
+ if (vbo + bytes >= end)
+ flags |= FIEMAP_EXTENT_LAST;
+
err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
if (err < 0)
break;
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 06492f088d60..49b7df616778 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -1185,8 +1185,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
if (!r_page)
return -ENOMEM;
- memset(info, 0, sizeof(struct restart_info));
-
/* Determine which restart area we are looking for. */
if (first) {
vbo = 0;
@@ -3791,10 +3789,11 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
if (!log)
return -ENOMEM;
+ memset(&rst_info, 0, sizeof(struct restart_info));
+
log->ni = ni;
log->l_size = l_size;
log->one_page_buf = kmalloc(page_size, GFP_NOFS);
-
if (!log->one_page_buf) {
err = -ENOMEM;
goto out;
@@ -3842,6 +3841,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
if (rst_info.vbo)
goto check_restart_area;
+ memset(&rst_info2, 0, sizeof(struct restart_info));
err = log_read_rst(log, l_size, false, &rst_info2);
/* Determine which restart area to use. */
@@ -4085,8 +4085,10 @@ process_log:
if (client == LFS_NO_CLIENT_LE) {
/* Insert "NTFS" client LogFile. */
client = ra->client_idx[0];
- if (client == LFS_NO_CLIENT_LE)
- return -EINVAL;
+ if (client == LFS_NO_CLIENT_LE) {
+ err = -EINVAL;
+ goto out;
+ }
t16 = le16_to_cpu(client);
cr = ca + t16;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 74f60c457f28..be4ebdd8048b 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -758,6 +758,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
loff_t vbo = iocb->ki_pos;
loff_t end;
int wr = iov_iter_rw(iter) & WRITE;
+ size_t iter_count = iov_iter_count(iter);
loff_t valid;
ssize_t ret;
@@ -771,10 +772,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
wr ? ntfs_get_block_direct_IO_W
: ntfs_get_block_direct_IO_R);
- if (ret <= 0)
+ if (ret > 0)
+ end = vbo + ret;
+ else if (wr && ret == -EIOCBQUEUED)
+ end = vbo + iter_count;
+ else
goto out;
- end = vbo + ret;
valid = ni->i_valid;
if (wr) {
if (end > valid && !S_ISBLK(inode->i_mode)) {
@@ -1950,6 +1954,7 @@ const struct address_space_operations ntfs_aops = {
.direct_IO = ntfs_direct_IO,
.bmap = ntfs_bmap,
.dirty_folio = block_dirty_folio,
+ .invalidate_folio = block_invalidate_folio,
};
const struct address_space_operations ntfs_aops_cmpr = {
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index afd0ddad826f..5e0e0280e70d 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -112,7 +112,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
return -ENOMEM;
if (!size) {
- ;
+ /* EA info persists, but xattr is empty. Looks like EA problem. */
} else if (attr_ea->non_res) {
struct runs_tree run;
@@ -259,7 +259,7 @@ out:
static noinline int ntfs_set_ea(struct inode *inode, const char *name,
size_t name_len, const void *value,
- size_t val_size, int flags)
+ size_t val_size, int flags, bool locked)
{
struct ntfs_inode *ni = ntfs_i(inode);
struct ntfs_sb_info *sbi = ni->mi.sbi;
@@ -278,7 +278,8 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name,
u64 new_sz;
void *p;
- ni_lock(ni);
+ if (!locked)
+ ni_lock(ni);
run_init(&ea_run);
@@ -467,7 +468,8 @@ update_ea:
mark_inode_dirty(&ni->vfs_inode);
out:
- ni_unlock(ni);
+ if (!locked)
+ ni_unlock(ni);
run_close(&ea_run);
kfree(ea_all);
@@ -541,7 +543,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
struct inode *inode, struct posix_acl *acl,
- int type)
+ int type, bool init_acl)
{
const char *name;
size_t size, name_len;
@@ -554,8 +556,9 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
switch (type) {
case ACL_TYPE_ACCESS:
- if (acl) {
- umode_t mode = inode->i_mode;
+ /* Do not change i_mode if we are in init_acl */
+ if (acl && !init_acl) {
+ umode_t mode;
err = posix_acl_update_mode(mnt_userns, inode, &mode,
&acl);
@@ -598,7 +601,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
flags = 0;
}
- err = ntfs_set_ea(inode, name, name_len, value, size, flags);
+ err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
if (err == -ENODATA && !size)
err = 0; /* Removing non existed xattr. */
if (!err)
@@ -616,7 +619,68 @@ out:
int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
struct posix_acl *acl, int type)
{
- return ntfs_set_acl_ex(mnt_userns, inode, acl, type);
+ return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false);
+}
+
+static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int type, void *buffer,
+ size_t size)
+{
+ struct posix_acl *acl;
+ int err;
+
+ if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
+ ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
+ return -EOPNOTSUPP;
+ }
+
+ acl = ntfs_get_acl(inode, type, false);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+
+ if (!acl)
+ return -ENODATA;
+
+ err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ posix_acl_release(acl);
+
+ return err;
+}
+
+static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
+ struct inode *inode, int type, const void *value,
+ size_t size)
+{
+ struct posix_acl *acl;
+ int err;
+
+ if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
+ ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
+ return -EOPNOTSUPP;
+ }
+
+ if (!inode_owner_or_capable(mnt_userns, inode))
+ return -EPERM;
+
+ if (!value) {
+ acl = NULL;
+ } else {
+ acl = posix_acl_from_xattr(mnt_userns, value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+
+ if (acl) {
+ err = posix_acl_valid(mnt_userns, acl);
+ if (err)
+ goto release_and_out;
+ }
+ }
+
+ err = ntfs_set_acl(mnt_userns, inode, acl, type);
+
+release_and_out:
+ posix_acl_release(acl);
+ return err;
}
/*
@@ -636,7 +700,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
if (default_acl) {
err = ntfs_set_acl_ex(mnt_userns, inode, default_acl,
- ACL_TYPE_DEFAULT);
+ ACL_TYPE_DEFAULT, true);
posix_acl_release(default_acl);
} else {
inode->i_default_acl = NULL;
@@ -647,7 +711,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
else {
if (!err)
err = ntfs_set_acl_ex(mnt_userns, inode, acl,
- ACL_TYPE_ACCESS);
+ ACL_TYPE_ACCESS, true);
posix_acl_release(acl);
}
@@ -785,6 +849,23 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
goto out;
}
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+ (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+ /* TODO: init_user_ns? */
+ err = ntfs_xattr_get_acl(
+ &init_user_ns, inode,
+ name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+ ? ACL_TYPE_ACCESS
+ : ACL_TYPE_DEFAULT,
+ buffer, size);
+ goto out;
+ }
+#endif
/* Deal with NTFS extended attribute. */
err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL);
@@ -897,10 +978,29 @@ set_new_fa:
goto out;
}
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
+ (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
+ !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
+ err = ntfs_xattr_set_acl(
+ mnt_userns, inode,
+ name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
+ ? ACL_TYPE_ACCESS
+ : ACL_TYPE_DEFAULT,
+ value, size);
+ goto out;
+ }
+#endif
/* Deal with NTFS extended attribute. */
- err = ntfs_set_ea(inode, name, name_len, value, size, flags);
+ err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
out:
+ inode->i_ctime = current_time(inode);
+ mark_inode_dirty(inode);
+
return err;
}
@@ -913,35 +1013,37 @@ int ntfs_save_wsl_perm(struct inode *inode)
{
int err;
__le32 value;
+ struct ntfs_inode *ni = ntfs_i(inode);
- /* TODO: refactor this, so we don't lock 4 times in ntfs_set_ea */
+ ni_lock(ni);
value = cpu_to_le32(i_uid_read(inode));
err = ntfs_set_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value,
- sizeof(value), 0);
+ sizeof(value), 0, true); /* true == already locked. */
if (err)
goto out;
value = cpu_to_le32(i_gid_read(inode));
err = ntfs_set_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value,
- sizeof(value), 0);
+ sizeof(value), 0, true);
if (err)
goto out;
value = cpu_to_le32(inode->i_mode);
err = ntfs_set_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value,
- sizeof(value), 0);
+ sizeof(value), 0, true);
if (err)
goto out;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
value = cpu_to_le32(inode->i_rdev);
err = ntfs_set_ea(inode, "$LXDEV", sizeof("$LXDEV") - 1, &value,
- sizeof(value), 0);
+ sizeof(value), 0, true);
if (err)
goto out;
}
out:
+ ni_unlock(ni);
/* In case of error should we delete all WSL xattr? */
return err;
}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index c0b84e960b20..e8b9b756f0ac 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,7 +65,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write)
*/
static int run_gc(struct ubifs_info *c)
{
- int err, lnum;
+ int lnum;
/* Make some free space by garbage-collecting dirty space */
down_read(&c->commit_sem);
@@ -76,10 +76,7 @@ static int run_gc(struct ubifs_info *c)
/* GC freed one LEB, return it to lprops */
dbg_budg("GC freed LEB %d", lnum);
- err = ubifs_return_leb(c, lnum);
- if (err)
- return err;
- return 0;
+ return ubifs_return_leb(c, lnum);
}
/**
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e4f193eae4b2..e4c4761aff7f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -677,7 +677,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
int err;
err = security_inode_init_security(inode, dentry, qstr,
- &init_xattrs, 0);
+ &init_xattrs, NULL);
if (err) {
struct ubifs_info *c = dentry->i_sb->s_fs_info;
ubifs_err(c, "cannot initialize security for inode %lu, error %d",
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 1e4ee042d52f..3e920cf1b454 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -173,7 +173,6 @@ __xfs_free_perag(
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
- ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -192,7 +191,7 @@ xfs_free_perag(
pag = radix_tree_delete(&mp->m_perag_tree, agno);
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
- ASSERT(atomic_read(&pag->pag_ref) == 0);
+ XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_iunlink_destroy(pag);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 14ae0826bc15..836ab1b8ed7b 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -25,10 +25,9 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_attr_item.h"
-#include "xfs_log.h"
+#include "xfs_xattr.h"
-struct kmem_cache *xfs_attri_cache;
-struct kmem_cache *xfs_attrd_cache;
+struct kmem_cache *xfs_attr_intent_cache;
/*
* xfs_attr.c
@@ -58,11 +57,11 @@ STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
*/
STATIC int xfs_attr_node_get(xfs_da_args_t *args);
STATIC void xfs_attr_restore_rmt_blk(struct xfs_da_args *args);
-static int xfs_attr_node_try_addname(struct xfs_attr_item *attr);
-STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_item *attr);
-STATIC int xfs_attr_node_remove_attr(struct xfs_attr_item *attr);
-STATIC int xfs_attr_node_hasname(xfs_da_args_t *args,
- struct xfs_da_state **state);
+static int xfs_attr_node_try_addname(struct xfs_attr_intent *attr);
+STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_intent *attr);
+STATIC int xfs_attr_node_remove_attr(struct xfs_attr_intent *attr);
+STATIC int xfs_attr_node_lookup(struct xfs_da_args *args,
+ struct xfs_da_state *state);
int
xfs_inode_hasattr(
@@ -377,7 +376,7 @@ xfs_attr_try_sf_addname(
static int
xfs_attr_sf_addname(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_inode *dp = args->dp;
@@ -423,7 +422,7 @@ out:
*/
static enum xfs_delattr_state
xfs_attr_complete_op(
- struct xfs_attr_item *attr,
+ struct xfs_attr_intent *attr,
enum xfs_delattr_state replace_state)
{
struct xfs_da_args *args = attr->xattri_da_args;
@@ -439,7 +438,7 @@ xfs_attr_complete_op(
static int
xfs_attr_leaf_addname(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error;
@@ -493,7 +492,7 @@ out:
*/
static int
xfs_attr_node_addname(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error;
@@ -530,7 +529,7 @@ out:
static int
xfs_attr_rmtval_alloc(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error = 0;
@@ -594,6 +593,19 @@ xfs_attr_leaf_mark_incomplete(
return xfs_attr3_leaf_setflag(args);
}
+/* Ensure the da state of an xattr deferred work item is ready to go. */
+static inline void
+xfs_attr_item_init_da_state(
+ struct xfs_attr_intent *attr)
+{
+ struct xfs_da_args *args = attr->xattri_da_args;
+
+ if (!attr->xattri_da_state)
+ attr->xattri_da_state = xfs_da_state_alloc(args);
+ else
+ xfs_da_state_reset(attr->xattri_da_state, args);
+}
+
/*
* Initial setup for xfs_attr_node_removename. Make sure the attr is there and
* the blocks are valid. Attr keys with remote blocks will be marked
@@ -601,29 +613,33 @@ xfs_attr_leaf_mark_incomplete(
*/
static
int xfs_attr_node_removename_setup(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
- struct xfs_da_state **state = &attr->xattri_da_state;
+ struct xfs_da_state *state;
int error;
- error = xfs_attr_node_hasname(args, state);
+ xfs_attr_item_init_da_state(attr);
+ error = xfs_attr_node_lookup(args, attr->xattri_da_state);
if (error != -EEXIST)
goto out;
error = 0;
- ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
- ASSERT((*state)->path.blk[(*state)->path.active - 1].magic ==
+ state = attr->xattri_da_state;
+ ASSERT(state->path.blk[state->path.active - 1].bp != NULL);
+ ASSERT(state->path.blk[state->path.active - 1].magic ==
XFS_ATTR_LEAF_MAGIC);
- error = xfs_attr_leaf_mark_incomplete(args, *state);
+ error = xfs_attr_leaf_mark_incomplete(args, state);
if (error)
goto out;
if (args->rmtblkno > 0)
error = xfs_attr_rmtval_invalidate(args);
out:
- if (error)
- xfs_da_state_free(*state);
+ if (error) {
+ xfs_da_state_free(attr->xattri_da_state);
+ attr->xattri_da_state = NULL;
+ }
return error;
}
@@ -635,7 +651,7 @@ out:
*/
static int
xfs_attr_leaf_remove_attr(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_inode *dp = args->dp;
@@ -700,7 +716,7 @@ xfs_attr_leaf_shrink(
*/
int
xfs_attr_set_iter(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error = 0;
@@ -852,6 +868,7 @@ xfs_attr_lookup(
{
struct xfs_inode *dp = args->dp;
struct xfs_buf *bp = NULL;
+ struct xfs_da_state *state;
int error;
if (!xfs_inode_hasattr(dp))
@@ -869,19 +886,22 @@ xfs_attr_lookup(
return error;
}
- return xfs_attr_node_hasname(args, NULL);
+ state = xfs_da_state_alloc(args);
+ error = xfs_attr_node_lookup(args, state);
+ xfs_da_state_free(state);
+ return error;
}
static int
-xfs_attr_item_init(
+xfs_attr_intent_init(
struct xfs_da_args *args,
unsigned int op_flags, /* op flag (set or remove) */
- struct xfs_attr_item **attr) /* new xfs_attr_item */
+ struct xfs_attr_intent **attr) /* new xfs_attr_intent */
{
- struct xfs_attr_item *new;
+ struct xfs_attr_intent *new;
- new = kmem_zalloc(sizeof(struct xfs_attr_item), KM_NOFS);
+ new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL);
new->xattri_op_flags = op_flags;
new->xattri_da_args = args;
@@ -894,10 +914,10 @@ static int
xfs_attr_defer_add(
struct xfs_da_args *args)
{
- struct xfs_attr_item *new;
+ struct xfs_attr_intent *new;
int error = 0;
- error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_SET, &new);
+ error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_SET, &new);
if (error)
return error;
@@ -913,10 +933,10 @@ static int
xfs_attr_defer_replace(
struct xfs_da_args *args)
{
- struct xfs_attr_item *new;
+ struct xfs_attr_intent *new;
int error = 0;
- error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REPLACE, &new);
+ error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REPLACE, &new);
if (error)
return error;
@@ -933,10 +953,10 @@ xfs_attr_defer_remove(
struct xfs_da_args *args)
{
- struct xfs_attr_item *new;
+ struct xfs_attr_intent *new;
int error;
- error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REMOVE, &new);
+ error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REMOVE, &new);
if (error)
return error;
@@ -962,7 +982,6 @@ xfs_attr_set(
int error, local;
int rmt_blks = 0;
unsigned int total;
- int delayed = xfs_has_larp(mp);
if (xfs_is_shutdown(dp->i_mount))
return -EIO;
@@ -1007,12 +1026,6 @@ xfs_attr_set(
rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
}
- if (delayed) {
- error = xfs_attr_use_log_assist(mp);
- if (error)
- return error;
- }
-
/*
* Root fork attributes can use reserved data blocks for this
* operation if necessary
@@ -1020,7 +1033,7 @@ xfs_attr_set(
xfs_init_attr_trans(args, &tres, &total);
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
if (error)
- goto drop_incompat;
+ return error;
if (args->value || xfs_inode_hasattr(dp)) {
error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK,
@@ -1080,9 +1093,6 @@ xfs_attr_set(
error = xfs_trans_commit(args->trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
-drop_incompat:
- if (delayed)
- xlog_drop_incompat_feat(mp->m_log);
return error;
out_trans_cancel:
@@ -1091,40 +1101,6 @@ out_trans_cancel:
goto out_unlock;
}
-int __init
-xfs_attri_init_cache(void)
-{
- xfs_attri_cache = kmem_cache_create("xfs_attri",
- sizeof(struct xfs_attri_log_item),
- 0, 0, NULL);
-
- return xfs_attri_cache != NULL ? 0 : -ENOMEM;
-}
-
-void
-xfs_attri_destroy_cache(void)
-{
- kmem_cache_destroy(xfs_attri_cache);
- xfs_attri_cache = NULL;
-}
-
-int __init
-xfs_attrd_init_cache(void)
-{
- xfs_attrd_cache = kmem_cache_create("xfs_attrd",
- sizeof(struct xfs_attrd_log_item),
- 0, 0, NULL);
-
- return xfs_attrd_cache != NULL ? 0 : -ENOMEM;
-}
-
-void
-xfs_attrd_destroy_cache(void)
-{
- kmem_cache_destroy(xfs_attrd_cache);
- xfs_attrd_cache = NULL;
-}
-
/*========================================================================
* External routines when attribute list is inside the inode
*========================================================================*/
@@ -1384,32 +1360,20 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
return error;
}
-/*
- * Return EEXIST if attr is found, or ENOATTR if not
- * statep: If not null is set to point at the found state. Caller will
- * be responsible for freeing the state in this case.
- */
+/* Return EEXIST if attr is found, or ENOATTR if not. */
STATIC int
-xfs_attr_node_hasname(
+xfs_attr_node_lookup(
struct xfs_da_args *args,
- struct xfs_da_state **statep)
+ struct xfs_da_state *state)
{
- struct xfs_da_state *state;
int retval, error;
- state = xfs_da_state_alloc(args);
- if (statep != NULL)
- *statep = state;
-
/*
* Search to see if name exists, and get back a pointer to it.
*/
error = xfs_da3_node_lookup_int(state, &retval);
if (error)
- retval = error;
-
- if (!statep)
- xfs_da_state_free(state);
+ return error;
return retval;
}
@@ -1420,7 +1384,7 @@ xfs_attr_node_hasname(
STATIC int
xfs_attr_node_addname_find_attr(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error;
@@ -1429,7 +1393,8 @@ xfs_attr_node_addname_find_attr(
* Search to see if name already exists, and get back a pointer
* to where it should go.
*/
- error = xfs_attr_node_hasname(args, &attr->xattri_da_state);
+ xfs_attr_item_init_da_state(attr);
+ error = xfs_attr_node_lookup(args, attr->xattri_da_state);
switch (error) {
case -ENOATTR:
if (args->op_flags & XFS_DA_OP_REPLACE)
@@ -1456,8 +1421,10 @@ xfs_attr_node_addname_find_attr(
return 0;
error:
- if (attr->xattri_da_state)
+ if (attr->xattri_da_state) {
xfs_da_state_free(attr->xattri_da_state);
+ attr->xattri_da_state = NULL;
+ }
return error;
}
@@ -1470,7 +1437,7 @@ error:
*/
static int
xfs_attr_node_try_addname(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_da_state *state = attr->xattri_da_state;
@@ -1511,6 +1478,7 @@ xfs_attr_node_try_addname(
out:
xfs_da_state_free(state);
+ attr->xattri_da_state = NULL;
return error;
}
@@ -1535,10 +1503,10 @@ xfs_attr_node_removename(
static int
xfs_attr_node_remove_attr(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
- struct xfs_da_state *state = NULL;
+ struct xfs_da_state *state = xfs_da_state_alloc(args);
int retval = 0;
int error = 0;
@@ -1548,8 +1516,6 @@ xfs_attr_node_remove_attr(
* attribute entry after any split ops.
*/
args->attr_filter |= XFS_ATTR_INCOMPLETE;
- state = xfs_da_state_alloc(args);
- state->inleaf = 0;
error = xfs_da3_node_lookup_int(state, &retval);
if (error)
goto out;
@@ -1567,8 +1533,7 @@ xfs_attr_node_remove_attr(
retval = error = 0;
out:
- if (state)
- xfs_da_state_free(state);
+ xfs_da_state_free(state);
if (error)
return error;
return retval;
@@ -1597,7 +1562,8 @@ xfs_attr_node_get(
/*
* Search to see if name exists, and get back a pointer to it.
*/
- error = xfs_attr_node_hasname(args, &state);
+ state = xfs_da_state_alloc(args);
+ error = xfs_attr_node_lookup(args, state);
if (error != -EEXIST)
goto out_release;
@@ -1616,8 +1582,7 @@ out_release:
state->path.blk[i].bp = NULL;
}
- if (state)
- xfs_da_state_free(state);
+ xfs_da_state_free(state);
return error;
}
@@ -1637,3 +1602,20 @@ xfs_attr_namecheck(
/* There shouldn't be any nulls here */
return !memchr(name, 0, length);
}
+
+int __init
+xfs_attr_intent_init_cache(void)
+{
+ xfs_attr_intent_cache = kmem_cache_create("xfs_attr_intent",
+ sizeof(struct xfs_attr_intent),
+ 0, 0, NULL);
+
+ return xfs_attr_intent_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_attr_intent_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_attr_intent_cache);
+ xfs_attr_intent_cache = NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 1af7abe29eef..e329da3e7afa 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -31,7 +31,8 @@ struct xfs_attr_list_context;
static inline bool xfs_has_larp(struct xfs_mount *mp)
{
#ifdef DEBUG
- return xfs_globals.larp;
+ /* Logged xattrs require a V5 super for log_incompat */
+ return xfs_has_crc(mp) && xfs_globals.larp;
#else
return false;
#endif
@@ -434,7 +435,7 @@ struct xfs_attr_list_context {
*/
/*
- * Enum values for xfs_attr_item.xattri_da_state
+ * Enum values for xfs_attr_intent.xattri_da_state
*
* These values are used by delayed attribute operations to keep track of where
* they were before they returned -EAGAIN. A return code of -EAGAIN signals the
@@ -501,44 +502,46 @@ enum xfs_delattr_state {
{ XFS_DAS_NODE_REMOVE_ATTR, "XFS_DAS_NODE_REMOVE_ATTR" }, \
{ XFS_DAS_DONE, "XFS_DAS_DONE" }
-/*
- * Defines for xfs_attr_item.xattri_flags
- */
-#define XFS_DAC_LEAF_ADDNAME_INIT 0x01 /* xfs_attr_leaf_addname init*/
+struct xfs_attri_log_nameval;
/*
* Context used for keeping track of delayed attribute operations
*/
-struct xfs_attr_item {
+struct xfs_attr_intent {
+ /*
+ * used to log this item to an intent containing a list of attrs to
+ * commit later
+ */
+ struct list_head xattri_list;
+
+ /* Used in xfs_attr_node_removename to roll through removing blocks */
+ struct xfs_da_state *xattri_da_state;
+
struct xfs_da_args *xattri_da_args;
/*
+ * Shared buffer containing the attr name and value so that the logging
+ * code can share large memory buffers between log items.
+ */
+ struct xfs_attri_log_nameval *xattri_nameval;
+
+ /*
* Used by xfs_attr_set to hold a leaf buffer across a transaction roll
*/
struct xfs_buf *xattri_leaf_bp;
- /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
- struct xfs_bmbt_irec xattri_map;
- xfs_dablk_t xattri_lblkno;
- int xattri_blkcnt;
-
- /* Used in xfs_attr_node_removename to roll through removing blocks */
- struct xfs_da_state *xattri_da_state;
-
/* Used to keep track of current state of delayed operation */
- unsigned int xattri_flags;
enum xfs_delattr_state xattri_dela_state;
/*
- * Attr operation being performed - XFS_ATTR_OP_FLAGS_*
+ * Attr operation being performed - XFS_ATTRI_OP_FLAGS_*
*/
unsigned int xattri_op_flags;
- /*
- * used to log this item to an intent containing a list of attrs to
- * commit later
- */
- struct list_head xattri_list;
+ /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
+ xfs_dablk_t xattri_lblkno;
+ int xattri_blkcnt;
+ struct xfs_bmbt_irec xattri_map;
};
@@ -557,21 +560,13 @@ bool xfs_attr_is_leaf(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_da_args *args);
int xfs_attr_get(struct xfs_da_args *args);
int xfs_attr_set(struct xfs_da_args *args);
-int xfs_attr_set_iter(struct xfs_attr_item *attr);
-int xfs_attr_remove_iter(struct xfs_attr_item *attr);
+int xfs_attr_set_iter(struct xfs_attr_intent *attr);
+int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
bool xfs_attr_namecheck(const void *name, size_t length);
int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
unsigned int *total);
-extern struct kmem_cache *xfs_attri_cache;
-extern struct kmem_cache *xfs_attrd_cache;
-
-int __init xfs_attri_init_cache(void);
-void xfs_attri_destroy_cache(void);
-int __init xfs_attrd_init_cache(void);
-void xfs_attrd_destroy_cache(void);
-
/*
* Check to see if the attr should be upgraded from non-existent or shortform to
* single-leaf-block attribute list.
@@ -634,4 +629,8 @@ xfs_attr_init_replace_state(struct xfs_da_args *args)
return xfs_attr_init_add_state(args);
}
+extern struct kmem_cache *xfs_attr_intent_cache;
+int __init xfs_attr_intent_init_cache(void);
+void xfs_attr_intent_destroy_cache(void);
+
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 4250159ecced..7298c148f848 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -568,7 +568,7 @@ xfs_attr_rmtval_stale(
*/
int
xfs_attr_rmtval_find_space(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_bmbt_irec *map = &attr->xattri_map;
@@ -598,7 +598,7 @@ xfs_attr_rmtval_find_space(
*/
int
xfs_attr_rmtval_set_blk(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_inode *dp = args->dp;
@@ -674,7 +674,7 @@ xfs_attr_rmtval_invalidate(
*/
int
xfs_attr_rmtval_remove(
- struct xfs_attr_item *attr)
+ struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
int error, done;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 62b398edec3f..d097ec6c4dc3 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -12,9 +12,9 @@ int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
xfs_buf_flags_t incore_flags);
int xfs_attr_rmtval_invalidate(struct xfs_da_args *args);
-int xfs_attr_rmtval_remove(struct xfs_attr_item *attr);
+int xfs_attr_rmtval_remove(struct xfs_attr_intent *attr);
int xfs_attr_rmt_find_hole(struct xfs_da_args *args);
int xfs_attr_rmtval_set_value(struct xfs_da_args *args);
-int xfs_attr_rmtval_set_blk(struct xfs_attr_item *attr);
-int xfs_attr_rmtval_find_space(struct xfs_attr_item *attr);
+int xfs_attr_rmtval_set_blk(struct xfs_attr_intent *attr);
+int xfs_attr_rmtval_find_space(struct xfs_attr_intent *attr);
#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2aa300f7461f..2eecc49fc1b2 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -51,16 +51,31 @@ xfs_btree_magic(
return magic;
}
-static xfs_failaddr_t
+/*
+ * These sibling pointer checks are optimised for null sibling pointers. This
+ * happens a lot, and we don't need to byte swap at runtime if the sibling
+ * pointer is NULL.
+ *
+ * These are explicitly marked at inline because the cost of calling them as
+ * functions instead of inlining them is about 36 bytes extra code per call site
+ * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
+ * two sibling check functions reduces the compiled code size by over 300
+ * bytes.
+ */
+static inline xfs_failaddr_t
xfs_btree_check_lblock_siblings(
struct xfs_mount *mp,
struct xfs_btree_cur *cur,
int level,
xfs_fsblock_t fsb,
- xfs_fsblock_t sibling)
+ __be64 dsibling)
{
- if (sibling == NULLFSBLOCK)
+ xfs_fsblock_t sibling;
+
+ if (dsibling == cpu_to_be64(NULLFSBLOCK))
return NULL;
+
+ sibling = be64_to_cpu(dsibling);
if (sibling == fsb)
return __this_address;
if (level >= 0) {
@@ -74,17 +89,21 @@ xfs_btree_check_lblock_siblings(
return NULL;
}
-static xfs_failaddr_t
+static inline xfs_failaddr_t
xfs_btree_check_sblock_siblings(
struct xfs_mount *mp,
struct xfs_btree_cur *cur,
int level,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
- xfs_agblock_t sibling)
+ __be32 dsibling)
{
- if (sibling == NULLAGBLOCK)
+ xfs_agblock_t sibling;
+
+ if (dsibling == cpu_to_be32(NULLAGBLOCK))
return NULL;
+
+ sibling = be32_to_cpu(dsibling);
if (sibling == agbno)
return __this_address;
if (level >= 0) {
@@ -136,10 +155,10 @@ __xfs_btree_check_lblock(
fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
- be64_to_cpu(block->bb_u.l.bb_leftsib));
+ block->bb_u.l.bb_leftsib);
if (!fa)
fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
- be64_to_cpu(block->bb_u.l.bb_rightsib));
+ block->bb_u.l.bb_rightsib);
return fa;
}
@@ -204,10 +223,10 @@ __xfs_btree_check_sblock(
}
fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_leftsib));
+ block->bb_u.s.bb_leftsib);
if (!fa)
fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno,
- agbno, be32_to_cpu(block->bb_u.s.bb_rightsib));
+ agbno, block->bb_u.s.bb_rightsib);
return fa;
}
@@ -426,8 +445,14 @@ xfs_btree_del_cursor(
break;
}
+ /*
+ * If we are doing a BMBT update, the number of unaccounted blocks
+ * allocated during this cursor life time should be zero. If it's not
+ * zero, then we should be shut down or on our way to shutdown due to
+ * cancelling a dirty transaction on error.
+ */
ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
- xfs_is_shutdown(cur->bc_mp));
+ xfs_is_shutdown(cur->bc_mp) || error != 0);
if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
kmem_free(cur->bc_ops);
if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
@@ -3247,7 +3272,7 @@ xfs_btree_insrec(
struct xfs_btree_block *block; /* btree block */
struct xfs_buf *bp; /* buffer for block */
union xfs_btree_ptr nptr; /* new block ptr */
- struct xfs_btree_cur *ncur; /* new btree cursor */
+ struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
union xfs_btree_key nkey; /* new block key */
union xfs_btree_key *lkey;
int optr; /* old key/record index */
@@ -3327,7 +3352,7 @@ xfs_btree_insrec(
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error)
- return error;
+ goto error0;
#endif
/*
@@ -3347,7 +3372,7 @@ xfs_btree_insrec(
for (i = numrecs - ptr; i >= 0; i--) {
error = xfs_btree_debug_check_ptr(cur, pp, i, level);
if (error)
- return error;
+ goto error0;
}
xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
@@ -3432,6 +3457,8 @@ xfs_btree_insrec(
return 0;
error0:
+ if (ncur)
+ xfs_btree_del_cursor(ncur, error);
return error;
}
@@ -4523,10 +4550,10 @@ xfs_btree_lblock_verify(
/* sibling pointer verification */
fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
- be64_to_cpu(block->bb_u.l.bb_leftsib));
+ block->bb_u.l.bb_leftsib);
if (!fa)
fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
- be64_to_cpu(block->bb_u.l.bb_rightsib));
+ block->bb_u.l.bb_rightsib);
return fa;
}
@@ -4580,10 +4607,10 @@ xfs_btree_sblock_verify(
agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_leftsib));
+ block->bb_u.s.bb_leftsib);
if (!fa)
fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno,
- be32_to_cpu(block->bb_u.s.bb_rightsib));
+ block->bb_u.s.bb_rightsib);
return fa;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index aa74f3fdb571..e7201dc68f43 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -117,6 +117,17 @@ xfs_da_state_free(xfs_da_state_t *state)
kmem_cache_free(xfs_da_state_cache, state);
}
+void
+xfs_da_state_reset(
+ struct xfs_da_state *state,
+ struct xfs_da_args *args)
+{
+ xfs_da_state_kill_altpath(state);
+ memset(state, 0, sizeof(struct xfs_da_state));
+ state->args = args;
+ state->mp = state->args->dp->i_mount;
+}
+
static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
{
if (whichfork == XFS_DATA_FORK)
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index ed2303e4d46a..d33b7686a0b3 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -225,6 +225,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
struct xfs_da_state *xfs_da_state_alloc(struct xfs_da_args *args);
void xfs_da_state_free(xfs_da_state_t *state);
+void xfs_da_state_reset(struct xfs_da_state *state, struct xfs_da_args *args);
void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp,
struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index ceb222b4f261..5a321b783398 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -191,35 +191,56 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
};
-static bool
+/*
+ * Ensure there's a log intent item associated with this deferred work item if
+ * the operation must be restarted on crash. Returns 1 if there's a log item;
+ * 0 if there isn't; or a negative errno.
+ */
+static int
xfs_defer_create_intent(
struct xfs_trans *tp,
struct xfs_defer_pending *dfp,
bool sort)
{
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
+ struct xfs_log_item *lip;
+
+ if (dfp->dfp_intent)
+ return 1;
- if (!dfp->dfp_intent)
- dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
- dfp->dfp_count, sort);
- return dfp->dfp_intent != NULL;
+ lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
+ if (!lip)
+ return 0;
+ if (IS_ERR(lip))
+ return PTR_ERR(lip);
+
+ dfp->dfp_intent = lip;
+ return 1;
}
/*
* For each pending item in the intake list, log its intent item and the
* associated extents, then add the entire intake list to the end of
* the pending list.
+ *
+ * Returns 1 if at least one log item was associated with the deferred work;
+ * 0 if there are no log items; or a negative errno.
*/
-static bool
+static int
xfs_defer_create_intents(
struct xfs_trans *tp)
{
struct xfs_defer_pending *dfp;
- bool ret = false;
+ int ret = 0;
list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
+ int ret2;
+
trace_xfs_defer_create_intent(tp->t_mountp, dfp);
- ret |= xfs_defer_create_intent(tp, dfp, true);
+ ret2 = xfs_defer_create_intent(tp, dfp, true);
+ if (ret2 < 0)
+ return ret2;
+ ret |= ret2;
}
return ret;
}
@@ -457,6 +478,8 @@ xfs_defer_finish_one(
dfp->dfp_count--;
error = ops->finish_item(tp, dfp->dfp_done, li, &state);
if (error == -EAGAIN) {
+ int ret;
+
/*
* Caller wants a fresh transaction; put the work item
* back on the list and log a new log intent item to
@@ -467,7 +490,9 @@ xfs_defer_finish_one(
dfp->dfp_count++;
dfp->dfp_done = NULL;
dfp->dfp_intent = NULL;
- xfs_defer_create_intent(tp, dfp, false);
+ ret = xfs_defer_create_intent(tp, dfp, false);
+ if (ret < 0)
+ error = ret;
}
if (error)
@@ -514,10 +539,14 @@ xfs_defer_finish_noroll(
* of time that any one intent item can stick around in memory,
* pinning the log tail.
*/
- bool has_intents = xfs_defer_create_intents(*tp);
+ int has_intents = xfs_defer_create_intents(*tp);
list_splice_init(&(*tp)->t_dfops, &dop_pending);
+ if (has_intents < 0) {
+ error = has_intents;
+ goto out_shutdown;
+ }
if (has_intents || dfp) {
error = xfs_defer_trans_roll(tp);
if (error)
@@ -676,13 +705,15 @@ xfs_defer_ops_capture(
if (list_empty(&tp->t_dfops))
return NULL;
+ error = xfs_defer_create_intents(tp);
+ if (error < 0)
+ return ERR_PTR(error);
+
/* Create an object to capture the defer ops. */
dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
INIT_LIST_HEAD(&dfc->dfc_list);
INIT_LIST_HEAD(&dfc->dfc_dfops);
- xfs_defer_create_intents(tp);
-
/* Move the dfops chain and transaction state to the capture struct. */
list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
@@ -759,6 +790,10 @@ xfs_defer_ops_capture_and_commit(
/* If we don't capture anything, commit transaction and exit. */
dfc = xfs_defer_ops_capture(tp);
+ if (IS_ERR(dfc)) {
+ xfs_trans_cancel(tp);
+ return PTR_ERR(dfc);
+ }
if (!dfc)
return xfs_trans_commit(tp);
@@ -873,10 +908,7 @@ xfs_defer_init_item_caches(void)
error = xfs_extfree_intent_init_cache();
if (error)
goto err;
- error = xfs_attri_init_cache();
- if (error)
- goto err;
- error = xfs_attrd_init_cache();
+ error = xfs_attr_intent_init_cache();
if (error)
goto err;
return 0;
@@ -889,8 +921,7 @@ err:
void
xfs_defer_destroy_item_caches(void)
{
- xfs_attri_destroy_cache();
- xfs_attrd_destroy_cache();
+ xfs_attr_intent_destroy_cache();
xfs_extfree_intent_destroy_cache();
xfs_bmap_intent_destroy_cache();
xfs_refcount_intent_destroy_cache();
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f7edd1ecf6d9..b351b9dc6561 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -906,10 +906,18 @@ struct xfs_icreate_log {
* Flags for deferred attribute operations.
* Upper bits are flags, lower byte is type code
*/
-#define XFS_ATTR_OP_FLAGS_SET 1 /* Set the attribute */
-#define XFS_ATTR_OP_FLAGS_REMOVE 2 /* Remove the attribute */
-#define XFS_ATTR_OP_FLAGS_REPLACE 3 /* Replace the attribute */
-#define XFS_ATTR_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
+#define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */
+#define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */
+#define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */
+#define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
+
+/*
+ * alfi_attr_filter captures the state of xfs_da_args.attr_filter, so it should
+ * never have any other bits set.
+ */
+#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \
+ XFS_ATTR_SECURE | \
+ XFS_ATTR_INCOMPLETE)
/*
* This is the structure used to lay out an attr log item in the
@@ -924,7 +932,7 @@ struct xfs_attri_log_format {
uint32_t alfi_op_flags; /* marks the op as a set or remove */
uint32_t alfi_name_len; /* attr name length */
uint32_t alfi_value_len; /* attr value length */
- uint32_t alfi_attr_flags;/* attr flags */
+ uint32_t alfi_attr_filter;/* attr filter flags */
};
struct xfs_attrd_log_format {
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 32e216255cb0..2420865f3007 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -110,12 +110,6 @@ struct xlog_recover {
#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
-/*
- * This is the number of entries in the l_buf_cancel_table used during
- * recovery.
- */
-#define XLOG_BC_TABLE_SIZE 64
-
#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
@@ -128,5 +122,13 @@ int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_inode **ipp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
+int xlog_alloc_buf_cancel_table(struct xlog *log);
+void xlog_free_buf_cancel_table(struct xlog *log);
+
+#ifdef DEBUG
+void xlog_check_buf_cancel_table(struct xlog *log);
+#else
+#define xlog_check_buf_cancel_table(log) do { } while (0)
+#endif
#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index f0b38f4aba80..8b9bd178a487 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -213,7 +213,7 @@ xfs_symlink_shortform_verify(
/*
* Zero length symlinks should never occur in memory as they are
- * never alllowed to exist on disk.
+ * never allowed to exist on disk.
*/
if (!size)
return __this_address;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index b11870d07c56..2e8e400f10a9 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -340,20 +340,6 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
},
};
-/* This isn't a stable feature, warn once per day. */
-static inline void
-xchk_experimental_warning(
- struct xfs_mount *mp)
-{
- static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT(
- "xchk_warning", 86400 * HZ, 1);
- ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE);
-
- if (__ratelimit(&scrub_warning))
- xfs_alert(mp,
-"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
-}
-
static int
xchk_validate_inputs(
struct xfs_mount *mp,
@@ -478,7 +464,8 @@ xfs_scrub_metadata(
if (error)
goto out;
- xchk_experimental_warning(mp);
+ xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
+ "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL);
if (!sc) {
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 3df9c1782ead..b744c62052b6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -17,6 +17,7 @@
#include "xfs_error.h"
#include "xfs_acl.h"
#include "xfs_trans.h"
+#include "xfs_xattr.h"
#include <linux/posix_acl_xattr.h>
@@ -202,7 +203,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
xfs_acl_to_disk(args.value, acl);
}
- error = xfs_attr_set(&args);
+ error = xfs_attr_change(&args);
kmem_free(args.value);
/*
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index e8ac88d9fd14..4a28c2d77070 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -22,13 +22,15 @@
#include "xfs_attr.h"
#include "xfs_attr_item.h"
#include "xfs_trace.h"
-#include "xfs_inode.h"
#include "xfs_trans_space.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+struct kmem_cache *xfs_attri_cache;
+struct kmem_cache *xfs_attrd_cache;
+
static const struct xfs_item_ops xfs_attri_item_ops;
static const struct xfs_item_ops xfs_attrd_item_ops;
static struct xfs_attrd_log_item *xfs_trans_get_attrd(struct xfs_trans *tp,
@@ -39,12 +41,80 @@ static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_attri_log_item, attri_item);
}
+/*
+ * Shared xattr name/value buffers for logged extended attribute operations
+ *
+ * When logging updates to extended attributes, we can create quite a few
+ * attribute log intent items for a single xattr update. To avoid cycling the
+ * memory allocator and memcpy overhead, the name (and value, for setxattr)
+ * are kept in a refcounted object that is shared across all related log items
+ * and the upper-level deferred work state structure. The shared buffer has
+ * a control structure, followed by the name, and then the value.
+ */
+
+static inline struct xfs_attri_log_nameval *
+xfs_attri_log_nameval_get(
+ struct xfs_attri_log_nameval *nv)
+{
+ if (!refcount_inc_not_zero(&nv->refcount))
+ return NULL;
+ return nv;
+}
+
+static inline void
+xfs_attri_log_nameval_put(
+ struct xfs_attri_log_nameval *nv)
+{
+ if (!nv)
+ return;
+ if (refcount_dec_and_test(&nv->refcount))
+ kvfree(nv);
+}
+
+static inline struct xfs_attri_log_nameval *
+xfs_attri_log_nameval_alloc(
+ const void *name,
+ unsigned int name_len,
+ const void *value,
+ unsigned int value_len)
+{
+ struct xfs_attri_log_nameval *nv;
+
+ /*
+ * This could be over 64kB in length, so we have to use kvmalloc() for
+ * this. But kvmalloc() utterly sucks, so we use our own version.
+ */
+ nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) +
+ name_len + value_len);
+ if (!nv)
+ return nv;
+
+ nv->name.i_addr = nv + 1;
+ nv->name.i_len = name_len;
+ nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME;
+ memcpy(nv->name.i_addr, name, name_len);
+
+ if (value_len) {
+ nv->value.i_addr = nv->name.i_addr + name_len;
+ nv->value.i_len = value_len;
+ memcpy(nv->value.i_addr, value, value_len);
+ } else {
+ nv->value.i_addr = NULL;
+ nv->value.i_len = 0;
+ }
+ nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE;
+
+ refcount_set(&nv->refcount, 1);
+ return nv;
+}
+
STATIC void
xfs_attri_item_free(
struct xfs_attri_log_item *attrip)
{
kmem_free(attrip->attri_item.li_lv_shadow);
- kvfree(attrip);
+ xfs_attri_log_nameval_put(attrip->attri_nameval);
+ kmem_cache_free(xfs_attri_cache, attrip);
}
/*
@@ -73,16 +143,17 @@ xfs_attri_item_size(
int *nbytes)
{
struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+ struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
*nvecs += 2;
*nbytes += sizeof(struct xfs_attri_log_format) +
- xlog_calc_iovec_len(attrip->attri_name_len);
+ xlog_calc_iovec_len(nv->name.i_len);
- if (!attrip->attri_value_len)
+ if (!nv->value.i_len)
return;
*nvecs += 1;
- *nbytes += xlog_calc_iovec_len(attrip->attri_value_len);
+ *nbytes += xlog_calc_iovec_len(nv->value.i_len);
}
/*
@@ -97,6 +168,7 @@ xfs_attri_item_format(
{
struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
struct xfs_log_iovec *vecp = NULL;
+ struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
attrip->attri_format.alfi_type = XFS_LI_ATTRI;
attrip->attri_format.alfi_size = 1;
@@ -108,22 +180,18 @@ xfs_attri_item_format(
* the log recovery.
*/
- ASSERT(attrip->attri_name_len > 0);
+ ASSERT(nv->name.i_len > 0);
attrip->attri_format.alfi_size++;
- if (attrip->attri_value_len > 0)
+ if (nv->value.i_len > 0)
attrip->attri_format.alfi_size++;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
&attrip->attri_format,
sizeof(struct xfs_attri_log_format));
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME,
- attrip->attri_name,
- attrip->attri_name_len);
- if (attrip->attri_value_len > 0)
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
- attrip->attri_value,
- attrip->attri_value_len);
+ xlog_copy_from_iovec(lv, &vecp, &nv->name);
+ if (nv->value.i_len > 0)
+ xlog_copy_from_iovec(lv, &vecp, &nv->value);
}
/*
@@ -158,41 +226,18 @@ xfs_attri_item_release(
STATIC struct xfs_attri_log_item *
xfs_attri_init(
struct xfs_mount *mp,
- uint32_t name_len,
- uint32_t value_len)
-
+ struct xfs_attri_log_nameval *nv)
{
struct xfs_attri_log_item *attrip;
- uint32_t buffer_size = name_len + value_len;
- if (buffer_size) {
- /*
- * This could be over 64kB in length, so we have to use
- * kvmalloc() for this. But kvmalloc() utterly sucks, so we
- * use own version.
- */
- attrip = xlog_kvmalloc(sizeof(struct xfs_attri_log_item) +
- buffer_size);
- } else {
- attrip = kmem_cache_alloc(xfs_attri_cache,
- GFP_NOFS | __GFP_NOFAIL);
- }
- memset(attrip, 0, sizeof(struct xfs_attri_log_item));
+ attrip = kmem_cache_zalloc(xfs_attri_cache, GFP_NOFS | __GFP_NOFAIL);
- attrip->attri_name_len = name_len;
- if (name_len)
- attrip->attri_name = ((char *)attrip) +
- sizeof(struct xfs_attri_log_item);
- else
- attrip->attri_name = NULL;
-
- attrip->attri_value_len = value_len;
- if (value_len)
- attrip->attri_value = ((char *)attrip) +
- sizeof(struct xfs_attri_log_item) +
- name_len;
- else
- attrip->attri_value = NULL;
+ /*
+ * Grab an extra reference to the name/value buffer for this log item.
+ * The caller retains its own reference!
+ */
+ attrip->attri_nameval = xfs_attri_log_nameval_get(nv);
+ ASSERT(attrip->attri_nameval);
xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI,
&xfs_attri_item_ops);
@@ -233,7 +278,7 @@ STATIC void
xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp)
{
kmem_free(attrdp->attrd_item.li_lv_shadow);
- kmem_free(attrdp);
+ kmem_cache_free(xfs_attrd_cache, attrdp);
}
STATIC void
@@ -297,7 +342,7 @@ xfs_attrd_item_intent(
*/
STATIC int
xfs_xattri_finish_update(
- struct xfs_attr_item *attr,
+ struct xfs_attr_intent *attr,
struct xfs_attrd_log_item *attrdp)
{
struct xfs_da_args *args = attr->xattri_da_args;
@@ -335,7 +380,7 @@ STATIC void
xfs_attr_log_item(
struct xfs_trans *tp,
struct xfs_attri_log_item *attrip,
- struct xfs_attr_item *attr)
+ const struct xfs_attr_intent *attr)
{
struct xfs_attri_log_format *attrp;
@@ -343,23 +388,18 @@ xfs_attr_log_item(
set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
/*
- * At this point the xfs_attr_item has been constructed, and we've
+ * At this point the xfs_attr_intent has been constructed, and we've
* created the log intent. Fill in the attri log item and log format
- * structure with fields from this xfs_attr_item
+ * structure with fields from this xfs_attr_intent
*/
attrp = &attrip->attri_format;
attrp->alfi_ino = attr->xattri_da_args->dp->i_ino;
+ ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK));
attrp->alfi_op_flags = attr->xattri_op_flags;
- attrp->alfi_value_len = attr->xattri_da_args->valuelen;
- attrp->alfi_name_len = attr->xattri_da_args->namelen;
- attrp->alfi_attr_flags = attr->xattri_da_args->attr_filter;
-
- memcpy(attrip->attri_name, attr->xattri_da_args->name,
- attr->xattri_da_args->namelen);
- memcpy(attrip->attri_value, attr->xattri_da_args->value,
- attr->xattri_da_args->valuelen);
- attrip->attri_name_len = attr->xattri_da_args->namelen;
- attrip->attri_value_len = attr->xattri_da_args->valuelen;
+ attrp->alfi_value_len = attr->xattri_nameval->value.i_len;
+ attrp->alfi_name_len = attr->xattri_nameval->name.i_len;
+ ASSERT(!(attr->xattri_da_args->attr_filter & ~XFS_ATTRI_FILTER_MASK));
+ attrp->alfi_attr_filter = attr->xattri_da_args->attr_filter;
}
/* Get an ATTRI. */
@@ -372,7 +412,7 @@ xfs_attr_create_intent(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_attri_log_item *attrip;
- struct xfs_attr_item *attr;
+ struct xfs_attr_intent *attr;
ASSERT(count == 1);
@@ -383,19 +423,47 @@ xfs_attr_create_intent(
* Each attr item only performs one attribute operation at a time, so
* this is a list of one
*/
- list_for_each_entry(attr, items, xattri_list) {
- attrip = xfs_attri_init(mp, attr->xattri_da_args->namelen,
- attr->xattri_da_args->valuelen);
- if (attrip == NULL)
- return NULL;
-
- xfs_trans_add_item(tp, &attrip->attri_item);
- xfs_attr_log_item(tp, attrip, attr);
+ attr = list_first_entry_or_null(items, struct xfs_attr_intent,
+ xattri_list);
+
+ /*
+ * Create a buffer to store the attribute name and value. This buffer
+ * will be shared between the higher level deferred xattr work state
+ * and the lower level xattr log items.
+ */
+ if (!attr->xattri_nameval) {
+ struct xfs_da_args *args = attr->xattri_da_args;
+
+ /*
+ * Transfer our reference to the name/value buffer to the
+ * deferred work state structure.
+ */
+ attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name,
+ args->namelen, args->value, args->valuelen);
}
+ if (!attr->xattri_nameval)
+ return ERR_PTR(-ENOMEM);
+
+ attrip = xfs_attri_init(mp, attr->xattri_nameval);
+ xfs_trans_add_item(tp, &attrip->attri_item);
+ xfs_attr_log_item(tp, attrip, attr);
return &attrip->attri_item;
}
+static inline void
+xfs_attr_free_item(
+ struct xfs_attr_intent *attr)
+{
+ if (attr->xattri_da_state)
+ xfs_da_state_free(attr->xattri_da_state);
+ xfs_attri_log_nameval_put(attr->xattri_nameval);
+ if (attr->xattri_da_args->op_flags & XFS_DA_OP_RECOVERY)
+ kmem_free(attr);
+ else
+ kmem_cache_free(xfs_attr_intent_cache, attr);
+}
+
/* Process an attr. */
STATIC int
xfs_attr_finish_item(
@@ -404,11 +472,11 @@ xfs_attr_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_attr_item *attr;
+ struct xfs_attr_intent *attr;
struct xfs_attrd_log_item *done_item = NULL;
int error;
- attr = container_of(item, struct xfs_attr_item, xattri_list);
+ attr = container_of(item, struct xfs_attr_intent, xattri_list);
if (done)
done_item = ATTRD_ITEM(done);
@@ -420,7 +488,7 @@ xfs_attr_finish_item(
error = xfs_xattri_finish_update(attr, done_item);
if (error != -EAGAIN)
- kmem_free(attr);
+ xfs_attr_free_item(attr);
return error;
}
@@ -438,33 +506,10 @@ STATIC void
xfs_attr_cancel_item(
struct list_head *item)
{
- struct xfs_attr_item *attr;
-
- attr = container_of(item, struct xfs_attr_item, xattri_list);
- kmem_free(attr);
-}
-
-STATIC xfs_lsn_t
-xfs_attri_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
-
- /*
- * The attrip refers to xfs_attr_item memory to log the name and value
- * with the intent item. This already occurred when the intent was
- * committed so these fields are no longer accessed. Clear them out of
- * caution since we're about to free the xfs_attr_item.
- */
- attrip->attri_name = NULL;
- attrip->attri_value = NULL;
+ struct xfs_attr_intent *attr;
- /*
- * The ATTRI is logged only once and cannot be moved in the log, so
- * simply return the lsn at which it's been logged.
- */
- return lsn;
+ attr = container_of(item, struct xfs_attr_intent, xattri_list);
+ xfs_attr_free_item(attr);
}
STATIC bool
@@ -482,16 +527,22 @@ xfs_attri_validate(
struct xfs_attri_log_format *attrp)
{
unsigned int op = attrp->alfi_op_flags &
- XFS_ATTR_OP_FLAGS_TYPE_MASK;
+ XFS_ATTRI_OP_FLAGS_TYPE_MASK;
if (attrp->__pad != 0)
return false;
+ if (attrp->alfi_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)
+ return false;
+
+ if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK)
+ return false;
+
/* alfi_op_flags should be either a set or remove */
switch (op) {
- case XFS_ATTR_OP_FLAGS_SET:
- case XFS_ATTR_OP_FLAGS_REPLACE:
- case XFS_ATTR_OP_FLAGS_REMOVE:
+ case XFS_ATTRI_OP_FLAGS_SET:
+ case XFS_ATTRI_OP_FLAGS_REPLACE:
+ case XFS_ATTRI_OP_FLAGS_REMOVE:
break;
default:
return false;
@@ -517,13 +568,14 @@ xfs_attri_item_recover(
struct list_head *capture_list)
{
struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
- struct xfs_attr_item *attr;
+ struct xfs_attr_intent *attr;
struct xfs_mount *mp = lip->li_log->l_mp;
struct xfs_inode *ip;
struct xfs_da_args *args;
struct xfs_trans *tp;
struct xfs_trans_res tres;
struct xfs_attri_log_format *attrp;
+ struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
int error, ret = 0;
int total;
int local;
@@ -535,41 +587,50 @@ xfs_attri_item_recover(
*/
attrp = &attrip->attri_format;
if (!xfs_attri_validate(mp, attrp) ||
- !xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len))
+ !xfs_attr_namecheck(nv->name.i_addr, nv->name.i_len))
return -EFSCORRUPTED;
error = xlog_recover_iget(mp, attrp->alfi_ino, &ip);
if (error)
return error;
- attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
+ attr = kmem_zalloc(sizeof(struct xfs_attr_intent) +
sizeof(struct xfs_da_args), KM_NOFS);
args = (struct xfs_da_args *)(attr + 1);
attr->xattri_da_args = args;
- attr->xattri_op_flags = attrp->alfi_op_flags;
+ attr->xattri_op_flags = attrp->alfi_op_flags &
+ XFS_ATTRI_OP_FLAGS_TYPE_MASK;
+
+ /*
+ * We're reconstructing the deferred work state structure from the
+ * recovered log item. Grab a reference to the name/value buffer and
+ * attach it to the new work state.
+ */
+ attr->xattri_nameval = xfs_attri_log_nameval_get(nv);
+ ASSERT(attr->xattri_nameval);
args->dp = ip;
args->geo = mp->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
- args->name = attrip->attri_name;
- args->namelen = attrp->alfi_name_len;
+ args->name = nv->name.i_addr;
+ args->namelen = nv->name.i_len;
args->hashval = xfs_da_hashname(args->name, args->namelen);
- args->attr_filter = attrp->alfi_attr_flags;
+ args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK;
args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT;
- switch (attrp->alfi_op_flags & XFS_ATTR_OP_FLAGS_TYPE_MASK) {
- case XFS_ATTR_OP_FLAGS_SET:
- case XFS_ATTR_OP_FLAGS_REPLACE:
- args->value = attrip->attri_value;
- args->valuelen = attrp->alfi_value_len;
+ switch (attr->xattri_op_flags) {
+ case XFS_ATTRI_OP_FLAGS_SET:
+ case XFS_ATTRI_OP_FLAGS_REPLACE:
+ args->value = nv->value.i_addr;
+ args->valuelen = nv->value.i_len;
args->total = xfs_attr_calc_size(args, &local);
if (xfs_inode_hasattr(args->dp))
attr->xattri_dela_state = xfs_attr_init_replace_state(args);
else
attr->xattri_dela_state = xfs_attr_init_add_state(args);
break;
- case XFS_ATTR_OP_FLAGS_REMOVE:
+ case XFS_ATTRI_OP_FLAGS_REMOVE:
if (!xfs_inode_hasattr(args->dp))
goto out;
attr->xattri_dela_state = xfs_attr_init_remove_state(args);
@@ -613,7 +674,7 @@ out_unlock:
xfs_irele(ip);
out:
if (ret != -EAGAIN)
- kmem_free(attr);
+ xfs_attr_free_item(attr);
return error;
}
@@ -636,22 +697,18 @@ xfs_attri_item_relog(
attrdp = xfs_trans_get_attrd(tp, old_attrip);
set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
- new_attrip = xfs_attri_init(tp->t_mountp, old_attrp->alfi_name_len,
- old_attrp->alfi_value_len);
+ /*
+ * Create a new log item that shares the same name/value buffer as the
+ * old log item.
+ */
+ new_attrip = xfs_attri_init(tp->t_mountp, old_attrip->attri_nameval);
new_attrp = &new_attrip->attri_format;
new_attrp->alfi_ino = old_attrp->alfi_ino;
new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
new_attrp->alfi_value_len = old_attrp->alfi_value_len;
new_attrp->alfi_name_len = old_attrp->alfi_name_len;
- new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
-
- memcpy(new_attrip->attri_name, old_attrip->attri_name,
- new_attrip->attri_name_len);
-
- if (new_attrip->attri_value_len > 0)
- memcpy(new_attrip->attri_value, old_attrip->attri_value,
- new_attrip->attri_value_len);
+ new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter;
xfs_trans_add_item(tp, &new_attrip->attri_item);
set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
@@ -666,46 +723,46 @@ xlog_recover_attri_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_attri_log_item *attrip;
struct xfs_attri_log_format *attri_formatp;
- int region = 0;
+ struct xfs_attri_log_nameval *nv;
+ const void *attr_value = NULL;
+ const void *attr_name;
+ int error;
- attri_formatp = item->ri_buf[region].i_addr;
+ attri_formatp = item->ri_buf[0].i_addr;
+ attr_name = item->ri_buf[1].i_addr;
- /* Validate xfs_attri_log_format */
+ /* Validate xfs_attri_log_format before the large memory allocation */
if (!xfs_attri_validate(mp, attri_formatp)) {
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
- /* memory alloc failure will cause replay to abort */
- attrip = xfs_attri_init(mp, attri_formatp->alfi_name_len,
- attri_formatp->alfi_value_len);
- if (attrip == NULL)
- return -ENOMEM;
+ if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
- error = xfs_attri_copy_format(&item->ri_buf[region],
- &attrip->attri_format);
- if (error)
- goto out;
+ if (attri_formatp->alfi_value_len)
+ attr_value = item->ri_buf[2].i_addr;
- region++;
- memcpy(attrip->attri_name, item->ri_buf[region].i_addr,
- attrip->attri_name_len);
+ /*
+ * Memory alloc failure will cause replay to abort. We attach the
+ * name/value buffer to the recovered incore log item and drop our
+ * reference.
+ */
+ nv = xfs_attri_log_nameval_alloc(attr_name,
+ attri_formatp->alfi_name_len, attr_value,
+ attri_formatp->alfi_value_len);
+ if (!nv)
+ return -ENOMEM;
- if (!xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
+ attrip = xfs_attri_init(mp, nv);
+ error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format);
+ if (error)
goto out;
- }
-
- if (attrip->attri_value_len > 0) {
- region++;
- memcpy(attrip->attri_value, item->ri_buf[region].i_addr,
- attrip->attri_value_len);
- }
/*
* The ATTRI has two references. One for the ATTRD and one for ATTRI to
@@ -715,9 +772,11 @@ xlog_recover_attri_commit_pass2(
*/
xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
xfs_attri_release(attrip);
+ xfs_attri_log_nameval_put(nv);
return 0;
out:
xfs_attri_item_free(attrip);
+ xfs_attri_log_nameval_put(nv);
return error;
}
@@ -797,7 +856,6 @@ static const struct xfs_item_ops xfs_attri_item_ops = {
.iop_size = xfs_attri_item_size,
.iop_format = xfs_attri_item_format,
.iop_unpin = xfs_attri_item_unpin,
- .iop_committed = xfs_attri_item_committed,
.iop_release = xfs_attri_item_release,
.iop_recover = xfs_attri_item_recover,
.iop_match = xfs_attri_item_match,
diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h
index c3b779f82adb..3280a7930287 100644
--- a/fs/xfs/xfs_attr_item.h
+++ b/fs/xfs/xfs_attr_item.h
@@ -11,25 +11,30 @@
struct xfs_mount;
struct kmem_zone;
+struct xfs_attri_log_nameval {
+ struct xfs_log_iovec name;
+ struct xfs_log_iovec value;
+ refcount_t refcount;
+
+ /* name and value follow the end of this struct */
+};
+
/*
* This is the "attr intention" log item. It is used to log the fact that some
* extended attribute operations need to be processed. An operation is
* currently either a set or remove. Set or remove operations are described by
- * the xfs_attr_item which may be logged to this intent.
+ * the xfs_attr_intent which may be logged to this intent.
*
* During a normal attr operation, name and value point to the name and value
* fields of the caller's xfs_da_args structure. During a recovery, the name
* and value buffers are copied from the log, and stored in a trailing buffer
- * attached to the xfs_attr_item until they are committed. They are freed when
- * the xfs_attr_item itself is freed when the work is done.
+ * attached to the xfs_attr_intent until they are committed. They are freed
+ * when the xfs_attr_intent itself is freed when the work is done.
*/
struct xfs_attri_log_item {
struct xfs_log_item attri_item;
atomic_t attri_refcount;
- int attri_name_len;
- int attri_value_len;
- void *attri_name;
- void *attri_value;
+ struct xfs_attri_log_nameval *attri_nameval;
struct xfs_attri_log_format attri_format;
};
@@ -43,4 +48,7 @@ struct xfs_attrd_log_item {
struct xfs_attrd_log_format attrd_format;
};
+extern struct kmem_cache *xfs_attri_cache;
+extern struct kmem_cache *xfs_attrd_cache;
+
#endif /* __XFS_ATTR_ITEM_H__ */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index e484251dc9c8..ffa94102094d 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -24,6 +24,15 @@
#include "xfs_quota.h"
/*
+ * This is the number of entries in the l_buf_cancel_table used during
+ * recovery.
+ */
+#define XLOG_BC_TABLE_SIZE 64
+
+#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
+ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+
+/*
* This structure is used during recovery to record the buf log items which
* have been canceled and should not be replayed.
*/
@@ -993,3 +1002,60 @@ const struct xlog_recover_item_ops xlog_buf_item_ops = {
.commit_pass1 = xlog_recover_buf_commit_pass1,
.commit_pass2 = xlog_recover_buf_commit_pass2,
};
+
+#ifdef DEBUG
+void
+xlog_check_buf_cancel_table(
+ struct xlog *log)
+{
+ int i;
+
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ ASSERT(list_empty(&log->l_buf_cancel_table[i]));
+}
+#endif
+
+int
+xlog_alloc_buf_cancel_table(
+ struct xlog *log)
+{
+ void *p;
+ int i;
+
+ ASSERT(log->l_buf_cancel_table == NULL);
+
+ p = kmalloc_array(XLOG_BC_TABLE_SIZE, sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ log->l_buf_cancel_table = p;
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+
+ return 0;
+}
+
+void
+xlog_free_buf_cancel_table(
+ struct xlog *log)
+{
+ int i;
+
+ if (!log->l_buf_cancel_table)
+ return;
+
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) {
+ struct xfs_buf_cancel *bc;
+
+ while ((bc = list_first_entry_or_null(
+ &log->l_buf_cancel_table[i],
+ struct xfs_buf_cancel, bc_list))) {
+ list_del(&bc->bc_list);
+ kmem_free(bc);
+ }
+ }
+
+ kmem_free(log->l_buf_cancel_table);
+ log->l_buf_cancel_table = NULL;
+}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a60632ecc3f0..5a171c0b244b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -576,9 +576,9 @@ xfs_file_dio_write_unaligned(
* don't even bother trying the fast path in this case.
*/
if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
-retry_exclusive:
if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN;
+retry_exclusive:
iolock = XFS_IOLOCK_EXCL;
flags = IOMAP_DIO_FORCE_WAIT;
}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 888839e75d11..d4a77c53f94b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -149,12 +149,7 @@ xfs_growfs_data_private(
error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount,
delta, &lastag_extended);
} else {
- static struct ratelimit_state shrink_warning = \
- RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1);
- ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE);
-
- if (__ratelimit(&shrink_warning))
- xfs_alert(mp,
+ xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK,
"EXPERIMENTAL online shrink feature in use. Use at your own risk!");
error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b2879870a17e..52d6f2c7d58b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2622,7 +2622,7 @@ xfs_ifree(
*/
error = xfs_difree(tp, pag, ip->i_ino, &xic);
if (error)
- return error;
+ goto out;
error = xfs_iunlink_remove(tp, pag, ip);
if (error)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e5cb7936206..5a364a7d58fd 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -37,6 +37,7 @@
#include "xfs_health.h"
#include "xfs_reflink.h"
#include "xfs_ioctl.h"
+#include "xfs_xattr.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -524,7 +525,7 @@ xfs_attrmulti_attr_set(
args.valuelen = len;
}
- error = xfs_attr_set(&args);
+ error = xfs_attr_change(&args);
if (!error && (flags & XFS_IOC_ATTR_ROOT))
xfs_forget_acl(inode, name);
kfree(args.value);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index e912b7fee714..29f5b8b8aca6 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -24,6 +24,7 @@
#include "xfs_iomap.h"
#include "xfs_error.h"
#include "xfs_ioctl.h"
+#include "xfs_xattr.h"
#include <linux/posix_acl.h>
#include <linux/security.h>
@@ -61,7 +62,7 @@ xfs_initxattrs(
.value = xattr->value,
.valuelen = xattr->value_len,
};
- error = xfs_attr_set(&args);
+ error = xfs_attr_change(&args);
if (error < 0)
break;
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9dc748abdf33..1e972f884a81 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3877,44 +3877,3 @@ xlog_drop_incompat_feat(
{
up_read(&log->l_incompat_users);
}
-
-/*
- * Get permission to use log-assisted atomic exchange of file extents.
- *
- * Callers must not be running any transactions or hold any inode locks, and
- * they must release the permission by calling xlog_drop_incompat_feat
- * when they're done.
- */
-int
-xfs_attr_use_log_assist(
- struct xfs_mount *mp)
-{
- int error = 0;
-
- /*
- * Protect ourselves from an idle log clearing the logged xattrs log
- * incompat feature bit.
- */
- xlog_use_incompat_feat(mp->m_log);
-
- /*
- * If log-assisted xattrs are already enabled, the caller can use the
- * log assisted swap functions with the log-incompat reference we got.
- */
- if (xfs_sb_version_haslogxattrs(&mp->m_sb))
- return 0;
-
- /* Enable log-assisted xattrs. */
- error = xfs_add_incompat_log_feature(mp,
- XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
- if (error)
- goto drop_incompat;
-
- xfs_warn_once(mp,
-"EXPERIMENTAL logged extended attributes feature added. Use at your own risk!");
-
- return 0;
-drop_incompat:
- xlog_drop_incompat_feat(mp->m_log);
- return error;
-}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 252b098cde1f..f3ce046a7d45 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -86,6 +86,13 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
return buf;
}
+static inline void *
+xlog_copy_from_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
+ const struct xfs_log_iovec *src)
+{
+ return xlog_copy_iovec(lv, vecp, src->i_type, src->i_addr, src->i_len);
+}
+
/*
* By comparing each component, we don't have to worry about extra
* endian issues in treating two 32 bit numbers as one 64 bit number
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 67fd9789e69a..686c01eb3661 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -428,9 +428,6 @@ struct xlog {
struct rw_semaphore l_incompat_users;
};
-#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
- ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
-
/*
* Bits for operational state
*/
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 97b941c07957..5f7e4e6e33ce 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -39,13 +39,6 @@ STATIC int
xlog_clear_stale_blocks(
struct xlog *,
xfs_lsn_t);
-#if defined(DEBUG)
-STATIC void
-xlog_recover_check_summary(
- struct xlog *);
-#else
-#define xlog_recover_check_summary(log)
-#endif
STATIC int
xlog_do_recovery_pass(
struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
@@ -3230,7 +3223,7 @@ xlog_do_log_recovery(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
- int error, i;
+ int error;
ASSERT(head_blk != tail_blk);
@@ -3238,37 +3231,25 @@ xlog_do_log_recovery(
* First do a pass to find all of the cancelled buf log items.
* Store them in the buf_cancel_table for use in the second pass.
*/
- log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
- sizeof(struct list_head),
- 0);
- for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+ error = xlog_alloc_buf_cancel_table(log);
+ if (error)
+ return error;
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1, NULL);
- if (error != 0) {
- kmem_free(log->l_buf_cancel_table);
- log->l_buf_cancel_table = NULL;
- return error;
- }
+ if (error != 0)
+ goto out_cancel;
+
/*
* Then do a second pass to actually recover the items in the log.
* When it is complete free the table of buf cancel items.
*/
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS2, NULL);
-#ifdef DEBUG
- if (!error) {
- int i;
-
- for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- ASSERT(list_empty(&log->l_buf_cancel_table[i]));
- }
-#endif /* DEBUG */
-
- kmem_free(log->l_buf_cancel_table);
- log->l_buf_cancel_table = NULL;
-
+ if (!error)
+ xlog_check_buf_cancel_table(log);
+out_cancel:
+ xlog_free_buf_cancel_table(log);
return error;
}
@@ -3339,8 +3320,6 @@ xlog_do_recover(
}
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
- xlog_recover_check_summary(log);
-
/* Normal transactions can now occur */
clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
return 0;
@@ -3483,7 +3462,6 @@ xlog_recover_finish(
}
xlog_recover_process_iunlinks(log);
- xlog_recover_check_summary(log);
/*
* Recover any CoW staging blocks that are still referenced by the
@@ -3517,52 +3495,3 @@ xlog_recover_cancel(
xlog_recover_cancel_intents(log);
}
-#if defined(DEBUG)
-/*
- * Read all of the agf and agi counters and check that they
- * are consistent with the superblock counters.
- */
-STATIC void
-xlog_recover_check_summary(
- struct xlog *log)
-{
- struct xfs_mount *mp = log->l_mp;
- struct xfs_perag *pag;
- struct xfs_buf *agfbp;
- struct xfs_buf *agibp;
- xfs_agnumber_t agno;
- uint64_t freeblks;
- uint64_t itotal;
- uint64_t ifree;
- int error;
-
- freeblks = 0LL;
- itotal = 0LL;
- ifree = 0LL;
- for_each_perag(mp, agno, pag) {
- error = xfs_read_agf(mp, NULL, pag->pag_agno, 0, &agfbp);
- if (error) {
- xfs_alert(mp, "%s agf read failed agno %d error %d",
- __func__, pag->pag_agno, error);
- } else {
- struct xfs_agf *agfp = agfbp->b_addr;
-
- freeblks += be32_to_cpu(agfp->agf_freeblks) +
- be32_to_cpu(agfp->agf_flcount);
- xfs_buf_relse(agfbp);
- }
-
- error = xfs_read_agi(mp, NULL, pag->pag_agno, &agibp);
- if (error) {
- xfs_alert(mp, "%s agi read failed agno %d error %d",
- __func__, pag->pag_agno, error);
- } else {
- struct xfs_agi *agi = agibp->b_addr;
-
- itotal += be32_to_cpu(agi->agi_count);
- ifree += be32_to_cpu(agi->agi_freecount);
- xfs_buf_relse(agibp);
- }
- }
-}
-#endif /* DEBUG */
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 55ee464ab59f..cc323775a12c 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -75,6 +75,12 @@ do { \
#define xfs_debug_ratelimited(dev, fmt, ...) \
xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
+#define xfs_warn_mount(mp, warntag, fmt, ...) \
+do { \
+ if (xfs_should_warn((mp), (warntag))) \
+ xfs_warn((mp), (fmt), ##__VA_ARGS__); \
+} while (0)
+
#define xfs_warn_once(dev, fmt, ...) \
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
#define xfs_notice_once(dev, fmt, ...) \
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0c0bcbd4949d..daa8d29c46b4 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1356,7 +1356,6 @@ xfs_clear_incompat_log_features(
if (xfs_sb_has_incompat_log_feature(&mp->m_sb,
XFS_SB_FEAT_INCOMPAT_LOG_ALL)) {
- xfs_info(mp, "Clearing log incompat feature flags.");
xfs_sb_remove_incompat_log_features(&mp->m_sb);
ret = true;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8c42786e4942..ba5d42abf66e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -391,6 +391,13 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
*/
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
+/* Kernel has logged a warning about online fsck being used on this fs. */
+#define XFS_OPSTATE_WARNED_SCRUB 7
+/* Kernel has logged a warning about shrink being used on this fs. */
+#define XFS_OPSTATE_WARNED_SHRINK 8
+/* Kernel has logged a warning about logged xattr updates being used. */
+#define XFS_OPSTATE_WARNED_LARP 9
+
#define __XFS_IS_OPSTATE(name, NAME) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
{ \
@@ -413,6 +420,12 @@ __XFS_IS_OPSTATE(readonly, READONLY)
__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
+static inline bool
+xfs_should_warn(struct xfs_mount *mp, long nr)
+{
+ return !test_and_set_bit(nr, &mp->m_opstate);
+}
+
#define XFS_OPSTATE_STRINGS \
{ (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \
{ (1UL << XFS_OPSTATE_CLEAN), "clean" }, \
@@ -420,7 +433,10 @@ __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
{ (1UL << XFS_OPSTATE_INODE32), "inode32" }, \
{ (1UL << XFS_OPSTATE_READONLY), "read_only" }, \
{ (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \
- { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }
+ { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
+ { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
+ { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
+ { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }
/*
* Max and min values for mount-option defined I/O
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 8fc813cb6011..abf08bbf34a9 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1308,8 +1308,15 @@ xfs_qm_quotacheck(
error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
NULL);
- if (error)
+ if (error) {
+ /*
+ * The inode walk may have partially populated the dquot
+ * caches. We must purge them before disabling quota and
+ * tearing down the quotainfo, or else the dquots will leak.
+ */
+ xfs_qm_dqpurge_all(mp);
goto error_return;
+ }
/*
* We've made all the changes that we need to make incore. Flush them
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8495ef076ffc..ed18160e6181 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -38,6 +38,8 @@
#include "xfs_pwork.h"
#include "xfs_ag.h"
#include "xfs_defer.h"
+#include "xfs_attr_item.h"
+#include "xfs_xattr.h"
#include <linux/magic.h>
#include <linux/fs_context.h>
@@ -2079,8 +2081,24 @@ xfs_init_caches(void)
if (!xfs_bui_cache)
goto out_destroy_bud_cache;
+ xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
+ sizeof(struct xfs_attrd_log_item),
+ 0, 0, NULL);
+ if (!xfs_attrd_cache)
+ goto out_destroy_bui_cache;
+
+ xfs_attri_cache = kmem_cache_create("xfs_attri_item",
+ sizeof(struct xfs_attri_log_item),
+ 0, 0, NULL);
+ if (!xfs_attri_cache)
+ goto out_destroy_attrd_cache;
+
return 0;
+ out_destroy_attrd_cache:
+ kmem_cache_destroy(xfs_attrd_cache);
+ out_destroy_bui_cache:
+ kmem_cache_destroy(xfs_bui_cache);
out_destroy_bud_cache:
kmem_cache_destroy(xfs_bud_cache);
out_destroy_cui_cache:
@@ -2127,6 +2145,8 @@ xfs_destroy_caches(void)
* destroy caches.
*/
rcu_barrier();
+ kmem_cache_destroy(xfs_attri_cache);
+ kmem_cache_destroy(xfs_attrd_cache);
kmem_cache_destroy(xfs_bui_cache);
kmem_cache_destroy(xfs_bud_cache);
kmem_cache_destroy(xfs_cui_cache);
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 167d23f92ffe..3cd5a51bace1 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -91,7 +91,6 @@ extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
xfs_agnumber_t agcount);
extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
extern const struct quotactl_ops xfs_quotactl_operations;
extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 7a044afd4c46..35e13e125ec6 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -15,9 +15,86 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_acl.h"
+#include "xfs_log.h"
+#include "xfs_xattr.h"
#include <linux/posix_acl_xattr.h>
+/*
+ * Get permission to use log-assisted atomic exchange of file extents.
+ *
+ * Callers must not be running any transactions or hold any inode locks, and
+ * they must release the permission by calling xlog_drop_incompat_feat
+ * when they're done.
+ */
+static inline int
+xfs_attr_grab_log_assist(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+
+ /*
+ * Protect ourselves from an idle log clearing the logged xattrs log
+ * incompat feature bit.
+ */
+ xlog_use_incompat_feat(mp->m_log);
+
+ /*
+ * If log-assisted xattrs are already enabled, the caller can use the
+ * log assisted swap functions with the log-incompat reference we got.
+ */
+ if (xfs_sb_version_haslogxattrs(&mp->m_sb))
+ return 0;
+
+ /* Enable log-assisted xattrs. */
+ error = xfs_add_incompat_log_feature(mp,
+ XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
+ if (error)
+ goto drop_incompat;
+
+ xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP,
+ "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!");
+
+ return 0;
+drop_incompat:
+ xlog_drop_incompat_feat(mp->m_log);
+ return error;
+}
+
+static inline void
+xfs_attr_rele_log_assist(
+ struct xfs_mount *mp)
+{
+ xlog_drop_incompat_feat(mp->m_log);
+}
+
+/*
+ * Set or remove an xattr, having grabbed the appropriate logging resources
+ * prior to calling libxfs.
+ */
+int
+xfs_attr_change(
+ struct xfs_da_args *args)
+{
+ struct xfs_mount *mp = args->dp->i_mount;
+ bool use_logging = false;
+ int error;
+
+ if (xfs_has_larp(mp)) {
+ error = xfs_attr_grab_log_assist(mp);
+ if (error)
+ return error;
+
+ use_logging = true;
+ }
+
+ error = xfs_attr_set(args);
+
+ if (use_logging)
+ xfs_attr_rele_log_assist(mp);
+ return error;
+}
+
static int
xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
@@ -56,7 +133,7 @@ xfs_xattr_set(const struct xattr_handler *handler,
};
int error;
- error = xfs_attr_set(&args);
+ error = xfs_attr_change(&args);
if (!error && (handler->flags & XFS_ATTR_ROOT))
xfs_forget_acl(inode, name);
return error;
diff --git a/fs/xfs/xfs_xattr.h b/fs/xfs/xfs_xattr.h
new file mode 100644
index 000000000000..2b09133b1b9b
--- /dev/null
+++ b/fs/xfs/xfs_xattr.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_XATTR_H__
+#define __XFS_XATTR_H__
+
+int xfs_attr_change(struct xfs_da_args *args);
+
+extern const struct xattr_handler *xfs_xattr_handlers[];
+
+#endif /* __XFS_XATTR_H__ */