diff options
Diffstat (limited to 'fs')
91 files changed, 1602 insertions, 1357 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 7584aa6e5025..e5221be6eb55 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -256,6 +256,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) struct iov_iter iter; ssize_t err = 0; size_t len; + int mode; __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); @@ -264,7 +265,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) goto out; /* We need to fetch the inline data. */ - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); + mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA); + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -604,8 +606,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, true); - if (IS_ERR(req)) + if (IS_ERR(req)) { + redirty_page_for_writepage(wbc, page); return PTR_ERR(req); + } set_page_writeback(page); if (caching) @@ -1644,7 +1648,7 @@ int ceph_uninline_data(struct file *file) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_osd_request *req; + struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf; struct folio *folio = NULL; u64 inline_version = CEPH_INLINE_NONE; @@ -1652,10 +1656,23 @@ int ceph_uninline_data(struct file *file) int err = 0; u64 len; + spin_lock(&ci->i_ceph_lock); + inline_version = ci->i_inline_version; + spin_unlock(&ci->i_ceph_lock); + + dout("uninline_data %p %llx.%llx inline_version %llu\n", + inode, ceph_vinop(inode), inline_version); + + if (inline_version == CEPH_INLINE_NONE) + return 0; + prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) return -ENOMEM; + if (inline_version == 1) /* initial version, no data */ + goto out_uninline; + folio = read_mapping_folio(inode->i_mapping, 0, file); if (IS_ERR(folio)) { err = PTR_ERR(folio); @@ -1664,17 +1681,6 @@ int ceph_uninline_data(struct file *file) folio_lock(folio); - spin_lock(&ci->i_ceph_lock); - inline_version = ci->i_inline_version; - spin_unlock(&ci->i_ceph_lock); - - dout("uninline_data %p %llx.%llx inline_version %llu\n", - inode, ceph_vinop(inode), inline_version); - - if (inline_version == 1 || /* initial version, no data */ - inline_version == CEPH_INLINE_NONE) - goto out_unlock; - len = i_size_read(inode); if (len > folio_size(folio)) len = folio_size(folio); @@ -1739,6 +1745,7 @@ int ceph_uninline_data(struct file *file) ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, err); +out_uninline: if (!err) { int dirty; @@ -1757,8 +1764,10 @@ out_put_req: if (err == -ECANCELED) err = 0; out_unlock: - folio_unlock(folio); - folio_put(folio); + if (folio) { + folio_unlock(folio); + folio_put(folio); + } out: ceph_free_cap_flush(prealloc_cf); dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", @@ -1777,7 +1786,6 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) if (!mapping->a_ops->read_folio) return -ENOEXEC; - file_accessed(file); vma->vm_ops = &ceph_vmops; return 0; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5c14ef04e474..bf2e94005598 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1577,7 +1577,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, while (first_tid <= last_tid) { struct ceph_cap *cap = ci->i_auth_cap; - struct ceph_cap_flush *cf; + struct ceph_cap_flush *cf = NULL, *iter; int ret; if (!(cap && cap->session == session)) { @@ -1587,8 +1587,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, } ret = -ENOENT; - list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { - if (cf->tid >= first_tid) { + list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) { + if (iter->tid >= first_tid) { + cf = iter; ret = 0; break; } @@ -1910,6 +1911,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct rb_node *p; bool queue_invalidate = false; bool tried_invalidate = false; + bool queue_writeback = false; if (session) ceph_get_mds_session(session); @@ -2062,10 +2064,27 @@ retry: } /* completed revocation? going down and there are no caps? */ - if (revoking && (revoking & cap_used) == 0) { - dout("completed revocation of %s\n", - ceph_cap_string(cap->implemented & ~cap->issued)); - goto ack; + if (revoking) { + if ((revoking & cap_used) == 0) { + dout("completed revocation of %s\n", + ceph_cap_string(cap->implemented & ~cap->issued)); + goto ack; + } + + /* + * If the "i_wrbuffer_ref" was increased by mmap or generic + * cache write just before the ceph_check_caps() is called, + * the Fb capability revoking will fail this time. Then we + * must wait for the BDI's delayed work to flush the dirty + * pages and to release the "i_wrbuffer_ref", which will cost + * at most 5 seconds. That means the MDS needs to wait at + * most 5 seconds to finished the Fb capability's revocation. + * + * Let's queue a writeback for it. + */ + if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && + (revoking & CEPH_CAP_FILE_BUFFER)) + queue_writeback = true; } /* want more caps from mds? */ @@ -2135,6 +2154,8 @@ ack: spin_unlock(&ci->i_ceph_lock); ceph_put_mds_session(session); + if (queue_writeback) + ceph_queue_writeback(inode); if (queue_invalidate) ceph_queue_invalidate(inode); } @@ -2218,9 +2239,9 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) } /* - * wait for any unsafe requests to complete. + * flush the mdlog and wait for any unsafe requests to complete. */ -static int unsafe_request_wait(struct inode *inode) +static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); @@ -2336,7 +2357,7 @@ retry: kfree(sessions); } - dout("unsafe_request_wait %p wait on tid %llu %llu\n", + dout("%s %p wait on tid %llu %llu\n", __func__, inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); if (req1) { ret = !wait_for_completion_timeout(&req1->r_safe_completion, @@ -2380,7 +2401,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); - err = unsafe_request_wait(inode); + err = flush_mdlog_and_wait_inode_unsafe_requests(inode); /* * only wait on non-file metadata writeback (the mds @@ -3182,10 +3203,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc) { struct inode *inode = &ci->vfs_inode; - struct ceph_cap_snap *capsnap = NULL; + struct ceph_cap_snap *capsnap = NULL, *iter; int put = 0; bool last = false; - bool found = false; bool flush_snaps = false; bool complete_capsnap = false; @@ -3212,14 +3232,14 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, last ? " LAST" : ""); } else { - list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - if (capsnap->context == snapc) { - found = true; + list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { + if (iter->context == snapc) { + capsnap = iter; break; } } - if (!found) { + if (!capsnap) { /* * The capsnap should already be removed when removing * auth cap in the case of a forced unmount. @@ -3769,8 +3789,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; u64 follows = le64_to_cpu(m->snap_follows); - struct ceph_cap_snap *capsnap; - bool flushed = false; + struct ceph_cap_snap *capsnap = NULL, *iter; bool wake_ci = false; bool wake_mdsc = false; @@ -3778,26 +3797,26 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, inode, ci, session->s_mds, follows); spin_lock(&ci->i_ceph_lock); - list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - if (capsnap->follows == follows) { - if (capsnap->cap_flush.tid != flush_tid) { + list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { + if (iter->follows == follows) { + if (iter->cap_flush.tid != flush_tid) { dout(" cap_snap %p follows %lld tid %lld !=" - " %lld\n", capsnap, follows, - flush_tid, capsnap->cap_flush.tid); + " %lld\n", iter, follows, + flush_tid, iter->cap_flush.tid); break; } - flushed = true; + capsnap = iter; break; } else { dout(" skipping cap_snap %p follows %lld\n", - capsnap, capsnap->follows); + iter, iter->follows); } } - if (flushed) + if (capsnap) ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc); spin_unlock(&ci->i_ceph_lock); - if (flushed) { + if (capsnap) { ceph_put_snap_context(capsnap->context); ceph_put_cap_snap(capsnap); if (wake_ci) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 63113e2a4890..b7e9cac3aeef 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -578,7 +578,7 @@ void ceph_evict_inode(struct inode *inode) __ceph_remove_caps(ci); - if (__ceph_has_any_quota(ci)) + if (__ceph_has_quota(ci, QUOTA_GET_ANY)) ceph_adjust_quota_realms_count(inode, false); /* @@ -1466,10 +1466,12 @@ retry_lookup: } else if (have_lease) { if (d_unhashed(dn)) d_add(dn, NULL); + } + + if (!d_unhashed(dn) && have_lease) update_dentry_lease(dir, dn, rinfo->dlease, session, req->r_request_started); - } goto done; } @@ -1884,7 +1886,6 @@ static void ceph_do_invalidate_pages(struct inode *inode) orig_gen = ci->i_rdcache_gen; spin_unlock(&ci->i_ceph_lock); - ceph_fscache_invalidate(inode, false); if (invalidate_inode_pages2(inode->i_mapping) < 0) { pr_err("invalidate_inode_pages2 %llx.%llx failed\n", ceph_vinop(inode)); @@ -2258,6 +2259,30 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, return err; } +int ceph_try_to_choose_auth_mds(struct inode *inode, int mask) +{ + int issued = ceph_caps_issued(ceph_inode(inode)); + + /* + * If any 'x' caps is issued we can just choose the auth MDS + * instead of the random replica MDSes. Because only when the + * Locker is in LOCK_EXEC state will the loner client could + * get the 'x' caps. And if we send the getattr requests to + * any replica MDS it must auth pin and tries to rdlock from + * the auth MDS, and then the auth MDS need to do the Locker + * state transition to LOCK_SYNC. And after that the lock state + * will change back. + * + * This cost much when doing the Locker state transition and + * usually will need to revoke caps from clients. + */ + if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL)) + || (mask & CEPH_STAT_RSTAT)) + return USE_AUTH_MDS; + else + return USE_ANY_MDS; +} + /* * Verify that we have a lease on the given mask. If not, * do a getattr against an mds. @@ -2281,7 +2306,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) return 0; - mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS; + mode = ceph_try_to_choose_auth_mds(inode, mask); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); if (IS_ERR(req)) return PTR_ERR(req); @@ -2423,7 +2448,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, return -ESTALE; /* Skip the getattr altogether if we're asked not to sync */ - if (!(flags & AT_STATX_DONT_SYNC)) { + if ((flags & AT_STATX_SYNC_TYPE) != AT_STATX_DONT_SYNC) { err = ceph_do_getattr(inode, statx_to_caps(request_mask, inode->i_mode), flags & AT_STATX_FORCE_SYNC); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 00c3de177dd6..f5d110d90b77 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -437,7 +437,7 @@ static int ceph_parse_deleg_inos(void **p, void *end, ceph_decode_32_safe(p, end, sets, bad); dout("got %u sets of delegated inodes\n", sets); while (sets--) { - u64 start, len, ino; + u64 start, len; ceph_decode_64_safe(p, end, start, bad); ceph_decode_64_safe(p, end, len, bad); @@ -449,7 +449,7 @@ static int ceph_parse_deleg_inos(void **p, void *end, continue; } while (len--) { - int err = xa_insert(&s->s_delegated_inos, ino = start++, + int err = xa_insert(&s->s_delegated_inos, start++, DELEGATED_INO_AVAILABLE, GFP_KERNEL); if (!err) { @@ -2651,7 +2651,28 @@ static int __prepare_send_request(struct ceph_mds_session *session, struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_mds_request_head_old *rhead; struct ceph_msg *msg; - int flags = 0; + int flags = 0, max_retry; + + /* + * The type of 'r_attempts' in kernel 'ceph_mds_request' + * is 'int', while in 'ceph_mds_request_head' the type of + * 'num_retry' is '__u8'. So in case the request retries + * exceeding 256 times, the MDS will receive a incorrect + * retry seq. + * + * In this case it's ususally a bug in MDS and continue + * retrying the request makes no sense. + * + * In future this could be fixed in ceph code, so avoid + * using the hardcode here. + */ + max_retry = sizeof_field(struct ceph_mds_request_head, num_retry); + max_retry = 1 << (max_retry * BITS_PER_BYTE); + if (req->r_attempts >= max_retry) { + pr_warn_ratelimited("%s request tid %llu seq overflow\n", + __func__, req->r_tid); + return -EMULTIHOP; + } req->r_attempts++; if (req->r_inode) { @@ -2663,7 +2684,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, else req->r_sent_on_mseq = -1; } - dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, + dout("%s %p tid %lld %s (attempt %d)\n", __func__, req, req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { @@ -3265,6 +3286,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, int err = -EINVAL; void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; + bool aborted = false; ceph_decode_need(&p, end, 2*sizeof(u32), bad); next_mds = ceph_decode_32(&p); @@ -3273,16 +3295,41 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = lookup_get_request(mdsc, tid); if (!req) { + mutex_unlock(&mdsc->mutex); dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); - goto out; /* dup reply? */ + return; /* dup reply? */ } if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { dout("forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); } else if (fwd_seq <= req->r_num_fwd) { - dout("forward tid %llu to mds%d - old seq %d <= %d\n", - tid, next_mds, req->r_num_fwd, fwd_seq); + /* + * The type of 'num_fwd' in ceph 'MClientRequestForward' + * is 'int32_t', while in 'ceph_mds_request_head' the + * type is '__u8'. So in case the request bounces between + * MDSes exceeding 256 times, the client will get stuck. + * + * In this case it's ususally a bug in MDS and continue + * bouncing the request makes no sense. + * + * In future this could be fixed in ceph code, so avoid + * using the hardcode here. + */ + int max = sizeof_field(struct ceph_mds_request_head, num_fwd); + max = 1 << (max * BITS_PER_BYTE); + if (req->r_num_fwd >= max) { + mutex_lock(&req->r_fill_mutex); + req->r_err = -EMULTIHOP; + set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); + mutex_unlock(&req->r_fill_mutex); + aborted = true; + pr_warn_ratelimited("forward tid %llu seq overflow\n", + tid); + } else { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", + tid, next_mds, req->r_num_fwd, fwd_seq); + } } else { /* resend. forward race not possible; mds would drop */ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); @@ -3294,9 +3341,12 @@ static void handle_forward(struct ceph_mds_client *mdsc, put_request_session(req); __do_request(mdsc, req); } - ceph_mdsc_put_request(req); -out: mutex_unlock(&mdsc->mutex); + + /* kick calling process */ + if (aborted) + complete_request(mdsc, req); + ceph_mdsc_put_request(req); return; bad: @@ -3375,13 +3425,17 @@ static void handle_session(struct ceph_mds_session *session, } if (msg_version >= 5) { - u32 flags; - /* version >= 4, struct_v, struct_cv, len, metric_spec */ - ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad); + u32 flags, len; + + /* version >= 4 */ + ceph_decode_skip_16(&p, end, bad); /* struct_v, struct_cv */ + ceph_decode_32_safe(&p, end, len, bad); /* len */ + ceph_decode_skip_n(&p, end, len, bad); /* metric_spec */ + /* version >= 5, flags */ - ceph_decode_32_safe(&p, end, flags, bad); + ceph_decode_32_safe(&p, end, flags, bad); if (flags & CEPH_SESSION_BLOCKLISTED) { - pr_warn("mds%d session blocklisted\n", session->s_mds); + pr_warn("mds%d session blocklisted\n", session->s_mds); blocklisted = true; } } @@ -4396,12 +4450,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dentry->d_name.len); spin_unlock(&dentry->d_lock); - /* - * if this is a preemptive lease RELEASE, no need to - * flush request stream, since the actual request will - * soon follow. - */ - msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE); ceph_con_send(&session->s_con, msg); } @@ -4696,15 +4744,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) } /* - * wait for all write mds requests to flush. + * flush the mdlog and wait for all write mds requests to flush. */ -static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) +static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc, + u64 want_tid) { struct ceph_mds_request *req = NULL, *nextreq; + struct ceph_mds_session *last_session = NULL; struct rb_node *n; mutex_lock(&mdsc->mutex); - dout("wait_unsafe_requests want %lld\n", want_tid); + dout("%s want %lld\n", __func__, want_tid); restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { @@ -4716,14 +4766,32 @@ restart: nextreq = NULL; if (req->r_op != CEPH_MDS_OP_SETFILELOCK && (req->r_op & CEPH_MDS_OP_WRITE)) { + struct ceph_mds_session *s = req->r_session; + + if (!s) { + req = nextreq; + continue; + } + /* write op */ ceph_mdsc_get_request(req); if (nextreq) ceph_mdsc_get_request(nextreq); + s = ceph_get_mds_session(s); mutex_unlock(&mdsc->mutex); - dout("wait_unsafe_requests wait on %llu (want %llu)\n", + + /* send flush mdlog request to MDS */ + if (last_session != s) { + send_flush_mdlog(s); + ceph_put_mds_session(last_session); + last_session = s; + } else { + ceph_put_mds_session(s); + } + dout("%s wait on %llu (want %llu)\n", __func__, req->r_tid, want_tid); wait_for_completion(&req->r_safe_completion); + mutex_lock(&mdsc->mutex); ceph_mdsc_put_request(req); if (!nextreq) @@ -4738,7 +4806,8 @@ restart: req = nextreq; } mutex_unlock(&mdsc->mutex); - dout("wait_unsafe_requests done\n"); + ceph_put_mds_session(last_session); + dout("%s done\n", __func__); } void ceph_mdsc_sync(struct ceph_mds_client *mdsc) @@ -4767,7 +4836,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); - wait_unsafe_requests(mdsc, want_tid); + flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid); wait_caps_flush(mdsc, want_flush); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 33497846e47e..1140aecd82ce 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -579,7 +579,7 @@ static inline int ceph_wait_on_async_create(struct inode *inode) struct ceph_inode_info *ci = ceph_inode(inode); return wait_on_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT, - TASK_INTERRUPTIBLE); + TASK_KILLABLE); } extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index a338a3ec0dc4..64592adfe48f 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -195,9 +195,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) /* * This function walks through the snaprealm for an inode and returns the - * ceph_snap_realm for the first snaprealm that has quotas set (either max_files - * or max_bytes). If the root is reached, return the root ceph_snap_realm - * instead. + * ceph_snap_realm for the first snaprealm that has quotas set (max_files, + * max_bytes, or any, depending on the 'which_quota' argument). If the root is + * reached, return the root ceph_snap_realm instead. * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. @@ -209,7 +209,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) * will be restarted. */ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, - struct inode *inode, bool retry) + struct inode *inode, + enum quota_get_realm which_quota, + bool retry) { struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; @@ -248,7 +250,7 @@ restart: } ci = ceph_inode(in); - has_quota = __ceph_has_any_quota(ci); + has_quota = __ceph_has_quota(ci, which_quota); iput(in); next = realm->parent; @@ -279,8 +281,8 @@ restart: * dropped and we can then restart the whole operation. */ down_read(&mdsc->snap_rwsem); - old_realm = get_quota_realm(mdsc, old, true); - new_realm = get_quota_realm(mdsc, new, false); + old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); + new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); if (PTR_ERR(new_realm) == -EAGAIN) { up_read(&mdsc->snap_rwsem); if (old_realm) @@ -483,7 +485,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) bool is_updated = false; down_read(&mdsc->snap_rwsem); - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true); + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), + QUOTA_GET_MAX_BYTES, true); up_read(&mdsc->snap_rwsem); if (!realm) return false; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e6987d295079..b73b4f75462c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1119,6 +1119,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc) s->s_time_gran = 1; s->s_time_min = 0; s->s_time_max = U32_MAX; + s->s_flags |= SB_NODIRATIME | SB_NOATIME; ret = set_anon_super_fc(s, fc); if (ret != 0) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 20ceab74e871..dd7dac0f984a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1022,6 +1022,7 @@ static inline void ceph_queue_flush_snaps(struct inode *inode) ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS); } +extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask); extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force); static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) @@ -1278,9 +1279,29 @@ extern void ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); /* quota.c */ -static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci) + +enum quota_get_realm { + QUOTA_GET_MAX_FILES, + QUOTA_GET_MAX_BYTES, + QUOTA_GET_ANY +}; + +static inline bool __ceph_has_quota(struct ceph_inode_info *ci, + enum quota_get_realm which) { - return ci->i_max_files || ci->i_max_bytes; + bool has_quota = false; + + switch (which) { + case QUOTA_GET_MAX_BYTES: + has_quota = !!ci->i_max_bytes; + break; + case QUOTA_GET_MAX_FILES: + has_quota = !!ci->i_max_files; + break; + default: + has_quota = !!(ci->i_max_files || ci->i_max_bytes); + } + return has_quota; } extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc); @@ -1289,10 +1310,10 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci, u64 max_bytes, u64 max_files) { bool had_quota, has_quota; - had_quota = __ceph_has_any_quota(ci); + had_quota = __ceph_has_quota(ci, QUOTA_GET_ANY); ci->i_max_bytes = max_bytes; ci->i_max_files = max_files; - has_quota = __ceph_has_any_quota(ci); + has_quota = __ceph_has_quota(ci, QUOTA_GET_ANY); if (had_quota != has_quota) ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index afec84088471..8c2dc2c762a4 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -366,6 +366,14 @@ static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci, } #define XATTR_RSTAT_FIELD(_type, _name) \ XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT) +#define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .exists_cb = NULL, \ + .flags = VXATTR_FLAG_RSTAT, \ + } #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ { \ .name = CEPH_XATTR_NAME2(_type, _name, _field), \ @@ -404,7 +412,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_RSTAT_FIELD(dir, rsubdirs), XATTR_RSTAT_FIELD(dir, rsnaps), XATTR_RSTAT_FIELD(dir, rbytes), - XATTR_RSTAT_FIELD(dir, rctime), + XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime), { .name = "ceph.dir.pin", .name_size = sizeof("ceph.dir.pin"), diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index cc8fdcb35b71..8c9f2c00be72 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \ inode.o link.o misc.o netmisc.o smbencrypt.o transport.o \ cifs_unicode.o nterr.o cifsencrypt.o \ - readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \ + readdir.o ioctl.o sess.o export.o unc.o winucase.o \ smb2ops.o smb2maperror.o smb2transport.o \ smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \ dns_resolve.o cifs_spnego_negtokeninit.asn1.o asn1.o @@ -30,3 +30,5 @@ cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o + +cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index 180c234c2f46..1e4c7cc5287f 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -465,7 +465,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a int ret = 0; /* Store the reconnect address */ - mutex_lock(&tcon->ses->server->srv_mutex); + cifs_server_lock(tcon->ses->server); if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr)) goto unlock; @@ -501,7 +501,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a cifs_signal_cifsd_for_reconnect(tcon->ses->server, false); unlock: - mutex_unlock(&tcon->ses->server->srv_mutex); + cifs_server_unlock(tcon->ses->server); return ret; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 0912d8bbbac1..663cb9db4908 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -236,9 +236,9 @@ int cifs_verify_signature(struct smb_rqst *rqst, cpu_to_le32(expected_sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); if (rc) return rc; @@ -626,7 +626,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) memcpy(ses->auth_key.response + baselen, tiblob, tilen); - mutex_lock(&ses->server->srv_mutex); + cifs_server_lock(ses->server); rc = cifs_alloc_hash("hmac(md5)", &ses->server->secmech.hmacmd5, @@ -678,7 +678,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); unlock: - mutex_unlock(&ses->server->srv_mutex); + cifs_server_unlock(ses->server); setup_ntlmv2_rsp_ret: kfree(tiblob); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f539a39d47f5..12c872800326 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -838,7 +838,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, int flags, struct smb3_fs_context *old_ctx) { int rc; - struct super_block *sb; + struct super_block *sb = NULL; struct cifs_sb_info *cifs_sb = NULL; struct cifs_mnt_data mnt_data; struct dentry *root; @@ -934,9 +934,11 @@ out_super: return root; out: if (cifs_sb) { - kfree(cifs_sb->prepath); - smb3_cleanup_fs_context(cifs_sb->ctx); - kfree(cifs_sb); + if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */ + kfree(cifs_sb->prepath); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); + } } return root; } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c0542bdcd06b..dd7e070ca243 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -152,6 +152,7 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define SMB3_PRODUCT_BUILD 35 -#define CIFS_VERSION "2.36" +/* when changing internal version - update following two lines at same time */ +#define SMB3_PRODUCT_BUILD 37 +#define CIFS_VERSION "2.37" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 68da230c7f11..f873379066c7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -16,6 +16,7 @@ #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/utsname.h> +#include <linux/sched/mm.h> #include <linux/netfs.h> #include "cifs_fs_sb.h" #include "cifsacl.h" @@ -628,7 +629,8 @@ struct TCP_Server_Info { unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ spinlock_t req_lock; /* protect the two values above */ - struct mutex srv_mutex; + struct mutex _srv_mutex; + unsigned int nofs_flag; struct task_struct *tsk; char server_GUID[16]; __u16 sec_mode; @@ -743,6 +745,22 @@ struct TCP_Server_Info { #endif }; +static inline void cifs_server_lock(struct TCP_Server_Info *server) +{ + unsigned int nofs_flag = memalloc_nofs_save(); + + mutex_lock(&server->_srv_mutex); + server->nofs_flag = nofs_flag; +} + +static inline void cifs_server_unlock(struct TCP_Server_Info *server) +{ + unsigned int nofs_flag = server->nofs_flag; + + mutex_unlock(&server->_srv_mutex); + memalloc_nofs_restore(nofs_flag); +} + struct cifs_credits { unsigned int value; unsigned int instance; @@ -1945,11 +1963,13 @@ extern mempool_t *cifs_mid_poolp; /* Operations for different SMB versions */ #define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; -#define SMB20_VERSION_STRING "2.0" extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 53373a3649e1..d46702f5a663 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -148,7 +148,7 @@ static void cifs_resolve_server(struct work_struct *work) struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, resolve.work); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); /* * Resolve the hostname again to make sure that IP address is up-to-date. @@ -159,7 +159,7 @@ static void cifs_resolve_server(struct work_struct *work) __func__, rc); } - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); } /* @@ -267,7 +267,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) /* do not want to be sending data on a socket we are freeing */ cifs_dbg(FYI, "%s: tearing down socket\n", __func__); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); if (server->ssocket) { cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state, server->ssocket->flags); @@ -296,7 +296,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) mid->mid_flags |= MID_DELETED; } spin_unlock(&GlobalMid_Lock); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_entry_safe(mid, nmid, &retry_list, qhead) { @@ -306,9 +306,9 @@ cifs_abort_connection(struct TCP_Server_Info *server) } if (cifs_rdma_enabled(server)) { - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); smbd_destroy(server); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); } } @@ -359,7 +359,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, do { try_to_freeze(); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); if (!cifs_swn_set_server_dstaddr(server)) { /* resolve the hostname again to make sure that IP address is up-to-date */ @@ -372,7 +372,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, else rc = generic_ip_connect(server); if (rc) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc); msleep(3000); } else { @@ -383,7 +383,7 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, server->tcpStatus = CifsNeedNegotiate; spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } } while (server->tcpStatus == CifsNeedReconnect); @@ -488,12 +488,12 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) do { try_to_freeze(); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); rc = reconnect_target_unlocked(server, &tl, &target_hint); if (rc) { /* Failed to reconnect socket */ - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc); msleep(3000); continue; @@ -510,7 +510,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } while (server->tcpStatus == CifsNeedReconnect); @@ -1565,7 +1565,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, init_waitqueue_head(&tcp_ses->response_q); init_waitqueue_head(&tcp_ses->request_q); INIT_LIST_HEAD(&tcp_ses->pending_mid_q); - mutex_init(&tcp_ses->srv_mutex); + mutex_init(&tcp_ses->_srv_mutex); memcpy(tcp_ses->workstation_RFC1001_name, ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); memcpy(tcp_ses->server_RFC1001_name, @@ -1845,7 +1845,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses) unsigned int rc, xid; unsigned int chan_count; struct TCP_Server_Info *server = ses->server; - cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); spin_lock(&cifs_tcp_ses_lock); if (ses->ses_status == SES_EXITING) { diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index c5dd6f7305bd..34a8f3baed5e 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1229,6 +1229,30 @@ void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) kref_put(&mg->refcount, mount_group_release); } +/* Extract share from DFS target and return a pointer to prefix path or NULL */ +static const char *parse_target_share(const char *target, char **share) +{ + const char *s, *seps = "/\\"; + size_t len; + + s = strpbrk(target + 1, seps); + if (!s) + return ERR_PTR(-EINVAL); + + len = strcspn(s + 1, seps); + if (!len) + return ERR_PTR(-EINVAL); + s += len; + + len = s - target + 1; + *share = kstrndup(target, len, GFP_KERNEL); + if (!*share) + return ERR_PTR(-ENOMEM); + + s = target + len; + return s + strspn(s, seps); +} + /** * dfs_cache_get_tgt_share - parse a DFS target * @@ -1242,56 +1266,46 @@ void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, char **prefix) { - char *s, sep, *p; - size_t len; - size_t plen1, plen2; + char sep; + char *target_share; + char *ppath = NULL; + const char *target_ppath, *dfsref_ppath; + size_t target_pplen, dfsref_pplen; + size_t len, c; if (!it || !path || !share || !prefix || strlen(path) < it->it_path_consumed) return -EINVAL; - *share = NULL; - *prefix = NULL; - sep = it->it_name[0]; if (sep != '\\' && sep != '/') return -EINVAL; - s = strchr(it->it_name + 1, sep); - if (!s) - return -EINVAL; + target_ppath = parse_target_share(it->it_name, &target_share); + if (IS_ERR(target_ppath)) + return PTR_ERR(target_ppath); - /* point to prefix in target node */ - s = strchrnul(s + 1, sep); + /* point to prefix in DFS referral path */ + dfsref_ppath = path + it->it_path_consumed; + dfsref_ppath += strspn(dfsref_ppath, "/\\"); - /* extract target share */ - *share = kstrndup(it->it_name, s - it->it_name, GFP_KERNEL); - if (!*share) - return -ENOMEM; + target_pplen = strlen(target_ppath); + dfsref_pplen = strlen(dfsref_ppath); - /* skip separator */ - if (*s) - s++; - /* point to prefix in DFS path */ - p = path + it->it_path_consumed; - if (*p == sep) - p++; - - /* merge prefix paths from DFS path and target node */ - plen1 = it->it_name + strlen(it->it_name) - s; - plen2 = path + strlen(path) - p; - if (plen1 || plen2) { - len = plen1 + plen2 + 2; - *prefix = kmalloc(len, GFP_KERNEL); - if (!*prefix) { - kfree(*share); - *share = NULL; + /* merge prefix paths from DFS referral path and target node */ + if (target_pplen || dfsref_pplen) { + len = target_pplen + dfsref_pplen + 2; + ppath = kzalloc(len, GFP_KERNEL); + if (!ppath) { + kfree(target_share); return -ENOMEM; } - if (plen1) - scnprintf(*prefix, len, "%.*s%c%.*s", (int)plen1, s, sep, (int)plen2, p); - else - strscpy(*prefix, p, len); + c = strscpy(ppath, target_ppath, len); + if (c && dfsref_pplen) + ppath[c] = sep; + strlcat(ppath, dfsref_ppath, len); } + *share = target_share; + *prefix = ppath; return 0; } @@ -1327,9 +1341,9 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", __func__, ip); } else { - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); } kfree(ip); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index c6214cfc575f..3b7915af1f62 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1120,14 +1120,14 @@ sess_establish_session(struct sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; struct TCP_Server_Info *server = sess_data->server; - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); if (!server->session_estab) { if (server->sign) { server->session_key.response = kmemdup(ses->auth_key.response, ses->auth_key.len, GFP_KERNEL); if (!server->session_key.response) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); return -ENOMEM; } server->session_key.len = @@ -1136,7 +1136,7 @@ sess_establish_session(struct sess_data *sess_data) server->sequence_number = 0x2; server->session_estab = true; } - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "CIFS session established successfully\n"); return 0; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c71c9a44bef4..2e20ee4dab7b 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -38,10 +38,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, in_buf->WordCount = 0; put_bcc(0, in_buf); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); if (rc) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); return rc; } @@ -55,7 +55,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, if (rc < 0) server->sequence_number--; - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n", get_mid(in_buf), rc); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index d7ade739cde1..98a76fa791c0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3859,7 +3859,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, if (rc) goto out; - if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + if (cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) smb2_set_sparse(xid, tcon, cfile, inode, false); eof = cpu_to_le64(off + len); @@ -4345,11 +4345,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, } } +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY static bool smb2_is_read_op(__u32 oplock) { return oplock == SMB2_OPLOCK_LEVEL_II; } +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ static bool smb21_is_read_op(__u32 oplock) @@ -5448,7 +5450,7 @@ out: return rc; } - +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -5547,6 +5549,7 @@ struct smb_version_operations smb20_operations = { .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, }; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, @@ -5878,6 +5881,7 @@ struct smb_version_operations smb311_operations = { .is_network_name_deleted = smb2_is_network_name_deleted, }; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, .protocol_id = SMB20_PROT_ID, @@ -5898,6 +5902,7 @@ struct smb_version_values smb20_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease), }; +#endif /* ALLOW_INSECURE_LEGACY */ struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 084be3a90198..0e8c85249579 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1369,13 +1369,13 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; struct TCP_Server_Info *server = sess_data->server; - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); if (server->ops->generate_signingkey) { rc = server->ops->generate_signingkey(ses, server); if (rc) { cifs_dbg(FYI, "SMB3 session key generation failed\n"); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); return rc; } } @@ -1383,7 +1383,7 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) server->sequence_number = 0x2; server->session_estab = true; } - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); cifs_dbg(FYI, "SMB2/3 session established successfully\n"); return rc; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index c3278db1cade..5fbbec22bcc8 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1382,9 +1382,9 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "freeing mr list\n"); wake_up_interruptible_all(&info->wait_mr); while (atomic_read(&info->mr_used_count)) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); msleep(1000); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); } destroy_mr_list(info); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 05eca41e3b1e..bfc9bd55870a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -822,7 +822,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, } else instance = exist_credits->instance; - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); /* * We can't use credits obtained from the previous session to send this @@ -830,14 +830,14 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, * return -EAGAIN in such cases to let callers handle it. */ if (instance != server->reconnect_instance) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); add_credits_and_wake_if(server, &credits, optype); return -EAGAIN; } mid = server->ops->setup_async_request(server, rqst); if (IS_ERR(mid)) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); add_credits_and_wake_if(server, &credits, optype); return PTR_ERR(mid); } @@ -868,7 +868,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, cifs_delete_mid(mid); } - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); if (rc == 0) return 0; @@ -1109,7 +1109,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * of smb data. */ - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); /* * All the parts of the compound chain belong obtained credits from the @@ -1119,7 +1119,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * handle it. */ if (instance != server->reconnect_instance) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); for (j = 0; j < num_rqst; j++) add_credits(server, &credits[j], optype); return -EAGAIN; @@ -1131,7 +1131,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, revert_current_mid(server, i); for (j = 0; j < i; j++) cifs_delete_mid(midQ[j]); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); /* Update # of requests on wire to server */ for (j = 0; j < num_rqst; j++) @@ -1163,7 +1163,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, server->sequence_number -= 2; } - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); /* * If sending failed for some reason or it is an oplock break that we @@ -1190,9 +1190,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { spin_unlock(&cifs_tcp_ses_lock); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); spin_lock(&cifs_tcp_ses_lock); } @@ -1266,9 +1266,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, .iov_len = resp_iov[0].iov_len }; spin_unlock(&cifs_tcp_ses_lock); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); smb311_update_preauth_hash(ses, server, &iov, 1); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); spin_lock(&cifs_tcp_ses_lock); } spin_unlock(&cifs_tcp_ses_lock); @@ -1385,11 +1385,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, and avoid races inside tcp sendmsg code that could cause corruption of smb data */ - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); rc = allocate_mid(ses, in_buf, &midQ); if (rc) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); /* Update # of requests on wire to server */ add_credits(server, &credits, 0); return rc; @@ -1397,7 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); if (rc) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); goto out; } @@ -1411,7 +1411,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (rc < 0) server->sequence_number -= 2; - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); if (rc < 0) goto out; @@ -1530,18 +1530,18 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, and avoid races inside tcp sendmsg code that could cause corruption of smb data */ - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); rc = allocate_mid(ses, in_buf, &midQ); if (rc) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); return rc; } rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); if (rc) { cifs_delete_mid(midQ); - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); return rc; } @@ -1554,7 +1554,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, if (rc < 0) server->sequence_number -= 2; - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); if (rc < 0) { cifs_delete_mid(midQ); diff --git a/fs/exec.c b/fs/exec.c index 14b4b3755580..0989fb8472a1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1312,9 +1312,7 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out_unlock; - if (me->flags & PF_KTHREAD) - free_kthread_struct(me); - me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); me->personality &= ~bprm->per_clear; @@ -1959,6 +1957,10 @@ int kernel_execve(const char *kernel_filename, int fd = AT_FDCWD; int retval; + /* It is non-sense for kernel threads to call execve */ + if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) + return -EINVAL; + filename = getname_kernel(kernel_filename); if (IS_ERR(filename)) return PTR_ERR(filename); diff --git a/fs/file.c b/fs/file.c index ee9317346702..dd6692048f4f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -630,32 +630,23 @@ EXPORT_SYMBOL(fd_install); * @files: file struct to retrieve file from * @fd: file descriptor to retrieve file for * - * If this functions returns an EINVAL error pointer the fd was beyond the - * current maximum number of file descriptors for that fdtable. + * Context: files_lock must be held. * - * Returns: The file associated with @fd, on error returns an error pointer. + * Returns: The file associated with @fd (NULL if @fd is not open) */ static struct file *pick_file(struct files_struct *files, unsigned fd) { + struct fdtable *fdt = files_fdtable(files); struct file *file; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) { - file = ERR_PTR(-EINVAL); - goto out_unlock; - } + if (fd >= fdt->max_fds) + return NULL; + file = fdt->fd[fd]; - if (!file) { - file = ERR_PTR(-EBADF); - goto out_unlock; + if (file) { + rcu_assign_pointer(fdt->fd[fd], NULL); + __put_unused_fd(files, fd); } - rcu_assign_pointer(fdt->fd[fd], NULL); - __put_unused_fd(files, fd); - -out_unlock: - spin_unlock(&files->file_lock); return file; } @@ -664,8 +655,10 @@ int close_fd(unsigned fd) struct files_struct *files = current->files; struct file *file; + spin_lock(&files->file_lock); file = pick_file(files, fd); - if (IS_ERR(file)) + spin_unlock(&files->file_lock); + if (!file) return -EBADF; return filp_close(file, files); @@ -702,20 +695,25 @@ static inline void __range_cloexec(struct files_struct *cur_fds, static inline void __range_close(struct files_struct *cur_fds, unsigned int fd, unsigned int max_fd) { + unsigned n; + + rcu_read_lock(); + n = last_fd(files_fdtable(cur_fds)); + rcu_read_unlock(); + max_fd = min(max_fd, n); + while (fd <= max_fd) { struct file *file; + spin_lock(&cur_fds->file_lock); file = pick_file(cur_fds, fd++); - if (!IS_ERR(file)) { + spin_unlock(&cur_fds->file_lock); + + if (file) { /* found a valid file to close */ filp_close(file, cur_fds); cond_resched(); - continue; } - - /* beyond the last fd in that table */ - if (PTR_ERR(file) == -EINVAL) - return; } } @@ -795,26 +793,9 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) * See close_fd_get_file() below, this variant assumes current->files->file_lock * is held. */ -int __close_fd_get_file(unsigned int fd, struct file **res) +struct file *__close_fd_get_file(unsigned int fd) { - struct files_struct *files = current->files; - struct file *file; - struct fdtable *fdt; - - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_err; - file = fdt->fd[fd]; - if (!file) - goto out_err; - rcu_assign_pointer(fdt->fd[fd], NULL); - __put_unused_fd(files, fd); - get_file(file); - *res = file; - return 0; -out_err: - *res = NULL; - return -ENOENT; + return pick_file(current->files, fd); } /* @@ -822,16 +803,16 @@ out_err: * The caller must ensure that filp_close() called on the file, and then * an fput(). */ -int close_fd_get_file(unsigned int fd, struct file **res) +struct file *close_fd_get_file(unsigned int fd) { struct files_struct *files = current->files; - int ret; + struct file *file; spin_lock(&files->file_lock); - ret = __close_fd_get_file(fd, res); + file = pick_file(files, fd); spin_unlock(&files->file_lock); - return ret; + return file; } void do_close_on_exec(struct files_struct *files) @@ -871,7 +852,7 @@ void do_close_on_exec(struct files_struct *files) } static inline struct file *__fget_files_rcu(struct files_struct *files, - unsigned int fd, fmode_t mask, unsigned int refs) + unsigned int fd, fmode_t mask) { for (;;) { struct file *file; @@ -897,10 +878,9 @@ static inline struct file *__fget_files_rcu(struct files_struct *files, * Such a race can take two forms: * * (a) the file ref already went down to zero, - * and get_file_rcu_many() fails. Just try - * again: + * and get_file_rcu() fails. Just try again: */ - if (unlikely(!get_file_rcu_many(file, refs))) + if (unlikely(!get_file_rcu(file))) continue; /* @@ -909,11 +889,11 @@ static inline struct file *__fget_files_rcu(struct files_struct *files, * pointer having changed, because it always goes * hand-in-hand with 'fdt'. * - * If so, we need to put our refs and try again. + * If so, we need to put our ref and try again. */ if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || unlikely(rcu_dereference_raw(*fdentry) != file)) { - fput_many(file, refs); + fput(file); continue; } @@ -926,37 +906,31 @@ static inline struct file *__fget_files_rcu(struct files_struct *files, } static struct file *__fget_files(struct files_struct *files, unsigned int fd, - fmode_t mask, unsigned int refs) + fmode_t mask) { struct file *file; rcu_read_lock(); - file = __fget_files_rcu(files, fd, mask, refs); + file = __fget_files_rcu(files, fd, mask); rcu_read_unlock(); return file; } -static inline struct file *__fget(unsigned int fd, fmode_t mask, - unsigned int refs) -{ - return __fget_files(current->files, fd, mask, refs); -} - -struct file *fget_many(unsigned int fd, unsigned int refs) +static inline struct file *__fget(unsigned int fd, fmode_t mask) { - return __fget(fd, FMODE_PATH, refs); + return __fget_files(current->files, fd, mask); } struct file *fget(unsigned int fd) { - return __fget(fd, FMODE_PATH, 1); + return __fget(fd, FMODE_PATH); } EXPORT_SYMBOL(fget); struct file *fget_raw(unsigned int fd) { - return __fget(fd, 0, 1); + return __fget(fd, 0); } EXPORT_SYMBOL(fget_raw); @@ -966,7 +940,7 @@ struct file *fget_task(struct task_struct *task, unsigned int fd) task_lock(task); if (task->files) - file = __fget_files(task->files, fd, 0, 1); + file = __fget_files(task->files, fd, 0); task_unlock(task); return file; @@ -1035,7 +1009,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) return 0; return (unsigned long)file; } else { - file = __fget(fd, mask, 1); + file = __fget(fd, mask); if (!file) return 0; return FDPUT_FPUT | (unsigned long)file; diff --git a/fs/file_table.c b/fs/file_table.c index ada8fe814db9..5424e3a8df5f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -368,9 +368,9 @@ EXPORT_SYMBOL_GPL(flush_delayed_fput); static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); -void fput_many(struct file *file, unsigned int refs) +void fput(struct file *file) { - if (atomic_long_sub_and_test(refs, &file->f_count)) { + if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { @@ -389,11 +389,6 @@ void fput_many(struct file *file, unsigned int refs) } } -void fput(struct file *file) -{ - fput_many(file, 1); -} - /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index a41ea0ba6943..bffd156d6434 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -1,32 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_SUPER_H_ #define _VXFS_SUPER_H_ diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c index 1fd41cf98b9f..de2a5bccb930 100644 --- a/fs/freevxfs/vxfs_bmap.c +++ b/fs/freevxfs/vxfs_bmap.c @@ -1,30 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h index acc5477b3f23..fbcd603365ad 100644 --- a/fs/freevxfs/vxfs_dir.h +++ b/fs/freevxfs/vxfs_dir.h @@ -1,31 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_DIR_H_ #define _VXFS_DIR_H_ diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index f5c428e21024..3a2180c5e208 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -1,31 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_EXTERN_H_ #define _VXFS_EXTERN_H_ diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index a4610a77649e..c1174a3f8990 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -1,31 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_fshead.h b/fs/freevxfs/vxfs_fshead.h index e026f0c49159..dfd2147599c4 100644 --- a/fs/freevxfs/vxfs_fshead.h +++ b/fs/freevxfs/vxfs_fshead.h @@ -1,32 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_FSHEAD_H_ #define _VXFS_FSHEAD_H_ diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index a37431e443d3..c2ef9f0debbd 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -1,30 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 1f41b25ef38b..ceb6a12649ba 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -1,31 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_inode.h b/fs/freevxfs/vxfs_inode.h index f012abed125d..1e9e138d2b33 100644 --- a/fs/freevxfs/vxfs_inode.h +++ b/fs/freevxfs/vxfs_inode.h @@ -1,32 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_INODE_H_ #define _VXFS_INODE_H_ diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index a51425634f65..f04ba2ed1e1a 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -1,31 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c index 813da6685151..23f35187c289 100644 --- a/fs/freevxfs/vxfs_olt.c +++ b/fs/freevxfs/vxfs_olt.c @@ -1,30 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h index 0c0b0c9fa557..53afba08d617 100644 --- a/fs/freevxfs/vxfs_olt.h +++ b/fs/freevxfs/vxfs_olt.h @@ -1,31 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * */ #ifndef _VXFS_OLT_H_ #define _VXFS_OLT_H_ diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index 6143ebab940d..0e633d2bfc7d 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -1,30 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 22eed5a73ac2..c3b82f716f9a 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -1,31 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2000-2001 Christoph Hellwig. * Copyright (c) 2016 Krzysztof Blaszkowski - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL"). - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ /* diff --git a/fs/fsopen.c b/fs/fsopen.c index 27a890aa493a..fc9d2d9fd234 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -119,7 +119,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags) const char *fs_name; int ret; - if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) + if (!may_mount()) return -EPERM; if (flags & ~FSOPEN_CLOEXEC) @@ -162,7 +162,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags unsigned int lookup_flags; int ret; - if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) + if (!may_mount()) return -EPERM; if ((flags & ~(FSPICK_CLOEXEC | diff --git a/fs/internal.h b/fs/internal.h index 9a6c233ee7f1..87e96b9024ce 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -84,6 +84,7 @@ extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write_file(struct file *); extern void dissolve_on_fput(struct vfsmount *); +extern bool may_mount(void); int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page); @@ -125,7 +126,7 @@ extern struct file *do_file_open_root(const struct path *, const char *, const struct open_flags *); extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); -extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern struct file *__close_fd_get_file(unsigned int fd); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); int chmod_common(const struct path *path, umode_t mode); diff --git a/fs/io_uring.c b/fs/io_uring.c index 9f1c682d7caf..86f9df56526b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -112,7 +112,8 @@ IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ - REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) + REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ + REQ_F_ASYNC_DATA) #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ IO_REQ_CLEAN_FLAGS) @@ -540,6 +541,7 @@ struct io_uring_task { const struct io_ring_ctx *last; struct io_wq *io_wq; struct percpu_counter inflight; + atomic_t inflight_tracked; atomic_t in_idle; spinlock_t task_lock; @@ -574,6 +576,7 @@ struct io_close { struct file *file; int fd; u32 file_slot; + u32 flags; }; struct io_timeout_data { @@ -1355,8 +1358,6 @@ static void io_clean_op(struct io_kiocb *req); static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); static struct file *io_file_get_normal(struct io_kiocb *req, int fd); -static void io_drop_inflight_file(struct io_kiocb *req); -static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); static void io_queue_sqe(struct io_kiocb *req); static void io_rsrc_put_work(struct work_struct *work); @@ -1366,7 +1367,9 @@ static int io_req_prep_async(struct io_kiocb *req); static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index); -static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); +static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, + unsigned int offset); +static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static void io_eventfd_signal(struct io_ring_ctx *ctx); @@ -1757,9 +1760,29 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, bool cancel_all) __must_hold(&req->ctx->timeout_lock) { + struct io_kiocb *req; + if (task && head->task != task) return false; - return cancel_all; + if (cancel_all) + return true; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; +} + +static bool io_match_linked(struct io_kiocb *head) +{ + struct io_kiocb *req; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; } /* @@ -1769,9 +1792,24 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all) { + bool matched; + if (task && head->task != task) return false; - return cancel_all; + if (cancel_all) + return true; + + if (head->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = head->ctx; + + /* protect against races with linked timeouts */ + spin_lock_irq(&ctx->timeout_lock); + matched = io_match_linked(head); + spin_unlock_irq(&ctx->timeout_lock); + } else { + matched = io_match_linked(head); + } + return matched; } static inline bool req_has_async_data(struct io_kiocb *req) @@ -1927,6 +1965,14 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) return req->flags & REQ_F_FIXED_FILE; } +static inline void io_req_track_inflight(struct io_kiocb *req) +{ + if (!(req->flags & REQ_F_INFLIGHT)) { + req->flags |= REQ_F_INFLIGHT; + atomic_inc(¤t->io_uring->inflight_tracked); + } +} + static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -2988,8 +3034,6 @@ static void __io_req_task_work_add(struct io_kiocb *req, unsigned long flags; bool running; - io_drop_inflight_file(req); - spin_lock_irqsave(&tctx->task_lock, flags); wq_list_add_tail(&req->io_task_work.node, list); running = tctx->task_running; @@ -4176,6 +4220,16 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) return 0; } +static int io_readv_prep_async(struct io_kiocb *req) +{ + return io_rw_prep_async(req, READ); +} + +static int io_writev_prep_async(struct io_kiocb *req) +{ + return io_rw_prep_async(req, WRITE); +} + /* * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. @@ -5103,42 +5157,6 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) return 0; } -static int io_shutdown_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ -#if defined(CONFIG_NET) - if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || - sqe->buf_index || sqe->splice_fd_in)) - return -EINVAL; - - req->shutdown.how = READ_ONCE(sqe->len); - return 0; -#else - return -EOPNOTSUPP; -#endif -} - -static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) -{ -#if defined(CONFIG_NET) - struct socket *sock; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - ret = __sys_shutdown_sock(sock, req->shutdown.how); - io_req_complete(req, ret); - return 0; -#else - return -EOPNOTSUPP; -#endif -} - static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -5445,15 +5463,11 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx) unsigned long nr = ctx->nr_user_files; int ret; - if (table->alloc_hint >= nr) - table->alloc_hint = 0; - do { ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); - if (ret != nr) { - table->alloc_hint = ret + 1; + if (ret != nr) return ret; - } + if (!table->alloc_hint) break; @@ -5464,6 +5478,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx) return -ENFILE; } +/* + * Note when io_fixed_fd_install() returns error value, it will ensure + * fput() is called correspondingly. + */ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, struct file *file, unsigned int file_slot) { @@ -5471,26 +5489,24 @@ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, struct io_ring_ctx *ctx = req->ctx; int ret; + io_ring_submit_lock(ctx, issue_flags); + if (alloc_slot) { - io_ring_submit_lock(ctx, issue_flags); ret = io_file_bitmap_get(ctx); - if (unlikely(ret < 0)) { - io_ring_submit_unlock(ctx, issue_flags); - return ret; - } - + if (unlikely(ret < 0)) + goto err; file_slot = ret; } else { file_slot--; } ret = io_install_fixed_file(req, file, issue_flags, file_slot); - if (alloc_slot) { - io_ring_submit_unlock(ctx, issue_flags); - if (!ret) - return file_slot; - } - + if (!ret && alloc_slot) + ret = file_slot; +err: + io_ring_submit_unlock(ctx, issue_flags); + if (unlikely(ret < 0)) + fput(file); return ret; } @@ -5972,14 +5988,18 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags) static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) + if (sqe->off || sqe->addr || sqe->len || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); req->close.file_slot = READ_ONCE(sqe->file_index); - if (req->close.file_slot && req->close.fd) + req->close.flags = READ_ONCE(sqe->close_flags); + if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT) + return -EINVAL; + if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) && + req->close.file_slot && req->close.fd) return -EINVAL; return 0; @@ -5995,7 +6015,8 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags) if (req->close.file_slot) { ret = io_close_fixed(req, issue_flags); - goto err; + if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT)) + goto err; } spin_lock(&files->file_lock); @@ -6018,13 +6039,10 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; } - ret = __close_fd_get_file(close->fd, &file); + file = __close_fd_get_file(close->fd); spin_unlock(&files->file_lock); - if (ret < 0) { - if (ret == -ENOENT) - ret = -EBADF; + if (!file) goto err; - } /* No ->flush() or already async, safely close from here */ ret = filp_close(file, current->files); @@ -6063,6 +6081,34 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) } #if defined(CONFIG_NET) +static int io_shutdown_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || + sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + req->shutdown.how = READ_ONCE(sqe->len); + return 0; +} + +static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) +{ + struct socket *sock; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + ret = __sys_shutdown_sock(sock, req->shutdown.how); + io_req_complete(req, ret); + return 0; +} + static bool io_net_retry(struct socket *sock, int flags) { if (!(flags & MSG_WAITALL)) @@ -6674,8 +6720,8 @@ static int io_socket(struct io_kiocb *req, unsigned int issue_flags) fd_install(fd, file); ret = fd; } else { - ret = io_install_fixed_file(req, file, issue_flags, - sock->file_slot - 1); + ret = io_fixed_fd_install(req, issue_flags, file, + sock->file_slot); } __io_req_complete(req, issue_flags, ret, 0); return 0; @@ -6767,6 +6813,7 @@ IO_NETOP_PREP_ASYNC(recvmsg); IO_NETOP_PREP_ASYNC(connect); IO_NETOP_PREP(accept); IO_NETOP_PREP(socket); +IO_NETOP_PREP(shutdown); IO_NETOP_FN(send); IO_NETOP_FN(recv); #endif /* CONFIG_NET */ @@ -6905,10 +6952,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!req->cqe.res) { struct poll_table_struct pt = { ._key = req->apoll_events }; - unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED; - - if (unlikely(!io_assign_file(req, flags))) - return -EBADF; req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; } @@ -7390,7 +7433,7 @@ static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); } -static int io_poll_update_prep(struct io_kiocb *req, +static int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_poll_update *upd = &req->poll_update; @@ -7454,7 +7497,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) return 0; } -static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) +static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) { struct io_cancel_data cd = { .data = req->poll_update.old_user_data, }; struct io_ring_ctx *ctx = req->ctx; @@ -7698,8 +7741,9 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) return 0; } -static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, - bool is_timeout_link) +static int __io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe, + bool is_timeout_link) { struct io_timeout_data *data; unsigned flags; @@ -7754,6 +7798,18 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +static int io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, false); +} + +static int io_link_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, true); +} + static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; @@ -7970,7 +8026,7 @@ done: return 0; } -static int io_rsrc_update_prep(struct io_kiocb *req, +static int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) @@ -7986,6 +8042,41 @@ static int io_rsrc_update_prep(struct io_kiocb *req, return 0; } +static int io_files_update_with_index_alloc(struct io_kiocb *req, + unsigned int issue_flags) +{ + __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg); + unsigned int done; + struct file *file; + int ret, fd; + + for (done = 0; done < req->rsrc_update.nr_args; done++) { + if (copy_from_user(&fd, &fds[done], sizeof(fd))) { + ret = -EFAULT; + break; + } + + file = fget(fd); + if (!file) { + ret = -EBADF; + break; + } + ret = io_fixed_fd_install(req, issue_flags, file, + IORING_FILE_INDEX_ALLOC); + if (ret < 0) + break; + if (copy_to_user(&fds[done], &ret, sizeof(ret))) { + ret = -EFAULT; + __io_close_fixed(req, issue_flags, ret); + break; + } + } + + if (done) + return done; + return ret; +} + static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; @@ -7999,10 +8090,14 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) up.resv = 0; up.resv2 = 0; - io_ring_submit_lock(ctx, issue_flags); - ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, - &up, req->rsrc_update.nr_args); - io_ring_submit_unlock(ctx, issue_flags); + if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) { + ret = io_files_update_with_index_alloc(req, issue_flags); + } else { + io_ring_submit_lock(ctx, issue_flags); + ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, + &up, req->rsrc_update.nr_args); + io_ring_submit_unlock(ctx, issue_flags); + } if (ret < 0) req_set_fail(req); @@ -8025,7 +8120,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_POLL_ADD: return io_poll_add_prep(req, sqe); case IORING_OP_POLL_REMOVE: - return io_poll_update_prep(req, sqe); + return io_poll_remove_prep(req, sqe); case IORING_OP_FSYNC: return io_fsync_prep(req, sqe); case IORING_OP_SYNC_FILE_RANGE: @@ -8039,13 +8134,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_CONNECT: return io_connect_prep(req, sqe); case IORING_OP_TIMEOUT: - return io_timeout_prep(req, sqe, false); + return io_timeout_prep(req, sqe); case IORING_OP_TIMEOUT_REMOVE: return io_timeout_remove_prep(req, sqe); case IORING_OP_ASYNC_CANCEL: return io_async_cancel_prep(req, sqe); case IORING_OP_LINK_TIMEOUT: - return io_timeout_prep(req, sqe, true); + return io_link_timeout_prep(req, sqe); case IORING_OP_ACCEPT: return io_accept_prep(req, sqe); case IORING_OP_FALLOCATE: @@ -8055,7 +8150,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) case IORING_OP_CLOSE: return io_close_prep(req, sqe); case IORING_OP_FILES_UPDATE: - return io_rsrc_update_prep(req, sqe); + return io_files_update_prep(req, sqe); case IORING_OP_STATX: return io_statx_prep(req, sqe); case IORING_OP_FADVISE: @@ -8123,9 +8218,9 @@ static int io_req_prep_async(struct io_kiocb *req) switch (req->opcode) { case IORING_OP_READV: - return io_rw_prep_async(req, READ); + return io_readv_prep_async(req); case IORING_OP_WRITEV: - return io_rw_prep_async(req, WRITE); + return io_writev_prep_async(req); case IORING_OP_SENDMSG: return io_sendmsg_prep_async(req); case IORING_OP_RECVMSG: @@ -8264,6 +8359,11 @@ static void io_clean_op(struct io_kiocb *req) kfree(req->apoll); req->apoll = NULL; } + if (req->flags & REQ_F_INFLIGHT) { + struct io_uring_task *tctx = req->task->io_uring; + + atomic_dec(&tctx->inflight_tracked); + } if (req->flags & REQ_F_CREDS) put_cred(req->creds); if (req->flags & REQ_F_ASYNC_DATA) { @@ -8288,6 +8388,7 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { + const struct io_op_def *def = &io_op_defs[req->opcode]; const struct cred *creds = NULL; int ret; @@ -8297,7 +8398,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) creds = override_creds(req->creds); - if (!io_op_defs[req->opcode].audit_skip) + if (!def->audit_skip) audit_uring_entry(req->opcode); switch (req->opcode) { @@ -8321,7 +8422,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) ret = io_poll_add(req, issue_flags); break; case IORING_OP_POLL_REMOVE: - ret = io_poll_update(req, issue_flags); + ret = io_poll_remove(req, issue_flags); break; case IORING_OP_SYNC_FILE_RANGE: ret = io_sync_file_range(req, issue_flags); @@ -8436,7 +8537,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) break; } - if (!io_op_defs[req->opcode].audit_skip) + if (!def->audit_skip) audit_uring_exit(!ret, ret); if (creds) @@ -8569,19 +8670,6 @@ out: return file; } -/* - * Drop the file for requeue operations. Only used of req->file is the - * io_uring descriptor itself. - */ -static void io_drop_inflight_file(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_INFLIGHT)) { - fput(req->file); - req->file = NULL; - req->flags &= ~REQ_F_INFLIGHT; - } -} - static struct file *io_file_get_normal(struct io_kiocb *req, int fd) { struct file *file = fget(fd); @@ -8590,7 +8678,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd) /* we don't allow fixed io_uring files */ if (file && file->f_op == &io_uring_fops) - req->flags |= REQ_F_INFLIGHT; + io_req_track_inflight(req); return file; } @@ -8788,6 +8876,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_uring_sqe *sqe) __must_hold(&ctx->uring_lock) { + const struct io_op_def *def; unsigned int sqe_flags; int personality; u8 opcode; @@ -8805,12 +8894,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->opcode = 0; return -EINVAL; } + def = &io_op_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ if (sqe_flags & ~SQE_VALID_FLAGS) return -EINVAL; if (sqe_flags & IOSQE_BUFFER_SELECT) { - if (!io_op_defs[opcode].buffer_select) + if (!def->buffer_select) return -EOPNOTSUPP; req->buf_index = READ_ONCE(sqe->buf_group); } @@ -8836,12 +8926,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } } - if (!io_op_defs[opcode].ioprio && sqe->ioprio) + if (!def->ioprio && sqe->ioprio) return -EINVAL; - if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) + if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - if (io_op_defs[opcode].needs_file) { + if (def->needs_file) { struct io_submit_state *state = &ctx->submit_state; req->cqe.fd = READ_ONCE(sqe->fd); @@ -8850,7 +8940,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, * Plug now if we have more than 2 IO left after this, and the * target is potentially a read/write to block based storage. */ - if (state->need_plug && io_op_defs[opcode].plug) { + if (state->need_plug && def->plug) { state->plug_started = true; state->need_plug = false; blk_start_plug_nr_ios(&state->plug, state->submit_nr); @@ -9658,8 +9748,7 @@ static inline void io_file_bitmap_set(struct io_file_table *table, int bit) { WARN_ON_ONCE(test_bit(bit, table->bitmap)); __set_bit(bit, table->bitmap); - if (bit == table->alloc_hint) - table->alloc_hint++; + table->alloc_hint = bit + 1; } static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) @@ -10113,21 +10202,19 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index) + __must_hold(&req->ctx->uring_lock) { struct io_ring_ctx *ctx = req->ctx; bool needs_switch = false; struct io_fixed_file *file_slot; - int ret = -EBADF; + int ret; - io_ring_submit_lock(ctx, issue_flags); if (file->f_op == &io_uring_fops) - goto err; - ret = -ENXIO; + return -EBADF; if (!ctx->file_data) - goto err; - ret = -EINVAL; + return -ENXIO; if (slot_index >= ctx->nr_user_files) - goto err; + return -EINVAL; slot_index = array_index_nospec(slot_index, ctx->nr_user_files); file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); @@ -10158,15 +10245,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - io_ring_submit_unlock(ctx, issue_flags); if (ret) fput(file); return ret; } -static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, + unsigned int offset) { - unsigned int offset = req->close.file_slot - 1; struct io_ring_ctx *ctx = req->ctx; struct io_fixed_file *file_slot; struct file *file; @@ -10203,6 +10289,11 @@ out: return ret; } +static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +{ + return __io_close_fixed(req, issue_flags, req->close.file_slot - 1); +} + static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) @@ -10351,6 +10442,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task, xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); atomic_set(&tctx->in_idle, 0); + atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; spin_lock_init(&tctx->task_lock); INIT_WQ_LIST(&tctx->task_list); @@ -11046,6 +11138,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); __io_remove_buffers(ctx, bl, -1U); + kfree(bl); } while (!list_empty(&ctx->io_buffers_pages)) { @@ -11581,7 +11674,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) { if (tracked) - return 0; + return atomic_read(&tctx->inflight_tracked); return percpu_counter_sum(&tctx->inflight); } @@ -11957,14 +12050,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, return -EINVAL; fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); f.file = tctx->registered_rings[fd]; - if (unlikely(!f.file)) - return -EBADF; + f.flags = 0; } else { f = fdget(fd); - if (unlikely(!f.file)) - return -EBADF; } + if (unlikely(!f.file)) + return -EBADF; + ret = -EOPNOTSUPP; if (unlikely(f.file->f_op != &io_uring_fops)) goto out_fput; @@ -12062,8 +12155,7 @@ iopoll_locked: out: percpu_ref_put(&ctx->refs); out_fput: - if (!(flags & IORING_ENTER_REGISTERED_RING)) - fdput(f); + fdput(f); return ret; } diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 7e9abdb89712..acd32f05b519 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -43,9 +43,9 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, jffs2_dbg(1, "%s(): erase block %#08x (range %#08x-%#08x)\n", __func__, jeb->offset, jeb->offset, jeb->offset + c->sector_size); - instr = kmalloc(sizeof(struct erase_info), GFP_KERNEL); + instr = kzalloc(sizeof(struct erase_info), GFP_KERNEL); if (!instr) { - pr_warn("kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); + pr_warn("kzalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); mutex_lock(&c->erase_free_sem); spin_lock(&c->erase_completion_lock); list_move(&jeb->list, &c->erase_pending_list); @@ -57,8 +57,6 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, return; } - memset(instr, 0, sizeof(*instr)); - instr->addr = jeb->offset; instr->len = c->sector_size; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 00a110f40e10..39cec28096a7 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -604,6 +604,7 @@ out_root: jffs2_free_raw_node_refs(c); kvfree(c->blocks); jffs2_clear_xattr_subsystem(c); + jffs2_sum_exit(c); out_inohash: kfree(c->inocache_list); out_wbuf: diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index e205fde7163a..6eca72cfa1f2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -18,7 +18,15 @@ #include "kernfs-internal.h" static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ +/* + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() + * will perform wakeups when releasing console_sem. Holding rename_lock + * will introduce deadlock if the scheduler reads the kernfs_name in the + * wakeup path. + */ +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -229,12 +237,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) { unsigned long flags; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -248,10 +256,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) unsigned long flags; int sz; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, - sizeof(kernfs_pr_cont_buf)); + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); if (sz < 0) { pr_cont("(error)"); goto out; @@ -265,7 +273,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) pr_cont("%s", kernfs_pr_cont_buf); out: - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -823,13 +831,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ - spin_lock_irq(&kernfs_rename_lock); + spin_lock_irq(&kernfs_pr_cont_lock); len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len >= sizeof(kernfs_pr_cont_buf)) { - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } @@ -841,7 +848,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, parent = kernfs_find_ns(parent, name, ns); } - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 88423069407c..e3abfa843879 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -33,7 +33,6 @@ static DEFINE_SPINLOCK(kernfs_open_node_lock); static DEFINE_MUTEX(kernfs_open_file_mutex); struct kernfs_open_node { - atomic_t refcnt; atomic_t event; wait_queue_head_t poll; struct list_head files; /* goes through kernfs_open_file.list */ @@ -530,10 +529,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn, } on = kn->attr.open; - if (on) { - atomic_inc(&on->refcnt); + if (on) list_add_tail(&of->list, &on->files); - } spin_unlock_irq(&kernfs_open_node_lock); mutex_unlock(&kernfs_open_file_mutex); @@ -548,7 +545,6 @@ static int kernfs_get_open_node(struct kernfs_node *kn, if (!new_on) return -ENOMEM; - atomic_set(&new_on->refcnt, 0); atomic_set(&new_on->event, 1); init_waitqueue_head(&new_on->poll); INIT_LIST_HEAD(&new_on->files); @@ -556,17 +552,19 @@ static int kernfs_get_open_node(struct kernfs_node *kn, } /** - * kernfs_put_open_node - put kernfs_open_node - * @kn: target kernfs_nodet + * kernfs_unlink_open_file - Unlink @of from @kn. + * + * @kn: target kernfs_node * @of: associated kernfs_open_file * - * Put @kn->attr.open and unlink @of from the files list. If - * reference count reaches zero, disassociate and free it. + * Unlink @of from list of @kn's associated open files. If list of + * associated open files becomes empty, disassociate and free + * kernfs_open_node. * * LOCKING: * None. */ -static void kernfs_put_open_node(struct kernfs_node *kn, +static void kernfs_unlink_open_file(struct kernfs_node *kn, struct kernfs_open_file *of) { struct kernfs_open_node *on = kn->attr.open; @@ -578,7 +576,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn, if (of) list_del(&of->list); - if (atomic_dec_and_test(&on->refcnt)) + if (list_empty(&on->files)) kn->attr.open = NULL; else on = NULL; @@ -706,7 +704,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) return 0; err_put_node: - kernfs_put_open_node(kn, of); + kernfs_unlink_open_file(kn, of); err_seq_release: seq_release(inode, file); err_free: @@ -752,7 +750,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp) mutex_unlock(&kernfs_open_file_mutex); } - kernfs_put_open_node(kn, of); + kernfs_unlink_open_file(kn, of); seq_release(inode, filp); kfree(of->prealloc_buf); kfree(of); @@ -768,15 +766,24 @@ void kernfs_drain_open_files(struct kernfs_node *kn) if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) return; - spin_lock_irq(&kernfs_open_node_lock); - on = kn->attr.open; - if (on) - atomic_inc(&on->refcnt); - spin_unlock_irq(&kernfs_open_node_lock); - if (!on) + /* + * lockless opportunistic check is safe below because no one is adding to + * ->attr.open at this point of time. This check allows early bail out + * if ->attr.open is already NULL. kernfs_unlink_open_file makes + * ->attr.open NULL only while holding kernfs_open_file_mutex so below + * check under kernfs_open_file_mutex will ensure bailing out if + * ->attr.open became NULL while waiting for the mutex. + */ + if (!kn->attr.open) return; mutex_lock(&kernfs_open_file_mutex); + if (!kn->attr.open) { + mutex_unlock(&kernfs_open_file_mutex); + return; + } + + on = kn->attr.open; list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); @@ -789,8 +796,6 @@ void kernfs_drain_open_files(struct kernfs_node *kn) } mutex_unlock(&kernfs_open_file_mutex); - - kernfs_put_open_node(kn, NULL); } /* diff --git a/fs/namei.c b/fs/namei.c index 776ecf679965..1f28d3f463c3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -730,13 +730,6 @@ static bool legitimize_links(struct nameidata *nd) static bool legitimize_root(struct nameidata *nd) { - /* - * For scoped-lookups (where nd->root has been zeroed), we need to - * restart the whole lookup from scratch -- because set_root() is wrong - * for these lookups (nd->dfd is the root, not the filesystem root). - */ - if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED)) - return false; /* Nothing to do if nd->root is zero or is managed by the VFS user. */ if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET)) return true; @@ -798,7 +791,7 @@ out: * @seq: seq number to check @dentry against * Returns: true on success, false on failure * - * Similar to to try_to_unlazy(), but here we have the next dentry already + * Similar to try_to_unlazy(), but here we have the next dentry already * picked by rcu-walk and want to legitimize that in addition to the current * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy_next() failure and @@ -1755,7 +1748,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, seq); - if (!try_to_unlazy(nd) != 0 || !grabbed_link) + if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD; if (nd_alloc_stack(nd)) diff --git a/fs/namespace.c b/fs/namespace.c index 41461f55c039..e6a7e769d25d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1760,7 +1760,7 @@ out_unlock: /* * Is the caller allowed to modify his namespace? */ -static inline bool may_mount(void) +bool may_mount(void) { return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 7b861e4f0533..03d3a270eff4 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -328,7 +328,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, char *read_name = NULL; int len, status = 0; - server = NFS_SERVER(ss_mnt->mnt_root->d_inode); + server = NFS_SB(ss_mnt->mnt_sb); if (!fattr) return ERR_PTR(-ENOMEM); @@ -346,7 +346,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); - r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr); + r_ino = nfs_fhget(ss_mnt->mnt_sb, src_fh, fattr); if (IS_ERR(r_ino)) { res = ERR_CAST(r_ino); goto out_free_name; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index a4fcdc7927ca..8e9d2b35175f 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -492,7 +492,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &new_valid, true, NULL); + &new_valid, ni->mi.sbi->options->prealloc, NULL); up_write(&ni->file.run_lock); if (new_valid < ni->i_valid) @@ -659,7 +659,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* * Normal file: Allocate clusters, do not change 'valid' size. */ - err = ntfs_set_size(inode, max(end, i_size)); + loff_t new_size = max(end, i_size); + + err = inode_newsize_ok(inode, new_size); + if (err) + goto out; + + err = ntfs_set_size(inode, new_size); if (err) goto out; @@ -759,7 +765,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } inode_dio_wait(inode); - if (attr->ia_size < oldsize) + if (attr->ia_size <= oldsize) err = ntfs_truncate(inode, attr->ia_size); else if (attr->ia_size > oldsize) err = ntfs_extend(inode, attr->ia_size, 0, NULL); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 6f47a9c17f89..18842998c8fa 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -1964,10 +1964,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, vcn += clen; - if (vbo + bytes >= end) { + if (vbo + bytes >= end) bytes = end - vbo; - flags |= FIEMAP_EXTENT_LAST; - } if (vbo + bytes <= valid) { ; @@ -1977,6 +1975,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, /* vbo < valid && valid < vbo + bytes */ u64 dlen = valid - vbo; + if (vbo + dlen >= end) + flags |= FIEMAP_EXTENT_LAST; + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, flags); if (err < 0) @@ -1995,6 +1996,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, flags |= FIEMAP_EXTENT_UNWRITTEN; } + if (vbo + bytes >= end) + flags |= FIEMAP_EXTENT_LAST; + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); if (err < 0) break; diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 06492f088d60..49b7df616778 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1185,8 +1185,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, if (!r_page) return -ENOMEM; - memset(info, 0, sizeof(struct restart_info)); - /* Determine which restart area we are looking for. */ if (first) { vbo = 0; @@ -3791,10 +3789,11 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) if (!log) return -ENOMEM; + memset(&rst_info, 0, sizeof(struct restart_info)); + log->ni = ni; log->l_size = l_size; log->one_page_buf = kmalloc(page_size, GFP_NOFS); - if (!log->one_page_buf) { err = -ENOMEM; goto out; @@ -3842,6 +3841,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) if (rst_info.vbo) goto check_restart_area; + memset(&rst_info2, 0, sizeof(struct restart_info)); err = log_read_rst(log, l_size, false, &rst_info2); /* Determine which restart area to use. */ @@ -4085,8 +4085,10 @@ process_log: if (client == LFS_NO_CLIENT_LE) { /* Insert "NTFS" client LogFile. */ client = ra->client_idx[0]; - if (client == LFS_NO_CLIENT_LE) - return -EINVAL; + if (client == LFS_NO_CLIENT_LE) { + err = -EINVAL; + goto out; + } t16 = le16_to_cpu(client); cr = ca + t16; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 74f60c457f28..be4ebdd8048b 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -758,6 +758,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t vbo = iocb->ki_pos; loff_t end; int wr = iov_iter_rw(iter) & WRITE; + size_t iter_count = iov_iter_count(iter); loff_t valid; ssize_t ret; @@ -771,10 +772,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) wr ? ntfs_get_block_direct_IO_W : ntfs_get_block_direct_IO_R); - if (ret <= 0) + if (ret > 0) + end = vbo + ret; + else if (wr && ret == -EIOCBQUEUED) + end = vbo + iter_count; + else goto out; - end = vbo + ret; valid = ni->i_valid; if (wr) { if (end > valid && !S_ISBLK(inode->i_mode)) { @@ -1950,6 +1954,7 @@ const struct address_space_operations ntfs_aops = { .direct_IO = ntfs_direct_IO, .bmap = ntfs_bmap, .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, }; const struct address_space_operations ntfs_aops_cmpr = { diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index afd0ddad826f..5e0e0280e70d 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -112,7 +112,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, return -ENOMEM; if (!size) { - ; + /* EA info persists, but xattr is empty. Looks like EA problem. */ } else if (attr_ea->non_res) { struct runs_tree run; @@ -259,7 +259,7 @@ out: static noinline int ntfs_set_ea(struct inode *inode, const char *name, size_t name_len, const void *value, - size_t val_size, int flags) + size_t val_size, int flags, bool locked) { struct ntfs_inode *ni = ntfs_i(inode); struct ntfs_sb_info *sbi = ni->mi.sbi; @@ -278,7 +278,8 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, u64 new_sz; void *p; - ni_lock(ni); + if (!locked) + ni_lock(ni); run_init(&ea_run); @@ -467,7 +468,8 @@ update_ea: mark_inode_dirty(&ni->vfs_inode); out: - ni_unlock(ni); + if (!locked) + ni_unlock(ni); run_close(&ea_run); kfree(ea_all); @@ -541,7 +543,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, - int type) + int type, bool init_acl) { const char *name; size_t size, name_len; @@ -554,8 +556,9 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, switch (type) { case ACL_TYPE_ACCESS: - if (acl) { - umode_t mode = inode->i_mode; + /* Do not change i_mode if we are in init_acl */ + if (acl && !init_acl) { + umode_t mode; err = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); @@ -598,7 +601,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, flags = 0; } - err = ntfs_set_ea(inode, name, name_len, value, size, flags); + err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); if (err == -ENODATA && !size) err = 0; /* Removing non existed xattr. */ if (!err) @@ -616,7 +619,68 @@ out: int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type) { - return ntfs_set_acl_ex(mnt_userns, inode, acl, type); + return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); +} + +static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, void *buffer, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) { + ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); + return -EOPNOTSUPP; + } + + acl = ntfs_get_acl(inode, type, false); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (!acl) + return -ENODATA; + + err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); + posix_acl_release(acl); + + return err; +} + +static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, const void *value, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) { + ntfs_inode_warn(inode, "add mount option \"acl\" to use acl"); + return -EOPNOTSUPP; + } + + if (!inode_owner_or_capable(mnt_userns, inode)) + return -EPERM; + + if (!value) { + acl = NULL; + } else { + acl = posix_acl_from_xattr(mnt_userns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + err = posix_acl_valid(mnt_userns, acl); + if (err) + goto release_and_out; + } + } + + err = ntfs_set_acl(mnt_userns, inode, acl, type); + +release_and_out: + posix_acl_release(acl); + return err; } /* @@ -636,7 +700,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, if (default_acl) { err = ntfs_set_acl_ex(mnt_userns, inode, default_acl, - ACL_TYPE_DEFAULT); + ACL_TYPE_DEFAULT, true); posix_acl_release(default_acl); } else { inode->i_default_acl = NULL; @@ -647,7 +711,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, else { if (!err) err = ntfs_set_acl_ex(mnt_userns, inode, acl, - ACL_TYPE_ACCESS); + ACL_TYPE_ACCESS, true); posix_acl_release(acl); } @@ -785,6 +849,23 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, goto out; } +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + /* TODO: init_user_ns? */ + err = ntfs_xattr_get_acl( + &init_user_ns, inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + buffer, size); + goto out; + } +#endif /* Deal with NTFS extended attribute. */ err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL); @@ -897,10 +978,29 @@ set_new_fa: goto out; } +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + err = ntfs_xattr_set_acl( + mnt_userns, inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + value, size); + goto out; + } +#endif /* Deal with NTFS extended attribute. */ - err = ntfs_set_ea(inode, name, name_len, value, size, flags); + err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); out: + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + return err; } @@ -913,35 +1013,37 @@ int ntfs_save_wsl_perm(struct inode *inode) { int err; __le32 value; + struct ntfs_inode *ni = ntfs_i(inode); - /* TODO: refactor this, so we don't lock 4 times in ntfs_set_ea */ + ni_lock(ni); value = cpu_to_le32(i_uid_read(inode)); err = ntfs_set_ea(inode, "$LXUID", sizeof("$LXUID") - 1, &value, - sizeof(value), 0); + sizeof(value), 0, true); /* true == already locked. */ if (err) goto out; value = cpu_to_le32(i_gid_read(inode)); err = ntfs_set_ea(inode, "$LXGID", sizeof("$LXGID") - 1, &value, - sizeof(value), 0); + sizeof(value), 0, true); if (err) goto out; value = cpu_to_le32(inode->i_mode); err = ntfs_set_ea(inode, "$LXMOD", sizeof("$LXMOD") - 1, &value, - sizeof(value), 0); + sizeof(value), 0, true); if (err) goto out; if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { value = cpu_to_le32(inode->i_rdev); err = ntfs_set_ea(inode, "$LXDEV", sizeof("$LXDEV") - 1, &value, - sizeof(value), 0); + sizeof(value), 0, true); if (err) goto out; } out: + ni_unlock(ni); /* In case of error should we delete all WSL xattr? */ return err; } diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index c0b84e960b20..e8b9b756f0ac 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -65,7 +65,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write) */ static int run_gc(struct ubifs_info *c) { - int err, lnum; + int lnum; /* Make some free space by garbage-collecting dirty space */ down_read(&c->commit_sem); @@ -76,10 +76,7 @@ static int run_gc(struct ubifs_info *c) /* GC freed one LEB, return it to lprops */ dbg_budg("GC freed LEB %d", lnum); - err = ubifs_return_leb(c, lnum); - if (err) - return err; - return 0; + return ubifs_return_leb(c, lnum); } /** diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index e4f193eae4b2..e4c4761aff7f 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -677,7 +677,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, int err; err = security_inode_init_security(inode, dentry, qstr, - &init_xattrs, 0); + &init_xattrs, NULL); if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 1e4ee042d52f..3e920cf1b454 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -173,7 +173,6 @@ __xfs_free_perag( struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); - ASSERT(atomic_read(&pag->pag_ref) == 0); kmem_free(pag); } @@ -192,7 +191,7 @@ xfs_free_perag( pag = radix_tree_delete(&mp->m_perag_tree, agno); spin_unlock(&mp->m_perag_lock); ASSERT(pag); - ASSERT(atomic_read(&pag->pag_ref) == 0); + XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_iunlink_destroy(pag); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 14ae0826bc15..836ab1b8ed7b 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -25,10 +25,9 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_attr_item.h" -#include "xfs_log.h" +#include "xfs_xattr.h" -struct kmem_cache *xfs_attri_cache; -struct kmem_cache *xfs_attrd_cache; +struct kmem_cache *xfs_attr_intent_cache; /* * xfs_attr.c @@ -58,11 +57,11 @@ STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp); */ STATIC int xfs_attr_node_get(xfs_da_args_t *args); STATIC void xfs_attr_restore_rmt_blk(struct xfs_da_args *args); -static int xfs_attr_node_try_addname(struct xfs_attr_item *attr); -STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_item *attr); -STATIC int xfs_attr_node_remove_attr(struct xfs_attr_item *attr); -STATIC int xfs_attr_node_hasname(xfs_da_args_t *args, - struct xfs_da_state **state); +static int xfs_attr_node_try_addname(struct xfs_attr_intent *attr); +STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_intent *attr); +STATIC int xfs_attr_node_remove_attr(struct xfs_attr_intent *attr); +STATIC int xfs_attr_node_lookup(struct xfs_da_args *args, + struct xfs_da_state *state); int xfs_inode_hasattr( @@ -377,7 +376,7 @@ xfs_attr_try_sf_addname( static int xfs_attr_sf_addname( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_inode *dp = args->dp; @@ -423,7 +422,7 @@ out: */ static enum xfs_delattr_state xfs_attr_complete_op( - struct xfs_attr_item *attr, + struct xfs_attr_intent *attr, enum xfs_delattr_state replace_state) { struct xfs_da_args *args = attr->xattri_da_args; @@ -439,7 +438,7 @@ xfs_attr_complete_op( static int xfs_attr_leaf_addname( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error; @@ -493,7 +492,7 @@ out: */ static int xfs_attr_node_addname( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error; @@ -530,7 +529,7 @@ out: static int xfs_attr_rmtval_alloc( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error = 0; @@ -594,6 +593,19 @@ xfs_attr_leaf_mark_incomplete( return xfs_attr3_leaf_setflag(args); } +/* Ensure the da state of an xattr deferred work item is ready to go. */ +static inline void +xfs_attr_item_init_da_state( + struct xfs_attr_intent *attr) +{ + struct xfs_da_args *args = attr->xattri_da_args; + + if (!attr->xattri_da_state) + attr->xattri_da_state = xfs_da_state_alloc(args); + else + xfs_da_state_reset(attr->xattri_da_state, args); +} + /* * Initial setup for xfs_attr_node_removename. Make sure the attr is there and * the blocks are valid. Attr keys with remote blocks will be marked @@ -601,29 +613,33 @@ xfs_attr_leaf_mark_incomplete( */ static int xfs_attr_node_removename_setup( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; - struct xfs_da_state **state = &attr->xattri_da_state; + struct xfs_da_state *state; int error; - error = xfs_attr_node_hasname(args, state); + xfs_attr_item_init_da_state(attr); + error = xfs_attr_node_lookup(args, attr->xattri_da_state); if (error != -EEXIST) goto out; error = 0; - ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); - ASSERT((*state)->path.blk[(*state)->path.active - 1].magic == + state = attr->xattri_da_state; + ASSERT(state->path.blk[state->path.active - 1].bp != NULL); + ASSERT(state->path.blk[state->path.active - 1].magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_attr_leaf_mark_incomplete(args, *state); + error = xfs_attr_leaf_mark_incomplete(args, state); if (error) goto out; if (args->rmtblkno > 0) error = xfs_attr_rmtval_invalidate(args); out: - if (error) - xfs_da_state_free(*state); + if (error) { + xfs_da_state_free(attr->xattri_da_state); + attr->xattri_da_state = NULL; + } return error; } @@ -635,7 +651,7 @@ out: */ static int xfs_attr_leaf_remove_attr( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_inode *dp = args->dp; @@ -700,7 +716,7 @@ xfs_attr_leaf_shrink( */ int xfs_attr_set_iter( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error = 0; @@ -852,6 +868,7 @@ xfs_attr_lookup( { struct xfs_inode *dp = args->dp; struct xfs_buf *bp = NULL; + struct xfs_da_state *state; int error; if (!xfs_inode_hasattr(dp)) @@ -869,19 +886,22 @@ xfs_attr_lookup( return error; } - return xfs_attr_node_hasname(args, NULL); + state = xfs_da_state_alloc(args); + error = xfs_attr_node_lookup(args, state); + xfs_da_state_free(state); + return error; } static int -xfs_attr_item_init( +xfs_attr_intent_init( struct xfs_da_args *args, unsigned int op_flags, /* op flag (set or remove) */ - struct xfs_attr_item **attr) /* new xfs_attr_item */ + struct xfs_attr_intent **attr) /* new xfs_attr_intent */ { - struct xfs_attr_item *new; + struct xfs_attr_intent *new; - new = kmem_zalloc(sizeof(struct xfs_attr_item), KM_NOFS); + new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL); new->xattri_op_flags = op_flags; new->xattri_da_args = args; @@ -894,10 +914,10 @@ static int xfs_attr_defer_add( struct xfs_da_args *args) { - struct xfs_attr_item *new; + struct xfs_attr_intent *new; int error = 0; - error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_SET, &new); + error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_SET, &new); if (error) return error; @@ -913,10 +933,10 @@ static int xfs_attr_defer_replace( struct xfs_da_args *args) { - struct xfs_attr_item *new; + struct xfs_attr_intent *new; int error = 0; - error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REPLACE, &new); + error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REPLACE, &new); if (error) return error; @@ -933,10 +953,10 @@ xfs_attr_defer_remove( struct xfs_da_args *args) { - struct xfs_attr_item *new; + struct xfs_attr_intent *new; int error; - error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REMOVE, &new); + error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REMOVE, &new); if (error) return error; @@ -962,7 +982,6 @@ xfs_attr_set( int error, local; int rmt_blks = 0; unsigned int total; - int delayed = xfs_has_larp(mp); if (xfs_is_shutdown(dp->i_mount)) return -EIO; @@ -1007,12 +1026,6 @@ xfs_attr_set( rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); } - if (delayed) { - error = xfs_attr_use_log_assist(mp); - if (error) - return error; - } - /* * Root fork attributes can use reserved data blocks for this * operation if necessary @@ -1020,7 +1033,7 @@ xfs_attr_set( xfs_init_attr_trans(args, &tres, &total); error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans); if (error) - goto drop_incompat; + return error; if (args->value || xfs_inode_hasattr(dp)) { error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK, @@ -1080,9 +1093,6 @@ xfs_attr_set( error = xfs_trans_commit(args->trans); out_unlock: xfs_iunlock(dp, XFS_ILOCK_EXCL); -drop_incompat: - if (delayed) - xlog_drop_incompat_feat(mp->m_log); return error; out_trans_cancel: @@ -1091,40 +1101,6 @@ out_trans_cancel: goto out_unlock; } -int __init -xfs_attri_init_cache(void) -{ - xfs_attri_cache = kmem_cache_create("xfs_attri", - sizeof(struct xfs_attri_log_item), - 0, 0, NULL); - - return xfs_attri_cache != NULL ? 0 : -ENOMEM; -} - -void -xfs_attri_destroy_cache(void) -{ - kmem_cache_destroy(xfs_attri_cache); - xfs_attri_cache = NULL; -} - -int __init -xfs_attrd_init_cache(void) -{ - xfs_attrd_cache = kmem_cache_create("xfs_attrd", - sizeof(struct xfs_attrd_log_item), - 0, 0, NULL); - - return xfs_attrd_cache != NULL ? 0 : -ENOMEM; -} - -void -xfs_attrd_destroy_cache(void) -{ - kmem_cache_destroy(xfs_attrd_cache); - xfs_attrd_cache = NULL; -} - /*======================================================================== * External routines when attribute list is inside the inode *========================================================================*/ @@ -1384,32 +1360,20 @@ xfs_attr_leaf_get(xfs_da_args_t *args) return error; } -/* - * Return EEXIST if attr is found, or ENOATTR if not - * statep: If not null is set to point at the found state. Caller will - * be responsible for freeing the state in this case. - */ +/* Return EEXIST if attr is found, or ENOATTR if not. */ STATIC int -xfs_attr_node_hasname( +xfs_attr_node_lookup( struct xfs_da_args *args, - struct xfs_da_state **statep) + struct xfs_da_state *state) { - struct xfs_da_state *state; int retval, error; - state = xfs_da_state_alloc(args); - if (statep != NULL) - *statep = state; - /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); if (error) - retval = error; - - if (!statep) - xfs_da_state_free(state); + return error; return retval; } @@ -1420,7 +1384,7 @@ xfs_attr_node_hasname( STATIC int xfs_attr_node_addname_find_attr( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error; @@ -1429,7 +1393,8 @@ xfs_attr_node_addname_find_attr( * Search to see if name already exists, and get back a pointer * to where it should go. */ - error = xfs_attr_node_hasname(args, &attr->xattri_da_state); + xfs_attr_item_init_da_state(attr); + error = xfs_attr_node_lookup(args, attr->xattri_da_state); switch (error) { case -ENOATTR: if (args->op_flags & XFS_DA_OP_REPLACE) @@ -1456,8 +1421,10 @@ xfs_attr_node_addname_find_attr( return 0; error: - if (attr->xattri_da_state) + if (attr->xattri_da_state) { xfs_da_state_free(attr->xattri_da_state); + attr->xattri_da_state = NULL; + } return error; } @@ -1470,7 +1437,7 @@ error: */ static int xfs_attr_node_try_addname( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_da_state *state = attr->xattri_da_state; @@ -1511,6 +1478,7 @@ xfs_attr_node_try_addname( out: xfs_da_state_free(state); + attr->xattri_da_state = NULL; return error; } @@ -1535,10 +1503,10 @@ xfs_attr_node_removename( static int xfs_attr_node_remove_attr( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; - struct xfs_da_state *state = NULL; + struct xfs_da_state *state = xfs_da_state_alloc(args); int retval = 0; int error = 0; @@ -1548,8 +1516,6 @@ xfs_attr_node_remove_attr( * attribute entry after any split ops. */ args->attr_filter |= XFS_ATTR_INCOMPLETE; - state = xfs_da_state_alloc(args); - state->inleaf = 0; error = xfs_da3_node_lookup_int(state, &retval); if (error) goto out; @@ -1567,8 +1533,7 @@ xfs_attr_node_remove_attr( retval = error = 0; out: - if (state) - xfs_da_state_free(state); + xfs_da_state_free(state); if (error) return error; return retval; @@ -1597,7 +1562,8 @@ xfs_attr_node_get( /* * Search to see if name exists, and get back a pointer to it. */ - error = xfs_attr_node_hasname(args, &state); + state = xfs_da_state_alloc(args); + error = xfs_attr_node_lookup(args, state); if (error != -EEXIST) goto out_release; @@ -1616,8 +1582,7 @@ out_release: state->path.blk[i].bp = NULL; } - if (state) - xfs_da_state_free(state); + xfs_da_state_free(state); return error; } @@ -1637,3 +1602,20 @@ xfs_attr_namecheck( /* There shouldn't be any nulls here */ return !memchr(name, 0, length); } + +int __init +xfs_attr_intent_init_cache(void) +{ + xfs_attr_intent_cache = kmem_cache_create("xfs_attr_intent", + sizeof(struct xfs_attr_intent), + 0, 0, NULL); + + return xfs_attr_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_attr_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_attr_intent_cache); + xfs_attr_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 1af7abe29eef..e329da3e7afa 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -31,7 +31,8 @@ struct xfs_attr_list_context; static inline bool xfs_has_larp(struct xfs_mount *mp) { #ifdef DEBUG - return xfs_globals.larp; + /* Logged xattrs require a V5 super for log_incompat */ + return xfs_has_crc(mp) && xfs_globals.larp; #else return false; #endif @@ -434,7 +435,7 @@ struct xfs_attr_list_context { */ /* - * Enum values for xfs_attr_item.xattri_da_state + * Enum values for xfs_attr_intent.xattri_da_state * * These values are used by delayed attribute operations to keep track of where * they were before they returned -EAGAIN. A return code of -EAGAIN signals the @@ -501,44 +502,46 @@ enum xfs_delattr_state { { XFS_DAS_NODE_REMOVE_ATTR, "XFS_DAS_NODE_REMOVE_ATTR" }, \ { XFS_DAS_DONE, "XFS_DAS_DONE" } -/* - * Defines for xfs_attr_item.xattri_flags - */ -#define XFS_DAC_LEAF_ADDNAME_INIT 0x01 /* xfs_attr_leaf_addname init*/ +struct xfs_attri_log_nameval; /* * Context used for keeping track of delayed attribute operations */ -struct xfs_attr_item { +struct xfs_attr_intent { + /* + * used to log this item to an intent containing a list of attrs to + * commit later + */ + struct list_head xattri_list; + + /* Used in xfs_attr_node_removename to roll through removing blocks */ + struct xfs_da_state *xattri_da_state; + struct xfs_da_args *xattri_da_args; /* + * Shared buffer containing the attr name and value so that the logging + * code can share large memory buffers between log items. + */ + struct xfs_attri_log_nameval *xattri_nameval; + + /* * Used by xfs_attr_set to hold a leaf buffer across a transaction roll */ struct xfs_buf *xattri_leaf_bp; - /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ - struct xfs_bmbt_irec xattri_map; - xfs_dablk_t xattri_lblkno; - int xattri_blkcnt; - - /* Used in xfs_attr_node_removename to roll through removing blocks */ - struct xfs_da_state *xattri_da_state; - /* Used to keep track of current state of delayed operation */ - unsigned int xattri_flags; enum xfs_delattr_state xattri_dela_state; /* - * Attr operation being performed - XFS_ATTR_OP_FLAGS_* + * Attr operation being performed - XFS_ATTRI_OP_FLAGS_* */ unsigned int xattri_op_flags; - /* - * used to log this item to an intent containing a list of attrs to - * commit later - */ - struct list_head xattri_list; + /* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */ + xfs_dablk_t xattri_lblkno; + int xattri_blkcnt; + struct xfs_bmbt_irec xattri_map; }; @@ -557,21 +560,13 @@ bool xfs_attr_is_leaf(struct xfs_inode *ip); int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); int xfs_attr_set(struct xfs_da_args *args); -int xfs_attr_set_iter(struct xfs_attr_item *attr); -int xfs_attr_remove_iter(struct xfs_attr_item *attr); +int xfs_attr_set_iter(struct xfs_attr_intent *attr); +int xfs_attr_remove_iter(struct xfs_attr_intent *attr); bool xfs_attr_namecheck(const void *name, size_t length); int xfs_attr_calc_size(struct xfs_da_args *args, int *local); void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres, unsigned int *total); -extern struct kmem_cache *xfs_attri_cache; -extern struct kmem_cache *xfs_attrd_cache; - -int __init xfs_attri_init_cache(void); -void xfs_attri_destroy_cache(void); -int __init xfs_attrd_init_cache(void); -void xfs_attrd_destroy_cache(void); - /* * Check to see if the attr should be upgraded from non-existent or shortform to * single-leaf-block attribute list. @@ -634,4 +629,8 @@ xfs_attr_init_replace_state(struct xfs_da_args *args) return xfs_attr_init_add_state(args); } +extern struct kmem_cache *xfs_attr_intent_cache; +int __init xfs_attr_intent_init_cache(void); +void xfs_attr_intent_destroy_cache(void); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 4250159ecced..7298c148f848 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -568,7 +568,7 @@ xfs_attr_rmtval_stale( */ int xfs_attr_rmtval_find_space( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_bmbt_irec *map = &attr->xattri_map; @@ -598,7 +598,7 @@ xfs_attr_rmtval_find_space( */ int xfs_attr_rmtval_set_blk( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; struct xfs_inode *dp = args->dp; @@ -674,7 +674,7 @@ xfs_attr_rmtval_invalidate( */ int xfs_attr_rmtval_remove( - struct xfs_attr_item *attr) + struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; int error, done; diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 62b398edec3f..d097ec6c4dc3 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -12,9 +12,9 @@ int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags); int xfs_attr_rmtval_invalidate(struct xfs_da_args *args); -int xfs_attr_rmtval_remove(struct xfs_attr_item *attr); +int xfs_attr_rmtval_remove(struct xfs_attr_intent *attr); int xfs_attr_rmt_find_hole(struct xfs_da_args *args); int xfs_attr_rmtval_set_value(struct xfs_da_args *args); -int xfs_attr_rmtval_set_blk(struct xfs_attr_item *attr); -int xfs_attr_rmtval_find_space(struct xfs_attr_item *attr); +int xfs_attr_rmtval_set_blk(struct xfs_attr_intent *attr); +int xfs_attr_rmtval_find_space(struct xfs_attr_intent *attr); #endif /* __XFS_ATTR_REMOTE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2aa300f7461f..2eecc49fc1b2 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -51,16 +51,31 @@ xfs_btree_magic( return magic; } -static xfs_failaddr_t +/* + * These sibling pointer checks are optimised for null sibling pointers. This + * happens a lot, and we don't need to byte swap at runtime if the sibling + * pointer is NULL. + * + * These are explicitly marked at inline because the cost of calling them as + * functions instead of inlining them is about 36 bytes extra code per call site + * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these + * two sibling check functions reduces the compiled code size by over 300 + * bytes. + */ +static inline xfs_failaddr_t xfs_btree_check_lblock_siblings( struct xfs_mount *mp, struct xfs_btree_cur *cur, int level, xfs_fsblock_t fsb, - xfs_fsblock_t sibling) + __be64 dsibling) { - if (sibling == NULLFSBLOCK) + xfs_fsblock_t sibling; + + if (dsibling == cpu_to_be64(NULLFSBLOCK)) return NULL; + + sibling = be64_to_cpu(dsibling); if (sibling == fsb) return __this_address; if (level >= 0) { @@ -74,17 +89,21 @@ xfs_btree_check_lblock_siblings( return NULL; } -static xfs_failaddr_t +static inline xfs_failaddr_t xfs_btree_check_sblock_siblings( struct xfs_mount *mp, struct xfs_btree_cur *cur, int level, xfs_agnumber_t agno, xfs_agblock_t agbno, - xfs_agblock_t sibling) + __be32 dsibling) { - if (sibling == NULLAGBLOCK) + xfs_agblock_t sibling; + + if (dsibling == cpu_to_be32(NULLAGBLOCK)) return NULL; + + sibling = be32_to_cpu(dsibling); if (sibling == agbno) return __this_address; if (level >= 0) { @@ -136,10 +155,10 @@ __xfs_btree_check_lblock( fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, - be64_to_cpu(block->bb_u.l.bb_leftsib)); + block->bb_u.l.bb_leftsib); if (!fa) fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, - be64_to_cpu(block->bb_u.l.bb_rightsib)); + block->bb_u.l.bb_rightsib); return fa; } @@ -204,10 +223,10 @@ __xfs_btree_check_sblock( } fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno, - be32_to_cpu(block->bb_u.s.bb_leftsib)); + block->bb_u.s.bb_leftsib); if (!fa) fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, - agbno, be32_to_cpu(block->bb_u.s.bb_rightsib)); + agbno, block->bb_u.s.bb_rightsib); return fa; } @@ -426,8 +445,14 @@ xfs_btree_del_cursor( break; } + /* + * If we are doing a BMBT update, the number of unaccounted blocks + * allocated during this cursor life time should be zero. If it's not + * zero, then we should be shut down or on our way to shutdown due to + * cancelling a dirty transaction on error. + */ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || - xfs_is_shutdown(cur->bc_mp)); + xfs_is_shutdown(cur->bc_mp) || error != 0); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) kmem_free(cur->bc_ops); if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) @@ -3247,7 +3272,7 @@ xfs_btree_insrec( struct xfs_btree_block *block; /* btree block */ struct xfs_buf *bp; /* buffer for block */ union xfs_btree_ptr nptr; /* new block ptr */ - struct xfs_btree_cur *ncur; /* new btree cursor */ + struct xfs_btree_cur *ncur = NULL; /* new btree cursor */ union xfs_btree_key nkey; /* new block key */ union xfs_btree_key *lkey; int optr; /* old key/record index */ @@ -3327,7 +3352,7 @@ xfs_btree_insrec( #ifdef DEBUG error = xfs_btree_check_block(cur, block, level, bp); if (error) - return error; + goto error0; #endif /* @@ -3347,7 +3372,7 @@ xfs_btree_insrec( for (i = numrecs - ptr; i >= 0; i--) { error = xfs_btree_debug_check_ptr(cur, pp, i, level); if (error) - return error; + goto error0; } xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); @@ -3432,6 +3457,8 @@ xfs_btree_insrec( return 0; error0: + if (ncur) + xfs_btree_del_cursor(ncur, error); return error; } @@ -4523,10 +4550,10 @@ xfs_btree_lblock_verify( /* sibling pointer verification */ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, - be64_to_cpu(block->bb_u.l.bb_leftsib)); + block->bb_u.l.bb_leftsib); if (!fa) fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, - be64_to_cpu(block->bb_u.l.bb_rightsib)); + block->bb_u.l.bb_rightsib); return fa; } @@ -4580,10 +4607,10 @@ xfs_btree_sblock_verify( agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, - be32_to_cpu(block->bb_u.s.bb_leftsib)); + block->bb_u.s.bb_leftsib); if (!fa) fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, - be32_to_cpu(block->bb_u.s.bb_rightsib)); + block->bb_u.s.bb_rightsib); return fa; } diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index aa74f3fdb571..e7201dc68f43 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -117,6 +117,17 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_cache_free(xfs_da_state_cache, state); } +void +xfs_da_state_reset( + struct xfs_da_state *state, + struct xfs_da_args *args) +{ + xfs_da_state_kill_altpath(state); + memset(state, 0, sizeof(struct xfs_da_state)); + state->args = args; + state->mp = state->args->dp->i_mount; +} + static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) { if (whichfork == XFS_DATA_FORK) diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ed2303e4d46a..d33b7686a0b3 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -225,6 +225,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, struct xfs_da_state *xfs_da_state_alloc(struct xfs_da_args *args); void xfs_da_state_free(xfs_da_state_t *state); +void xfs_da_state_reset(struct xfs_da_state *state, struct xfs_da_args *args); void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from); diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index ceb222b4f261..5a321b783398 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -191,35 +191,56 @@ static const struct xfs_defer_op_type *defer_op_types[] = { [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type, }; -static bool +/* + * Ensure there's a log intent item associated with this deferred work item if + * the operation must be restarted on crash. Returns 1 if there's a log item; + * 0 if there isn't; or a negative errno. + */ +static int xfs_defer_create_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp, bool sort) { const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; + struct xfs_log_item *lip; + + if (dfp->dfp_intent) + return 1; - if (!dfp->dfp_intent) - dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, - dfp->dfp_count, sort); - return dfp->dfp_intent != NULL; + lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort); + if (!lip) + return 0; + if (IS_ERR(lip)) + return PTR_ERR(lip); + + dfp->dfp_intent = lip; + return 1; } /* * For each pending item in the intake list, log its intent item and the * associated extents, then add the entire intake list to the end of * the pending list. + * + * Returns 1 if at least one log item was associated with the deferred work; + * 0 if there are no log items; or a negative errno. */ -static bool +static int xfs_defer_create_intents( struct xfs_trans *tp) { struct xfs_defer_pending *dfp; - bool ret = false; + int ret = 0; list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { + int ret2; + trace_xfs_defer_create_intent(tp->t_mountp, dfp); - ret |= xfs_defer_create_intent(tp, dfp, true); + ret2 = xfs_defer_create_intent(tp, dfp, true); + if (ret2 < 0) + return ret2; + ret |= ret2; } return ret; } @@ -457,6 +478,8 @@ xfs_defer_finish_one( dfp->dfp_count--; error = ops->finish_item(tp, dfp->dfp_done, li, &state); if (error == -EAGAIN) { + int ret; + /* * Caller wants a fresh transaction; put the work item * back on the list and log a new log intent item to @@ -467,7 +490,9 @@ xfs_defer_finish_one( dfp->dfp_count++; dfp->dfp_done = NULL; dfp->dfp_intent = NULL; - xfs_defer_create_intent(tp, dfp, false); + ret = xfs_defer_create_intent(tp, dfp, false); + if (ret < 0) + error = ret; } if (error) @@ -514,10 +539,14 @@ xfs_defer_finish_noroll( * of time that any one intent item can stick around in memory, * pinning the log tail. */ - bool has_intents = xfs_defer_create_intents(*tp); + int has_intents = xfs_defer_create_intents(*tp); list_splice_init(&(*tp)->t_dfops, &dop_pending); + if (has_intents < 0) { + error = has_intents; + goto out_shutdown; + } if (has_intents || dfp) { error = xfs_defer_trans_roll(tp); if (error) @@ -676,13 +705,15 @@ xfs_defer_ops_capture( if (list_empty(&tp->t_dfops)) return NULL; + error = xfs_defer_create_intents(tp); + if (error < 0) + return ERR_PTR(error); + /* Create an object to capture the defer ops. */ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS); INIT_LIST_HEAD(&dfc->dfc_list); INIT_LIST_HEAD(&dfc->dfc_dfops); - xfs_defer_create_intents(tp); - /* Move the dfops chain and transaction state to the capture struct. */ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops); dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE; @@ -759,6 +790,10 @@ xfs_defer_ops_capture_and_commit( /* If we don't capture anything, commit transaction and exit. */ dfc = xfs_defer_ops_capture(tp); + if (IS_ERR(dfc)) { + xfs_trans_cancel(tp); + return PTR_ERR(dfc); + } if (!dfc) return xfs_trans_commit(tp); @@ -873,10 +908,7 @@ xfs_defer_init_item_caches(void) error = xfs_extfree_intent_init_cache(); if (error) goto err; - error = xfs_attri_init_cache(); - if (error) - goto err; - error = xfs_attrd_init_cache(); + error = xfs_attr_intent_init_cache(); if (error) goto err; return 0; @@ -889,8 +921,7 @@ err: void xfs_defer_destroy_item_caches(void) { - xfs_attri_destroy_cache(); - xfs_attrd_destroy_cache(); + xfs_attr_intent_destroy_cache(); xfs_extfree_intent_destroy_cache(); xfs_bmap_intent_destroy_cache(); xfs_refcount_intent_destroy_cache(); diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index f7edd1ecf6d9..b351b9dc6561 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -906,10 +906,18 @@ struct xfs_icreate_log { * Flags for deferred attribute operations. * Upper bits are flags, lower byte is type code */ -#define XFS_ATTR_OP_FLAGS_SET 1 /* Set the attribute */ -#define XFS_ATTR_OP_FLAGS_REMOVE 2 /* Remove the attribute */ -#define XFS_ATTR_OP_FLAGS_REPLACE 3 /* Replace the attribute */ -#define XFS_ATTR_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */ +#define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */ +#define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */ +#define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */ +#define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */ + +/* + * alfi_attr_filter captures the state of xfs_da_args.attr_filter, so it should + * never have any other bits set. + */ +#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \ + XFS_ATTR_SECURE | \ + XFS_ATTR_INCOMPLETE) /* * This is the structure used to lay out an attr log item in the @@ -924,7 +932,7 @@ struct xfs_attri_log_format { uint32_t alfi_op_flags; /* marks the op as a set or remove */ uint32_t alfi_name_len; /* attr name length */ uint32_t alfi_value_len; /* attr value length */ - uint32_t alfi_attr_flags;/* attr flags */ + uint32_t alfi_attr_filter;/* attr filter flags */ }; struct xfs_attrd_log_format { diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 32e216255cb0..2420865f3007 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -110,12 +110,6 @@ struct xlog_recover { #define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) -/* - * This is the number of entries in the l_buf_cancel_table used during - * recovery. - */ -#define XLOG_BC_TABLE_SIZE 64 - #define XLOG_RECOVER_CRCPASS 0 #define XLOG_RECOVER_PASS1 1 #define XLOG_RECOVER_PASS2 2 @@ -128,5 +122,13 @@ int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_inode **ipp); void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, uint64_t intent_id); +int xlog_alloc_buf_cancel_table(struct xlog *log); +void xlog_free_buf_cancel_table(struct xlog *log); + +#ifdef DEBUG +void xlog_check_buf_cancel_table(struct xlog *log); +#else +#define xlog_check_buf_cancel_table(log) do { } while (0) +#endif #endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index f0b38f4aba80..8b9bd178a487 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -213,7 +213,7 @@ xfs_symlink_shortform_verify( /* * Zero length symlinks should never occur in memory as they are - * never alllowed to exist on disk. + * never allowed to exist on disk. */ if (!size) return __this_address; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index b11870d07c56..2e8e400f10a9 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -340,20 +340,6 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { }, }; -/* This isn't a stable feature, warn once per day. */ -static inline void -xchk_experimental_warning( - struct xfs_mount *mp) -{ - static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( - "xchk_warning", 86400 * HZ, 1); - ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); - - if (__ratelimit(&scrub_warning)) - xfs_alert(mp, -"EXPERIMENTAL online scrub feature in use. Use at your own risk!"); -} - static int xchk_validate_inputs( struct xfs_mount *mp, @@ -478,7 +464,8 @@ xfs_scrub_metadata( if (error) goto out; - xchk_experimental_warning(mp); + xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, + "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); if (!sc) { diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 3df9c1782ead..b744c62052b6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -17,6 +17,7 @@ #include "xfs_error.h" #include "xfs_acl.h" #include "xfs_trans.h" +#include "xfs_xattr.h" #include <linux/posix_acl_xattr.h> @@ -202,7 +203,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) xfs_acl_to_disk(args.value, acl); } - error = xfs_attr_set(&args); + error = xfs_attr_change(&args); kmem_free(args.value); /* diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index e8ac88d9fd14..4a28c2d77070 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -22,13 +22,15 @@ #include "xfs_attr.h" #include "xfs_attr_item.h" #include "xfs_trace.h" -#include "xfs_inode.h" #include "xfs_trans_space.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +struct kmem_cache *xfs_attri_cache; +struct kmem_cache *xfs_attrd_cache; + static const struct xfs_item_ops xfs_attri_item_ops; static const struct xfs_item_ops xfs_attrd_item_ops; static struct xfs_attrd_log_item *xfs_trans_get_attrd(struct xfs_trans *tp, @@ -39,12 +41,80 @@ static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_attri_log_item, attri_item); } +/* + * Shared xattr name/value buffers for logged extended attribute operations + * + * When logging updates to extended attributes, we can create quite a few + * attribute log intent items for a single xattr update. To avoid cycling the + * memory allocator and memcpy overhead, the name (and value, for setxattr) + * are kept in a refcounted object that is shared across all related log items + * and the upper-level deferred work state structure. The shared buffer has + * a control structure, followed by the name, and then the value. + */ + +static inline struct xfs_attri_log_nameval * +xfs_attri_log_nameval_get( + struct xfs_attri_log_nameval *nv) +{ + if (!refcount_inc_not_zero(&nv->refcount)) + return NULL; + return nv; +} + +static inline void +xfs_attri_log_nameval_put( + struct xfs_attri_log_nameval *nv) +{ + if (!nv) + return; + if (refcount_dec_and_test(&nv->refcount)) + kvfree(nv); +} + +static inline struct xfs_attri_log_nameval * +xfs_attri_log_nameval_alloc( + const void *name, + unsigned int name_len, + const void *value, + unsigned int value_len) +{ + struct xfs_attri_log_nameval *nv; + + /* + * This could be over 64kB in length, so we have to use kvmalloc() for + * this. But kvmalloc() utterly sucks, so we use our own version. + */ + nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) + + name_len + value_len); + if (!nv) + return nv; + + nv->name.i_addr = nv + 1; + nv->name.i_len = name_len; + nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME; + memcpy(nv->name.i_addr, name, name_len); + + if (value_len) { + nv->value.i_addr = nv->name.i_addr + name_len; + nv->value.i_len = value_len; + memcpy(nv->value.i_addr, value, value_len); + } else { + nv->value.i_addr = NULL; + nv->value.i_len = 0; + } + nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE; + + refcount_set(&nv->refcount, 1); + return nv; +} + STATIC void xfs_attri_item_free( struct xfs_attri_log_item *attrip) { kmem_free(attrip->attri_item.li_lv_shadow); - kvfree(attrip); + xfs_attri_log_nameval_put(attrip->attri_nameval); + kmem_cache_free(xfs_attri_cache, attrip); } /* @@ -73,16 +143,17 @@ xfs_attri_item_size( int *nbytes) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); + struct xfs_attri_log_nameval *nv = attrip->attri_nameval; *nvecs += 2; *nbytes += sizeof(struct xfs_attri_log_format) + - xlog_calc_iovec_len(attrip->attri_name_len); + xlog_calc_iovec_len(nv->name.i_len); - if (!attrip->attri_value_len) + if (!nv->value.i_len) return; *nvecs += 1; - *nbytes += xlog_calc_iovec_len(attrip->attri_value_len); + *nbytes += xlog_calc_iovec_len(nv->value.i_len); } /* @@ -97,6 +168,7 @@ xfs_attri_item_format( { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); struct xfs_log_iovec *vecp = NULL; + struct xfs_attri_log_nameval *nv = attrip->attri_nameval; attrip->attri_format.alfi_type = XFS_LI_ATTRI; attrip->attri_format.alfi_size = 1; @@ -108,22 +180,18 @@ xfs_attri_item_format( * the log recovery. */ - ASSERT(attrip->attri_name_len > 0); + ASSERT(nv->name.i_len > 0); attrip->attri_format.alfi_size++; - if (attrip->attri_value_len > 0) + if (nv->value.i_len > 0) attrip->attri_format.alfi_size++; xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME, - attrip->attri_name, - attrip->attri_name_len); - if (attrip->attri_value_len > 0) - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE, - attrip->attri_value, - attrip->attri_value_len); + xlog_copy_from_iovec(lv, &vecp, &nv->name); + if (nv->value.i_len > 0) + xlog_copy_from_iovec(lv, &vecp, &nv->value); } /* @@ -158,41 +226,18 @@ xfs_attri_item_release( STATIC struct xfs_attri_log_item * xfs_attri_init( struct xfs_mount *mp, - uint32_t name_len, - uint32_t value_len) - + struct xfs_attri_log_nameval *nv) { struct xfs_attri_log_item *attrip; - uint32_t buffer_size = name_len + value_len; - if (buffer_size) { - /* - * This could be over 64kB in length, so we have to use - * kvmalloc() for this. But kvmalloc() utterly sucks, so we - * use own version. - */ - attrip = xlog_kvmalloc(sizeof(struct xfs_attri_log_item) + - buffer_size); - } else { - attrip = kmem_cache_alloc(xfs_attri_cache, - GFP_NOFS | __GFP_NOFAIL); - } - memset(attrip, 0, sizeof(struct xfs_attri_log_item)); + attrip = kmem_cache_zalloc(xfs_attri_cache, GFP_NOFS | __GFP_NOFAIL); - attrip->attri_name_len = name_len; - if (name_len) - attrip->attri_name = ((char *)attrip) + - sizeof(struct xfs_attri_log_item); - else - attrip->attri_name = NULL; - - attrip->attri_value_len = value_len; - if (value_len) - attrip->attri_value = ((char *)attrip) + - sizeof(struct xfs_attri_log_item) + - name_len; - else - attrip->attri_value = NULL; + /* + * Grab an extra reference to the name/value buffer for this log item. + * The caller retains its own reference! + */ + attrip->attri_nameval = xfs_attri_log_nameval_get(nv); + ASSERT(attrip->attri_nameval); xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI, &xfs_attri_item_ops); @@ -233,7 +278,7 @@ STATIC void xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp) { kmem_free(attrdp->attrd_item.li_lv_shadow); - kmem_free(attrdp); + kmem_cache_free(xfs_attrd_cache, attrdp); } STATIC void @@ -297,7 +342,7 @@ xfs_attrd_item_intent( */ STATIC int xfs_xattri_finish_update( - struct xfs_attr_item *attr, + struct xfs_attr_intent *attr, struct xfs_attrd_log_item *attrdp) { struct xfs_da_args *args = attr->xattri_da_args; @@ -335,7 +380,7 @@ STATIC void xfs_attr_log_item( struct xfs_trans *tp, struct xfs_attri_log_item *attrip, - struct xfs_attr_item *attr) + const struct xfs_attr_intent *attr) { struct xfs_attri_log_format *attrp; @@ -343,23 +388,18 @@ xfs_attr_log_item( set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags); /* - * At this point the xfs_attr_item has been constructed, and we've + * At this point the xfs_attr_intent has been constructed, and we've * created the log intent. Fill in the attri log item and log format - * structure with fields from this xfs_attr_item + * structure with fields from this xfs_attr_intent */ attrp = &attrip->attri_format; attrp->alfi_ino = attr->xattri_da_args->dp->i_ino; + ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)); attrp->alfi_op_flags = attr->xattri_op_flags; - attrp->alfi_value_len = attr->xattri_da_args->valuelen; - attrp->alfi_name_len = attr->xattri_da_args->namelen; - attrp->alfi_attr_flags = attr->xattri_da_args->attr_filter; - - memcpy(attrip->attri_name, attr->xattri_da_args->name, - attr->xattri_da_args->namelen); - memcpy(attrip->attri_value, attr->xattri_da_args->value, - attr->xattri_da_args->valuelen); - attrip->attri_name_len = attr->xattri_da_args->namelen; - attrip->attri_value_len = attr->xattri_da_args->valuelen; + attrp->alfi_value_len = attr->xattri_nameval->value.i_len; + attrp->alfi_name_len = attr->xattri_nameval->name.i_len; + ASSERT(!(attr->xattri_da_args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); + attrp->alfi_attr_filter = attr->xattri_da_args->attr_filter; } /* Get an ATTRI. */ @@ -372,7 +412,7 @@ xfs_attr_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_attri_log_item *attrip; - struct xfs_attr_item *attr; + struct xfs_attr_intent *attr; ASSERT(count == 1); @@ -383,19 +423,47 @@ xfs_attr_create_intent( * Each attr item only performs one attribute operation at a time, so * this is a list of one */ - list_for_each_entry(attr, items, xattri_list) { - attrip = xfs_attri_init(mp, attr->xattri_da_args->namelen, - attr->xattri_da_args->valuelen); - if (attrip == NULL) - return NULL; - - xfs_trans_add_item(tp, &attrip->attri_item); - xfs_attr_log_item(tp, attrip, attr); + attr = list_first_entry_or_null(items, struct xfs_attr_intent, + xattri_list); + + /* + * Create a buffer to store the attribute name and value. This buffer + * will be shared between the higher level deferred xattr work state + * and the lower level xattr log items. + */ + if (!attr->xattri_nameval) { + struct xfs_da_args *args = attr->xattri_da_args; + + /* + * Transfer our reference to the name/value buffer to the + * deferred work state structure. + */ + attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name, + args->namelen, args->value, args->valuelen); } + if (!attr->xattri_nameval) + return ERR_PTR(-ENOMEM); + + attrip = xfs_attri_init(mp, attr->xattri_nameval); + xfs_trans_add_item(tp, &attrip->attri_item); + xfs_attr_log_item(tp, attrip, attr); return &attrip->attri_item; } +static inline void +xfs_attr_free_item( + struct xfs_attr_intent *attr) +{ + if (attr->xattri_da_state) + xfs_da_state_free(attr->xattri_da_state); + xfs_attri_log_nameval_put(attr->xattri_nameval); + if (attr->xattri_da_args->op_flags & XFS_DA_OP_RECOVERY) + kmem_free(attr); + else + kmem_cache_free(xfs_attr_intent_cache, attr); +} + /* Process an attr. */ STATIC int xfs_attr_finish_item( @@ -404,11 +472,11 @@ xfs_attr_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_attr_item *attr; + struct xfs_attr_intent *attr; struct xfs_attrd_log_item *done_item = NULL; int error; - attr = container_of(item, struct xfs_attr_item, xattri_list); + attr = container_of(item, struct xfs_attr_intent, xattri_list); if (done) done_item = ATTRD_ITEM(done); @@ -420,7 +488,7 @@ xfs_attr_finish_item( error = xfs_xattri_finish_update(attr, done_item); if (error != -EAGAIN) - kmem_free(attr); + xfs_attr_free_item(attr); return error; } @@ -438,33 +506,10 @@ STATIC void xfs_attr_cancel_item( struct list_head *item) { - struct xfs_attr_item *attr; - - attr = container_of(item, struct xfs_attr_item, xattri_list); - kmem_free(attr); -} - -STATIC xfs_lsn_t -xfs_attri_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); - - /* - * The attrip refers to xfs_attr_item memory to log the name and value - * with the intent item. This already occurred when the intent was - * committed so these fields are no longer accessed. Clear them out of - * caution since we're about to free the xfs_attr_item. - */ - attrip->attri_name = NULL; - attrip->attri_value = NULL; + struct xfs_attr_intent *attr; - /* - * The ATTRI is logged only once and cannot be moved in the log, so - * simply return the lsn at which it's been logged. - */ - return lsn; + attr = container_of(item, struct xfs_attr_intent, xattri_list); + xfs_attr_free_item(attr); } STATIC bool @@ -482,16 +527,22 @@ xfs_attri_validate( struct xfs_attri_log_format *attrp) { unsigned int op = attrp->alfi_op_flags & - XFS_ATTR_OP_FLAGS_TYPE_MASK; + XFS_ATTRI_OP_FLAGS_TYPE_MASK; if (attrp->__pad != 0) return false; + if (attrp->alfi_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK) + return false; + + if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK) + return false; + /* alfi_op_flags should be either a set or remove */ switch (op) { - case XFS_ATTR_OP_FLAGS_SET: - case XFS_ATTR_OP_FLAGS_REPLACE: - case XFS_ATTR_OP_FLAGS_REMOVE: + case XFS_ATTRI_OP_FLAGS_SET: + case XFS_ATTRI_OP_FLAGS_REPLACE: + case XFS_ATTRI_OP_FLAGS_REMOVE: break; default: return false; @@ -517,13 +568,14 @@ xfs_attri_item_recover( struct list_head *capture_list) { struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); - struct xfs_attr_item *attr; + struct xfs_attr_intent *attr; struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_inode *ip; struct xfs_da_args *args; struct xfs_trans *tp; struct xfs_trans_res tres; struct xfs_attri_log_format *attrp; + struct xfs_attri_log_nameval *nv = attrip->attri_nameval; int error, ret = 0; int total; int local; @@ -535,41 +587,50 @@ xfs_attri_item_recover( */ attrp = &attrip->attri_format; if (!xfs_attri_validate(mp, attrp) || - !xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len)) + !xfs_attr_namecheck(nv->name.i_addr, nv->name.i_len)) return -EFSCORRUPTED; error = xlog_recover_iget(mp, attrp->alfi_ino, &ip); if (error) return error; - attr = kmem_zalloc(sizeof(struct xfs_attr_item) + + attr = kmem_zalloc(sizeof(struct xfs_attr_intent) + sizeof(struct xfs_da_args), KM_NOFS); args = (struct xfs_da_args *)(attr + 1); attr->xattri_da_args = args; - attr->xattri_op_flags = attrp->alfi_op_flags; + attr->xattri_op_flags = attrp->alfi_op_flags & + XFS_ATTRI_OP_FLAGS_TYPE_MASK; + + /* + * We're reconstructing the deferred work state structure from the + * recovered log item. Grab a reference to the name/value buffer and + * attach it to the new work state. + */ + attr->xattri_nameval = xfs_attri_log_nameval_get(nv); + ASSERT(attr->xattri_nameval); args->dp = ip; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->name = attrip->attri_name; - args->namelen = attrp->alfi_name_len; + args->name = nv->name.i_addr; + args->namelen = nv->name.i_len; args->hashval = xfs_da_hashname(args->name, args->namelen); - args->attr_filter = attrp->alfi_attr_flags; + args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT; - switch (attrp->alfi_op_flags & XFS_ATTR_OP_FLAGS_TYPE_MASK) { - case XFS_ATTR_OP_FLAGS_SET: - case XFS_ATTR_OP_FLAGS_REPLACE: - args->value = attrip->attri_value; - args->valuelen = attrp->alfi_value_len; + switch (attr->xattri_op_flags) { + case XFS_ATTRI_OP_FLAGS_SET: + case XFS_ATTRI_OP_FLAGS_REPLACE: + args->value = nv->value.i_addr; + args->valuelen = nv->value.i_len; args->total = xfs_attr_calc_size(args, &local); if (xfs_inode_hasattr(args->dp)) attr->xattri_dela_state = xfs_attr_init_replace_state(args); else attr->xattri_dela_state = xfs_attr_init_add_state(args); break; - case XFS_ATTR_OP_FLAGS_REMOVE: + case XFS_ATTRI_OP_FLAGS_REMOVE: if (!xfs_inode_hasattr(args->dp)) goto out; attr->xattri_dela_state = xfs_attr_init_remove_state(args); @@ -613,7 +674,7 @@ out_unlock: xfs_irele(ip); out: if (ret != -EAGAIN) - kmem_free(attr); + xfs_attr_free_item(attr); return error; } @@ -636,22 +697,18 @@ xfs_attri_item_relog( attrdp = xfs_trans_get_attrd(tp, old_attrip); set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags); - new_attrip = xfs_attri_init(tp->t_mountp, old_attrp->alfi_name_len, - old_attrp->alfi_value_len); + /* + * Create a new log item that shares the same name/value buffer as the + * old log item. + */ + new_attrip = xfs_attri_init(tp->t_mountp, old_attrip->attri_nameval); new_attrp = &new_attrip->attri_format; new_attrp->alfi_ino = old_attrp->alfi_ino; new_attrp->alfi_op_flags = old_attrp->alfi_op_flags; new_attrp->alfi_value_len = old_attrp->alfi_value_len; new_attrp->alfi_name_len = old_attrp->alfi_name_len; - new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags; - - memcpy(new_attrip->attri_name, old_attrip->attri_name, - new_attrip->attri_name_len); - - if (new_attrip->attri_value_len > 0) - memcpy(new_attrip->attri_value, old_attrip->attri_value, - new_attrip->attri_value_len); + new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter; xfs_trans_add_item(tp, &new_attrip->attri_item); set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags); @@ -666,46 +723,46 @@ xlog_recover_attri_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_attri_log_item *attrip; struct xfs_attri_log_format *attri_formatp; - int region = 0; + struct xfs_attri_log_nameval *nv; + const void *attr_value = NULL; + const void *attr_name; + int error; - attri_formatp = item->ri_buf[region].i_addr; + attri_formatp = item->ri_buf[0].i_addr; + attr_name = item->ri_buf[1].i_addr; - /* Validate xfs_attri_log_format */ + /* Validate xfs_attri_log_format before the large memory allocation */ if (!xfs_attri_validate(mp, attri_formatp)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } - /* memory alloc failure will cause replay to abort */ - attrip = xfs_attri_init(mp, attri_formatp->alfi_name_len, - attri_formatp->alfi_value_len); - if (attrip == NULL) - return -ENOMEM; + if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } - error = xfs_attri_copy_format(&item->ri_buf[region], - &attrip->attri_format); - if (error) - goto out; + if (attri_formatp->alfi_value_len) + attr_value = item->ri_buf[2].i_addr; - region++; - memcpy(attrip->attri_name, item->ri_buf[region].i_addr, - attrip->attri_name_len); + /* + * Memory alloc failure will cause replay to abort. We attach the + * name/value buffer to the recovered incore log item and drop our + * reference. + */ + nv = xfs_attri_log_nameval_alloc(attr_name, + attri_formatp->alfi_name_len, attr_value, + attri_formatp->alfi_value_len); + if (!nv) + return -ENOMEM; - if (!xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - error = -EFSCORRUPTED; + attrip = xfs_attri_init(mp, nv); + error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format); + if (error) goto out; - } - - if (attrip->attri_value_len > 0) { - region++; - memcpy(attrip->attri_value, item->ri_buf[region].i_addr, - attrip->attri_value_len); - } /* * The ATTRI has two references. One for the ATTRD and one for ATTRI to @@ -715,9 +772,11 @@ xlog_recover_attri_commit_pass2( */ xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn); xfs_attri_release(attrip); + xfs_attri_log_nameval_put(nv); return 0; out: xfs_attri_item_free(attrip); + xfs_attri_log_nameval_put(nv); return error; } @@ -797,7 +856,6 @@ static const struct xfs_item_ops xfs_attri_item_ops = { .iop_size = xfs_attri_item_size, .iop_format = xfs_attri_item_format, .iop_unpin = xfs_attri_item_unpin, - .iop_committed = xfs_attri_item_committed, .iop_release = xfs_attri_item_release, .iop_recover = xfs_attri_item_recover, .iop_match = xfs_attri_item_match, diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index c3b779f82adb..3280a7930287 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -11,25 +11,30 @@ struct xfs_mount; struct kmem_zone; +struct xfs_attri_log_nameval { + struct xfs_log_iovec name; + struct xfs_log_iovec value; + refcount_t refcount; + + /* name and value follow the end of this struct */ +}; + /* * This is the "attr intention" log item. It is used to log the fact that some * extended attribute operations need to be processed. An operation is * currently either a set or remove. Set or remove operations are described by - * the xfs_attr_item which may be logged to this intent. + * the xfs_attr_intent which may be logged to this intent. * * During a normal attr operation, name and value point to the name and value * fields of the caller's xfs_da_args structure. During a recovery, the name * and value buffers are copied from the log, and stored in a trailing buffer - * attached to the xfs_attr_item until they are committed. They are freed when - * the xfs_attr_item itself is freed when the work is done. + * attached to the xfs_attr_intent until they are committed. They are freed + * when the xfs_attr_intent itself is freed when the work is done. */ struct xfs_attri_log_item { struct xfs_log_item attri_item; atomic_t attri_refcount; - int attri_name_len; - int attri_value_len; - void *attri_name; - void *attri_value; + struct xfs_attri_log_nameval *attri_nameval; struct xfs_attri_log_format attri_format; }; @@ -43,4 +48,7 @@ struct xfs_attrd_log_item { struct xfs_attrd_log_format attrd_format; }; +extern struct kmem_cache *xfs_attri_cache; +extern struct kmem_cache *xfs_attrd_cache; + #endif /* __XFS_ATTR_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index e484251dc9c8..ffa94102094d 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -24,6 +24,15 @@ #include "xfs_quota.h" /* + * This is the number of entries in the l_buf_cancel_table used during + * recovery. + */ +#define XLOG_BC_TABLE_SIZE 64 + +#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ + ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) + +/* * This structure is used during recovery to record the buf log items which * have been canceled and should not be replayed. */ @@ -993,3 +1002,60 @@ const struct xlog_recover_item_ops xlog_buf_item_ops = { .commit_pass1 = xlog_recover_buf_commit_pass1, .commit_pass2 = xlog_recover_buf_commit_pass2, }; + +#ifdef DEBUG +void +xlog_check_buf_cancel_table( + struct xlog *log) +{ + int i; + + for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) + ASSERT(list_empty(&log->l_buf_cancel_table[i])); +} +#endif + +int +xlog_alloc_buf_cancel_table( + struct xlog *log) +{ + void *p; + int i; + + ASSERT(log->l_buf_cancel_table == NULL); + + p = kmalloc_array(XLOG_BC_TABLE_SIZE, sizeof(struct list_head), + GFP_KERNEL); + if (!p) + return -ENOMEM; + + log->l_buf_cancel_table = p; + for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) + INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); + + return 0; +} + +void +xlog_free_buf_cancel_table( + struct xlog *log) +{ + int i; + + if (!log->l_buf_cancel_table) + return; + + for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) { + struct xfs_buf_cancel *bc; + + while ((bc = list_first_entry_or_null( + &log->l_buf_cancel_table[i], + struct xfs_buf_cancel, bc_list))) { + list_del(&bc->bc_list); + kmem_free(bc); + } + } + + kmem_free(log->l_buf_cancel_table); + log->l_buf_cancel_table = NULL; +} diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a60632ecc3f0..5a171c0b244b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -576,9 +576,9 @@ xfs_file_dio_write_unaligned( * don't even bother trying the fast path in this case. */ if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) { -retry_exclusive: if (iocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; +retry_exclusive: iolock = XFS_IOLOCK_EXCL; flags = IOMAP_DIO_FORCE_WAIT; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 888839e75d11..d4a77c53f94b 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -149,12 +149,7 @@ xfs_growfs_data_private( error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, delta, &lastag_extended); } else { - static struct ratelimit_state shrink_warning = \ - RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1); - ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE); - - if (__ratelimit(&shrink_warning)) - xfs_alert(mp, + xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b2879870a17e..52d6f2c7d58b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2622,7 +2622,7 @@ xfs_ifree( */ error = xfs_difree(tp, pag, ip->i_ino, &xic); if (error) - return error; + goto out; error = xfs_iunlink_remove(tp, pag, ip); if (error) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e5cb7936206..5a364a7d58fd 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -37,6 +37,7 @@ #include "xfs_health.h" #include "xfs_reflink.h" #include "xfs_ioctl.h" +#include "xfs_xattr.h" #include <linux/mount.h> #include <linux/namei.h> @@ -524,7 +525,7 @@ xfs_attrmulti_attr_set( args.valuelen = len; } - error = xfs_attr_set(&args); + error = xfs_attr_change(&args); if (!error && (flags & XFS_IOC_ATTR_ROOT)) xfs_forget_acl(inode, name); kfree(args.value); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e912b7fee714..29f5b8b8aca6 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -24,6 +24,7 @@ #include "xfs_iomap.h" #include "xfs_error.h" #include "xfs_ioctl.h" +#include "xfs_xattr.h" #include <linux/posix_acl.h> #include <linux/security.h> @@ -61,7 +62,7 @@ xfs_initxattrs( .value = xattr->value, .valuelen = xattr->value_len, }; - error = xfs_attr_set(&args); + error = xfs_attr_change(&args); if (error < 0) break; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9dc748abdf33..1e972f884a81 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3877,44 +3877,3 @@ xlog_drop_incompat_feat( { up_read(&log->l_incompat_users); } - -/* - * Get permission to use log-assisted atomic exchange of file extents. - * - * Callers must not be running any transactions or hold any inode locks, and - * they must release the permission by calling xlog_drop_incompat_feat - * when they're done. - */ -int -xfs_attr_use_log_assist( - struct xfs_mount *mp) -{ - int error = 0; - - /* - * Protect ourselves from an idle log clearing the logged xattrs log - * incompat feature bit. - */ - xlog_use_incompat_feat(mp->m_log); - - /* - * If log-assisted xattrs are already enabled, the caller can use the - * log assisted swap functions with the log-incompat reference we got. - */ - if (xfs_sb_version_haslogxattrs(&mp->m_sb)) - return 0; - - /* Enable log-assisted xattrs. */ - error = xfs_add_incompat_log_feature(mp, - XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); - if (error) - goto drop_incompat; - - xfs_warn_once(mp, -"EXPERIMENTAL logged extended attributes feature added. Use at your own risk!"); - - return 0; -drop_incompat: - xlog_drop_incompat_feat(mp->m_log); - return error; -} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 252b098cde1f..f3ce046a7d45 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -86,6 +86,13 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, return buf; } +static inline void * +xlog_copy_from_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + const struct xfs_log_iovec *src) +{ + return xlog_copy_iovec(lv, vecp, src->i_type, src->i_addr, src->i_len); +} + /* * By comparing each component, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 67fd9789e69a..686c01eb3661 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -428,9 +428,6 @@ struct xlog { struct rw_semaphore l_incompat_users; }; -#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ - ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) - /* * Bits for operational state */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 97b941c07957..5f7e4e6e33ce 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -39,13 +39,6 @@ STATIC int xlog_clear_stale_blocks( struct xlog *, xfs_lsn_t); -#if defined(DEBUG) -STATIC void -xlog_recover_check_summary( - struct xlog *); -#else -#define xlog_recover_check_summary(log) -#endif STATIC int xlog_do_recovery_pass( struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); @@ -3230,7 +3223,7 @@ xlog_do_log_recovery( xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { - int error, i; + int error; ASSERT(head_blk != tail_blk); @@ -3238,37 +3231,25 @@ xlog_do_log_recovery( * First do a pass to find all of the cancelled buf log items. * Store them in the buf_cancel_table for use in the second pass. */ - log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * - sizeof(struct list_head), - 0); - for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) - INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); + error = xlog_alloc_buf_cancel_table(log); + if (error) + return error; error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1, NULL); - if (error != 0) { - kmem_free(log->l_buf_cancel_table); - log->l_buf_cancel_table = NULL; - return error; - } + if (error != 0) + goto out_cancel; + /* * Then do a second pass to actually recover the items in the log. * When it is complete free the table of buf cancel items. */ error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS2, NULL); -#ifdef DEBUG - if (!error) { - int i; - - for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) - ASSERT(list_empty(&log->l_buf_cancel_table[i])); - } -#endif /* DEBUG */ - - kmem_free(log->l_buf_cancel_table); - log->l_buf_cancel_table = NULL; - + if (!error) + xlog_check_buf_cancel_table(log); +out_cancel: + xlog_free_buf_cancel_table(log); return error; } @@ -3339,8 +3320,6 @@ xlog_do_recover( } mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); - xlog_recover_check_summary(log); - /* Normal transactions can now occur */ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); return 0; @@ -3483,7 +3462,6 @@ xlog_recover_finish( } xlog_recover_process_iunlinks(log); - xlog_recover_check_summary(log); /* * Recover any CoW staging blocks that are still referenced by the @@ -3517,52 +3495,3 @@ xlog_recover_cancel( xlog_recover_cancel_intents(log); } -#if defined(DEBUG) -/* - * Read all of the agf and agi counters and check that they - * are consistent with the superblock counters. - */ -STATIC void -xlog_recover_check_summary( - struct xlog *log) -{ - struct xfs_mount *mp = log->l_mp; - struct xfs_perag *pag; - struct xfs_buf *agfbp; - struct xfs_buf *agibp; - xfs_agnumber_t agno; - uint64_t freeblks; - uint64_t itotal; - uint64_t ifree; - int error; - - freeblks = 0LL; - itotal = 0LL; - ifree = 0LL; - for_each_perag(mp, agno, pag) { - error = xfs_read_agf(mp, NULL, pag->pag_agno, 0, &agfbp); - if (error) { - xfs_alert(mp, "%s agf read failed agno %d error %d", - __func__, pag->pag_agno, error); - } else { - struct xfs_agf *agfp = agfbp->b_addr; - - freeblks += be32_to_cpu(agfp->agf_freeblks) + - be32_to_cpu(agfp->agf_flcount); - xfs_buf_relse(agfbp); - } - - error = xfs_read_agi(mp, NULL, pag->pag_agno, &agibp); - if (error) { - xfs_alert(mp, "%s agi read failed agno %d error %d", - __func__, pag->pag_agno, error); - } else { - struct xfs_agi *agi = agibp->b_addr; - - itotal += be32_to_cpu(agi->agi_count); - ifree += be32_to_cpu(agi->agi_freecount); - xfs_buf_relse(agibp); - } - } -} -#endif /* DEBUG */ diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index 55ee464ab59f..cc323775a12c 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -75,6 +75,12 @@ do { \ #define xfs_debug_ratelimited(dev, fmt, ...) \ xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__) +#define xfs_warn_mount(mp, warntag, fmt, ...) \ +do { \ + if (xfs_should_warn((mp), (warntag))) \ + xfs_warn((mp), (fmt), ##__VA_ARGS__); \ +} while (0) + #define xfs_warn_once(dev, fmt, ...) \ xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) #define xfs_notice_once(dev, fmt, ...) \ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0c0bcbd4949d..daa8d29c46b4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1356,7 +1356,6 @@ xfs_clear_incompat_log_features( if (xfs_sb_has_incompat_log_feature(&mp->m_sb, XFS_SB_FEAT_INCOMPAT_LOG_ALL)) { - xfs_info(mp, "Clearing log incompat feature flags."); xfs_sb_remove_incompat_log_features(&mp->m_sb); ret = true; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8c42786e4942..ba5d42abf66e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -391,6 +391,13 @@ __XFS_HAS_FEAT(nouuid, NOUUID) */ #define XFS_OPSTATE_BLOCKGC_ENABLED 6 +/* Kernel has logged a warning about online fsck being used on this fs. */ +#define XFS_OPSTATE_WARNED_SCRUB 7 +/* Kernel has logged a warning about shrink being used on this fs. */ +#define XFS_OPSTATE_WARNED_SHRINK 8 +/* Kernel has logged a warning about logged xattr updates being used. */ +#define XFS_OPSTATE_WARNED_LARP 9 + #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ { \ @@ -413,6 +420,12 @@ __XFS_IS_OPSTATE(readonly, READONLY) __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) +static inline bool +xfs_should_warn(struct xfs_mount *mp, long nr) +{ + return !test_and_set_bit(nr, &mp->m_opstate); +} + #define XFS_OPSTATE_STRINGS \ { (1UL << XFS_OPSTATE_UNMOUNTING), "unmounting" }, \ { (1UL << XFS_OPSTATE_CLEAN), "clean" }, \ @@ -420,7 +433,10 @@ __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) { (1UL << XFS_OPSTATE_INODE32), "inode32" }, \ { (1UL << XFS_OPSTATE_READONLY), "read_only" }, \ { (1UL << XFS_OPSTATE_INODEGC_ENABLED), "inodegc" }, \ - { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" } + { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \ + { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \ + { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \ + { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" } /* * Max and min values for mount-option defined I/O diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 8fc813cb6011..abf08bbf34a9 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1308,8 +1308,15 @@ xfs_qm_quotacheck( error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, NULL); - if (error) + if (error) { + /* + * The inode walk may have partially populated the dquot + * caches. We must purge them before disabling quota and + * tearing down the quotainfo, or else the dquots will leak. + */ + xfs_qm_dqpurge_all(mp); goto error_return; + } /* * We've made all the changes that we need to make incore. Flush them diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8495ef076ffc..ed18160e6181 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -38,6 +38,8 @@ #include "xfs_pwork.h" #include "xfs_ag.h" #include "xfs_defer.h" +#include "xfs_attr_item.h" +#include "xfs_xattr.h" #include <linux/magic.h> #include <linux/fs_context.h> @@ -2079,8 +2081,24 @@ xfs_init_caches(void) if (!xfs_bui_cache) goto out_destroy_bud_cache; + xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", + sizeof(struct xfs_attrd_log_item), + 0, 0, NULL); + if (!xfs_attrd_cache) + goto out_destroy_bui_cache; + + xfs_attri_cache = kmem_cache_create("xfs_attri_item", + sizeof(struct xfs_attri_log_item), + 0, 0, NULL); + if (!xfs_attri_cache) + goto out_destroy_attrd_cache; + return 0; + out_destroy_attrd_cache: + kmem_cache_destroy(xfs_attrd_cache); + out_destroy_bui_cache: + kmem_cache_destroy(xfs_bui_cache); out_destroy_bud_cache: kmem_cache_destroy(xfs_bud_cache); out_destroy_cui_cache: @@ -2127,6 +2145,8 @@ xfs_destroy_caches(void) * destroy caches. */ rcu_barrier(); + kmem_cache_destroy(xfs_attri_cache); + kmem_cache_destroy(xfs_attrd_cache); kmem_cache_destroy(xfs_bui_cache); kmem_cache_destroy(xfs_bud_cache); kmem_cache_destroy(xfs_cui_cache); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 167d23f92ffe..3cd5a51bace1 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -91,7 +91,6 @@ extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *, xfs_agnumber_t agcount); extern const struct export_operations xfs_export_operations; -extern const struct xattr_handler *xfs_xattr_handlers[]; extern const struct quotactl_ops xfs_quotactl_operations; extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 7a044afd4c46..35e13e125ec6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -15,9 +15,86 @@ #include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_acl.h" +#include "xfs_log.h" +#include "xfs_xattr.h" #include <linux/posix_acl_xattr.h> +/* + * Get permission to use log-assisted atomic exchange of file extents. + * + * Callers must not be running any transactions or hold any inode locks, and + * they must release the permission by calling xlog_drop_incompat_feat + * when they're done. + */ +static inline int +xfs_attr_grab_log_assist( + struct xfs_mount *mp) +{ + int error = 0; + + /* + * Protect ourselves from an idle log clearing the logged xattrs log + * incompat feature bit. + */ + xlog_use_incompat_feat(mp->m_log); + + /* + * If log-assisted xattrs are already enabled, the caller can use the + * log assisted swap functions with the log-incompat reference we got. + */ + if (xfs_sb_version_haslogxattrs(&mp->m_sb)) + return 0; + + /* Enable log-assisted xattrs. */ + error = xfs_add_incompat_log_feature(mp, + XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); + if (error) + goto drop_incompat; + + xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP, + "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!"); + + return 0; +drop_incompat: + xlog_drop_incompat_feat(mp->m_log); + return error; +} + +static inline void +xfs_attr_rele_log_assist( + struct xfs_mount *mp) +{ + xlog_drop_incompat_feat(mp->m_log); +} + +/* + * Set or remove an xattr, having grabbed the appropriate logging resources + * prior to calling libxfs. + */ +int +xfs_attr_change( + struct xfs_da_args *args) +{ + struct xfs_mount *mp = args->dp->i_mount; + bool use_logging = false; + int error; + + if (xfs_has_larp(mp)) { + error = xfs_attr_grab_log_assist(mp); + if (error) + return error; + + use_logging = true; + } + + error = xfs_attr_set(args); + + if (use_logging) + xfs_attr_rele_log_assist(mp); + return error; +} + static int xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, @@ -56,7 +133,7 @@ xfs_xattr_set(const struct xattr_handler *handler, }; int error; - error = xfs_attr_set(&args); + error = xfs_attr_change(&args); if (!error && (handler->flags & XFS_ATTR_ROOT)) xfs_forget_acl(inode, name); return error; diff --git a/fs/xfs/xfs_xattr.h b/fs/xfs/xfs_xattr.h new file mode 100644 index 000000000000..2b09133b1b9b --- /dev/null +++ b/fs/xfs/xfs_xattr.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_XATTR_H__ +#define __XFS_XATTR_H__ + +int xfs_attr_change(struct xfs_da_args *args); + +extern const struct xattr_handler *xfs_xattr_handlers[]; + +#endif /* __XFS_XATTR_H__ */ |