From ad8c28a9eb81ca90fda29f48cfb4d19305943737 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 9 Sep 2019 15:58:55 -0400 Subject: ceph: convert int fields in ceph_mount_options to unsigned int Most of these values should never be negative, so convert them to unsigned values. Add some sanity checking to the parsed values, and clean up some unneeded casts. Note that while caps_max should never be negative, this patch leaves it signed, since this value ends up later being compared to a signed counter. Just ensure that userland never passes in a negative value for caps_max. Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 +++-- fs/ceph/super.c | 28 +++++++++++++++------------- fs/ceph/super.h | 16 ++++++++-------- 3 files changed, 26 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 068b029cf073..a3110d216aae 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; size_t size = sizeof(struct ceph_mds_reply_dir_entry); - int order, num_entries; + unsigned int num_entries; + int order; spin_lock(&ci->i_ceph_lock); num_entries = ci->i_files + ci->i_subdirs; spin_unlock(&ci->i_ceph_lock); - num_entries = max(num_entries, 1); + num_entries = max(num_entries, 1U); num_entries = min(num_entries, opt->max_readdir); order = get_order(size * num_entries); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9c9a7c68eea3..29a795f975df 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -172,10 +172,10 @@ static const struct fs_parameter_enum ceph_mount_param_enums[] = { static const struct fs_parameter_spec ceph_mount_param_specs[] = { fsparam_flag_no ("acl", Opt_acl), fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), - fsparam_u32 ("caps_max", Opt_caps_max), + fsparam_s32 ("caps_max", Opt_caps_max), fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), - fsparam_s32 ("write_congestion_kb", Opt_congestion_kb), + fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), fsparam_flag_no ("copyfrom", Opt_copyfrom), fsparam_flag_no ("dcache", Opt_dcache), fsparam_flag_no ("dirstat", Opt_dirstat), @@ -187,8 +187,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = { fsparam_flag_no ("quotadf", Opt_quotadf), fsparam_u32 ("rasize", Opt_rasize), fsparam_flag_no ("rbytes", Opt_rbytes), - fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes), - fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries), + fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), + fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), fsparam_enum ("recover_session", Opt_recover_session), fsparam_flag_no ("require_active_mds", Opt_require_active_mds), fsparam_u32 ("rsize", Opt_rsize), @@ -328,7 +328,9 @@ static int ceph_parse_mount_param(struct fs_context *fc, fsopt->caps_wanted_delay_max = result.uint_32; break; case Opt_caps_max: - fsopt->caps_max = result.uint_32; + if (result.int_32 < 0) + goto out_of_range; + fsopt->caps_max = result.int_32; break; case Opt_readdir_max_entries: if (result.uint_32 < 1) @@ -547,25 +549,25 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_show_option(m, "recover_session", "clean"); if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) - seq_printf(m, ",wsize=%d", fsopt->wsize); + seq_printf(m, ",wsize=%u", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) - seq_printf(m, ",rsize=%d", fsopt->rsize); + seq_printf(m, ",rsize=%u", fsopt->rsize); if (fsopt->rasize != CEPH_RASIZE_DEFAULT) - seq_printf(m, ",rasize=%d", fsopt->rasize); + seq_printf(m, ",rasize=%u", fsopt->rasize); if (fsopt->congestion_kb != default_congestion_kb()) - seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); + seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); if (fsopt->caps_max) seq_printf(m, ",caps_max=%d", fsopt->caps_max); if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) - seq_printf(m, ",caps_wanted_delay_min=%d", + seq_printf(m, ",caps_wanted_delay_min=%u", fsopt->caps_wanted_delay_min); if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) - seq_printf(m, ",caps_wanted_delay_max=%d", + seq_printf(m, ",caps_wanted_delay_max=%u", fsopt->caps_wanted_delay_max); if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) - seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); + seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) - seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); + seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) seq_show_option(m, "snapdirname", fsopt->snapdir_name); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f0f9cb7447ac..3bf1a01cd536 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -73,16 +73,16 @@ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ struct ceph_mount_options { - int flags; + unsigned int flags; - int wsize; /* max write size */ - int rsize; /* max read size */ - int rasize; /* max readahead */ - int congestion_kb; /* max writeback in flight */ - int caps_wanted_delay_min, caps_wanted_delay_max; + unsigned int wsize; /* max write size */ + unsigned int rsize; /* max read size */ + unsigned int rasize; /* max readahead */ + unsigned int congestion_kb; /* max writeback in flight */ + unsigned int caps_wanted_delay_min, caps_wanted_delay_max; int caps_max; - int max_readdir; /* max readdir result (entires) */ - int max_readdir_bytes; /* max readdir result (bytes) */ + unsigned int max_readdir; /* max readdir result (entries) */ + unsigned int max_readdir_bytes; /* max readdir result (bytes) */ /* * everything above this point can be memcmp'd; everything below -- cgit v1.2.3-59-g8ed1b From 3a3430affce5de301fc8e6e50fa3543d7597820e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 20 Nov 2019 12:00:59 -0500 Subject: ceph: show tasks waiting on caps in debugfs caps file Add some visibility of tasks that are waiting for caps to the "caps" debugfs file. Display the tgid of the waiting task, inode number, and the caps the task needs and wants. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 17 +++++++++++++++++ fs/ceph/debugfs.c | 13 +++++++++++++ fs/ceph/mds_client.c | 1 + fs/ceph/mds_client.h | 9 +++++++++ 4 files changed, 40 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f5a38910a82b..271d8283d263 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2764,7 +2764,19 @@ int ceph_get_caps(struct file *filp, int need, int want, if (ret == -EAGAIN) continue; if (!ret) { + struct ceph_mds_client *mdsc = fsc->mdsc; + struct cap_wait cw; DEFINE_WAIT_FUNC(wait, woken_wake_function); + + cw.ino = inode->i_ino; + cw.tgid = current->tgid; + cw.need = need; + cw.want = want; + + spin_lock(&mdsc->caps_list_lock); + list_add(&cw.list, &mdsc->cap_wait_list); + spin_unlock(&mdsc->caps_list_lock); + add_wait_queue(&ci->i_cap_wq, &wait); flags |= NON_BLOCKING; @@ -2778,6 +2790,11 @@ int ceph_get_caps(struct file *filp, int need, int want, } remove_wait_queue(&ci->i_cap_wq, &wait); + + spin_lock(&mdsc->caps_list_lock); + list_del(&cw.list); + spin_unlock(&mdsc->caps_list_lock); + if (ret == -EAGAIN) continue; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index facb387c2735..c281f32b54f7 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p) struct ceph_fs_client *fsc = s->private; struct ceph_mds_client *mdsc = fsc->mdsc; int total, avail, used, reserved, min, i; + struct cap_wait *cw; ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); seq_printf(s, "total\t\t%d\n" @@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p) } mutex_unlock(&mdsc->mutex); + seq_printf(s, "\n\nWaiters:\n--------\n"); + seq_printf(s, "tgid ino need want\n"); + seq_printf(s, "-----------------------------------------------------\n"); + + spin_lock(&mdsc->caps_list_lock); + list_for_each_entry(cw, &mdsc->cap_wait_list, list) { + seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino, + ceph_cap_string(cw->need), + ceph_cap_string(cw->want)); + } + spin_unlock(&mdsc->caps_list_lock); + return 0; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a3110d216aae..f8735fc9f9d6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4169,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; INIT_LIST_HEAD(&mdsc->cap_delay_list); + INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5cd131b41d84..14c7e8c49970 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -340,6 +340,14 @@ struct ceph_quotarealm_inode { struct inode *inode; }; +struct cap_wait { + struct list_head list; + unsigned long ino; + pid_t tgid; + int need; + int want; +}; + /* * mds client state */ @@ -416,6 +424,7 @@ struct ceph_mds_client { spinlock_t caps_list_lock; struct list_head caps_list; /* unused (reserved or unreserved) */ + struct list_head cap_wait_list; int caps_total_count; /* total caps allocated */ int caps_use_count; /* in use */ int caps_use_max; /* max used caps */ -- cgit v1.2.3-59-g8ed1b From bba1560bd4a46aa0d16bb7d81abd9d0eb47dea36 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 26 Nov 2019 07:32:22 -0500 Subject: ceph: trigger the reclaim work once there has enough pending caps The nr in ceph_reclaim_caps_nr() is very possibly larger than 1, so we may miss it and the reclaim work couldn't triggered as expected. Signed-off-by: Xiubo Li Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f8735fc9f9d6..374db1bd57d1 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) if (!nr) return; val = atomic_add_return(nr, &mdsc->cap_reclaim_pending); - if (!(val % CEPH_CAPS_PER_RELEASE)) { + if ((val % CEPH_CAPS_PER_RELEASE) < nr) { atomic_set(&mdsc->cap_reclaim_pending, 0); ceph_queue_cap_reclaim_work(mdsc); } -- cgit v1.2.3-59-g8ed1b From bd84fbcb319852f7aeaabc0db5bdeb87e3337bf3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 3 Dec 2019 03:00:51 -0500 Subject: ceph: switch to global cap helper __ceph_is_any_caps is a duplicate helper. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 271d8283d263..9d09bb53c1ab 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci) return rb_first(&ci->i_caps) == rb_last(&ci->i_caps); } -static int __ceph_is_any_caps(struct ceph_inode_info *ci) -{ - return !RB_EMPTY_ROOT(&ci->i_caps); -} - int ceph_is_any_caps(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); int ret; spin_lock(&ci->i_ceph_lock); - ret = __ceph_is_any_caps(ci); + ret = __ceph_is_any_real_caps(ci); spin_unlock(&ci->i_ceph_lock); return ret; @@ -1099,15 +1094,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) if (removed) ceph_put_cap(mdsc, cap); - /* when reconnect denied, we remove session caps forcibly, - * i_wr_ref can be non-zero. If there are ongoing write, - * keep i_snap_realm. - */ - if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm) - drop_inode_snap_realm(ci); + if (!__ceph_is_any_real_caps(ci)) { + /* when reconnect denied, we remove session caps forcibly, + * i_wr_ref can be non-zero. If there are ongoing write, + * keep i_snap_realm. + */ + if (ci->i_wr_ref == 0 && ci->i_snap_realm) + drop_inode_snap_realm(ci); - if (!__ceph_is_any_real_caps(ci)) __cap_delay_cancel(mdsc, ci); + } } struct cap_msg_args { @@ -2945,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) ci->i_head_snapc = NULL; } /* see comment in __ceph_remove_cap() */ - if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) + if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm) drop_inode_snap_realm(ci); } spin_unlock(&ci->i_ceph_lock); -- cgit v1.2.3-59-g8ed1b From da08e1e1d7c3f805f8771ad6a6fd3a7a30ba4fe2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 26 Nov 2019 07:24:20 -0500 Subject: ceph: add more debug info when decoding mdsmap Show the laggy state. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mdsmap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index aeec1d6e3769..471bac335fae 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) void *pexport_targets = NULL; struct ceph_timespec laggy_since; struct ceph_mds_info *info; + bool laggy; ceph_decode_need(p, end, sizeof(u64) + 1, bad); global_id = ceph_decode_64(p); @@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) if (err) goto corrupt; ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); + laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0; *p += sizeof(u32); ceph_decode_32_safe(p, end, namelen, bad); *p += namelen; @@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) *p = info_end; } - dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", + dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n", i+1, n, global_id, mds, inc, ceph_pr_addr(&addr), - ceph_mds_state_name(state)); + ceph_mds_state_name(state), + laggy ? "(laggy)" : ""); if (mds < 0 || state <= 0) continue; @@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) info->global_id = global_id; info->state = state; info->addr = addr; - info->laggy = (laggy_since.tv_sec != 0 || - laggy_since.tv_nsec != 0); + info->laggy = laggy; info->num_export_targets = num_export_targets; if (num_export_targets) { info->export_targets = kcalloc(num_export_targets, @@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_damaged = false; } bad_ext: + dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", + !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); *p = end; dout("mdsmap_decode success epoch %u\n", m->m_epoch); return m; -- cgit v1.2.3-59-g8ed1b