aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/ceph_common.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 12:49:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 12:49:18 -0700
commit95288a9b3beee8dd69d73b7691e36f2f231b7903 (patch)
treeb7bb598a516e5d7fa7dba040ac34bb0608b6388e /net/ceph/ceph_common.c
parentMerge tag 'gfs2-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 (diff)
parentrbd: compression_hint option (diff)
downloadlinux-dev-95288a9b3beee8dd69d73b7691e36f2f231b7903.tar.xz
linux-dev-95288a9b3beee8dd69d73b7691e36f2f231b7903.zip
Merge tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The highlights are: - OSD/MDS latency and caps cache metrics infrastructure for the filesytem (Xiubo Li). Currently available through debugfs and will be periodically sent to the MDS in the future. - support for replica reads (balanced and localized reads) for rbd and the filesystem (myself). The default remains to always read from primary, users can opt-in with the new crush_location and read_from_replica options. Note that reading from replica is safe for general use only since Octopus. - support for RADOS allocation hint flags (myself). Currently used by rbd to propagate the compressible/incompressible hint given with the new compression_hint map option and ready for passing on more advanced hints, e.g. based on fadvise() from the filesystem. - support for efficient cross-quota-realm renames (Luis Henriques) - assorted cap handling improvements and cleanups, particularly untangling some of the locking (Jeff Layton)" * tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client: (29 commits) rbd: compression_hint option libceph: support for alloc hint flags libceph: read_from_replica option libceph: support for balanced and localized reads libceph: crush_location infrastructure libceph: decode CRUSH device/bucket types and names libceph: add non-asserting rbtree insertion helper ceph: skip checking caps when session reconnecting and releasing reqs ceph: make sure mdsc->mutex is nested in s->s_mutex to fix dead lock ceph: don't return -ESTALE if there's still an open file libceph, rbd: replace zero-length array with flexible-array ceph: allow rename operation under different quota realms ceph: normalize 'delta' parameter usage in check_quota_exceeded ceph: ceph_kick_flushing_caps needs the s_mutex ceph: request expedited service on session's last cap flush ceph: convert mdsc->cap_dirty to a per-session list ceph: reset i_requested_max_size if file write is not wanted ceph: throw a warning if we destroy session with mutex still locked ceph: fix potential race in ceph_check_caps ceph: document what protects i_dirty_item and i_flushing_item ...
Diffstat (limited to 'net/ceph/ceph_common.c')
-rw-r--r--net/ceph/ceph_common.c75
1 files changed, 75 insertions, 0 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 66f22e8aa529..afe0e8184c23 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -176,6 +176,10 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
}
+ ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs);
+ if (ret)
+ return ret;
+
/* any matching mon ip implies a match */
for (i = 0; i < opt1->num_mon; i++) {
if (ceph_monmap_contains(client->monc.monmap,
@@ -259,6 +263,8 @@ enum {
Opt_secret,
Opt_key,
Opt_ip,
+ Opt_crush_location,
+ Opt_read_from_replica,
/* string args above */
Opt_share,
Opt_crc,
@@ -268,11 +274,25 @@ enum {
Opt_abort_on_full,
};
+enum {
+ Opt_read_from_replica_no,
+ Opt_read_from_replica_balance,
+ Opt_read_from_replica_localize,
+};
+
+static const struct constant_table ceph_param_read_from_replica[] = {
+ {"no", Opt_read_from_replica_no},
+ {"balance", Opt_read_from_replica_balance},
+ {"localize", Opt_read_from_replica_localize},
+ {}
+};
+
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
fsparam_flag_no ("crc", Opt_crc),
+ fsparam_string ("crush_location", Opt_crush_location),
fsparam_string ("fsid", Opt_fsid),
fsparam_string ("ip", Opt_ip),
fsparam_string ("key", Opt_key),
@@ -283,6 +303,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
__fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
fs_param_deprecated, NULL),
+ fsparam_enum ("read_from_replica", Opt_read_from_replica,
+ ceph_param_read_from_replica),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
@@ -297,6 +319,7 @@ struct ceph_options *ceph_alloc_options(void)
if (!opt)
return NULL;
+ opt->crush_locs = RB_ROOT;
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
GFP_KERNEL);
if (!opt->mon_addr) {
@@ -319,6 +342,7 @@ void ceph_destroy_options(struct ceph_options *opt)
if (!opt)
return;
+ ceph_clear_crush_locs(&opt->crush_locs);
kfree(opt->name);
if (opt->key) {
ceph_crypto_key_destroy(opt->key);
@@ -453,6 +477,34 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
if (!opt->key)
return -ENOMEM;
return get_secret(opt->key, param->string, &log);
+ case Opt_crush_location:
+ ceph_clear_crush_locs(&opt->crush_locs);
+ err = ceph_parse_crush_location(param->string,
+ &opt->crush_locs);
+ if (err) {
+ error_plog(&log, "Failed to parse CRUSH location: %d",
+ err);
+ return err;
+ }
+ break;
+ case Opt_read_from_replica:
+ switch (result.uint_32) {
+ case Opt_read_from_replica_no:
+ opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
+ CEPH_OSD_FLAG_LOCALIZE_READS);
+ break;
+ case Opt_read_from_replica_balance:
+ opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+ opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
+ break;
+ case Opt_read_from_replica_localize:
+ opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+ opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
+ break;
+ default:
+ BUG();
+ }
+ break;
case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout");
@@ -535,6 +587,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
{
struct ceph_options *opt = client->options;
size_t pos = m->count;
+ struct rb_node *n;
if (opt->name) {
seq_puts(m, "name=");
@@ -544,6 +597,28 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
if (opt->key)
seq_puts(m, "secret=<hidden>,");
+ if (!RB_EMPTY_ROOT(&opt->crush_locs)) {
+ seq_puts(m, "crush_location=");
+ for (n = rb_first(&opt->crush_locs); ; ) {
+ struct crush_loc_node *loc =
+ rb_entry(n, struct crush_loc_node, cl_node);
+
+ seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name,
+ loc->cl_loc.cl_name);
+ n = rb_next(n);
+ if (!n)
+ break;
+
+ seq_putc(m, '|');
+ }
+ seq_putc(m, ',');
+ }
+ if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
+ seq_puts(m, "read_from_replica=balance,");
+ } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
+ seq_puts(m, "read_from_replica=localize,");
+ }
+
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid);
if (opt->flags & CEPH_OPT_NOSHARE)