aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/super.c')
-rw-r--r--fs/ceph/super.c209
1 files changed, 182 insertions, 27 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bab61232dc5a..3fc48b43cab0 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
+#include <uapi/linux/magic.h>
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
@@ -70,15 +72,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
/*
- * express utilization in terms of large blocks to avoid
+ * Express utilization in terms of large blocks to avoid
* overflow on 32-bit machines.
- *
- * NOTE: for the time being, we make bsize == frsize to humor
- * not-yet-ancient versions of glibc that are broken.
- * Someday, we will probably want to report a real block
- * size... whatever that may mean for a network file system!
*/
- buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
/*
@@ -93,6 +89,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
}
+ /*
+ * NOTE: for the time being, we make bsize == frsize to humor
+ * not-yet-ancient versions of glibc that are broken.
+ * Someday, we will probably want to report a real block
+ * size... whatever that may mean for a network file system!
+ */
+ buf->f_bsize = buf->f_frsize;
+
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
buf->f_namelen = NAME_MAX;
@@ -146,6 +150,7 @@ enum {
Opt_mds_namespace,
Opt_recover_session,
Opt_source,
+ Opt_mon_addr,
/* string args above */
Opt_dirstat,
Opt_rbytes,
@@ -159,6 +164,7 @@ enum {
Opt_quotadf,
Opt_copyfrom,
Opt_wsync,
+ Opt_pagecache,
};
enum ceph_recover_session_mode {
@@ -197,8 +203,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
+ fsparam_flag_no ("pagecache", Opt_pagecache),
{}
};
@@ -228,9 +236,92 @@ static void canonicalize_path(char *path)
}
/*
- * Parse the source parameter. Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+ const char *namespace, size_t len)
+{
+ return !(fsopt->mds_namespace &&
+ (strlen(fsopt->mds_namespace) != len ||
+ strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ int r;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ if (*dev_name_end != ':')
+ return invalfc(fc, "separator ':' missing in source");
+
+ r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+ pctx->copts, fc->log.log, ',');
+ if (r)
+ return r;
+
+ fsopt->new_dev_syntax = false;
+ return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ size_t len;
+ struct ceph_fsid fsid;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+ char *fsid_start, *fs_name_start;
+
+ if (*dev_name_end != '=') {
+ dout("separator '=' missing in source");
+ return -EINVAL;
+ }
+
+ fsid_start = strchr(dev_name, '@');
+ if (!fsid_start)
+ return invalfc(fc, "missing cluster fsid");
+ ++fsid_start; /* start of cluster fsid */
+
+ fs_name_start = strchr(fsid_start, '.');
+ if (!fs_name_start)
+ return invalfc(fc, "missing file system name");
+
+ if (ceph_parse_fsid(fsid_start, &fsid))
+ return invalfc(fc, "Invalid FSID");
+
+ ++fs_name_start; /* start of file system name */
+ len = dev_name_end - fs_name_start;
+
+ if (!namespace_equals(fsopt, fs_name_start, len))
+ return invalfc(fc, "Mismatching mds_namespace");
+ kfree(fsopt->mds_namespace);
+ fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+ if (!fsopt->mds_namespace)
+ return -ENOMEM;
+ dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+ fsopt->new_dev_syntax = true;
+ return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ * <device_spec>=/<path>
+ * where
+ * <device_spec> is name@fsid.fsname
+ * <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
*
- * The source will look like:
+ * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>]
* where
* <server_spec> is <ip>[:<port>]
@@ -263,24 +354,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name);
}
- dev_name_end--; /* back up to ':' separator */
- if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalfc(fc, "No path or : separator in source");
+ dev_name_end--; /* back up to separator */
+ if (dev_name_end < dev_name)
+ return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
- ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc->log.log);
- if (ret)
- return ret;
+ dout("trying new device syntax");
+ ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+ if (ret) {
+ if (ret != -EINVAL)
+ return ret;
+ dout("trying old device syntax");
+ ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+ if (ret)
+ return ret;
+ }
fc->source = param->string;
param->string = NULL;
return 0;
}
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+ struct fs_context *fc)
+{
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ kfree(fsopt->mon_addr);
+ fsopt->mon_addr = param->string;
+ param->string = NULL;
+
+ return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+ pctx->copts, fc->log.log, '/');
+}
+
static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param)
{
@@ -306,6 +417,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL;
break;
case Opt_mds_namespace:
+ if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+ return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string;
param->string = NULL;
@@ -323,6 +436,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source)
return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
+ case Opt_mon_addr:
+ return ceph_parse_mon_addr(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +570,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
+ case Opt_pagecache:
+ if (result.negated)
+ fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+ else
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+ break;
default:
BUG();
}
@@ -474,6 +595,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace);
kfree(args->server_path);
kfree(args->fscache_uniq);
+ kfree(args->mon_addr);
kfree(args);
}
@@ -517,6 +639,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+ if (ret)
+ return ret;
+
return ceph_compare_options(new_opt, fsc->client);
}
@@ -572,15 +698,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
- if (fsopt->mds_namespace)
+ /* dump mds_namespace when old device syntax is in use */
+ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+ if (fsopt->mon_addr)
+ seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ seq_puts(m, ",nopagecache");
+
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -671,6 +804,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
fsc->have_copy_from2 = true;
atomic_long_set(&fsc->writeback_count, 0);
+ fsc->write_congested = false;
err = -ENOMEM;
/*
@@ -684,6 +818,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
if (!fsc->cap_wq)
goto fail_inode_wq;
+ hash_init(fsc->async_unlink_conflict);
+ spin_lock_init(&fsc->async_unlink_conflict_lock);
+
spin_lock(&ceph_fsc_lock);
list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
spin_unlock(&ceph_fsc_lock);
@@ -733,6 +870,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
*/
struct kmem_cache *ceph_inode_cachep;
struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_cap_snap_cachep;
struct kmem_cache *ceph_cap_flush_cachep;
struct kmem_cache *ceph_dentry_cachep;
struct kmem_cache *ceph_file_cachep;
@@ -743,7 +881,7 @@ mempool_t *ceph_wb_pagevec_pool;
static void ceph_inode_init_once(void *foo)
{
struct ceph_inode_info *ci = foo;
- inode_init_once(&ci->vfs_inode);
+ inode_init_once(&ci->netfs.inode);
}
static int __init init_caches(void)
@@ -761,6 +899,9 @@ static int __init init_caches(void)
ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
if (!ceph_cap_cachep)
goto bad_cap;
+ ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD);
+ if (!ceph_cap_snap_cachep)
+ goto bad_cap_snap;
ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
if (!ceph_cap_flush_cachep)
@@ -787,16 +928,10 @@ static int __init init_caches(void)
if (!ceph_wb_pagevec_pool)
goto bad_pagevec_pool;
- error = ceph_fscache_register();
- if (error)
- goto bad_fscache;
-
return 0;
-bad_fscache:
- kmem_cache_destroy(ceph_mds_request_cachep);
bad_pagevec_pool:
- mempool_destroy(ceph_wb_pagevec_pool);
+ kmem_cache_destroy(ceph_mds_request_cachep);
bad_mds_req:
kmem_cache_destroy(ceph_dir_file_cachep);
bad_dir_file:
@@ -806,6 +941,8 @@ bad_file:
bad_dentry:
kmem_cache_destroy(ceph_cap_flush_cachep);
bad_cap_flush:
+ kmem_cache_destroy(ceph_cap_snap_cachep);
+bad_cap_snap:
kmem_cache_destroy(ceph_cap_cachep);
bad_cap:
kmem_cache_destroy(ceph_inode_cachep);
@@ -822,14 +959,13 @@ static void destroy_caches(void)
kmem_cache_destroy(ceph_inode_cachep);
kmem_cache_destroy(ceph_cap_cachep);
+ kmem_cache_destroy(ceph_cap_snap_cachep);
kmem_cache_destroy(ceph_cap_flush_cachep);
kmem_cache_destroy(ceph_dentry_cachep);
kmem_cache_destroy(ceph_file_cachep);
kmem_cache_destroy(ceph_dir_file_cachep);
kmem_cache_destroy(ceph_mds_request_cachep);
mempool_destroy(ceph_wb_pagevec_pool);
-
- ceph_fscache_unregister();
}
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
@@ -988,6 +1124,7 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
s->s_time_gran = 1;
s->s_time_min = 0;
s->s_time_max = U32_MAX;
+ s->s_flags |= SB_NODIRATIME | SB_NOATIME;
ret = set_anon_super_fc(s, fc);
if (ret != 0)
@@ -1060,6 +1197,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb;
struct ceph_fs_client *fsc;
struct dentry *res;
@@ -1071,6 +1209,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
+ if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+ return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1156,6 +1296,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+ kfree(fsc->mount_options->mon_addr);
+ fsc->mount_options->mon_addr = fsopt->mon_addr;
+ fsopt->mon_addr = NULL;
+ pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+ }
+
sync_filesystem(fc->root->d_sb);
return 0;
}
@@ -1333,6 +1480,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+ .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
module_init(init_ceph);
module_exit(exit_ceph);