aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/Kconfig4
-rw-r--r--fs/ceph/addr.c50
-rw-r--r--fs/ceph/caps.c49
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c18
-rw-r--r--fs/ceph/inode.c63
-rw-r--r--fs/ceph/ioctl.c22
-rw-r--r--fs/ceph/locks.c2
-rw-r--r--fs/ceph/mds_client.c37
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/mdsmap.c12
-rw-r--r--fs/ceph/strings.c4
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/ceph/super.h14
-rw-r--r--fs/ceph/xattr.c214
16 files changed, 359 insertions, 157 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
config CEPH_FS
- tristate "Ceph distributed file system (EXPERIMENTAL)"
- depends on INET && EXPERIMENTAL
+ tristate "Ceph distributed file system"
+ depends on INET
select CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 064d1a68d2c1..a60ea977af6f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -195,7 +195,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
*/
static int readpage_nounlock(struct file *filp, struct page *page)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
{
struct inode *inode = req->r_inode;
- struct ceph_osd_reply_head *replyhead;
- int rc, bytes;
+ int rc = req->r_result;
+ int bytes = le32_to_cpu(msg->hdr.data_len);
int i;
- /* parse reply */
- replyhead = msg->front.iov_base;
- WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
- rc = le32_to_cpu(replyhead->result);
- bytes = le32_to_cpu(msg->hdr.data_len);
-
dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
/* unlock all pages, zeroing any data we didn't read */
@@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, 0,
ci->i_truncate_seq, ci->i_truncate_size,
- NULL, false, 1, 0);
+ NULL, false, 0);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -370,7 +364,7 @@ out:
static int ceph_readpages(struct file *file, struct address_space *mapping,
struct list_head *page_list, unsigned nr_pages)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int rc = 0;
int max = 0;
@@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
&ci->i_layout, snapc,
page_off, len,
ci->i_truncate_seq, ci->i_truncate_size,
- &inode->i_mtime,
- &page, 1, 0, 0, true);
+ &inode->i_mtime, &page, 1);
if (err < 0) {
dout("writepage setting page/mapping error %d %p\n", err, page);
SetPageError(page);
@@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
struct ceph_msg *msg)
{
struct inode *inode = req->r_inode;
- struct ceph_osd_reply_head *replyhead;
- struct ceph_osd_op *op;
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned wrote;
struct page *page;
int i;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- __s32 rc = -EIO;
- u64 bytes = 0;
+ int rc = req->r_result;
+ u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
long writeback_stat;
unsigned issued = ceph_caps_issued(ci);
- /* parse reply */
- replyhead = msg->front.iov_base;
- WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
- op = (void *)(replyhead + 1);
- rc = le32_to_cpu(replyhead->result);
- bytes = le64_to_cpu(op->extent.length);
-
if (rc >= 0) {
/*
* Assume we wrote the pages we originally sent. The
@@ -741,8 +725,6 @@ retry:
struct page *page;
int want;
u64 offset, len;
- struct ceph_osd_request_head *reqhead;
- struct ceph_osd_op *op;
long writeback_stat;
next = 0;
@@ -838,7 +820,7 @@ get_more_pages:
snapc, do_sync,
ci->i_truncate_seq,
ci->i_truncate_size,
- &inode->i_mtime, true, 1, 0);
+ &inode->i_mtime, true, 0);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
@@ -906,10 +888,8 @@ get_more_pages:
/* revise final length, page count */
req->r_num_pages = locked_pages;
- reqhead = req->r_request->front.iov_base;
- op = (void *)(reqhead + 1);
- op->extent.length = cpu_to_le64(len);
- op->payload_len = cpu_to_le32(len);
+ req->r_request_ops[0].extent.length = cpu_to_le64(len);
+ req->r_request_ops[0].payload_len = cpu_to_le32(len);
req->r_request->hdr.data_len = cpu_to_le32(len);
rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
@@ -977,7 +957,7 @@ static int ceph_update_writeable_page(struct file *file,
loff_t pos, unsigned len,
struct page *page)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
loff_t page_off = pos & PAGE_CACHE_MASK;
@@ -1086,7 +1066,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = file->private_data;
struct page *page;
@@ -1144,7 +1124,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1228,7 +1208,7 @@ const struct address_space_operations ceph_aops = {
*/
static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ struct inode *inode = file_inode(vma->vm_file);
struct page *page = vmf->page;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
loff_t off = page_offset(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a1d9bb30c1bf..78e2f575247d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -611,8 +611,16 @@ retry:
if (flags & CEPH_CAP_FLAG_AUTH)
ci->i_auth_cap = cap;
- else if (ci->i_auth_cap == cap)
+ else if (ci->i_auth_cap == cap) {
ci->i_auth_cap = NULL;
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ dout(" moving %p to cap_dirty_migrating\n", inode);
+ list_move(&ci->i_dirty_item,
+ &mdsc->cap_dirty_migrating);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
@@ -930,7 +938,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
u64 size, u64 max_size,
struct timespec *mtime, struct timespec *atime,
u64 time_warp_seq,
- uid_t uid, gid_t gid, umode_t mode,
+ kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
u64 follows)
@@ -974,8 +982,8 @@ static int send_cap_msg(struct ceph_mds_session *session,
ceph_encode_timespec(&fc->atime, atime);
fc->time_warp_seq = cpu_to_le32(time_warp_seq);
- fc->uid = cpu_to_le32(uid);
- fc->gid = cpu_to_le32(gid);
+ fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
+ fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
fc->mode = cpu_to_le32(mode);
fc->xattr_version = cpu_to_le64(xattr_version);
@@ -1081,8 +1089,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
struct timespec mtime, atime;
int wake = 0;
umode_t mode;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
struct ceph_mds_session *session;
u64 xattr_version = 0;
struct ceph_buffer *xattr_blob = NULL;
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = &ci->vfs_inode;
struct ceph_cap *cap;
- int file_wanted, used;
+ int file_wanted, used, cap_used;
int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
int issued, implemented, want, retain, revoking, flushing = 0;
int mds = -1; /* keep track of how far we've gone through i_caps list
@@ -1563,9 +1571,14 @@ retry_locked:
/* NOTE: no side-effects allowed, until we take s_mutex */
+ cap_used = used;
+ if (ci->i_auth_cap && cap != ci->i_auth_cap)
+ cap_used &= ~ci->i_auth_cap->issued;
+
revoking = cap->implemented & ~cap->issued;
- dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
+ dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
cap->mds, cap, ceph_cap_string(cap->issued),
+ ceph_cap_string(cap_used),
ceph_cap_string(cap->implemented),
ceph_cap_string(revoking));
@@ -1593,7 +1606,7 @@ retry_locked:
}
/* completed revocation? going down and there are no caps? */
- if (revoking && (revoking & used) == 0) {
+ if (revoking && (revoking & cap_used) == 0) {
dout("completed revocation of %s\n",
ceph_cap_string(cap->implemented & ~cap->issued));
goto ack;
@@ -1670,8 +1683,8 @@ ack:
sent++;
/* __send_cap drops i_ceph_lock */
- delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
- retain, flushing, NULL);
+ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
+ want, retain, flushing, NULL);
goto retry; /* retake i_ceph_lock and restart our cap scan. */
}
@@ -2359,10 +2372,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
inode->i_mode = le32_to_cpu(grant->mode);
- inode->i_uid = le32_to_cpu(grant->uid);
- inode->i_gid = le32_to_cpu(grant->gid);
+ inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
+ inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
- inode->i_uid, inode->i_gid);
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kgid(&init_user_ns, inode->i_gid));
}
if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -2416,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
dout("mds wanted %s -> %s\n",
ceph_cap_string(le32_to_cpu(grant->wanted)),
ceph_cap_string(wanted));
- grant->wanted = cpu_to_le32(wanted);
+ /* imported cap may not have correct mds_wanted */
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+ check_caps = 1;
}
cap->seq = seq;
@@ -2820,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
+ if (op == CEPH_CAP_OP_IMPORT)
+ ceph_add_cap_releases(mdsc, session);
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8c1aabe93b67..6d797f46d772 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -238,7 +238,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name,
static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct ceph_file_info *fi = filp->private_data;
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1138,7 +1138,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
loff_t *ppos)
{
struct ceph_file_info *cf = file->private_data;
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
int left;
const int bufsize = 1024;
@@ -1188,7 +1188,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct list_head *head = &ci->i_unsafe_dirops;
struct ceph_mds_request *req;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index ca3ab3f9ca70..16796be53ca5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -81,7 +81,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
if (parent_inode) {
/* nfsd wants connectable */
*max_len = connected_handle_length;
- type = 255;
+ type = FILEID_INVALID;
} else {
dout("encode_fh %p\n", dentry);
fh->ino = ceph_ino(inode);
@@ -90,7 +90,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
}
} else {
*max_len = handle_length;
- type = 255;
+ type = FILEID_INVALID;
}
if (dentry)
dput(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a3..bf338d9b67e3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
+ if (err)
+ goto out_err;
+
err = ceph_handle_snapdir(req, dentry, err);
if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err = finish_no_open(file, dn);
} else {
dout("atomic_open finish_open on dn %p\n", dn);
+ if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+ *opened |= FILE_CREATED;
+ }
err = finish_open(file, dentry, ceph_open, opened);
}
@@ -393,7 +399,7 @@ more:
static ssize_t ceph_sync_read(struct file *file, char __user *data,
unsigned len, loff_t *poff, int *checkeof)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct page **pages;
u64 off = *poff;
int num_pages, ret;
@@ -466,7 +472,7 @@ static void sync_write_commit(struct ceph_osd_request *req,
static ssize_t ceph_sync_write(struct file *file, const char __user *data,
size_t left, loff_t *offset)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
@@ -483,7 +489,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
int ret;
struct timespec mtime = CURRENT_TIME;
- if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP)
+ if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
dout("sync_write on file %p %lld~%u %s\n", file, *offset,
@@ -535,7 +541,7 @@ more:
ci->i_snap_realm->cached_context,
do_sync,
ci->i_truncate_seq, ci->i_truncate_size,
- &mtime, false, 2, page_align);
+ &mtime, false, page_align);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -637,7 +643,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
struct ceph_file_info *fi = filp->private_data;
loff_t *ppos = &iocb->ki_pos;
size_t len = iov->iov_len;
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
void __user *base = iov->iov_base;
ssize_t ret;
@@ -707,7 +713,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct ceph_file_info *fi = file->private_data;
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2971eaa65cdc..851814d951cd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -612,10 +612,11 @@ static int fill_inode(struct inode *inode,
if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
inode->i_mode = le32_to_cpu(info->mode);
- inode->i_uid = le32_to_cpu(info->uid);
- inode->i_gid = le32_to_cpu(info->gid);
+ inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
+ inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
- inode->i_uid, inode->i_gid);
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kgid(&init_user_ns, inode->i_gid));
}
if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -1130,8 +1131,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
req->r_request_started);
dout(" final dn %p\n", dn);
i++;
- } else if (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
- req->r_op == CEPH_MDS_OP_MKSNAP) {
+ } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
struct dentry *dn = req->r_dentry;
/* fill out a snapdir LOOKUPSNAP dentry */
@@ -1195,6 +1196,39 @@ done:
/*
* Prepopulate our cache with readdir results, leases, etc.
*/
+static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ int i, err = 0;
+
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ struct ceph_vino vino;
+ struct inode *in;
+ int rc;
+
+ vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+ vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+
+ in = ceph_get_inode(req->r_dentry->d_sb, vino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ dout("new_inode badness got %d\n", err);
+ continue;
+ }
+ rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (rc < 0) {
+ pr_err("fill_inode badness on %p got %d\n", in, rc);
+ err = rc;
+ continue;
+ }
+ }
+
+ return err;
+}
+
int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session)
{
@@ -1209,6 +1243,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
u64 frag = le32_to_cpu(rhead->args.readdir.frag);
struct ceph_dentry_info *di;
+ if (req->r_aborted)
+ return readdir_prepopulate_inodes_only(req, session);
+
if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
snapdir = ceph_get_snapdir(parent->d_inode);
parent = d_find_alias(snapdir);
@@ -1565,26 +1602,30 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (ia_valid & ATTR_UID) {
dout("setattr %p uid %d -> %d\n", inode,
- inode->i_uid, attr->ia_uid);
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kuid(&init_user_ns, attr->ia_uid));
if (issued & CEPH_CAP_AUTH_EXCL) {
inode->i_uid = attr->ia_uid;
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
- attr->ia_uid != inode->i_uid) {
- req->r_args.setattr.uid = cpu_to_le32(attr->ia_uid);
+ !uid_eq(attr->ia_uid, inode->i_uid)) {
+ req->r_args.setattr.uid = cpu_to_le32(
+ from_kuid(&init_user_ns, attr->ia_uid));
mask |= CEPH_SETATTR_UID;
release |= CEPH_CAP_AUTH_SHARED;
}
}
if (ia_valid & ATTR_GID) {
dout("setattr %p gid %d -> %d\n", inode,
- inode->i_gid, attr->ia_gid);
+ from_kgid(&init_user_ns, inode->i_gid),
+ from_kgid(&init_user_ns, attr->ia_gid));
if (issued & CEPH_CAP_AUTH_EXCL) {
inode->i_gid = attr->ia_gid;
dirtied |= CEPH_CAP_AUTH_EXCL;
} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
- attr->ia_gid != inode->i_gid) {
- req->r_args.setattr.gid = cpu_to_le32(attr->ia_gid);
+ !gid_eq(attr->ia_gid, inode->i_gid)) {
+ req->r_args.setattr.gid = cpu_to_le32(
+ from_kgid(&init_user_ns, attr->ia_gid));
mask |= CEPH_SETATTR_GID;
release |= CEPH_CAP_AUTH_SHARED;
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..4a989345b37b 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -16,11 +16,11 @@
*/
static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
{
- struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+ struct ceph_inode_info *ci = ceph_inode(file_inode(file));
struct ceph_ioctl_layout l;
int err;
- err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+ err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
if (!err) {
l.stripe_unit = ceph_file_layout_su(ci->i_layout);
l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
@@ -63,12 +63,12 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct inode *parent_inode;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
- struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+ struct ceph_inode_info *ci = ceph_inode(file_inode(file));
struct ceph_ioctl_layout nl;
int err;
@@ -76,7 +76,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
return -EFAULT;
/* validate changed params against current layout */
- err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+ err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
if (err)
return err;
@@ -136,7 +136,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
*/
static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
int err;
@@ -179,13 +179,12 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
{
struct ceph_ioctl_dataloc dl;
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
u64 len = 1, olen;
u64 tmp;
- struct ceph_object_layout ol;
struct ceph_pg pgid;
int r;
@@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
return -EFAULT;
down_read(&osdc->map_sem);
- r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+ r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
&dl.object_no, &dl.object_offset,
&olen);
if (r < 0)
@@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
ceph_ino(inode), dl.object_no);
- ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
+ ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
osdc->osdmap);
- pgid = ol.ol_pgid;
dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
if (dl.osd >= 0) {
struct ceph_entity_addr *a =
@@ -234,7 +232,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
static long ceph_ioctl_lazyio(struct file *file)
{
struct ceph_file_info *fi = file->private_data;
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 80576d05d687..202dd3d68be0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -13,7 +13,7 @@
static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
int cmd, u8 wait, struct file_lock *fl)
{
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file_inode(file);
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8309eb..442880d099c9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -233,6 +233,30 @@ bad:
}
/*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ int features)
+{
+ if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+ if (*p == end) {
+ info->has_create_ino = false;
+ } else {
+ info->has_create_ino = true;
+ info->ino = ceph_decode_64(p);
+ }
+ }
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ return -EIO;
+}
+
+/*
* parse extra results
*/
static int parse_reply_info_extra(void **p, void *end,
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
{
if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
- else
+ else if (info->head->op == CEPH_MDS_OP_READDIR)
return parse_reply_info_dir(p, end, info, features);
+ else if (info->head->op == CEPH_MDS_OP_CREATE)
+ return parse_reply_info_create(p, end, info, features);
+ else
+ return -EIO;
}
/*
@@ -1658,8 +1686,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
- head->caller_uid = cpu_to_le32(req->r_uid);
- head->caller_gid = cpu_to_le32(req->r_gid);
+ head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
+ head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
head->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1);
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&req->r_fill_mutex);
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
- if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+ if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+ req->r_op == CEPH_MDS_OP_LSSNAP) &&
rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd26846dd71d..c2a19fbbe517 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
struct ceph_mds_reply_info_in *dir_in;
u8 dir_complete, dir_end;
};
+
+ /* for create results */
+ struct {
+ bool has_create_ino;
+ u64 ino;
+ };
};
/* encoded blob describing snapshot contexts for certain
@@ -184,8 +190,8 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
- uid_t r_uid;
- gid_t r_gid;
+ kuid_t r_uid;
+ kgid_t r_gid;
/* for choosing which mds to send this request to */
int r_direct_mode;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 73b7d44e8a35..0d3c9240c61b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
return ERR_PTR(-ENOMEM);
ceph_decode_16_safe(p, end, version, bad);
+ if (version > 3) {
+ pr_warning("got mdsmap version %d > 3, failing", version);
+ goto bad;
+ }
ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
m->m_epoch = ceph_decode_32(p);
@@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
/* pg_pools */
ceph_decode_32_safe(p, end, n, bad);
m->m_num_data_pg_pools = n;
- m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
+ m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
if (!m->m_data_pg_pools)
goto badmem;
- ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
+ ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
for (i = 0; i < n; i++)
- m->m_data_pg_pools[i] = ceph_decode_32(p);
- m->m_cas_pg_pool = ceph_decode_32(p);
+ m->m_data_pg_pools[i] = ceph_decode_64(p);
+ m->m_cas_pg_pool = ceph_decode_64(p);
/* ok, we don't care about the rest. */
dout("mdsmap_decode success epoch %u\n", m->m_epoch);
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index cd5097d7c804..89fa4a940a0f 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
case CEPH_MDS_STATE_BOOT: return "up:boot";
case CEPH_MDS_STATE_STANDBY: return "up:standby";
case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay";
+ case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
case CEPH_MDS_STATE_CREATING: return "up:creating";
case CEPH_MDS_STATE_STARTING: return "up:starting";
/* up and in */
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LOOKUP: return "lookup";
case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent";
+ case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
case CEPH_MDS_OP_GETATTR: return "getattr";
case CEPH_MDS_OP_SETXATTR: return "setxattr";
case CEPH_MDS_OP_SETATTR: return "setattr";
case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+ case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
+ case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
case CEPH_MDS_OP_READDIR: return "readdir";
case CEPH_MDS_OP_MKNOD: return "mknod";
case CEPH_MDS_OP_LINK: return "link";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e86aa9948124..9fe17c6c2876 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
/*
* express utilization in terms of large blocks to avoid
* overflow on 32-bit machines.
+ *
+ * NOTE: for the time being, we make bsize == frsize to humor
+ * not-yet-ancient versions of glibc that are broken.
+ * Someday, we will probably want to report a real block
+ * size... whatever that may mean for a network file system!
*/
buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+ buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
@@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
buf->f_namelen = NAME_MAX;
- buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 66ebe720e40d..c7b309723dcc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -21,7 +21,7 @@
/* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */
-#define CEPH_BLOCK_SHIFT 20 /* 1 MB */
+#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
@@ -138,8 +138,8 @@ struct ceph_cap_snap {
struct ceph_snap_context *context;
umode_t mode;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
struct ceph_buffer *xattr_blob;
u64 xattr_version;
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
extern const struct file_operations ceph_file_fops;
extern const struct address_space_operations ceph_aops;
-extern int ceph_copy_to_page_vector(struct page **pages,
- const char *data,
- loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
- char *data,
- loff_t off, size_t len);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 2c2ae5be9902..9b6b2b6dd164 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -29,9 +29,94 @@ struct ceph_vxattr {
size_t name_size; /* strlen(name) + 1 (for '\0') */
size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
size_t size);
- bool readonly;
+ bool readonly, hidden;
+ bool (*exists_cb)(struct ceph_inode_info *ci);
};
+/* layouts */
+
+static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
+{
+ size_t s;
+ char *p = (char *)&ci->i_layout;
+
+ for (s = 0; s < sizeof(ci->i_layout); s++, p++)
+ if (*p)
+ return true;
+ return false;
+}
+
+static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ int ret;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
+ s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+ const char *pool_name;
+
+ dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+ down_read(&osdc->map_sem);
+ pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+ if (pool_name)
+ ret = snprintf(val, size,
+ "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout),
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+ pool_name);
+ else
+ ret = snprintf(val, size,
+ "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout),
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+ (unsigned long long)pool);
+
+ up_read(&osdc->map_sem);
+ return ret;
+}
+
+static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ int ret;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
+ s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+ const char *pool_name;
+
+ down_read(&osdc->map_sem);
+ pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+ if (pool_name)
+ ret = snprintf(val, size, "%s", pool_name);
+ else
+ ret = snprintf(val, size, "%lld", (unsigned long long)pool);
+ up_read(&osdc->map_sem);
+ return ret;
+}
+
/* directories */
static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
(long)ci->i_rctime.tv_nsec);
}
-#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
-#define XATTR_NAME_CEPH(_type, _name) \
- { \
- .name = CEPH_XATTR_NAME(_type, _name), \
- .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
- .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
- .readonly = true, \
- }
+#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
+#define CEPH_XATTR_NAME2(_type, _name, _name2) \
+ XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
+
+#define XATTR_NAME_CEPH(_type, _name) \
+ { \
+ .name = CEPH_XATTR_NAME(_type, _name), \
+ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+ .readonly = true, \
+ .hidden = false, \
+ .exists_cb = NULL, \
+ }
+#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
+ { \
+ .name = CEPH_XATTR_NAME2(_type, _name, _field), \
+ .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
+ .readonly = false, \
+ .hidden = true, \
+ .exists_cb = ceph_vxattrcb_layout_exists, \
+ }
static struct ceph_vxattr ceph_dir_vxattrs[] = {
+ {
+ .name = "ceph.dir.layout",
+ .name_size = sizeof("ceph.dir.layout"),
+ .getxattr_cb = ceph_vxattrcb_layout,
+ .readonly = false,
+ .hidden = false,
+ .exists_cb = ceph_vxattrcb_layout_exists,
+ },
+ XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
+ XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
+ XATTR_LAYOUT_FIELD(dir, layout, object_size),
+ XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_NAME_CEPH(dir, entries),
XATTR_NAME_CEPH(dir, files),
XATTR_NAME_CEPH(dir, subdirs),
@@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_NAME_CEPH(dir, rsubdirs),
XATTR_NAME_CEPH(dir, rbytes),
XATTR_NAME_CEPH(dir, rctime),
- { 0 } /* Required table terminator */
+ { .name = NULL, 0 } /* Required table terminator */
};
static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
/* files */
-static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
- size_t size)
-{
- int ret;
-
- ret = snprintf(val, size,
- "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
- (unsigned long long)ceph_file_layout_su(ci->i_layout),
- (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
- (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
- return ret;
-}
-
static struct ceph_vxattr ceph_file_vxattrs[] = {
- XATTR_NAME_CEPH(file, layout),
- /* The following extended attribute name is deprecated */
{
- .name = XATTR_CEPH_PREFIX "layout",
- .name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
- .getxattr_cb = ceph_vxattrcb_file_layout,
- .readonly = true,
+ .name = "ceph.file.layout",
+ .name_size = sizeof("ceph.file.layout"),
+ .getxattr_cb = ceph_vxattrcb_layout,
+ .readonly = false,
+ .hidden = false,
+ .exists_cb = ceph_vxattrcb_layout_exists,
},
- { 0 } /* Required table terminator */
+ XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
+ XATTR_LAYOUT_FIELD(file, layout, stripe_count),
+ XATTR_LAYOUT_FIELD(file, layout, object_size),
+ XATTR_LAYOUT_FIELD(file, layout, pool),
+ { .name = NULL, 0 } /* Required table terminator */
};
static size_t ceph_file_vxattrs_name_size; /* total size of all names */
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
size_t size = 0;
for (vxattr = vxattrs; vxattr->name; vxattr++)
- size += vxattr->name_size;
+ if (!vxattr->hidden)
+ size += vxattr->name_size;
return size;
}
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if (!ceph_is_valid_xattr(name))
return -ENODATA;
- /* let's see if a virtual xattr was requested */
- vxattr = ceph_match_vxattr(inode, name);
-
spin_lock(&ci->i_ceph_lock);
dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version);
+ /* let's see if a virtual xattr was requested */
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
+ err = vxattr->getxattr_cb(ci, value, size);
+ goto out;
+ }
+
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
(ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
goto get_xattr;
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
spin_lock(&ci->i_ceph_lock);
- if (vxattr && vxattr->readonly) {
- err = vxattr->getxattr_cb(ci, value, size);
- goto out;
- }
-
err = __build_xattrs(inode);
if (err < 0)
goto out;
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
get_xattr:
err = -ENODATA; /* == ENOATTR */
xattr = __get_xattr(ci, name);
- if (!xattr) {
- if (vxattr)
- err = vxattr->getxattr_cb(ci, value, size);
+ if (!xattr)
goto out;
- }
err = -ERANGE;
if (size && size < xattr->val_len)
@@ -664,23 +763,30 @@ list_xattr:
vir_namelen = ceph_vxattrs_name_size(vxattrs);
/* adding 1 byte per each variable due to the null termination */
- namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
+ namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
err = -ERANGE;
- if (size && namelen > size)
+ if (size && vir_namelen + namelen > size)
goto out;
- err = namelen;
+ err = namelen + vir_namelen;
if (size == 0)
goto out;
names = __copy_xattr_names(ci, names);
/* virtual xattr names, too */
- if (vxattrs)
+ err = namelen;
+ if (vxattrs) {
for (i = 0; vxattrs[i].name; i++) {
- len = sprintf(names, "%s", vxattrs[i].name);
- names += len + 1;
+ if (!vxattrs[i].hidden &&
+ !(vxattrs[i].exists_cb &&
+ !vxattrs[i].exists_cb(ci))) {
+ len = sprintf(names, "%s", vxattrs[i].name);
+ names += len + 1;
+ err += len + 1;
+ }
}
+ }
out:
spin_unlock(&ci->i_ceph_lock);
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (vxattr && vxattr->readonly)
return -EOPNOTSUPP;
+ /* pass any unhandled ceph.* xattrs through to the MDS */
+ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+ goto do_sync_unlocked;
+
/* preallocate memory for xattr name, value, index node */
err = -ENOMEM;
newname = kmemdup(name, name_len + 1, GFP_NOFS);
@@ -838,6 +948,7 @@ retry:
do_sync:
spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
err = ceph_sync_setxattr(dentry, name, value, size, flags);
out:
kfree(newname);
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
if (vxattr && vxattr->readonly)
return -EOPNOTSUPP;
+ /* pass any unhandled ceph.* xattrs through to the MDS */
+ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+ goto do_sync_unlocked;
+
err = -ENOMEM;
spin_lock(&ci->i_ceph_lock);
retry:
@@ -931,6 +1046,7 @@ retry:
return err;
do_sync:
spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
err = ceph_send_removexattr(dentry, name);
out:
return err;