From 70db4f3629b3476cf506be869ef9d15688d2d44a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 21 Oct 2014 18:09:56 -0700 Subject: ceph: introduce a new inode flag indicating if cached dentries are ordered After creating/deleting/renaming file, offsets of sibling dentries may change. So we can not use cached dentries to satisfy readdir. But we can still use the cached dentries to conclude -ENOENT for lookup. This patch introduces a new inode flag indicating if child dentries are ordered. The flag is set at the same time marking a directory complete. After creating/deleting/renaming file, we clear the flag on directory inode. This prevents ceph_readdir() from using cached dentries to satisfy readdir syscall. Signed-off-by: Yan, Zheng --- fs/ceph/inode.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/ceph/inode.c') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7b6139004401..72607c17e6fd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -389,6 +389,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; + ci->i_ordered_count = 0; atomic_set(&ci->i_release_count, 1); atomic_set(&ci->i_complete_count, 0); ci->i_symlink = NULL; @@ -845,7 +846,8 @@ static int fill_inode(struct inode *inode, (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); - __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); + __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count), + ci->i_ordered_count); } /* were we issued a capability? */ @@ -1206,8 +1208,8 @@ retry_lookup: ceph_invalidate_dentry_lease(dn); /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(dir); - ceph_dir_clear_complete(olddir); + ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(olddir); dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1219,6 +1221,7 @@ retry_lookup: if (!rinfo->head->is_target) { dout("fill_trace null dentry\n"); if (dn->d_inode) { + ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); d_delete(dn); } else { @@ -1235,7 +1238,7 @@ retry_lookup: /* attach proper inode */ if (!dn->d_inode) { - ceph_dir_clear_complete(dir); + ceph_dir_clear_ordered(dir); ihold(in); dn = splice_dentry(dn, in, &have_lease); if (IS_ERR(dn)) { @@ -1265,7 +1268,7 @@ retry_lookup: BUG_ON(!dir); BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); dout(" linking snapped dir %p to dn %p\n", in, dn); - ceph_dir_clear_complete(dir); + ceph_dir_clear_ordered(dir); ihold(in); dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { -- cgit v1.2.3-59-g8ed1b From 31c542a199d79f0f402c2f3e04229464510d47ec Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 14 Nov 2014 21:41:55 +0800 Subject: ceph: add inline data to pagecache Request reply and cap message can contain inline data. add inline data to the page cache if there is Fc cap. Signed-off-by: Yan, Zheng --- fs/ceph/addr.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/ceph/caps.c | 11 +++++++++++ fs/ceph/inode.c | 16 ++++++++++++++++ fs/ceph/super.h | 5 ++++- fs/ceph/super.h.rej | 10 ++++++++++ include/linux/ceph/ceph_fs.h | 1 + 6 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 fs/ceph/super.h.rej (limited to 'fs/ceph/inode.c') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f2c7aa878aa4..4a3f55f27ab4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1318,6 +1318,49 @@ out: return ret; } +void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, + char *data, size_t len) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page; + + if (locked_page) { + page = locked_page; + } else { + if (i_size_read(inode) == 0) + return; + page = find_or_create_page(mapping, 0, + mapping_gfp_mask(mapping) & ~__GFP_FS); + if (!page) + return; + if (PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return; + } + } + + dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", + inode, ceph_vinop(inode), len, locked_page); + + if (len > 0) { + void *kaddr = kmap_atomic(page); + memcpy(kaddr, data, len); + kunmap_atomic(kaddr); + } + + if (page != locked_page) { + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); + + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + } +} + static struct vm_operations_struct ceph_vmops = { .fault = ceph_filemap_fault, .page_mkwrite = ceph_page_mkwrite, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b88ae601f309..6372eb9ce491 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2405,6 +2405,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool queue_invalidate = false; bool queue_revalidate = false; bool deleted_inode = false; + bool fill_inline = false; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2578,6 +2579,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } BUG_ON(cap->issued & ~cap->implemented); + if (inline_version > 0 && inline_version >= ci->i_inline_version) { + ci->i_inline_version = inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { @@ -2591,6 +2599,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, wake = true; } + if (fill_inline) + ceph_fill_inline_data(inode, NULL, inline_data, inline_len); + if (queue_trunc) { ceph_queue_vmtruncate(inode); ceph_queue_revalidate(inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 72607c17e6fd..feea6a8f88ae 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -387,6 +387,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) spin_lock_init(&ci->i_ceph_lock); ci->i_version = 0; + ci->i_inline_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; ci->i_ordered_count = 0; @@ -676,6 +677,7 @@ static int fill_inode(struct inode *inode, bool wake = false; bool queue_trunc = false; bool new_version = false; + bool fill_inline = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), @@ -875,8 +877,22 @@ static int fill_inode(struct inode *inode, ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } + + if (iinfo->inline_version > 0 && + iinfo->inline_version >= ci->i_inline_version) { + int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; + ci->i_inline_version = iinfo->inline_version; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (le32_to_cpu(info->cap.caps) & cache_caps)) + fill_inline = true; + } + spin_unlock(&ci->i_ceph_lock); + if (fill_inline) + ceph_fill_inline_data(inode, NULL, + iinfo->inline_data, iinfo->inline_len); + if (wake) wake_up_all(&ci->i_cap_wq); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc1c8255dead..b0de9162758b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -253,6 +253,7 @@ struct ceph_inode_info { spinlock_t i_ceph_lock; u64 i_version; + u64 i_inline_version; u32 i_time_warp_seq; unsigned i_ceph_flags; @@ -858,7 +859,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn, int mds, int drop, int unless); extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, - int *got, loff_t endoff); + loff_t endoff, int *got, struct page **pinned_page); /* for counting open files by mode */ static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode) @@ -880,6 +881,8 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); +extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, + char *data, size_t len); /* dir.c */ extern const struct file_operations ceph_dir_fops; diff --git a/fs/ceph/super.h.rej b/fs/ceph/super.h.rej new file mode 100644 index 000000000000..88fe3dfadb29 --- /dev/null +++ b/fs/ceph/super.h.rej @@ -0,0 +1,10 @@ +--- fs/ceph/super.h ++++ fs/ceph/super.h +@@ -254,6 +255,7 @@ + spinlock_t i_ceph_lock; + + u64 i_version; ++ u64 i_inline_version; + u32 i_time_warp_seq; + + unsigned i_ceph_flags; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 31d8b98b7f96..2d4acfa2c7b7 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -552,6 +552,7 @@ struct ceph_filelock { int ceph_flags_to_mode(int flags); +#define CEPH_INLINE_NONE ((__u64)-1) /* capability bits */ #define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ -- cgit v1.2.3-59-g8ed1b From 01deead041e03c9a6b4e1b2dd165dee4cced6112 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 14 Nov 2014 21:56:29 +0800 Subject: ceph: use getattr request to fetch inline data Add a new parameter 'locked_page' to ceph_do_getattr(). If inline data in getattr reply will be copied to the page. Signed-off-by: Yan, Zheng --- fs/ceph/inode.c | 34 +++++++++++++++++++++++++--------- fs/ceph/mds_client.h | 1 + fs/ceph/super.h | 7 ++++++- include/linux/ceph/ceph_fs.h | 2 ++ 4 files changed, 34 insertions(+), 10 deletions(-) (limited to 'fs/ceph/inode.c') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index feea6a8f88ae..0bdabd1bff8a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -659,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, * Populate an inode based on info from mds. May be called on new or * existing inodes. */ -static int fill_inode(struct inode *inode, +static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_info_in *iinfo, struct ceph_mds_reply_dirfrag *dirinfo, struct ceph_mds_session *session, @@ -883,14 +883,15 @@ static int fill_inode(struct inode *inode, int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; ci->i_inline_version = iinfo->inline_version; if (ci->i_inline_version != CEPH_INLINE_NONE && - (le32_to_cpu(info->cap.caps) & cache_caps)) + (locked_page || + (le32_to_cpu(info->cap.caps) & cache_caps))) fill_inline = true; } spin_unlock(&ci->i_ceph_lock); if (fill_inline) - ceph_fill_inline_data(inode, NULL, + ceph_fill_inline_data(inode, locked_page, iinfo->inline_data, iinfo->inline_len); if (wake) @@ -1080,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, struct inode *dir = req->r_locked_dir; if (dir) { - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + err = fill_inode(dir, NULL, + &rinfo->diri, rinfo->dirfrag, session, req->r_request_started, -1, &req->r_caps_reservation); if (err < 0) @@ -1150,7 +1152,7 @@ retry_lookup: } req->r_target_inode = in; - err = fill_inode(in, &rinfo->targeti, NULL, + err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, req->r_request_started, (!req->r_aborted && rinfo->head->result == 0) ? req->r_fmode : -1, @@ -1321,7 +1323,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, dout("new_inode badness got %d\n", err); continue; } - rc = fill_inode(in, &rinfo->dir_in[i], NULL, session, + rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation); if (rc < 0) { @@ -1437,7 +1439,7 @@ retry_lookup: } } - if (fill_inode(in, &rinfo->dir_in[i], NULL, session, + if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, req->r_request_started, -1, &req->r_caps_reservation) < 0) { pr_err("fill_inode badness on %p\n", in); @@ -1920,7 +1922,8 @@ out_put: * Verify that we have a lease on the given mask. If not, * do a getattr against an mds. */ -int ceph_do_getattr(struct inode *inode, int mask, bool force) +int __ceph_do_getattr(struct inode *inode, struct page *locked_page, + int mask, bool force) { struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1932,7 +1935,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force) return 0; } - dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); + dout("do_getattr inode %p mask %s mode 0%o\n", + inode, ceph_cap_string(mask), inode->i_mode); if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) return 0; @@ -1943,7 +1947,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force) ihold(inode); req->r_num_caps = 1; req->r_args.getattr.mask = cpu_to_le32(mask); + req->r_locked_page = locked_page; err = ceph_mdsc_do_request(mdsc, NULL, req); + if (locked_page && err == 0) { + u64 inline_version = req->r_reply_info.targeti.inline_version; + if (inline_version == 0) { + /* the reply is supposed to contain inline data */ + err = -EINVAL; + } else if (inline_version == CEPH_INLINE_NONE) { + err = -ENODATA; + } else { + err = req->r_reply_info.targeti.inline_len; + } + } ceph_mdsc_put_request(req); dout("do_getattr result=%d\n", err); return err; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 01f5e4cfd684..e2817d00f7d9 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -223,6 +223,7 @@ struct ceph_mds_request { int r_request_release_offset; struct ceph_msg *r_reply; struct ceph_mds_reply_info_parsed r_reply_info; + struct page *r_locked_page; int r_err; bool r_aborted; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0de9162758b..6d56fae863ca 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -744,7 +744,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode); extern void ceph_queue_invalidate(struct inode *inode); extern void ceph_queue_writeback(struct inode *inode); -extern int ceph_do_getattr(struct inode *inode, int mask, bool force); +extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, + int mask, bool force); +static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) +{ + return __ceph_do_getattr(inode, NULL, mask, force); +} extern int ceph_permission(struct inode *inode, int mask); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d4acfa2c7b7..c0dadaac26e3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -617,6 +617,8 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_LINK_SHARED | \ CEPH_CAP_FILE_SHARED | \ CEPH_CAP_XATTR_SHARED) +#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \ + CEPH_CAP_FILE_RD) #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ CEPH_CAP_LINK_SHARED | \ -- cgit v1.2.3-59-g8ed1b