diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
29 files changed, 3254 insertions, 1530 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile index 9ac29e718da3..2ce10ff01b80 100644 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ b/drivers/staging/lustre/lustre/llite/Makefile @@ -4,7 +4,8 @@ lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \ rw.o namei.o symlink.o llite_mmap.o \ xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \ rw26.o super25.o statahead.o \ - ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \ - vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o + glimpse.o lcommon_cl.o lcommon_misc.o \ + vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \ + lproc_llite.o llite_lloop-y := lloop.o diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index dd1c827013b9..1b6f82a1a435 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -108,11 +108,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, static inline int return_if_equal(struct ldlm_lock *lock, void *data) { - if ((lock->l_flags & - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) == - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) - return LDLM_ITER_CONTINUE; - return LDLM_ITER_STOP; + return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ? + LDLM_ITER_CONTINUE : LDLM_ITER_STOP; } /* find any ldlm lock of the inode in mdc and lov @@ -253,8 +250,8 @@ void ll_invalidate_aliases(struct inode *inode) { struct dentry *dentry; - CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n", + PFID(ll_inode2fid(inode)), inode); ll_lock_dcache(inode); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { @@ -289,8 +286,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode) if (it->d.lustre.it_lock_mode && inode) { struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); } diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index e4c82883e580..4b00d1ac84fb 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -158,11 +158,16 @@ static int ll_dir_filler(void *_hash, struct page *page0) int i; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n", - inode->i_ino, inode->i_generation, inode, hash); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n", + PFID(ll_inode2fid(inode)), inode, hash); LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS); if (page_pool) { page_pool[0] = page0; @@ -177,8 +182,6 @@ static int ll_dir_filler(void *_hash, struct page *page0) page_pool[npages] = page; } - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); op_data->op_npages = npages; op_data->op_offset = hash; rc = md_readpage(exp, op_data, page_pool, &request); @@ -190,7 +193,7 @@ static int ll_dir_filler(void *_hash, struct page *page0) body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); /* Checked by mdc_readpage() */ if (body->valid & OBD_MD_FLSIZE) - cl_isize_write(inode, body->size); + i_size_write(inode, body->size); nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1) >> PAGE_SHIFT; @@ -372,8 +375,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, return ERR_PTR(rc); } - CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n", - dir, dir->i_ino, dir->i_generation); + CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), dir); md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &it.d.lustre.it_lock_handle, dir, NULL); } else { @@ -468,6 +471,28 @@ fail: goto out_unlock; } +/** + * return IF_* type for given lu_dirent entry. + * IF_* flag shld be converted to particular OS file type in + * platform llite module. + */ +static __u16 ll_dirent_type_get(struct lu_dirent *ent) +{ + __u16 type = 0; + struct luda_type *lt; + int len = 0; + + if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { + const unsigned int align = sizeof(struct luda_type) - 1; + + len = le16_to_cpu(ent->lde_namelen); + len = (len + align) & ~align; + lt = (void *)ent->lde_name + len; + type = IFTODT(le16_to_cpu(lt->lt_type)); + } + return type; +} + int ll_dir_read(struct inode *inode, struct dir_context *ctx) { struct ll_inode_info *info = ll_i2info(inode); @@ -589,15 +614,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) struct inode *inode = file_inode(filp); struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); struct ll_sb_info *sbi = ll_i2sbi(inode); + __u64 pos = lfd ? lfd->lfd_pos : 0; int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; int api32 = ll_need_32bit_api(sbi); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", - inode->i_ino, inode->i_generation, - inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n", + PFID(ll_inode2fid(inode)), inode, (unsigned long)pos, + i_size_read(inode), api32); - if (lfd->lfd_pos == MDS_DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * end-of-file. */ @@ -605,9 +631,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) goto out; } - ctx->pos = lfd->lfd_pos; + ctx->pos = pos; rc = ll_dir_read(inode, ctx); - lfd->lfd_pos = ctx->pos; + if (lfd) + lfd->lfd_pos = ctx->pos; if (ctx->pos == MDS_DIR_END_OFF) { if (api32) ctx->pos = LL_DIR_END_OFF_32BIT; @@ -804,9 +831,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) { - CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n", - inode->i_ino, - inode->i_generation, rc); + CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n", + PFID(ll_inode2fid(inode)), rc); goto out; } @@ -916,7 +942,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } /* Read current file data version */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc != 0) { CDEBUG(D_HSM, "Could not read file data version of " @@ -936,6 +962,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } progress: + /* On error, the request should be considered as completed */ + if (hpk.hpk_errval > 0) + hpk.hpk_flags |= HP_FLAG_COMPLETED; rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), &hpk, NULL); @@ -997,8 +1026,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) goto progress; } - rc = ll_data_version(inode, &data_version, - copy->hc_hai.hai_action == HSMA_ARCHIVE); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc) { CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n"); @@ -1033,7 +1061,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) /* hpk_errval must be >= 0 */ hpk.hpk_errval = EBUSY; } - } progress: @@ -1242,8 +1269,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct obd_ioctl_data *data; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n", - inode->i_ino, inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n", + PFID(ll_inode2fid(inode)), inode, cmd); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ @@ -1362,7 +1389,6 @@ out_free: lmv_out_free: obd_ioctl_freedata(buf, len); return rc; - } case LL_IOC_LOV_SETSTRIPE: { struct lov_user_md_v3 lumv3; @@ -1474,8 +1500,9 @@ free_lmv: cmd == LL_IOC_MDC_GETINFO)) { rc = 0; goto skip_lmm; - } else + } else { goto out_req; + } } if (cmd == IOC_MDC_GETFILESTRIPE || @@ -1688,15 +1715,16 @@ out_quotactl: return ll_flush_ctx(inode); #ifdef CONFIG_FS_POSIX_ACL case LL_IOC_RMTACL: { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - rc = rct_add(&sbi->ll_rct, current_pid(), arg); - if (!rc) - fd->fd_flags |= LL_FILE_RMTACL; - return rc; - } else - return 0; + rc = rct_add(&sbi->ll_rct, current_pid(), arg); + if (!rc) + fd->fd_flags |= LL_FILE_RMTACL; + return rc; + } else { + return 0; + } } #endif case LL_IOC_GETOBDCOUNT: { @@ -1817,6 +1845,9 @@ out_quotactl: return rc; } case LL_IOC_HSM_CT_START: + if (!capable(CFS_CAP_SYS_ADMIN)) + return -EPERM; + rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, sizeof(struct lustre_kernelcomm)); return rc; @@ -1865,7 +1896,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; - inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -1903,7 +1933,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) goto out; out: - inode_unlock(inode); return ret; } @@ -1922,7 +1951,7 @@ const struct file_operations ll_dir_operations = { .open = ll_dir_open, .release = ll_dir_release, .read = generic_read_dir, - .iterate = ll_readdir, + .iterate_shared = ll_readdir, .unlocked_ioctl = ll_dir_ioctl, .fsync = ll_fsync, }; diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index cf619af3caf5..f47f2acaf90c 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -45,6 +45,7 @@ #include "../include/lustre_lite.h" #include <linux/pagemap.h> #include <linux/file.h> +#include <linux/mount.h> #include "llite_internal.h" #include "../include/lustre/ll_fiemap.h" @@ -87,8 +88,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, op_data->op_attr.ia_ctime = inode->i_ctime; op_data->op_attr.ia_size = i_size_read(inode); op_data->op_attr_blocks = inode->i_blocks; - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = - ll_inode_to_ext_flags(inode->i_flags); + op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; if (fh) op_data->op_handle = *fh; @@ -170,13 +170,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, */ rc = ll_som_update(inode, op_data); if (rc) { - CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); rc = 0; } } else if (rc) { - CERROR("inode %lu mdc close failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); } /* DATA_MODIFIED flag was successfully sent on close, cancel data @@ -278,7 +280,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, /* clear group lock, if present */ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) - ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); + ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid); if (fd->fd_lease_och) { bool lease_broken; @@ -343,8 +345,8 @@ int ll_file_release(struct inode *inode, struct file *file) struct ll_inode_info *lli = ll_i2info(inode); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); #ifdef CONFIG_FS_POSIX_ACL if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { @@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file) struct ll_file_data *fd; int rc = 0, opendir_set = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, - inode->i_generation, inode, file->f_flags); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n", + PFID(ll_inode2fid(inode)), inode, file->f_flags); it = file->private_data; /* XXX: compat macro */ file->private_data = NULL; /* prevent ll_local_open assertion */ @@ -677,7 +679,9 @@ restart: if (rc) goto out_och_free; - LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); + LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF), + "inode %p: disposition %x, status %d\n", inode, + it_disposition(it, ~0), it->d.lustre.it_status); rc = ll_local_open(file, it, fd, *och_p); if (rc) @@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, return och; out_close: - rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); - if (rc2) - CERROR("Close openhandle returned %d\n", rc2); - - /* cancel open lock */ + /* Cancel open lock */ if (it.d.lustre.it_lock_mode != 0) { ldlm_lock_decref_and_cancel(&och->och_lease_handle, it.d.lustre.it_lock_mode); it.d.lustre.it_lock_mode = 0; + och->och_lease_handle.cookie = 0ULL; } + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); + if (rc2 < 0) + CERROR("%s: error closing file "DFID": %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&ll_i2info(inode)->lli_fid), rc2); + och = NULL; /* och has been freed in ll_close_inode_openhandle() */ out_release_it: ll_intent_release(&it); out: @@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, lock_res_and_lock(lock); cancelled = ldlm_is_cancel(lock); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } CDEBUG(D_INODE, "lease for " DFID " broken? %d\n", @@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, - struct obdo *obdo, __u64 ioepoch, int sync) + struct obdo *obdo, __u64 ioepoch, int dv_flags) { struct ptlrpc_request_set *set; struct obd_info oinfo = { }; @@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP | OBD_MD_FLEPOCH | OBD_MD_FLDATAVERSION; - if (sync) { + if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) { oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; + if (dv_flags & LL_DV_WR_FLUSH) + oinfo.oi_oa->o_flags |= OBD_FL_FLUSH; } set = ptlrpc_prep_set(); @@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } - if (rc == 0) + if (rc == 0) { oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE | - OBD_MD_FLDATAVERSION); + OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS); + if (dv_flags & LL_DV_WR_FLUSH && + !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS && + oinfo.oi_oa->o_flags & OBD_FL_FLUSH)) + return -ENOTSUPP; + } return rc; } @@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, lsm = ccc_inode_lsm_get(inode); rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), - obdo, ioepoch, sync); + obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0); if (rc == 0) { struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; @@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, return rc; } -int ll_merge_lvb(const struct lu_env *env, struct inode *inode) +int ll_merge_attr(const struct lu_env *env, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; - struct cl_attr *attr = ccc_env_thread_attr(env); - struct ost_lvb lvb; + struct cl_attr *attr = vvp_env_thread_attr(env); + s64 atime; + s64 mtime; + s64 ctime; int rc = 0; ll_inode_size_lock(inode); + /* merge timestamps the most recently obtained from mds with * timestamps obtained from osts */ - LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = lli->lli_atime; + LTIME_S(inode->i_mtime) = lli->lli_mtime; + LTIME_S(inode->i_ctime) = lli->lli_ctime; - lvb.lvb_size = i_size_read(inode); - lvb.lvb_blocks = inode->i_blocks; - lvb.lvb_mtime = LTIME_S(inode->i_mtime); - lvb.lvb_atime = LTIME_S(inode->i_atime); - lvb.lvb_ctime = LTIME_S(inode->i_ctime); + mtime = LTIME_S(inode->i_mtime); + atime = LTIME_S(inode->i_atime); + ctime = LTIME_S(inode->i_ctime); cl_object_attr_lock(obj); rc = cl_object_attr_get(env, obj, attr); cl_object_attr_unlock(obj); - if (rc == 0) { - if (lvb.lvb_atime < attr->cat_atime) - lvb.lvb_atime = attr->cat_atime; - if (lvb.lvb_ctime < attr->cat_ctime) - lvb.lvb_ctime = attr->cat_ctime; - if (lvb.lvb_mtime < attr->cat_mtime) - lvb.lvb_mtime = attr->cat_mtime; + if (rc != 0) + goto out_size_unlock; - CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", - PFID(&lli->lli_fid), attr->cat_size); - cl_isize_write_nolock(inode, attr->cat_size); + if (atime < attr->cat_atime) + atime = attr->cat_atime; - inode->i_blocks = attr->cat_blocks; + if (ctime < attr->cat_ctime) + ctime = attr->cat_ctime; - LTIME_S(inode->i_mtime) = lvb.lvb_mtime; - LTIME_S(inode->i_atime) = lvb.lvb_atime; - LTIME_S(inode->i_ctime) = lvb.lvb_ctime; - } + if (mtime < attr->cat_mtime) + mtime = attr->cat_mtime; + + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(&lli->lli_fid), attr->cat_size); + + i_size_write(inode, attr->cat_size); + + inode->i_blocks = attr->cat_blocks; + + LTIME_S(inode->i_mtime) = mtime; + LTIME_S(inode->i_atime) = atime; + LTIME_S(inode->i_ctime) = ctime; + +out_size_unlock: ll_inode_size_unlock(inode); return rc; @@ -1120,47 +1141,48 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, struct cl_io *io; ssize_t result; + CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n", + file->f_path.dentry->d_name.name, iot, *ppos, count); + restart: - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); ll_io_init(io, file, iot == CIT_WRITE); if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); int write_mutex_locked = 0; - cio->cui_fd = LUSTRE_FPRIVATE(file); - vio->cui_io_subtype = args->via_io_subtype; + vio->vui_fd = LUSTRE_FPRIVATE(file); + vio->vui_io_subtype = args->via_io_subtype; - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - cio->cui_iter = args->u.normal.via_iter; - cio->cui_iocb = args->u.normal.via_iocb; + vio->vui_iter = args->u.normal.via_iter; + vio->vui_iocb = args->u.normal.via_iocb; if ((iot == CIT_WRITE) && - !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { if (mutex_lock_interruptible(&lli-> lli_write_mutex)) { result = -ERESTARTSYS; goto out; } write_mutex_locked = 1; - } else if (iot == CIT_READ) { - down_read(&lli->lli_trunc_sem); } + down_read(&lli->lli_trunc_sem); break; case IO_SPLICE: - vio->u.splice.cui_pipe = args->u.splice.via_pipe; - vio->u.splice.cui_flags = args->u.splice.via_flags; + vio->u.splice.vui_pipe = args->u.splice.via_pipe; + vio->u.splice.vui_flags = args->u.splice.via_flags; break; default: - CERROR("Unknown IO type - %u\n", vio->cui_io_subtype); + CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); LBUG(); } result = cl_io_loop(env, io); + if (args->via_io_subtype == IO_NORMAL) + up_read(&lli->lli_trunc_sem); if (write_mutex_locked) mutex_unlock(&lli->lli_write_mutex); - else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) - up_read(&lli->lli_trunc_sem); } else { /* cl_io_rw_init() handled IO */ result = io->ci_result; @@ -1197,6 +1219,7 @@ out: fd->fd_write_failed = true; } } + CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); return result; } @@ -1212,7 +1235,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1236,7 +1259,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; @@ -1262,7 +1285,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_SPLICE); + args = ll_env_args(env, IO_SPLICE); args->u.splice.via_pipe = pipe; args->u.splice.via_flags = flags; @@ -1354,7 +1377,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) } int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, int lum_size) + __u64 flags, struct lov_user_md *lum, + int lum_size) { struct lov_stripe_md *lsm = NULL; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; @@ -1363,8 +1387,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, lsm = ccc_inode_lsm_get(inode); if (lsm) { ccc_inode_lsm_put(inode, lsm); - CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", - inode->i_ino); + CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n", + PFID(ll_inode2fid(inode))); rc = -EEXIST; goto out; } @@ -1478,7 +1502,7 @@ out: static int ll_lov_setea(struct inode *inode, struct file *file, unsigned long arg) { - int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; + __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; struct lov_user_md *lump; int lum_size = sizeof(struct lov_user_md) + sizeof(struct lov_user_ost_data); @@ -1512,7 +1536,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, struct lov_user_md_v1 __user *lumv1p = (void __user *)arg; struct lov_user_md_v3 __user *lumv3p = (void __user *)arg; int lum_size, rc; - int flags = FMODE_WRITE; + __u64 flags = FMODE_WRITE; /* first try with v1 which is smaller than v3 */ lum_size = sizeof(struct lov_user_md_v1); @@ -1561,7 +1585,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; int rc; if (arg == 0) { @@ -1575,14 +1599,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) spin_lock(&lli->lli_lock); if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { CWARN("group lock already existed with gid %lu\n", - fd->fd_grouplock.cg_gid); + fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } - LASSERT(!fd->fd_grouplock.cg_lock); + LASSERT(!fd->fd_grouplock.lg_lock); spin_unlock(&lli->lli_lock); - rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, + rc = cl_get_grouplock(ll_i2info(inode)->lli_clob, arg, (file->f_flags & O_NONBLOCK), &grouplock); if (rc) return rc; @@ -1608,7 +1632,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; spin_lock(&lli->lli_lock); if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { @@ -1616,11 +1640,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, CWARN("no group lock held\n"); return -EINVAL; } - LASSERT(fd->fd_grouplock.cg_lock); + LASSERT(fd->fd_grouplock.lg_lock); - if (fd->fd_grouplock.cg_gid != arg) { + if (fd->fd_grouplock.lg_gid != arg) { CWARN("group lock %lu doesn't match current id %lu\n", - arg, fd->fd_grouplock.cg_gid); + arg, fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } @@ -1861,11 +1885,12 @@ error: * This value is computed using stripe object version on OST. * Version is computed using server side locking. * - * @param extent_lock Take extent lock. Not needed if a process is already - * holding the OST object group locks. + * @param sync if do sync on the OST side; + * 0: no sync + * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs + * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs */ -int ll_data_version(struct inode *inode, __u64 *data_version, - int extent_lock) +int ll_data_version(struct inode *inode, __u64 *data_version, int flags) { struct lov_stripe_md *lsm = NULL; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -1887,7 +1912,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version, goto out; } - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock); + rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags); if (rc == 0) { if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) rc = -EOPNOTSUPP; @@ -1923,7 +1948,7 @@ int ll_hsm_release(struct inode *inode) } /* Grab latest data_version and [am]time values */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH); if (rc != 0) goto out; @@ -1933,7 +1958,7 @@ int ll_hsm_release(struct inode *inode) goto out; } - ll_merge_lvb(env, inode); + ll_merge_attr(env, inode); cl_env_nested_put(&nest, env); /* Release the file. @@ -2227,8 +2252,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ll_file_data *fd = LUSTRE_FPRIVATE(file); int flags, rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, - inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n", + PFID(ll_inode2fid(inode)), inode, cmd); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ @@ -2331,9 +2356,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(&idv, (char __user *)arg, sizeof(idv))) return -EFAULT; - rc = ll_data_version(inode, &idv.idv_version, - !(idv.idv_flags & LL_DV_NOFLUSH)); - + idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH; + rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags); if (rc == 0 && copy_to_user((char __user *)arg, &idv, sizeof(idv))) return -EFAULT; @@ -2499,7 +2523,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) rc = och->och_flags & (FMODE_READ | FMODE_WRITE); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } } mutex_unlock(&lli->lli_och_mutex); @@ -2537,9 +2561,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : (origin == SEEK_CUR) ? file->f_pos : 0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", - inode->i_ino, inode->i_generation, inode, retval, retval, - origin); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n", + PFID(ll_inode2fid(inode)), inode, retval, retval, origin); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { @@ -2603,8 +2626,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(env)) return PTR_ERR(env); - io = ccc_env_thread_io(env); - io->ci_obj = cl_i2info(inode)->lli_clob; + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; io->ci_ignore_layout = ignore_layout; /* initialize parameters for sync */ @@ -2634,8 +2657,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct ptlrpc_request *req; int rc, err; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2693,8 +2716,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) int rc; int rc2 = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", - inode->i_ino, file_lock); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n", + PFID(ll_inode2fid(inode)), file_lock); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); @@ -2777,9 +2800,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (IS_ERR(op_data)) return PTR_ERR(op_data); - CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", - inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode, - flock.l_flock.start, flock.l_flock.end); + CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", + PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, + einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, op_data, &lockh, &flock, 0, NULL /* req */, flags); @@ -2901,8 +2924,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) struct obd_export *exp; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n", - inode->i_ino, inode->i_generation, inode, dentry); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n", + PFID(ll_inode2fid(inode)), inode, dentry); exp = ll_i2mdexp(inode); @@ -2998,9 +3021,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) /* if object isn't regular file, don't validate size */ if (!S_ISREG(inode->i_mode)) { - LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime; + LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; + LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; } else { /* In case of restore, the MDT has the right size and has * already send it back without granting the layout lock, @@ -3124,8 +3147,8 @@ int ll_inode_permission(struct inode *inode, int mask) return rc; } - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", - inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n", + PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) return lustre_check_remote_perm(inode, mask); @@ -3335,10 +3358,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) int rc; CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", - PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), + PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock), lock->l_lvb_data, lock->l_lvb_len); - if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY)) + if (lock->l_lvb_data && ldlm_is_lvb_ready(lock)) return 0; /* if layout lock was granted right away, the layout is returned @@ -3415,14 +3438,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, LASSERT(lock); LASSERT(ldlm_has_layout(lock)); - LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d", - inode, PFID(&lli->lli_fid), reconf); + LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d", + PFID(&lli->lli_fid), inode, reconf); /* in case this is a caching lock and reinstate with new inode */ md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); lock_res_and_lock(lock); - lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); + lvb_ready = ldlm_is_lvb_ready(lock); unlock_res_and_lock(lock); /* checking lvb_ready is racy but this is okay. The worst case is * that multi processes may configure the file on the same time. @@ -3487,9 +3510,9 @@ out: /* wait for IO to complete if it's still being used. */ if (wait_layout) { - CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n", + CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n", ll_get_fsname(inode->i_sb, NULL, 0), - inode, PFID(&lli->lli_fid)); + PFID(&lli->lli_fid), inode); memset(&conf, 0, sizeof(conf)); conf.coc_opc = OBJECT_CONF_WAIT; @@ -3498,7 +3521,8 @@ out: if (rc == 0) rc = -EAGAIN; - CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n", + CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), rc); } return rc; @@ -3571,9 +3595,9 @@ again: it.it_op = IT_LAYOUT; lockh.cookie = 0ULL; - LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid)); + LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lli->lli_fid), inode); rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, NULL, 0, NULL, 0); @@ -3601,7 +3625,7 @@ again: /** * This function send a restore request to the MDT */ -int ll_layout_restore(struct inode *inode) +int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) { struct hsm_user_request *hur; int len, rc; @@ -3617,9 +3641,10 @@ int ll_layout_restore(struct inode *inode) hur->hur_request.hr_flags = 0; memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, sizeof(hur->hur_user_item[0].hui_fid)); - hur->hur_user_item[0].hui_extent.length = -1; + hur->hur_user_item[0].hui_extent.offset = offset; + hur->hur_user_item[0].hui_extent.length = length; hur->hur_request.hr_itemcount = 1; - rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, + rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp, len, hur, NULL); kfree(hur); return rc; diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c new file mode 100644 index 000000000000..d8ea75424e2f --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/glimpse.c @@ -0,0 +1,255 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * glimpse code shared between vvp and liblustre (and other Lustre clients in + * the future). + * + * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Oleg Drokin <oleg.drokin@sun.com> + */ + +#include "../../include/linux/libcfs/libcfs.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/obd.h" + +#include "../include/lustre_dlm.h" +#include "../include/lustre_lite.h" +#include "../include/lustre_mdc.h" +#include <linux/pagemap.h> +#include <linux/file.h> + +#include "../include/cl_object.h" +#include "../llite/llite_internal.h" + +static const struct cl_lock_descr whole_file = { + .cld_start = 0, + .cld_end = CL_PAGE_EOF, + .cld_mode = CLM_READ +}; + +/* + * Check whether file has possible unwriten pages. + * + * \retval 1 file is mmap-ed or has dirty pages + * 0 otherwise + */ +blkcnt_t dirty_cnt(struct inode *inode) +{ + blkcnt_t cnt = 0; + struct vvp_object *vob = cl_inode2vvp(inode); + void *results[1]; + + if (inode->i_mapping) + cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree, + results, 0, 1, + PAGECACHE_TAG_DIRTY); + if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0) + cnt = 1; + + return (cnt > 0) ? 1 : 0; +} + +int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, + struct inode *inode, struct cl_object *clob, int agl) +{ + struct ll_inode_info *lli = ll_i2info(inode); + const struct lu_fid *fid = lu_object_fid(&clob->co_lu); + int result; + + result = 0; + if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { + CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); + if (lli->lli_has_smd) { + struct cl_lock *lock = vvp_env_lock(env); + struct cl_lock_descr *descr = &lock->cll_descr; + + /* NOTE: this looks like DLM lock request, but it may + * not be one. Due to CEF_ASYNC flag (translated + * to LDLM_FL_HAS_INTENT by osc), this is + * glimpse request, that won't revoke any + * conflicting DLM locks held. Instead, + * ll_glimpse_callback() will be called on each + * client holding a DLM lock against this file, + * and resulting size will be returned for each + * stripe. DLM lock on [0, EOF] is acquired only + * if there were no conflicting locks. If there + * were conflicting locks, enqueuing or waiting + * fails with -ENAVAIL, but valid inode + * attributes are returned anyway. + */ + *descr = whole_file; + descr->cld_obj = clob; + descr->cld_mode = CLM_READ; + descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; + if (agl) + descr->cld_enq_flags |= CEF_AGL; + /* + * CEF_ASYNC is used because glimpse sub-locks cannot + * deadlock (because they never conflict with other + * locks) and, hence, can be enqueued out-of-order. + * + * CEF_MUST protects glimpse lock from conversion into + * a lockless mode. + */ + result = cl_lock_request(env, io, lock); + if (result < 0) + return result; + + if (!agl) { + ll_merge_attr(env, inode); + if (i_size_read(inode) > 0 && + inode->i_blocks == 0) { + /* + * LU-417: Add dirty pages block count + * lest i_blocks reports 0, some "cp" or + * "tar" may think it's a completely + * sparse file and skip it. + */ + inode->i_blocks = dirty_cnt(inode); + } + } + cl_lock_release(env, lock); + } else { + CDEBUG(D_DLMTRACE, "No objects for inode\n"); + ll_merge_attr(env, inode); + } + } + + return result; +} + +static int cl_io_get(struct inode *inode, struct lu_env **envout, + struct cl_io **ioout, int *refcheck) +{ + struct lu_env *env; + struct cl_io *io; + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *clob = lli->lli_clob; + int result; + + if (S_ISREG(inode->i_mode)) { + env = cl_env_get(refcheck); + if (!IS_ERR(env)) { + io = vvp_env_thread_io(env); + io->ci_obj = clob; + *envout = env; + *ioout = io; + result = 1; + } else { + result = PTR_ERR(env); + } + } else { + result = 0; + } + return result; +} + +int cl_glimpse_size0(struct inode *inode, int agl) +{ + /* + * We don't need ast_flags argument to cl_glimpse_size(), because + * osc_lock_enqueue() takes care of the possible deadlock that said + * argument was introduced to avoid. + */ + /* + * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to + * cl_glimpse_size(), which doesn't make sense: glimpse locks are not + * blocking anyway. + */ + struct lu_env *env = NULL; + struct cl_io *io = NULL; + int result; + int refcheck; + + result = cl_io_get(inode, &env, &io, &refcheck); + if (result > 0) { +again: + io->ci_verify_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result > 0) + /* + * nothing to do for this io. This currently happens + * when stripe sub-object's are not yet created. + */ + result = io->ci_result; + else if (result == 0) + result = cl_glimpse_lock(env, io, inode, io->ci_obj, + agl); + + OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2); + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + cl_env_put(env, &refcheck); + } + return result; +} + +int cl_local_size(struct inode *inode) +{ + struct lu_env *env = NULL; + struct cl_io *io = NULL; + struct cl_object *clob; + int result; + int refcheck; + + if (!ll_i2info(inode)->lli_has_smd) + return 0; + + result = cl_io_get(inode, &env, &io, &refcheck); + if (result <= 0) + return result; + + clob = io->ci_obj; + result = cl_io_init(env, io, CIT_MISC, clob); + if (result > 0) { + result = io->ci_result; + } else if (result == 0) { + struct cl_lock *lock = vvp_env_lock(env); + + lock->cll_descr = whole_file; + lock->cll_descr.cld_enq_flags = CEF_PEEK; + lock->cll_descr.cld_obj = clob; + result = cl_lock_request(env, io, lock); + if (result == 0) { + ll_merge_attr(env, inode); + cl_lock_release(env, lock); + } + } + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c new file mode 100644 index 000000000000..6c00715b438f --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -0,0 +1,327 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * cl code shared between vvp and liblustre (and other Lustre clients in the + * future). + * + * Author: Nikita Danilov <nikita.danilov@sun.com> + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../../include/linux/libcfs/libcfs.h" +# include <linux/fs.h> +# include <linux/sched.h> +# include <linux/mm.h> +# include <linux/quotaops.h> +# include <linux/highmem.h> +# include <linux/pagemap.h> +# include <linux/rbtree.h> + +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_fid.h" +#include "../include/lustre_lite.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_ver.h" +#include "../include/lustre_mdc.h" +#include "../include/cl_object.h" + +#include "../llite/llite_internal.h" + +/* + * ccc_ prefix stands for "Common Client Code". + */ + +/***************************************************************************** + * + * Vvp device and device type functions. + * + */ + +/** + * An `emergency' environment used by cl_inode_fini() when cl_env_get() + * fails. Access to this environment is serialized by cl_inode_fini_guard + * mutex. + */ +struct lu_env *cl_inode_fini_env; +int cl_inode_fini_refcheck; + +/** + * A mutex serializing calls to slp_inode_fini() under extreme memory + * pressure, when environments cannot be allocated. + */ +static DEFINE_MUTEX(cl_inode_fini_guard); + +int cl_setattr_ost(struct inode *inode, const struct iattr *attr) +{ + struct lu_env *env; + struct cl_io *io; + int result; + int refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; + + io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); + io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); + io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); + io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_valid = attr->ia_valid; + +again: + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { + struct vvp_io *vio = vvp_env_io(env); + + if (attr->ia_valid & ATTR_FILE) + /* populate the file descriptor for ftruncate to honor + * group lock - see LU-787 + */ + vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file); + + result = cl_io_loop(env, io); + } else { + result = io->ci_result; + } + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + /* HSM import case: file is released, cannot be restored + * no need to fail except if restore registration failed + * with -ENODATA + */ + if (result == -ENODATA && io->ci_restore_needed && + io->ci_result != -ENODATA) + result = 0; + cl_env_put(env, &refcheck); + return result; +} + +/** + * Initialize or update CLIO structures for regular files when new + * meta-data arrives from the server. + * + * \param inode regular file inode + * \param md new file metadata from MDS + * - allocates cl_object if necessary, + * - updated layout, if object was already here. + */ +int cl_file_inode_init(struct inode *inode, struct lustre_md *md) +{ + struct lu_env *env; + struct ll_inode_info *lli; + struct cl_object *clob; + struct lu_site *site; + struct lu_fid *fid; + struct cl_object_conf conf = { + .coc_inode = inode, + .u = { + .coc_md = md + } + }; + int result = 0; + int refcheck; + + LASSERT(md->body->valid & OBD_MD_FLID); + LASSERT(S_ISREG(inode->i_mode)); + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + site = ll_i2sbi(inode)->ll_site; + lli = ll_i2info(inode); + fid = &lli->lli_fid; + LASSERT(fid_is_sane(fid)); + + if (!lli->lli_clob) { + /* clob is slave of inode, empty lli_clob means for new inode, + * there is no clob in cache with the given fid, so it is + * unnecessary to perform lookup-alloc-lookup-insert, just + * alloc and insert directly. + */ + LASSERT(inode->i_state & I_NEW); + conf.coc_lu.loc_flags = LOC_F_NEW; + clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), + fid, &conf); + if (!IS_ERR(clob)) { + /* + * No locking is necessary, as new inode is + * locked by I_NEW bit. + */ + lli->lli_clob = clob; + lli->lli_has_smd = lsm_has_objects(md->lsm); + lu_object_ref_add(&clob->co_lu, "inode", inode); + } else { + result = PTR_ERR(clob); + } + } else { + result = cl_conf_set(env, lli->lli_clob, &conf); + } + + cl_env_put(env, &refcheck); + + if (result != 0) + CERROR("Failure to initialize cl object " DFID ": %d\n", + PFID(fid), result); + return result; +} + +/** + * Wait for others drop their references of the object at first, then we drop + * the last one, which will lead to the object be destroyed immediately. + * Must be called after cl_object_kill() against this object. + * + * The reason we want to do this is: destroying top object will wait for sub + * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) + * to initiate top object destroying which may deadlock. See bz22520. + */ +static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *header = obj->co_lu.lo_header; + wait_queue_t waiter; + + if (unlikely(atomic_read(&header->loh_ref) != 1)) { + struct lu_site *site = obj->co_lu.lo_dev->ld_site; + struct lu_site_bkt_data *bkt; + + bkt = lu_site_bkt_from_fid(site, &header->loh_fid); + + init_waitqueue_entry(&waiter, current); + add_wait_queue(&bkt->lsb_marche_funebre, &waiter); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (atomic_read(&header->loh_ref) == 1) + break; + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&bkt->lsb_marche_funebre, &waiter); + } + + cl_object_put(env, obj); +} + +void cl_inode_fini(struct inode *inode) +{ + struct lu_env *env; + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *clob = lli->lli_clob; + int refcheck; + int emergency; + + if (clob) { + void *cookie; + + cookie = cl_env_reenter(); + env = cl_env_get(&refcheck); + emergency = IS_ERR(env); + if (emergency) { + mutex_lock(&cl_inode_fini_guard); + LASSERT(cl_inode_fini_env); + cl_env_implant(cl_inode_fini_env, &refcheck); + env = cl_inode_fini_env; + } + /* + * cl_object cache is a slave to inode cache (which, in turn + * is a slave to dentry cache), don't keep cl_object in memory + * when its master is evicted. + */ + cl_object_kill(env, clob); + lu_object_ref_del(&clob->co_lu, "inode", inode); + cl_object_put_last(env, clob); + lli->lli_clob = NULL; + if (emergency) { + cl_env_unplant(cl_inode_fini_env, &refcheck); + mutex_unlock(&cl_inode_fini_guard); + } else { + cl_env_put(env, &refcheck); + } + cl_env_reexit(cookie); + } +} + +/** + * build inode number from passed @fid + */ +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) +{ + if (BITS_PER_LONG == 32 || api32) + return fid_flatten32(fid); + else + return fid_flatten(fid); +} + +/** + * build inode generation from passed @fid. If our FID overflows the 32-bit + * inode number then return a non-zero generation to distinguish them. + */ +__u32 cl_fid_build_gen(const struct lu_fid *fid) +{ + __u32 gen; + + if (fid_is_igif(fid)) { + gen = lu_igif_gen(fid); + return gen; + } + + gen = fid_flatten(fid) >> 32; + return gen; +} + +/* lsm is unreliable after hsm implementation as layout can be changed at + * any time. This is only to support old, non-clio-ized interfaces. It will + * cause deadlock if clio operations are called with this extra layout refcount + * because in case the layout changed during the IO, ll_layout_refresh() will + * have to wait for the refcount to become zero to destroy the older layout. + * + * Notice that the lsm returned by this function may not be valid unless called + * inside layout lock - MDS_INODELOCK_LAYOUT. + */ +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) +{ + return lov_lsm_get(ll_i2info(inode)->lli_clob); +} + +inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) +{ + lov_lsm_put(ll_i2info(inode)->lli_clob, lsm); +} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c new file mode 100644 index 000000000000..12f3e71f48c2 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c @@ -0,0 +1,201 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * cl code shared between vvp and liblustre (and other Lustre clients in the + * future). + * + */ +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/obd.h" +#include "../include/cl_object.h" + +#include "../include/lustre_lite.h" +#include "llite_internal.h" + +/* Initialize the default and maximum LOV EA and cookie sizes. This allows + * us to make MDS RPCs with large enough reply buffers to hold the + * maximum-sized (= maximum striped) EA and cookie without having to + * calculate this (via a call into the LOV + OSCs) each time we make an RPC. + */ +int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) +{ + struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; + __u32 valsize = sizeof(struct lov_desc); + int rc, easize, def_easize, cookiesize; + struct lov_desc desc; + __u16 stripes, def_stripes; + + rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC, + &valsize, &desc, NULL); + if (rc) + return rc; + + stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = stripes; + easize = obd_size_diskmd(dt_exp, &lsm); + + def_stripes = min_t(__u32, desc.ld_default_stripe_count, + LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = def_stripes; + def_easize = obd_size_diskmd(dt_exp, &lsm); + + cookiesize = stripes * sizeof(struct llog_cookie); + + /* default cookiesize is 0 because from 2.4 server doesn't send + * llog cookies to client. + */ + CDEBUG(D_HA, + "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n", + def_easize, easize, cookiesize); + + rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0); + return rc; +} + +/** + * This function is used as an upcall-callback hooked by liblustre and llite + * clients into obd_notify() listeners chain to handle notifications about + * change of import connect_flags. See llu_fsswop_mount() and + * lustre_common_fill_super(). + */ +int cl_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner, void *data) +{ + struct lustre_client_ocd *lco; + struct client_obd *cli; + __u64 flags; + int result; + + if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + cli = &watched->u.cli; + lco = owner; + flags = cli->cl_import->imp_connect_data.ocd_connect_flags; + CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n", + lco->lco_flags, flags); + mutex_lock(&lco->lco_lock); + lco->lco_flags &= flags; + /* for each osc event update ea size */ + if (lco->lco_dt_exp) + cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp); + + mutex_unlock(&lco->lco_lock); + result = 0; + } else { + CERROR("unexpected notification from %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + result = -EINVAL; + } + return result; +} + +#define GROUPLOCK_SCOPE "grouplock" + +int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, + struct ll_grouplock *cg) +{ + struct lu_env *env; + struct cl_io *io; + struct cl_lock *lock; + struct cl_lock_descr *descr; + __u32 enqflags; + int refcheck; + int rc; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = vvp_env_thread_io(env); + io->ci_obj = obj; + io->ci_ignore_layout = 1; + + rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (rc != 0) { + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + /* Does not make sense to take GL for released layout */ + if (rc > 0) + rc = -ENOTSUPP; + return rc; + } + + lock = vvp_env_lock(env); + descr = &lock->cll_descr; + descr->cld_obj = obj; + descr->cld_start = 0; + descr->cld_end = CL_PAGE_EOF; + descr->cld_gid = gid; + descr->cld_mode = CLM_GROUP; + + enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); + descr->cld_enq_flags = enqflags; + + rc = cl_lock_request(env, io, lock); + if (rc < 0) { + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + return rc; + } + + cg->lg_env = cl_env_get(&refcheck); + cg->lg_io = io; + cg->lg_lock = lock; + cg->lg_gid = gid; + LASSERT(cg->lg_env == env); + + cl_env_unplant(env, &refcheck); + return 0; +} + +void cl_put_grouplock(struct ll_grouplock *cg) +{ + struct lu_env *env = cg->lg_env; + struct cl_io *io = cg->lg_io; + struct cl_lock *lock = cg->lg_lock; + int refcheck; + + LASSERT(cg->lg_env); + LASSERT(cg->lg_gid); + + cl_env_implant(env, &refcheck); + cl_env_put(env, &refcheck); + + cl_lock_release(env, lock); + cl_io_fini(env, io); + cl_env_put(env, NULL); +} diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c index a55ac4dccd90..2df551d3ae6c 100644 --- a/drivers/staging/lustre/lustre/llite/llite_close.c +++ b/drivers/staging/lustre/lustre/llite/llite_close.c @@ -46,31 +46,31 @@ #include "llite_internal.h" /** records that a write is in flight */ -void vvp_write_pending(struct ccc_object *club, struct ccc_page *page) +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_SOM_DIRTY; - if (page && list_empty(&page->cpg_pending_linkage)) - list_add(&page->cpg_pending_linkage, &club->cob_pending_list); + if (page && list_empty(&page->vpg_pending_linkage)) + list_add(&page->vpg_pending_linkage, &club->vob_pending_list); spin_unlock(&lli->lli_lock); } /** records that a write has completed */ -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); int rc = 0; spin_lock(&lli->lli_lock); - if (page && !list_empty(&page->cpg_pending_linkage)) { - list_del_init(&page->cpg_pending_linkage); + if (page && !list_empty(&page->vpg_pending_linkage)) { + list_del_init(&page->vpg_pending_linkage); rc = 1; } spin_unlock(&lli->lli_lock); if (rc) - ll_queue_done_writing(club->cob_inode, 0); + ll_queue_done_writing(club->vob_inode, 0); } /** Queues DONE_WRITING if @@ -80,25 +80,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) void ll_queue_done_writing(struct inode *inode, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); lli->lli_flags |= flags; if ((lli->lli_flags & LLIF_DONE_WRITING) && - list_empty(&club->cob_pending_list)) { + list_empty(&club->vob_pending_list)) { struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); /* DONE_WRITING is allowed and inode has no dirty page. */ spin_lock(&lcq->lcq_lock); LASSERT(list_empty(&lli->lli_close_list)); - CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INODE, "adding inode "DFID" to close list\n", + PFID(ll_inode2fid(inode))); list_add_tail(&lli->lli_close_list, &lcq->lcq_head); /* Avoid a concurrent insertion into the close thread queue: @@ -124,9 +124,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) op_data->op_flags |= MF_SOM_CHANGE; /* Check if Size-on-MDS attributes are valid. */ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); if (!cl_local_size(inode)) { /* Send Size-on-MDS Attributes if valid. */ @@ -140,10 +140,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, struct obd_client_handle **och, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); - if (!(list_empty(&club->cob_pending_list))) { + if (!(list_empty(&club->vob_pending_list))) { if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { LASSERT(*och); LASSERT(!lli->lli_pending_och); @@ -198,7 +198,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, } } - LASSERT(list_empty(&club->cob_pending_list)); + LASSERT(list_empty(&club->vob_pending_list)); lli->lli_flags &= ~LLIF_SOM_DIRTY; spin_unlock(&lli->lli_lock); ll_done_writing_attr(inode, op_data); @@ -221,9 +221,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) LASSERT(op_data); if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); if (!oa) { @@ -241,9 +241,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) if (rc) { oa->o_valid = 0; if (rc != -ENOENT) - CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n", - rc, inode->i_ino, - inode->i_generation); + CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); } else { CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid)); @@ -302,9 +302,11 @@ static void ll_done_writing(struct inode *inode) * OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc done_writing failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); + } out: ll_finish_md_op_data(op_data); if (och) { @@ -323,8 +325,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, lli_close_list); list_del_init(&lli->lli_close_list); - } else if (atomic_read(&lcq->lcq_stop)) + } else if (atomic_read(&lcq->lcq_stop)) { lli = ERR_PTR(-EALREADY); + } spin_unlock(&lcq->lcq_lock); return lli; @@ -348,8 +351,8 @@ static int ll_close_thread(void *arg) break; inode = ll_info2i(lli); - CDEBUG(D_INFO, "done_writing for inode %lu/%u\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INFO, "done_writing for inode "DFID"\n", + PFID(ll_inode2fid(inode))); ll_done_writing(inode); iput(inode); } diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index e3c0f1dd4d31..3f2f30b6542c 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -43,11 +43,11 @@ /* for struct cl_lock_descr and struct cl_io */ #include "../include/cl_object.h" -#include "../include/lclient.h" #include "../include/lustre_mdc.h" #include "../include/lustre_intent.h" #include <linux/compat.h> #include <linux/posix_acl_xattr.h> +#include "vvp_internal.h" #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -99,6 +99,13 @@ struct ll_remote_perm { */ }; +struct ll_grouplock { + struct lu_env *lg_env; + struct cl_io *lg_io; + struct cl_lock *lg_lock; + unsigned long lg_gid; +}; + enum lli_flags { /* MDS has an authority for the Size-on-MDS attributes. */ LLIF_MDS_SIZE_LOCK = (1 << 0), @@ -161,7 +168,9 @@ struct ll_inode_info { struct inode lli_vfs_inode; /* the most recent timestamps obtained from mds */ - struct ost_lvb lli_lvb; + s64 lli_atime; + s64 lli_mtime; + s64 lli_ctime; spinlock_t lli_agl_lock; /* Try to make the d::member and f::member are aligned. Before using @@ -328,6 +337,7 @@ enum ra_stat { RA_STAT_EOF, RA_STAT_MAX_IN_FLIGHT, RA_STAT_WRONG_GRAB_PAGE, + RA_STAT_FAILED_REACH_END, _NR_RA_STAT, }; @@ -481,6 +491,12 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ + /* + * Used to track "unstable" pages on a client, and maintain a + * LRU list of clean pages. An "unstable" page is defined as + * any page which is sent to a server as part of a bulk request, + * but is uncommitted to stable storage. + */ struct cl_client_cache ll_cache; struct lprocfs_stats *ll_ra_stats; @@ -525,13 +541,6 @@ struct ll_sb_info { struct completion ll_kobj_unregister; }; -struct ll_ra_read { - pgoff_t lrr_start; - pgoff_t lrr_count; - struct task_struct *lrr_reader; - struct list_head lrr_linkage; -}; - /* * per file-descriptor read-ahead data. */ @@ -590,12 +599,6 @@ struct ll_readahead_state { */ unsigned long ras_request_index; /* - * list of struct ll_ra_read's one per read(2) call current in - * progress against this file descriptor. Used by read-ahead code, - * protected by ->ras_lock. - */ - struct list_head ras_read_beads; - /* * The following 3 items are used for detecting the stride I/O * mode. * In stride I/O mode, @@ -622,7 +625,7 @@ extern struct kmem_cache *ll_file_data_slab; struct lustre_handle; struct ll_file_data { struct ll_readahead_state fd_ras; - struct ccc_grouplock fd_grouplock; + struct ll_grouplock fd_grouplock; __u64 lfd_pos; __u32 fd_flags; fmode_t fd_omode; @@ -663,8 +666,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi) #endif } -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar); -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar); +void ll_ras_enter(struct file *f); + +/* llite/lcommon_misc.c */ +int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); +int cl_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner, void *data); +int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, + struct ll_grouplock *cg); +void cl_put_grouplock(struct ll_grouplock *cg); /* llite/lproc_llite.c */ int ldebugfs_register_mountpoint(struct dentry *parent, @@ -697,15 +708,15 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); /* llite/rw.c */ -int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); -int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); int ll_writepage(struct page *page, struct writeback_control *wbc); int ll_writepages(struct address_space *, struct writeback_control *wbc); int ll_readpage(struct file *file, struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags); + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit); +struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage); +void ll_cl_fini(struct ll_cl_context *lcc); extern const struct address_space_operations ll_aops; @@ -740,7 +751,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type); int ll_inode_permission(struct inode *inode, int mask); int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, + __u64 flags, struct lov_user_md *lum, int lum_size); int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct lov_mds_md **lmm, int *lmm_size, @@ -750,9 +761,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, int *lmm_size, struct ptlrpc_request **request); int ll_fsync(struct file *file, loff_t start, loff_t end, int data); -int ll_merge_lvb(const struct lu_env *env, struct inode *inode); +int ll_merge_attr(const struct lu_env *env, struct inode *inode); int ll_fid2path(struct inode *inode, void __user *arg); -int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +int ll_data_version(struct inode *inode, __u64 *data_version, int flags); int ll_hsm_release(struct inode *inode); /* llite/dcache.c */ @@ -824,65 +835,8 @@ struct ll_close_queue { atomic_t lcq_stop; }; -struct ccc_object *cl_inode2ccc(struct inode *inode); - -void vvp_write_pending (struct ccc_object *club, struct ccc_page *page); -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page); - -/* specific architecture can implement only part of this list */ -enum vvp_io_subtype { - /** normal IO */ - IO_NORMAL, - /** io started from splice_{read|write} */ - IO_SPLICE -}; - -/* IO subtypes */ -struct vvp_io { - /** io subtype */ - enum vvp_io_subtype cui_io_subtype; - - union { - struct { - struct pipe_inode_info *cui_pipe; - unsigned int cui_flags; - } splice; - struct vvp_fault_io { - /** - * Inode modification time that is checked across DLM - * lock request. - */ - time64_t ft_mtime; - struct vm_area_struct *ft_vma; - /** - * locked page returned from vvp_io - */ - struct page *ft_vmpage; - struct vm_fault_api { - /** - * kernel fault info - */ - struct vm_fault *ft_vmf; - /** - * fault API used bitflags for return code. - */ - unsigned int ft_flags; - /** - * check that flags are from filemap_fault - */ - bool ft_flags_valid; - } fault; - } fault; - } u; - /** - * Read-ahead state used by read and page-fault IO contexts. - */ - struct ll_ra_read cui_bead; - /** - * Set when cui_bead has been initialized. - */ - int cui_ra_window_set; -}; +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page); +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page); /** * IO arguments for various VFS I/O interfaces. @@ -911,54 +865,32 @@ struct ll_cl_context { int lcc_refcheck; }; -struct vvp_thread_info { - struct vvp_io_args vti_args; - struct ra_io_arg vti_ria; - struct ll_cl_context vti_io_ctx; +struct ll_thread_info { + struct vvp_io_args lti_args; + struct ra_io_arg lti_ria; + struct ll_cl_context lti_io_ctx; }; -static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) -{ - extern struct lu_context_key vvp_key; - struct vvp_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &vvp_key); - LASSERT(info); - return info; -} - -static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env, - enum vvp_io_subtype type) +extern struct lu_context_key ll_thread_key; +static inline struct ll_thread_info *ll_env_info(const struct lu_env *env) { - struct vvp_io_args *ret = &vvp_env_info(env)->vti_args; - - ret->via_io_subtype = type; + struct ll_thread_info *lti; - return ret; + lti = lu_context_key_get(&env->le_ctx, &ll_thread_key); + LASSERT(lti); + return lti; } -struct vvp_session { - struct vvp_io vs_ios; -}; - -static inline struct vvp_session *vvp_env_session(const struct lu_env *env) +static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, + enum vvp_io_subtype type) { - extern struct lu_context_key vvp_session_key; - struct vvp_session *ses; + struct vvp_io_args *via = &ll_env_info(env)->lti_args; - ses = lu_context_key_get(env->le_ses, &vvp_session_key); - LASSERT(ses); - return ses; -} + via->via_io_subtype = type; -static inline struct vvp_io *vvp_env_io(const struct lu_env *env) -{ - return &vvp_env_session(env)->vs_ios; + return via; } -int vvp_global_init(void); -void vvp_global_fini(void); - void ll_queue_done_writing(struct inode *inode, unsigned long flags); void ll_close_thread_shutdown(struct ll_close_queue *lcq); int ll_close_thread_start(struct ll_close_queue **lcq_ret); @@ -981,6 +913,10 @@ static inline void ll_invalidate_page(struct page *vmpage) if (!mapping) return; + /* + * truncate_complete_page() calls + * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). + */ ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE); truncate_complete_page(mapping, vmpage); } @@ -1040,10 +976,10 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) } /* llite/xattr.c */ -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size); ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); @@ -1055,9 +991,6 @@ void free_rmtperm_hash(struct hlist_head *hash); int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm); int lustre_check_remote_perm(struct inode *inode, int mask); -/* llite/llite_cl.c */ -extern struct lu_device_type vvp_device_type; - /** * Common IO arguments for various VFS I/O interfaces. */ @@ -1069,7 +1002,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, struct ll_readahead_state *ras, unsigned long index, unsigned hit); void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len); -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which); +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); /* llite/llite_rmtacl.c */ #ifdef CONFIG_FS_POSIX_ACL @@ -1163,6 +1096,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry, int only_unplug); void ll_stop_statahead(struct inode *dir, void *key); +blkcnt_t dirty_cnt(struct inode *inode); + +int cl_glimpse_size0(struct inode *inode, int agl); +int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, + struct inode *inode, struct cl_object *clob, int agl); + +static inline int cl_glimpse_size(struct inode *inode) +{ + return cl_glimpse_size0(inode, 0); +} + +static inline int cl_agl(struct inode *inode) +{ + return cl_glimpse_size0(inode, 1); +} + static inline int ll_glimpse_size(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1285,43 +1234,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd); void ll_iocontrol_unregister(void *magic); -/* lclient compat stuff */ -#define cl_inode_info ll_inode_info -#define cl_i2info(info) ll_i2info(info) -#define cl_inode_mode(inode) ((inode)->i_mode) -#define cl_i2sbi ll_i2sbi - -static inline struct ll_file_data *cl_iattr2fd(struct inode *inode, - const struct iattr *attr) -{ - LASSERT(attr->ia_valid & ATTR_FILE); - return LUSTRE_FPRIVATE(attr->ia_file); -} - -static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms) -{ - LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex)); - i_size_write(inode, kms); -} - -static inline void cl_isize_write(struct inode *inode, loff_t kms) -{ - ll_inode_size_lock(inode); - i_size_write(inode, kms); - ll_inode_size_unlock(inode); -} - -#define cl_isize_read(inode) i_size_read(inode) - -static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode) -{ - return ll_merge_lvb(env, inode); -} - -#define cl_inode_atime(inode) LTIME_S((inode)->i_atime) -#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime) -#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime) - int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, enum cl_fsync_mode mode, int ignore_layout); @@ -1350,7 +1262,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt, int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ : LPROC_LL_OSC_WRITE; - ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc); + ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc); } ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, @@ -1382,18 +1294,16 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, */ if (it->d.lustre.it_remote_lock_mode) { handle.cookie = it->d.lustre.it_remote_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n", - inode, - inode->i_ino, inode->i_generation, + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, NULL); } handle.cookie = it->d.lustre.it_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n", - inode, inode->i_ino, - inode->i_generation, handle.cookie); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, &it->d.lustre.it_lock_bits); @@ -1471,9 +1381,25 @@ enum { int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); -int ll_layout_restore(struct inode *inode); +int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); int ll_xattr_init(void); void ll_xattr_fini(void); +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt); + +/* lcommon_cl.c */ +int cl_setattr_ost(struct inode *inode, const struct iattr *attr); + +extern struct lu_env *cl_inode_fini_env; +extern int cl_inode_fini_refcheck; + +int cl_file_inode_init(struct inode *inode, struct lustre_md *md); +void cl_inode_fini(struct inode *inode); +int cl_local_size(struct inode *inode); + +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); +__u32 cl_fid_build_gen(const struct lu_fid *fid); + #endif /* LLITE_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b57a992688a8..96c7e9fc6e5f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -85,18 +85,18 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb) si_meminfo(&si); pages = si.totalram - si.totalhigh; - if (pages >> (20 - PAGE_SHIFT) < 512) - lru_page_max = pages / 2; - else - lru_page_max = (pages / 4) * 3; + lru_page_max = pages / 2; - /* initialize lru data */ + /* initialize ll_cache data */ atomic_set(&sbi->ll_cache.ccc_users, 0); sbi->ll_cache.ccc_lru_max = lru_page_max; atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); spin_lock_init(&sbi->ll_cache.ccc_lru_lock); INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); + atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); + init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq); + sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, SBI_DEFAULT_READAHEAD_MAX); sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; @@ -169,12 +169,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, return -ENOMEM; } - if (llite_root) { - err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); - if (err < 0) - CERROR("could not register mount in <debugfs>/lustre/llite\n"); - } - /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | @@ -337,10 +331,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, else sbi->ll_md_brw_size = PAGE_SIZE; - if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) { - LCONSOLE_INFO("Layout lock feature supported.\n"); + if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; - } if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { @@ -453,7 +445,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* make root inode * XXX: move this to after cbd setup? */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS; + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE; if (sbi->ll_flags & LL_SBI_RMT_CLIENT) valid |= OBD_MD_FLRMTPERM; else if (sbi->ll_flags & LL_SBI_ACL) @@ -555,6 +547,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, kfree(data); kfree(osfs); + if (llite_root) { + err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); + if (err < 0) { + CERROR("%s: could not register mount in debugfs: " + "rc = %d\n", ll_get_fsname(sb, NULL, 0), err); + err = 0; + } + } + return err; out_root: iput(root); @@ -573,7 +574,6 @@ out_md: out: kfree(data); kfree(osfs); - ldebugfs_unregister_mountpoint(sbi); return err; } @@ -897,10 +897,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) cfg->cfg_callback = class_config_llog_handler; /* set up client obds */ err = lustre_process_log(sb, profilenm, cfg); - if (err < 0) { - CERROR("Unable to process log: %d\n", err); + if (err < 0) goto out_free; - } /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */ lprof = class_get_profile(profilenm); @@ -947,7 +945,7 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int next, force = 1; + int ccc_count, next, force = 1, rc = 0; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); @@ -963,6 +961,19 @@ void ll_put_super(struct super_block *sb) force = obd->obd_force; } + /* Wait for unstable pages to be committed to stable storage */ + if (!force) { + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + + rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq, + !atomic_read(&sbi->ll_cache.ccc_unstable_nr), + &lwi); + } + + ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr); + if (!force && rc != -EINTR) + LASSERTF(!ccc_count, "count: %i\n", ccc_count); + /* We need to set force before the lov_disconnect in * lustre_common_put_super, since l_d cleans up osc's as well. */ @@ -999,6 +1010,8 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); + cl_env_cache_purge(~0); + module_put(THIS_MODULE); } /* client_put_super */ @@ -1032,8 +1045,8 @@ void ll_clear_inode(struct inode *inode) struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); if (S_ISDIR(inode->i_mode)) { /* these should have been cleared in ll_file_release */ @@ -1180,9 +1193,11 @@ static int ll_setattr_done_writing(struct inode *inode, * from OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc truncate failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", + ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); + } return rc; } @@ -1210,12 +1225,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) bool file_is_released = false; int rc = 0, rc1 = 0; - CDEBUG(D_VFSTRACE, - "%s: setattr inode %p/fid:" DFID - " from %llu to %llu, valid %x, hsm_import %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size, - attr->ia_valid, hsm_import); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode, + i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import); if (attr->ia_valid & ATTR_SIZE) { /* Check new size against VFS/VM file size limit and rlimit */ @@ -1265,14 +1277,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), (s64)ktime_get_real_seconds()); - /* If we are changing file size, file content is modified, flag it. */ - if (attr->ia_valid & ATTR_SIZE) { - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -1284,13 +1288,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (!S_ISDIR(inode->i_mode)) inode_unlock(inode); - memcpy(&op_data->op_attr, attr, sizeof(*attr)); - - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; - /* truncate on a released file must failed with -ENODATA, * so size must not be set on MDS for released file * but other attributes must be set @@ -1304,29 +1301,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) file_is_released = true; ccc_inode_lsm_put(inode, lsm); + + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + if (file_is_released) { + rc = ll_layout_restore(inode, 0, attr->ia_size); + if (rc < 0) + goto out; + + file_is_released = false; + ll_layout_refresh(inode, &gen); + } + + /* + * If we are changing file size, file content is + * modified, flag it. + */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + op_data->op_bias |= MDS_DATA_MODIFIED; + } } - /* if not in HSM import mode, clear size attr for released file - * we clear the attribute send to MDT in op_data, not the original - * received from caller in attr which is used later to - * decide return code - */ - if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import) - op_data->op_attr.ia_valid &= ~ATTR_SIZE; + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && + (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) + op_data->op_flags = MF_EPOCH_OPEN; rc = ll_md_setattr(dentry, op_data, &mod); if (rc) goto out; - /* truncate failed (only when non HSM import), others succeed */ - if (file_is_released) { - if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) - rc = -ENODATA; - else - rc = 0; - goto out; - } - /* RPC to MDT is sent, cancel data modification flag */ if (op_data->op_bias & MDS_DATA_MODIFIED) { spin_lock(&lli->lli_lock); @@ -1335,7 +1343,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) } ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode) || file_is_released) { rc = 0; goto out; } @@ -1552,7 +1560,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->valid & OBD_MD_FLATIME) { if (body->atime > LTIME_S(inode->i_atime)) LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; + lli->lli_atime = body->atime; } if (body->valid & OBD_MD_FLMTIME) { if (body->mtime > LTIME_S(inode->i_mtime)) { @@ -1561,12 +1569,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } - lli->lli_lvb.lvb_mtime = body->mtime; + lli->lli_mtime = body->mtime; } if (body->valid & OBD_MD_FLCTIME) { if (body->ctime > LTIME_S(inode->i_ctime)) LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; + lli->lli_ctime = body->ctime; } if (body->valid & OBD_MD_FLMODE) inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); @@ -1593,12 +1601,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) /* FID shouldn't be changed! */ if (fid_is_sane(&lli->lli_fid)) { LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), - "Trying to change FID "DFID - " to the "DFID", inode %lu/%u(%p)\n", + "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n", PFID(&lli->lli_fid), PFID(&body->fid1), - inode->i_ino, inode->i_generation, inode); - } else + PFID(ll_inode2fid(inode)), inode); + } else { lli->lli_fid = body->fid1; + } } LASSERT(fid_seq(&lli->lli_fid) != 0); @@ -1622,8 +1630,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (lli->lli_flags & (LLIF_DONE_WRITING | LLIF_EPOCH_PENDING | LLIF_SOM_DIRTY)) { - CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n", - inode->i_ino, lli->lli_flags); + CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), + lli->lli_flags); } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 @@ -1699,7 +1709,7 @@ void ll_read_inode2(struct inode *inode, void *opaque) void ll_delete_inode(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); if (S_ISREG(inode->i_mode) && lli->lli_clob) /* discard all dirty pages before truncating them, required by @@ -1715,8 +1725,8 @@ void ll_delete_inode(struct inode *inode) spin_lock_irq(&inode->i_data.tree_lock); spin_unlock_irq(&inode->i_data.tree_lock); LASSERTF(inode->i_data.nrpages == 0, - "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", - inode->i_ino, inode->i_generation, inode, + "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); } /* Workaround end */ @@ -1747,7 +1757,9 @@ int ll_iocontrol(struct inode *inode, struct file *file, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); + CERROR("%s: failure inode "DFID": rc = %d\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); return -abs(rc); } @@ -1772,7 +1784,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, if (IS_ERR(op_data)) return PTR_ERR(op_data); - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &req, NULL); @@ -2066,11 +2078,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg) } memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); - if (type & LL_STATFS_LMV) + if (type & LL_STATFS_LMV) { exp = sbi->ll_md_exp; - else if (type & LL_STATFS_LOV) + } else if (type & LL_STATFS_LOV) { exp = sbi->ll_dt_exp; - else { + } else { rc = -ENODEV; goto out_statfs; } @@ -2271,7 +2283,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; struct dentry *dentry = NULL; - struct ccc_object *obj = cl_inode2ccc(page->mapping->host); + struct vvp_object *obj = cl_inode2vvp(page->mapping->host); /* this can be called inside spin lock so use GFP_ATOMIC. */ buf = (char *)__get_free_page(GFP_ATOMIC); @@ -2285,7 +2297,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->cob_header.coh_lu.loh_fid), + PFID(&obj->vob_header.coh_lu.loh_fid), (path && !IS_ERR(path)) ? path : "", ioret); if (dentry) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 5b484e62ffd0..88ef1cac9e0f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -57,10 +57,10 @@ void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count) { - policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + + policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) + (vma->vm_pgoff << PAGE_SHIFT); policy->l_extent.end = (policy->l_extent.start + count - 1) | - ~CFS_PAGE_MASK; + ~PAGE_MASK; } struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, @@ -123,7 +123,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, *env_ret = env; - io = ccc_env_thread_io(env); +restart: + io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); @@ -146,17 +147,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - LASSERT(cio->cui_cl.cis_io == io); + LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; - cio->cui_fd = fd; + vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); + if (io->ci_need_restart) + goto restart; + cl_env_nested_put(nest, env); io = ERR_PTR(rc); } @@ -200,7 +204,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, * Otherwise, we could add dirty pages into osc cache * while truncate is on-going. */ - inode = ccc_object_inode(io->ci_obj); + inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); down_read(&lli->lli_trunc_sem); @@ -307,17 +311,17 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; - vio->u.fault.fault.ft_vmf = vmf; - vio->u.fault.fault.ft_flags = 0; - vio->u.fault.fault.ft_flags_valid = false; + vio->u.fault.ft_vmf = vmf; + vio->u.fault.ft_flags = 0; + vio->u.fault.ft_flags_valid = false; result = cl_io_loop(env, io); /* ft_flags are only valid if we reached * the call to filemap_fault */ - if (vio->u.fault.fault.ft_flags_valid) - fault_ret = vio->u.fault.fault.ft_flags; + if (vio->u.fault.ft_flags_valid) + fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage) { @@ -390,9 +394,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = ll_page_mkwrite0(vma, vmf->page, &retry); if (!printed && ++count > 16) { - CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n", + const struct dentry *de = vma->vm_file->f_path.dentry; + + CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n", current->comm, vmf->pgoff, - file_inode(vma->vm_file)->i_ino); + PFID(ll_inode2fid(de->d_inode))); printed = true; } } while (retry); @@ -422,16 +428,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /** * To avoid cancel the locks covering mmapped region for lock cache pressure, - * we track the mapped vma count in ccc_object::cob_mmap_cnt. + * we track the mapped vma count in vvp_object::vob_mmap_cnt. */ static void ll_vm_open(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); - atomic_inc(&vob->cob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); + atomic_inc(&vob->vob_mmap_cnt); } /** @@ -440,11 +446,11 @@ static void ll_vm_open(struct vm_area_struct *vma) static void ll_vm_close(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - atomic_dec(&vob->cob_mmap_cnt); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); + atomic_dec(&vob->vob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); } /* XXX put nice comment here. talk about __free_pte -> dirty pages and diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index 193aab879709..c1eef6198b25 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -119,7 +119,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb, rc = md_getattr(sbi->ll_md_exp, op_data, &req); kfree(op_data); if (rc) { - CERROR("can't get object attrs, fid "DFID", rc %d\n", + CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n", PFID(fid), rc); return ERR_PTR(rc); } @@ -191,8 +191,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, int fileid_len = sizeof(struct lustre_nfs_fid) / 4; struct lustre_nfs_fid *nfs_fid = (void *)fh; - CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n", - inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len); + CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), *plen, fileid_len); if (*plen < fileid_len) { *plen = fileid_len; @@ -298,8 +299,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild) sbi = ll_s2sbi(dir->i_sb); - CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n", - dir->i_ino, PFID(ll_inode2fid(dir))); + CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir))); rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc != 0) @@ -314,15 +316,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild) rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); + CERROR("%s: failure inode "DFID" get parent: rc = %d\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir)), rc); return ERR_PTR(rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body->valid & OBD_MD_FLID); - - CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", - PFID(ll_inode2fid(dir)), PFID(&body->fid1)); - + /* + * LU-3952: MDT may lost the FID of its parent, we should not crash + * the NFS server, ll_iget_for_nfs() will handle the error. + */ + if (body->valid & OBD_MD_FLID) { + CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", + PFID(ll_inode2fid(dir)), PFID(&body->fid1)); + } result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL); ptlrpc_req_finished(req); diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index f169c0db63b4..813a9a354e5f 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -274,8 +274,9 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) if (lo->lo_biotail) { lo->lo_biotail->bi_next = bio; lo->lo_biotail = bio; - } else + } else { lo->lo_bio = lo->lo_biotail = bio; + } spin_unlock_irqrestore(&lo->lo_lock, flags); atomic_inc(&lo->lo_pending); diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 27ab1261400e..55d62eb11957 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -254,7 +254,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj, pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ if (pages_number > totalram_pages / 2) { - CERROR("can't set file readahead more than %lu MB\n", totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ return -ERANGE; @@ -393,6 +392,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, struct super_block *sb = ((struct seq_file *)file->private_data)->private; struct ll_sb_info *sbi = ll_s2sbi(sb); struct cl_client_cache *cache = &sbi->ll_cache; + struct lu_env *env; + int refcheck; int mult, rc, pages_number; int diff = 0; int nrpages = 0; @@ -430,6 +431,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, goto out; } + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return 0; + diff = -diff; while (diff > 0) { int tmp; @@ -455,19 +460,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, break; if (!sbi->ll_dt_exp) { /* being initialized */ - rc = -ENODEV; - break; + rc = 0; + goto out; } /* difficult - have to ask OSCs to drop LRU slots. */ tmp = diff << 1; - rc = obd_set_info_async(NULL, sbi->ll_dt_exp, + rc = obd_set_info_async(env, sbi->ll_dt_exp, sizeof(KEY_CACHE_LRU_SHRINK), KEY_CACHE_LRU_SHRINK, sizeof(tmp), &tmp, NULL); if (rc < 0) break; } + cl_env_put(env, &refcheck); out: if (rc >= 0) { @@ -818,6 +824,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj, } LUSTRE_RW_ATTR(xattr_cache); +static ssize_t unstable_stats_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kobj); + struct cl_client_cache *cache = &sbi->ll_cache; + int pages, mb; + + pages = atomic_read(&cache->ccc_unstable_nr); + mb = (pages * PAGE_SIZE) >> 20; + + return sprintf(buf, "unstable_pages: %8d\n" + "unstable_mb: %8d\n", pages, mb); +} +LUSTRE_RO_ATTR(unstable_stats); + static struct lprocfs_vars lprocfs_llite_obd_vars[] = { /* { "mntpt_path", ll_rd_path, 0, 0 }, */ { "site", &ll_site_stats_fops, NULL, 0 }, @@ -853,6 +876,7 @@ static struct attribute *llite_attrs[] = { &lustre_attr_max_easize.attr, &lustre_attr_default_easize.attr, &lustre_attr_xattr_cache.attr, + &lustre_attr_unstable_stats.attr, NULL, }; @@ -953,6 +977,7 @@ static const char *ra_stat_string[] = { [RA_STAT_EOF] = "read-ahead to EOF", [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page", + [RA_STAT_FAILED_REACH_END] = "failed to reach end" }; int ldebugfs_register_mountpoint(struct dentry *parent, diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index f8f98e4e8258..5eba0ebae10f 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -128,12 +128,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, if (rc != 0) { iget_failed(inode); inode = NULL; - } else + } else { unlock_new_inode(inode); - } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) + } + } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { ll_update_inode(inode, md); - CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n", - inode, PFID(&md->body->fid1)); + CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n", + PFID(&md->body->fid1), inode); + } } return inode; } @@ -188,7 +190,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; /* Invalidate all dentries associated with this inode */ - LASSERT(lock->l_flags & LDLM_FL_CANCELING); + LASSERT(ldlm_is_canceling(lock)); if (!fid_res_name_eq(ll_inode2fid(inode), &lock->l_resource->lr_name)) { @@ -255,8 +257,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { - CDEBUG(D_INODE, "invalidating inode %lu\n", - inode->i_ino); + CDEBUG(D_INODE, "invalidating inode "DFID"\n", + PFID(ll_inode2fid(inode))); truncate_inode_pages(inode->i_mapping, 0); ll_invalidate_negative_children(inode); } @@ -476,9 +478,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) return ERR_PTR(-ENAMETOOLONG); - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, parent->i_ino, - parent->i_generation, parent, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n", + dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it)); if (d_mountpoint(dentry)) CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); @@ -553,9 +554,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; struct dentry *de; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n", - dentry, parent->i_ino, - parent->i_generation, parent, flags); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n", + dentry, PFID(ll_inode2fid(parent)), parent, flags); /* Optimize away (CREATE && !OPEN). Let .create handle the race. */ if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN)) @@ -586,10 +586,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, long long lookup_flags = LOOKUP_OPEN; int rc = 0; - CDEBUG(D_VFSTRACE, - "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n", - dentry, dir->i_ino, - dir->i_generation, dir, file, open_flags, mode, *opened); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n", + dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, + *opened); it = kzalloc(sizeof(*it), GFP_NOFS); if (!it) @@ -680,8 +679,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) * lock on the inode. Since we finally have an inode pointer, * stuff it in the lock. */ - CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); out: ptlrpc_req_finished(request); @@ -708,9 +707,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, dir->i_ino, - dir->i_generation, dir, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n", + dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it)); rc = it_open_error(DISP_OPEN_CREATE, it); if (rc) @@ -733,8 +731,9 @@ static void ll_update_times(struct ptlrpc_request *request, LASSERT(body); if (body->valid & OBD_MD_FLMTIME && body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n", - inode->i_ino, LTIME_S(inode->i_mtime), body->mtime); + CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n", + PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime), + body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } if (body->valid & OBD_MD_FLCTIME && @@ -791,9 +790,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n", - dchild, dir->i_ino, dir->i_generation, dir, - mode, old_encode_dev(rdev)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n", + dchild, PFID(ll_inode2fid(dir)), dir, mode, + old_encode_dev(rdev)); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -831,9 +830,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, { int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n", - dentry, dir->i_ino, - dir->i_generation, dir, mode, want_excl); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n", + dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl); rc = ll_mknod(dir, dentry, mode, 0); @@ -845,12 +843,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, return rc; } -static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid) -{ - if (d_really_is_positive(child)) - *fid = *ll_inode2fid(d_inode(child)); -} - int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) { struct mdt_body *body; @@ -927,23 +919,25 @@ out: * is any lock existing. They will recycle dentries and inodes based upon locks * too. b=20433 */ -static int ll_unlink(struct inode *dir, struct dentry *dentry) +static int ll_unlink(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + dchild, dir->i_ino, dir->i_generation, dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -963,8 +957,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n", + dentry, PFID(ll_inode2fid(dir)), dir); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -977,23 +971,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return err; } -static int ll_rmdir(struct inode *dir, struct dentry *dentry) +static int ll_rmdir(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", + dchild, PFID(ll_inode2fid(dir)), dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -1011,9 +1007,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n", - dentry, dir->i_ino, dir->i_generation, - dir, 3000, oldname); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n", + dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO, 0, LUSTRE_OPC_SYMLINK); @@ -1033,10 +1028,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, struct md_op_data *op_data; int err; - CDEBUG(D_VFSTRACE, - "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n", - src->i_ino, src->i_generation, src, dir->i_ino, - dir->i_generation, dir, new_dentry); + CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n", + PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir, + new_dentry); op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name, new_dentry->d_name.len, @@ -1056,42 +1050,45 @@ out: return err; } -static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int ll_rename(struct inode *src, struct dentry *src_dchild, + struct inode *tgt, struct dentry *tgt_dchild) { struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(old_dir); + struct ll_sb_info *sbi = ll_i2sbi(src); struct md_op_data *op_data; int err; CDEBUG(D_VFSTRACE, - "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n", - old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir, - new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir); + "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n", + src_dchild, PFID(ll_inode2fid(src)), src, + tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); - op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0, + op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(old_dentry, &op_data->op_fid3); - ll_get_child_fid(new_dentry, &op_data->op_fid4); + if (src_dchild && src_dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode); + if (tgt_dchild && tgt_dchild->d_inode) + op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode); + err = md_rename(sbi->ll_md_exp, op_data, - old_dentry->d_name.name, - old_dentry->d_name.len, - new_dentry->d_name.name, - new_dentry->d_name.len, &request); + src_dchild->d_name.name, + src_dchild->d_name.len, + tgt_dchild->d_name.name, + tgt_dchild->d_name.len, &request); ll_finish_md_op_data(op_data); if (!err) { - ll_update_times(request, old_dir); - ll_update_times(request, new_dir); + ll_update_times(request, src); + ll_update_times(request, tgt); ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1); - err = ll_objects_destroy(request, old_dir); + err = ll_objects_destroy(request, src); } ptlrpc_req_finished(request); if (!err) - d_move(old_dentry, new_dentry); + d_move(src_dchild, tgt_dchild); return err; } diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index edab6c5b7e50..336397773fbb 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -63,7 +63,7 @@ * Finalizes cl-data before exiting typical address_space operation. Dual to * ll_cl_init(). */ -static void ll_cl_fini(struct ll_cl_context *lcc) +void ll_cl_fini(struct ll_cl_context *lcc) { struct lu_env *env = lcc->lcc_env; struct cl_io *io = lcc->lcc_io; @@ -84,200 +84,59 @@ static void ll_cl_fini(struct ll_cl_context *lcc) * Initializes common cl-data at the typical address_space operation entry * point. */ -static struct ll_cl_context *ll_cl_init(struct file *file, - struct page *vmpage, int create) +struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage) { struct ll_cl_context *lcc; struct lu_env *env; struct cl_io *io; struct cl_object *clob; - struct ccc_io *cio; + struct vvp_io *vio; int refcheck; int result = 0; - clob = ll_i2info(vmpage->mapping->host)->lli_clob; + clob = ll_i2info(file_inode(file))->lli_clob; LASSERT(clob); env = cl_env_get(&refcheck); if (IS_ERR(env)) return ERR_CAST(env); - lcc = &vvp_env_info(env)->vti_io_ctx; + lcc = &ll_env_info(env)->lti_io_ctx; memset(lcc, 0, sizeof(*lcc)); lcc->lcc_env = env; lcc->lcc_refcheck = refcheck; lcc->lcc_cookie = current; - cio = ccc_env_io(env); - io = cio->cui_cl.cis_io; - if (!io && create) { - struct inode *inode = vmpage->mapping->host; - loff_t pos; - - if (inode_trylock(inode)) { - inode_unlock((inode)); - - /* this is too bad. Someone is trying to write the - * page w/o holding inode mutex. This means we can - * add dirty pages into cache during truncate - */ - CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n", - current->comm); - dump_stack(); - LBUG(); - return ERR_PTR(-EIO); - } - - /* - * Loop-back driver calls ->prepare_write(). - * methods directly, bypassing file system ->write() operation, - * so cl_io has to be created here. - */ - io = ccc_env_thread_io(env); - ll_io_init(io, file, 1); - - /* No lock at all for this kind of IO - we can't do it because - * we have held page lock, it would cause deadlock. - * XXX: This causes poor performance to loop device - One page - * per RPC. - * In order to get better performance, users should use - * lloop driver instead. - */ - io->ci_lockreq = CILR_NEVER; - - pos = vmpage->index << PAGE_SHIFT; - - /* Create a temp IO to serve write. */ - result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE); - if (result == 0) { - cio->cui_fd = LUSTRE_FPRIVATE(file); - cio->cui_iter = NULL; - result = cl_io_iter_init(env, io); - if (result == 0) { - result = cl_io_lock(env, io); - if (result == 0) - result = cl_io_start(env, io); - } - } else - result = io->ci_result; - } - + vio = vvp_env_io(env); + io = vio->vui_cl.cis_io; lcc->lcc_io = io; if (!io) result = -EIO; - if (result == 0) { + + if (result == 0 && vmpage) { struct cl_page *page; LASSERT(io->ci_state == CIS_IO_GOING); - LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file)); + LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file)); page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { lcc->lcc_page = page; lu_ref_add(&page->cp_reference, "cl_io", io); result = 0; - } else + } else { result = PTR_ERR(page); + } } if (result) { ll_cl_fini(lcc); lcc = ERR_PTR(result); } - CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n", - vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result, - env, io); - return lcc; -} - -static struct ll_cl_context *ll_cl_get(void) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - int refcheck; - - env = cl_env_get(&refcheck); - LASSERT(!IS_ERR(env)); - lcc = &vvp_env_info(env)->vti_io_ctx; - LASSERT(env == lcc->lcc_env); - LASSERT(current == lcc->lcc_cookie); - cl_env_put(env, &refcheck); - - /* env has got in ll_cl_init, so it is still usable. */ return lcc; } -/** - * ->prepare_write() address space operation called by generic_file_write() - * for every page during write. - */ -int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - int result; - - lcc = ll_cl_init(file, vmpage, 1); - if (!IS_ERR(lcc)) { - struct lu_env *env = lcc->lcc_env; - struct cl_io *io = lcc->lcc_io; - struct cl_page *page = lcc->lcc_page; - - cl_page_assume(env, io, page); - - result = cl_io_prepare_write(env, io, page, from, to); - if (result == 0) { - /* - * Add a reference, so that page is not evicted from - * the cache until ->commit_write() is called. - */ - cl_page_get(page); - lu_ref_add(&page->cp_reference, "prepare_write", - current); - } else { - cl_page_unassume(env, io, page); - ll_cl_fini(lcc); - } - /* returning 0 in prepare assumes commit must be called - * afterwards - */ - } else { - result = PTR_ERR(lcc); - } - return result; -} - -int ll_commit_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - struct cl_io *io; - struct cl_page *page; - int result = 0; - - lcc = ll_cl_get(); - env = lcc->lcc_env; - page = lcc->lcc_page; - io = lcc->lcc_io; - - LASSERT(cl_page_is_owned(page, io)); - LASSERT(from <= to); - if (from != to) /* handle short write case. */ - result = cl_io_commit_write(env, io, page, from, to); - if (cl_page_is_owned(page, io)) - cl_page_unassume(env, io, page); - - /* - * Release reference acquired by ll_prepare_write(). - */ - lu_ref_del(&page->cp_reference, "prepare_write", current); - cl_page_put(env, page); - ll_cl_fini(lcc); - return result; -} - static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); /** @@ -301,7 +160,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); */ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, struct ra_io_arg *ria, - unsigned long pages) + unsigned long pages, unsigned long min) { struct ll_ra_info *ra = &sbi->ll_ra_info; long ret; @@ -341,6 +200,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, } out: + if (ret < min) { + /* override ra limit for maximum performance */ + atomic_add(min - ret, &ra->ra_cur_pages); + ret = min; + } return ret; } @@ -357,9 +221,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which) lprocfs_counter_incr(sbi->ll_ra_stats, which); } -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which) +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) { - struct ll_sb_info *sbi = ll_i2sbi(mapping->host); + struct ll_sb_info *sbi = ll_i2sbi(inode); ll_ra_stats_inc_sbi(sbi, which); } @@ -388,61 +252,42 @@ static int index_in_window(unsigned long index, unsigned long point, return start <= index && index <= end; } -static struct ll_readahead_state *ll_ras_get(struct file *f) +void ll_ras_enter(struct file *f) { - struct ll_file_data *fd; - - fd = LUSTRE_FPRIVATE(f); - return &fd->fd_ras; -} - -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); + struct ll_file_data *fd = LUSTRE_FPRIVATE(f); + struct ll_readahead_state *ras = &fd->fd_ras; spin_lock(&ras->ras_lock); ras->ras_requests++; ras->ras_request_index = 0; ras->ras_consecutive_requests++; - rar->lrr_reader = current; - - list_add(&rar->lrr_linkage, &ras->ras_read_beads); - spin_unlock(&ras->ras_lock); -} - -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); - - spin_lock(&ras->ras_lock); - list_del_init(&rar->lrr_linkage); spin_unlock(&ras->ras_lock); } static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct cl_page *page, - struct page *vmpage) + struct cl_object *clob, pgoff_t *max_index) { - struct ccc_page *cp; + struct page *vmpage = page->cp_vmpage; + struct vvp_page *vpg; int rc; rc = 0; cl_page_assume(env, io, page); lu_ref_add(&page->cp_reference, "ra", current); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) { - rc = cl_page_is_under_lock(env, io, page); - if (rc == -EBUSY) { - cp->cpg_defer_uptodate = 1; - cp->cpg_ra_used = 0; + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) { + CDEBUG(D_READA, "page index %lu, max_index: %lu\n", + vvp_index(vpg), *max_index); + if (*max_index == 0 || vvp_index(vpg) > *max_index) + rc = cl_page_is_under_lock(env, io, page, max_index); + if (rc == 0) { + vpg->vpg_defer_uptodate = 1; + vpg->vpg_ra_used = 0; cl_page_list_add(queue, page); rc = 1; } else { - cl_page_delete(env, page); + cl_page_discard(env, io, page); rc = -ENOLCK; } } else { @@ -466,24 +311,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, */ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, - pgoff_t index, struct address_space *mapping) + pgoff_t index, pgoff_t *max_index) { + struct cl_object *clob = io->ci_obj; + struct inode *inode = vvp_object_inode(clob); struct page *vmpage; - struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; struct cl_page *page; enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ int rc = 0; const char *msg = NULL; - vmpage = grab_cache_page_nowait(mapping, index); + vmpage = grab_cache_page_nowait(inode->i_mapping, index); if (vmpage) { /* Check if vmpage was truncated or reclaimed */ - if (vmpage->mapping == mapping) { + if (vmpage->mapping == inode->i_mapping) { page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { rc = cl_read_ahead_page(env, io, queue, - page, vmpage); + page, clob, max_index); if (rc == -ENOLCK) { which = RA_STAT_FAILED_MATCH; msg = "lock match failed"; @@ -504,7 +350,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, msg = "g_c_p_n failed"; } if (msg) { - ll_ra_stats_inc(mapping, which); + ll_ra_stats_inc(inode, which); CDEBUG(D_READA, "%s\n", msg); } return rc; @@ -616,11 +462,12 @@ static int ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct ra_io_arg *ria, unsigned long *reserved_pages, - struct address_space *mapping, unsigned long *ra_end) { - int rc, count = 0, stride_ria; - unsigned long page_idx; + int rc, count = 0; + bool stride_ria; + pgoff_t page_idx; + pgoff_t max_index = 0; LASSERT(ria); RIA_DEBUG(ria); @@ -631,12 +478,13 @@ static int ll_read_ahead_pages(const struct lu_env *env, if (ras_inside_ra_window(page_idx, ria)) { /* If the page is inside the read-ahead window*/ rc = ll_read_ahead_page(env, io, queue, - page_idx, mapping); + page_idx, &max_index); if (rc == 1) { (*reserved_pages)--; count++; - } else if (rc == -ENOLCK) + } else if (rc == -ENOLCK) { break; + } } else if (stride_ria) { /* If it is not in the read-ahead window, and it is * read-ahead mode, then check whether it should skip @@ -666,25 +514,22 @@ static int ll_read_ahead_pages(const struct lu_env *env, } int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags) + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit) { struct vvp_io *vio = vvp_env_io(env); - struct vvp_thread_info *vti = vvp_env_info(env); - struct cl_attr *attr = ccc_env_thread_attr(env); + struct ll_thread_info *lti = ll_env_info(env); + struct cl_attr *attr = vvp_env_thread_attr(env); unsigned long start = 0, end = 0, reserved; - unsigned long ra_end, len; + unsigned long ra_end, len, mlen = 0; struct inode *inode; - struct ll_ra_read *bead; - struct ra_io_arg *ria = &vti->vti_ria; - struct ll_inode_info *lli; + struct ra_io_arg *ria = <i->lti_ria; struct cl_object *clob; int ret = 0; __u64 kms; - inode = mapping->host; - lli = ll_i2info(inode); - clob = lli->lli_clob; + clob = io->ci_obj; + inode = vvp_object_inode(clob); memset(ria, 0, sizeof(*ria)); @@ -696,22 +541,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, return ret; kms = attr->cat_kms; if (kms == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN); + ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN); return 0; } spin_lock(&ras->ras_lock); - if (vio->cui_ra_window_set) - bead = &vio->cui_bead; - else - bead = NULL; /* Enlarge the RA window to encompass the full read */ - if (bead && ras->ras_window_start + ras->ras_window_len < - bead->lrr_start + bead->lrr_count) { - ras->ras_window_len = bead->lrr_start + bead->lrr_count - + if (vio->vui_ra_valid && + ras->ras_window_start + ras->ras_window_len < + vio->vui_ra_start + vio->vui_ra_count) { + ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count - ras->ras_window_start; } + /* Reserve a part of the read-ahead window that we'll be issuing */ if (ras->ras_window_len) { start = ras->ras_next_readahead; @@ -755,29 +598,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, spin_unlock(&ras->ras_lock); if (end == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW); + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; } len = ria_page_count(ria); - if (len == 0) + if (len == 0) { + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; + } + + CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n", + PFID(lu_object_fid(&clob->co_lu)), + ria->ria_start, ria->ria_end, + vio->vui_ra_valid ? vio->vui_ra_start : 0, + vio->vui_ra_valid ? vio->vui_ra_count : 0, + hit); + + /* at least to extend the readahead window to cover current read */ + if (!hit && vio->vui_ra_valid && + vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { + /* to the end of current read window. */ + mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; + /* trim to RPC boundary */ + start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1); + mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start); + } - reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len); + reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); if (reserved < len) - ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT); + ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); - CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved, + CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", + reserved, len, mlen, atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), ll_i2sbi(inode)->ll_ra_info.ra_max_pages); - ret = ll_read_ahead_pages(env, io, queue, - ria, &reserved, mapping, &ra_end); + ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end); if (reserved != 0) ll_ra_count_put(ll_i2sbi(inode), reserved); if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT)) - ll_ra_stats_inc(mapping, RA_STAT_EOF); + ll_ra_stats_inc(inode, RA_STAT_EOF); /* if we didn't get to the end of the region we reserved from * the ras we need to go back and update the ras so that the @@ -789,6 +651,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, ra_end, end, ria->ria_end); if (ra_end != end + 1) { + ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); spin_lock(&ras->ras_lock); if (ra_end < ras->ras_next_readahead && index_in_window(ra_end, ras->ras_window_start, 0, @@ -836,7 +699,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) spin_lock_init(&ras->ras_lock); ras_reset(inode, ras, 0); ras->ras_requests = 0; - INIT_LIST_HEAD(&ras->ras_read_beads); } /* @@ -1059,15 +921,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ras->ras_last_readpage = index; ras_set_start(inode, ras, index); - if (stride_io_mode(ras)) + if (stride_io_mode(ras)) { /* Since stride readahead is sensitive to the offset * of read-ahead, so we use original offset here, * instead of ras_window_start, which is RPC aligned */ ras->ras_next_readahead = max(index, ras->ras_next_readahead); - else - ras->ras_next_readahead = max(ras->ras_window_start, - ras->ras_next_readahead); + } else { + if (ras->ras_next_readahead < ras->ras_window_start) + ras->ras_next_readahead = ras->ras_window_start; + if (!hit) + ras->ras_next_readahead = index + 1; + } RAS_CDEBUG(ras); /* Trigger RA in the mmap case where ras_consecutive_requests @@ -1129,7 +994,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) clob = ll_i2info(inode)->lli_clob; LASSERT(clob); - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); io->ci_obj = clob; io->ci_ignore_layout = 1; result = cl_io_init(env, io, CIT_MISC, clob); @@ -1240,8 +1105,9 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) { if (end == OBD_OBJECT_EOF) - end = i_size_read(inode); - mapping->writeback_index = (end >> PAGE_SHIFT) + 1; + mapping->writeback_index = 0; + else + mapping->writeback_index = (end >> PAGE_SHIFT) + 1; } return result; } @@ -1251,7 +1117,7 @@ int ll_readpage(struct file *file, struct page *vmpage) struct ll_cl_context *lcc; int result; - lcc = ll_cl_init(file, vmpage, 0); + lcc = ll_cl_init(file, vmpage); if (!IS_ERR(lcc)) { struct lu_env *env = lcc->lcc_env; struct cl_io *io = lcc->lcc_io; @@ -1273,3 +1139,28 @@ int ll_readpage(struct file *file, struct page *vmpage) } return result; } + +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt) +{ + struct cl_2queue *queue; + int result; + + LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); + + queue = &io->ci_queue; + cl_2queue_init_page(queue, page); + + result = cl_io_submit_sync(env, io, crt, queue, 0); + LASSERT(cl_page_is_owned(page, io)); + + if (crt == CRT_READ) + /* + * in CRT_WRITE case page is left locked even in case of + * error. + */ + cl_page_list_disown(env, io, &queue->c2_qin); + cl_2queue_fini(env, queue); + + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 69aa15e8e3ef..c12a048fce59 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -95,15 +95,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, if (obj) { page = cl_vmpage_page(vmpage, obj); if (page) { - lu_ref_add(&page->cp_reference, - "delete", vmpage); cl_page_delete(env, page); - lu_ref_del(&page->cp_reference, - "delete", vmpage); cl_page_put(env, page); } - } else + } else { LASSERT(vmpage->private == 0); + } cl_env_put(env, &refcheck); } } @@ -111,12 +108,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) { - struct cl_env_nest nest; struct lu_env *env; + void *cookie; struct cl_object *obj; struct cl_page *page; struct address_space *mapping; - int result; + int result = 0; LASSERT(PageLocked(vmpage)); if (PageWriteback(vmpage) || PageDirty(vmpage)) @@ -130,53 +127,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) if (!obj) return 1; - /* 1 for page allocator, 1 for cl_page and 1 for page cache */ + /* 1 for caller, 1 for cl_page and 1 for page cache */ if (page_count(vmpage) > 3) return 0; - /* TODO: determine what gfp should be used by @gfp_mask. */ - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - /* If we can't allocate an env we won't call cl_page_put() - * later on which further means it's impossible to drop - * page refcount by cl_page, so ask kernel to not free - * this page. - */ - return 0; - page = cl_vmpage_page(vmpage, obj); - result = !page; - if (page) { - if (!cl_page_in_use(page)) { - result = 1; - cl_page_delete(env, page); - } - cl_page_put(env, page); - } - cl_env_nested_put(&nest, env); - return result; -} + if (!page) + return 1; -static int ll_set_page_dirty(struct page *vmpage) -{ -#if 0 - struct cl_page *page = vvp_vmpage_page_transient(vmpage); - struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host); - struct vvp_page *cpg; + cookie = cl_env_reenter(); + env = cl_env_percpu_get(); + LASSERT(!IS_ERR(env)); - /* - * XXX should page method be called here? - */ - LASSERT(&obj->co_cl == page->cp_obj); - cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); - /* - * XXX cannot do much here, because page is possibly not locked: - * sys_munmap()->... - * ->unmap_page_range()->zap_pte_range()->set_page_dirty(). + if (!cl_page_in_use(page)) { + result = 1; + cl_page_delete(env, page); + } + + /* To use percpu env array, the call path can not be rescheduled; + * otherwise percpu array will be messed if ll_releaspage() called + * again on the same CPU. + * + * If this page holds the last refc of cl_object, the following + * call path may cause reschedule: + * cl_page_put -> cl_page_free -> cl_object_put -> + * lu_object_put -> lu_object_free -> lov_delete_raid0. + * + * However, the kernel can't get rid of this inode until all pages have + * been cleaned up. Now that we hold page lock here, it's pretty safe + * that we won't get into object delete path. */ - vvp_write_pending(obj, cpg); -#endif - return __set_page_dirty_nobuffers(vmpage); + LASSERT(cl_object_refc(obj) > 1); + cl_page_put(env, page); + + cl_env_percpu_put(env); + cl_env_reexit(cookie); + return result; } #define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL) @@ -266,7 +252,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, * write directly */ if (clp->cp_type == CPT_CACHEABLE) { - struct page *vmpage = cl_page_vmpage(env, clp); + struct page *vmpage = cl_page_vmpage(clp); struct page *src_page; struct page *dst_page; void *src; @@ -358,14 +344,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, */ #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, - loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) { struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); + loff_t file_offset = iocb->ki_pos; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); @@ -376,22 +362,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, return -EBADF; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK)) + if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) return -EINVAL; - CDEBUG(D_VFSTRACE, - "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", - inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE, + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", + PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_SHIFT, MAX_DIO_SIZE >> PAGE_SHIFT); /* Check that all user buffers are aligned as well */ - if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK) + if (iov_iter_alignment(iter) & ~PAGE_MASK) return -EINVAL; env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); - io = ccc_env_io(env)->cui_cl.cis_io; + io = vvp_env_io(env)->vui_cl.cis_io; LASSERT(io); /* 0. Need locking between buffered and direct access. and race with @@ -401,7 +386,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == READ) inode_lock(inode); - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); while (iov_iter_count(iter)) { struct page **pages; size_t offs; @@ -435,8 +420,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, size > (PAGE_SIZE / sizeof(*pages)) * PAGE_SIZE) { size = ((((size / 2) - 1) | - ~CFS_PAGE_MASK) + 1) & - CFS_PAGE_MASK; + ~PAGE_MASK) + 1) & + PAGE_MASK; CDEBUG(D_VFSTRACE, "DIO size now %lu\n", size); continue; @@ -449,62 +434,213 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, file_offset += result; } out: - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); if (iov_iter_rw(iter) == READ) inode_unlock(inode); if (tot_bytes > 0) { - if (iov_iter_rw(iter) == WRITE) { - struct lov_stripe_md *lsm; - - lsm = ccc_inode_lsm_get(inode); - LASSERT(lsm); - lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0); - lov_stripe_unlock(lsm); - ccc_inode_lsm_put(inode, lsm); - } + struct vvp_io *vio = vvp_env_io(env); + + /* no commit async for direct IO */ + vio->u.write.vui_written += tot_bytes; } cl_env_put(env, &refcheck); - return tot_bytes ? : result; + return tot_bytes ? tot_bytes : result; +} + +/** + * Prepare partially written-to page for a write. + */ +static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, + struct cl_page *pg) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct cl_object *obj = io->ci_obj; + struct vvp_page *vpg = cl_object_page_slice(obj, pg); + loff_t offset = cl_offset(obj, vvp_index(vpg)); + int result; + + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + cl_object_attr_unlock(obj); + if (result == 0) { + /* + * If are writing to a new page, no need to read old data. + * The extent locking will have updated the KMS, and for our + * purposes here we can treat it like i_size. + */ + if (attr->cat_kms <= offset) { + char *kaddr = kmap_atomic(vpg->vpg_page); + + memset(kaddr, 0, cl_page_size(obj)); + kunmap_atomic(kaddr); + } else if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; + } else { + result = ll_page_sync_io(env, io, pg, CRT_READ); + } + } + return result; } static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + struct ll_cl_context *lcc; + struct lu_env *env; + struct cl_io *io; + struct cl_page *page; + struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; - struct page *page; - int rc; - unsigned from = pos & (PAGE_SIZE - 1); + struct page *vmpage = NULL; + unsigned int from = pos & (PAGE_SIZE - 1); + unsigned int to = from + len; + int result = 0; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; + CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); - *pagep = page; + lcc = ll_cl_init(file, NULL); + if (IS_ERR(lcc)) { + result = PTR_ERR(lcc); + goto out; + } - rc = ll_prepare_write(file, page, from, from + len); - if (rc) { - unlock_page(page); - put_page(page); + env = lcc->lcc_env; + io = lcc->lcc_io; + + /* To avoid deadlock, try to lock page first. */ + vmpage = grab_cache_page_nowait(mapping, index); + if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) { + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *plist = &vio->u.write.vui_queue; + + /* if the page is already in dirty cache, we have to commit + * the pages right now; otherwise, it may cause deadlock + * because it holds page lock of a dirty page and request for + * more grants. It's okay for the dirty page to be the first + * one in commit page list, though. + */ + if (vmpage && plist->pl_nr > 0) { + unlock_page(vmpage); + put_page(vmpage); + vmpage = NULL; + } + + /* commit pages and then wait for page lock */ + result = vvp_io_write_commit(env, io); + if (result < 0) + goto out; + + if (!vmpage) { + vmpage = grab_cache_page_write_begin(mapping, index, + flags); + if (!vmpage) { + result = -ENOMEM; + goto out; + } + } } - return rc; + + page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + if (IS_ERR(page)) { + result = PTR_ERR(page); + goto out; + } + + lcc->lcc_page = page; + lu_ref_add(&page->cp_reference, "cl_io", io); + + cl_page_assume(env, io, page); + if (!PageUptodate(vmpage)) { + /* + * We're completely overwriting an existing page, + * so _don't_ set it up to date until commit_write + */ + if (from == 0 && to == PAGE_SIZE) { + CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); + POISON_PAGE(vmpage, 0x11); + } else { + /* TODO: can be optimized at OSC layer to check if it + * is a lockless IO. In that case, it's not necessary + * to read the data. + */ + result = ll_prepare_partial_page(env, io, page); + if (result == 0) + SetPageUptodate(vmpage); + } + } + if (result < 0) + cl_page_unassume(env, io, page); +out: + if (result < 0) { + if (vmpage) { + unlock_page(vmpage); + put_page(vmpage); + } + if (!IS_ERR(lcc)) + ll_cl_fini(lcc); + } else { + *pagep = vmpage; + *fsdata = lcc; + } + return result; } static int ll_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct page *vmpage, void *fsdata) { + struct ll_cl_context *lcc = fsdata; + struct lu_env *env; + struct cl_io *io; + struct vvp_io *vio; + struct cl_page *page; unsigned from = pos & (PAGE_SIZE - 1); - int rc; + bool unplug = false; + int result = 0; + + put_page(vmpage); + + env = lcc->lcc_env; + page = lcc->lcc_page; + io = lcc->lcc_io; + vio = vvp_env_io(env); + + LASSERT(cl_page_is_owned(page, io)); + if (copied > 0) { + struct cl_page_list *plist = &vio->u.write.vui_queue; + + lcc->lcc_page = NULL; /* page will be queued */ + + /* Add it into write queue */ + cl_page_list_add(plist, page); + if (plist->pl_nr == 1) /* first page */ + vio->u.write.vui_from = from; + else + LASSERT(from == 0); + vio->u.write.vui_to = from + copied; + + /* We may have one full RPC, commit it soon */ + if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) + unplug = true; + + CL_PAGE_DEBUG(D_VFSTRACE, env, page, + "queued page: %d.\n", plist->pl_nr); + } else { + cl_page_disown(env, io, page); + + /* page list is not contiguous now, commit it now */ + unplug = true; + } - rc = ll_commit_write(file, page, from, from + copied); - unlock_page(page); - put_page(page); + if (unplug || + file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) + result = vvp_io_write_commit(env, io); - return rc ?: copied; + ll_cl_fini(lcc); + return result >= 0 ? copied : result; } #ifdef CONFIG_MIGRATION @@ -523,7 +659,7 @@ const struct address_space_operations ll_aops = { .direct_IO = ll_direct_IO_26, .writepage = ll_writepage, .writepages = ll_writepages, - .set_page_dirty = ll_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .write_begin = ll_write_begin, .write_end = ll_write_end, .invalidatepage = ll_invalidatepage, diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 99ffd1589df8..6322f88661e8 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -661,8 +661,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai) if (rc) goto out; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - child, child->i_ino, child->i_generation); + CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n", + ll_get_fsname(child->i_sb, NULL, 0), + PFID(ll_inode2fid(child)), child); ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL); entry->se_inode = child; @@ -1591,13 +1592,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, *dentryp = alias; } else if (d_inode(*dentryp) != inode) { /* revalidate, but inode is recreated */ - CDEBUG(D_READA, - "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n", - *dentryp, - d_inode(*dentryp)->i_ino, - d_inode(*dentryp)->i_generation, - inode->i_ino, - inode->i_generation); + CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n", + ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0), + *dentryp, + PFID(ll_inode2fid(d_inode(*dentryp))), + PFID(ll_inode2fid(inode))); ll_sai_unplug(sai, entry); return -ESTALE; } else { diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 61856d37afc5..415750b0bff4 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -164,9 +164,18 @@ static int __init lustre_init(void) if (rc != 0) goto out_sysfs; + cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck, + LCT_REMEMBER | LCT_NOREF); + if (IS_ERR(cl_inode_fini_env)) { + rc = PTR_ERR(cl_inode_fini_env); + goto out_vvp; + } + + cl_inode_fini_env->le_ctx.lc_cookie = 0x4; + rc = ll_xattr_init(); if (rc != 0) - goto out_vvp; + goto out_inode_fini_env; lustre_register_client_fill_super(ll_fill_super); lustre_register_kill_super_cb(ll_kill_super); @@ -174,6 +183,8 @@ static int __init lustre_init(void) return 0; +out_inode_fini_env: + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); out_vvp: vvp_global_fini(); out_sysfs: @@ -198,6 +209,7 @@ static void __exit lustre_exit(void) kset_unregister(llite_kset); ll_xattr_fini(); + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); vvp_global_fini(); kmem_cache_destroy(ll_inode_cachep); diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c index 46d03ea48352..3fc736ccf85e 100644 --- a/drivers/staging/lustre/lustre/llite/symlink.c +++ b/drivers/staging/lustre/lustre/llite/symlink.c @@ -77,7 +77,9 @@ static int ll_readlink_internal(struct inode *inode, ll_finish_md_op_data(op_data); if (rc) { if (rc != -ENOENT) - CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); + CERROR("%s: inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); goto failed; } @@ -90,8 +92,10 @@ static int ll_readlink_internal(struct inode *inode, LASSERT(symlen != 0); if (body->eadatasize != symlen) { - CERROR("inode %lu: symlink length %d not expected %d\n", - inode->i_ino, body->eadatasize - 1, symlen - 1); + CERROR("%s: inode "DFID": symlink length %d not expected %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), body->eadatasize - 1, + symlen - 1); rc = -EPROTO; goto failed; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c index 282b70b776da..47101de1c020 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c @@ -36,6 +36,7 @@ * cl_device and cl_device_type implementation for VVP layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_LLITE @@ -56,13 +57,33 @@ * "llite_" (var. "ll_") prefix. */ -static struct kmem_cache *vvp_thread_kmem; +static struct kmem_cache *ll_thread_kmem; +struct kmem_cache *vvp_lock_kmem; +struct kmem_cache *vvp_object_kmem; +struct kmem_cache *vvp_req_kmem; static struct kmem_cache *vvp_session_kmem; +static struct kmem_cache *vvp_thread_kmem; + static struct lu_kmem_descr vvp_caches[] = { { - .ckd_cache = &vvp_thread_kmem, - .ckd_name = "vvp_thread_kmem", - .ckd_size = sizeof(struct vvp_thread_info), + .ckd_cache = &ll_thread_kmem, + .ckd_name = "ll_thread_kmem", + .ckd_size = sizeof(struct ll_thread_info), + }, + { + .ckd_cache = &vvp_lock_kmem, + .ckd_name = "vvp_lock_kmem", + .ckd_size = sizeof(struct vvp_lock), + }, + { + .ckd_cache = &vvp_object_kmem, + .ckd_name = "vvp_object_kmem", + .ckd_size = sizeof(struct vvp_object), + }, + { + .ckd_cache = &vvp_req_kmem, + .ckd_name = "vvp_req_kmem", + .ckd_size = sizeof(struct vvp_req), }, { .ckd_cache = &vvp_session_kmem, @@ -70,29 +91,40 @@ static struct lu_kmem_descr vvp_caches[] = { .ckd_size = sizeof(struct vvp_session) }, { + .ckd_cache = &vvp_thread_kmem, + .ckd_name = "vvp_thread_kmem", + .ckd_size = sizeof(struct vvp_thread_info), + }, + { .ckd_cache = NULL } }; -static void *vvp_key_init(const struct lu_context *ctx, - struct lu_context_key *key) +static void *ll_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) { struct vvp_thread_info *info; - info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS); if (!info) info = ERR_PTR(-ENOMEM); return info; } -static void vvp_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) +static void ll_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) { struct vvp_thread_info *info = data; - kmem_cache_free(vvp_thread_kmem, info); + kmem_cache_free(ll_thread_kmem, info); } +struct lu_context_key ll_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = ll_thread_key_init, + .lct_fini = ll_thread_key_fini +}; + static void *vvp_session_key_init(const struct lu_context *ctx, struct lu_context_key *key) { @@ -112,34 +144,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx, kmem_cache_free(vvp_session_kmem, session); } -struct lu_context_key vvp_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = vvp_key_init, - .lct_fini = vvp_key_fini -}; - struct lu_context_key vvp_session_key = { .lct_tags = LCT_SESSION, .lct_init = vvp_session_key_init, .lct_fini = vvp_session_key_fini }; +void *vvp_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct vvp_thread_info *vti; + + vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + if (!vti) + vti = ERR_PTR(-ENOMEM); + return vti; +} + +void vvp_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct vvp_thread_info *vti = data; + + kmem_cache_free(vvp_thread_kmem, vti); +} + +struct lu_context_key vvp_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = vvp_thread_key_init, + .lct_fini = vvp_thread_key_fini +}; + /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */ -LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key); +LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key); static const struct lu_device_operations vvp_lu_ops = { .ldo_object_alloc = vvp_object_alloc }; static const struct cl_device_operations vvp_cl_ops = { - .cdo_req_init = ccc_req_init + .cdo_req_init = vvp_req_init }; +static struct lu_device *vvp_device_free(const struct lu_env *env, + struct lu_device *d) +{ + struct vvp_device *vdv = lu2vvp_dev(d); + struct cl_site *site = lu2cl_site(d->ld_site); + struct lu_device *next = cl2lu_dev(vdv->vdv_next); + + if (d->ld_site) { + cl_site_fini(site); + kfree(site); + } + cl_device_fini(lu2cl_dev(d)); + kfree(vdv); + return next; +} + static struct lu_device *vvp_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) { - return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops); + struct vvp_device *vdv; + struct lu_device *lud; + struct cl_site *site; + int rc; + + vdv = kzalloc(sizeof(*vdv), GFP_NOFS); + if (!vdv) + return ERR_PTR(-ENOMEM); + + lud = &vdv->vdv_cl.cd_lu_dev; + cl_device_init(&vdv->vdv_cl, t); + vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops; + vdv->vdv_cl.cd_ops = &vvp_cl_ops; + + site = kzalloc(sizeof(*site), GFP_NOFS); + if (site) { + rc = cl_site_init(site, &vdv->vdv_cl); + if (rc == 0) { + rc = lu_site_init_finish(&site->cs_lu); + } else { + LASSERT(!lud->ld_site); + CERROR("Cannot init lu_site, rc %d.\n", rc); + kfree(site); + } + } else { + rc = -ENOMEM; + } + if (rc != 0) { + vvp_device_free(env, lud); + lud = ERR_PTR(rc); + } + return lud; +} + +static int vvp_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + struct vvp_device *vdv; + int rc; + + vdv = lu2vvp_dev(d); + vdv->vdv_next = lu2cl_dev(next); + + LASSERT(d->ld_site && next->ld_type); + next->ld_site = d->ld_site; + rc = next->ld_type->ldt_ops->ldto_device_init(env, next, + next->ld_type->ldt_name, + NULL); + if (rc == 0) { + lu_device_get(next); + lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); + } + return rc; +} + +static struct lu_device *vvp_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + return cl2lu_dev(lu2vvp_dev(d)->vdv_next); } static const struct lu_device_type_operations vvp_device_type_ops = { @@ -150,9 +275,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = { .ldto_stop = vvp_type_stop, .ldto_device_alloc = vvp_device_alloc, - .ldto_device_free = ccc_device_free, - .ldto_device_init = ccc_device_init, - .ldto_device_fini = ccc_device_fini + .ldto_device_free = vvp_device_free, + .ldto_device_init = vvp_device_init, + .ldto_device_fini = vvp_device_fini, }; struct lu_device_type vvp_device_type = { @@ -168,20 +293,27 @@ struct lu_device_type vvp_device_type = { */ int vvp_global_init(void) { - int result; + int rc; - result = lu_kmem_init(vvp_caches); - if (result == 0) { - result = ccc_global_init(&vvp_device_type); - if (result != 0) - lu_kmem_fini(vvp_caches); - } - return result; + rc = lu_kmem_init(vvp_caches); + if (rc != 0) + return rc; + + rc = lu_device_type_init(&vvp_device_type); + if (rc != 0) + goto out_kmem; + + return 0; + +out_kmem: + lu_kmem_fini(vvp_caches); + + return rc; } void vvp_global_fini(void) { - ccc_global_fini(&vvp_device_type); + lu_device_type_fini(&vvp_device_type); lu_kmem_fini(vvp_caches); } @@ -205,13 +337,14 @@ int cl_sb_init(struct super_block *sb) cl = cl_type_setup(env, NULL, &vvp_device_type, sbi->ll_dt_exp->exp_obd->obd_lu_dev); if (!IS_ERR(cl)) { - cl2ccc_dev(cl)->cdv_sb = sb; + cl2vvp_dev(cl)->vdv_sb = sb; sbi->ll_cl = cl; sbi->ll_site = cl2lu_dev(cl)->ld_site; } cl_env_put(env, &refcheck); - } else + } else { rc = PTR_ERR(env); + } return rc; } @@ -356,23 +489,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, return ~0ULL; clob = vvp_pgcache_obj(env, dev, &id); if (clob) { - struct cl_object_header *hdr; - int nr; - struct cl_page *pg; - - /* got an object. Find next page. */ - hdr = cl_object_header(clob); + struct inode *inode = vvp_object_inode(clob); + struct page *vmpage; + int nr; - spin_lock(&hdr->coh_page_guard); - nr = radix_tree_gang_lookup(&hdr->coh_tree, - (void **)&pg, - id.vpi_index, 1); + nr = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, &vmpage); if (nr > 0) { - id.vpi_index = pg->cp_index; + id.vpi_index = vmpage->index; /* Cant support over 16T file */ - nr = !(pg->cp_index > 0xffffffff); + nr = !(vmpage->index > 0xffffffff); + put_page(vmpage); } - spin_unlock(&hdr->coh_page_guard); lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); @@ -398,21 +526,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, static void vvp_pgcache_page_show(const struct lu_env *env, struct seq_file *seq, struct cl_page *page) { - struct ccc_page *cpg; + struct vvp_page *vpg; struct page *vmpage; int has_flags; - cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vmpage = cpg->cpg_page; - seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [", + vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); + vmpage = vpg->vpg_page; + seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [", 0 /* gen */, - cpg, page, + vpg, page, "none", - cpg->cpg_write_queued ? "wq" : "- ", - cpg->cpg_defer_uptodate ? "du" : "- ", + vpg->vpg_write_queued ? "wq" : "- ", + vpg->vpg_defer_uptodate ? "du" : "- ", PageWriteback(vmpage) ? "wb" : "-", - vmpage, vmpage->mapping->host->i_ino, - vmpage->mapping->host->i_generation, + vmpage, PFID(ll_inode2fid(vmpage->mapping->host)), vmpage->mapping->host, vmpage->index, page_count(vmpage)); has_flags = 0; @@ -431,8 +558,6 @@ static int vvp_pgcache_show(struct seq_file *f, void *v) struct ll_sb_info *sbi; struct cl_object *clob; struct lu_env *env; - struct cl_page *page; - struct cl_object_header *hdr; struct vvp_pgcache_id id; int refcheck; int result; @@ -444,27 +569,38 @@ static int vvp_pgcache_show(struct seq_file *f, void *v) sbi = f->private; clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id); if (clob) { - hdr = cl_object_header(clob); - - spin_lock(&hdr->coh_page_guard); - page = cl_page_lookup(hdr, id.vpi_index); - spin_unlock(&hdr->coh_page_guard); + struct inode *inode = vvp_object_inode(clob); + struct cl_page *page = NULL; + struct page *vmpage; + + result = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, + &vmpage); + if (result > 0) { + lock_page(vmpage); + page = cl_vmpage_page(vmpage, clob); + unlock_page(vmpage); + put_page(vmpage); + } - seq_printf(f, "%8x@"DFID": ", - id.vpi_index, PFID(&hdr->coh_lu.loh_fid)); + seq_printf(f, "%8x@" DFID ": ", id.vpi_index, + PFID(lu_object_fid(&clob->co_lu))); if (page) { vvp_pgcache_page_show(env, f, page); cl_page_put(env, page); - } else + } else { seq_puts(f, "missing\n"); + } lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); - } else + } else { seq_printf(f, "%llx missing\n", pos); + } cl_env_put(env, &refcheck); result = 0; - } else + } else { result = PTR_ERR(env); + } return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index bb393378c9bb..27b9b0a01f32 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -41,21 +41,337 @@ #ifndef VVP_INTERNAL_H #define VVP_INTERNAL_H +#include "../include/lustre/lustre_idl.h" #include "../include/cl_object.h" -#include "llite_internal.h" -int vvp_io_init(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -int vvp_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); +enum obd_notify_event; +struct inode; +struct lov_stripe_md; +struct lustre_md; +struct obd_capa; +struct obd_device; +struct obd_export; +struct page; + +/* specific architecture can implement only part of this list */ +enum vvp_io_subtype { + /** normal IO */ + IO_NORMAL, + /** io started from splice_{read|write} */ + IO_SPLICE +}; + +/** + * IO state private to IO state private to VVP layer. + */ +struct vvp_io { + /** super class */ + struct cl_io_slice vui_cl; + struct cl_io_lock_link vui_link; + /** + * I/O vector information to or from which read/write is going. + */ + struct iov_iter *vui_iter; + /** + * Total size for the left IO. + */ + size_t vui_tot_count; + + union { + struct vvp_fault_io { + /** + * Inode modification time that is checked across DLM + * lock request. + */ + time64_t ft_mtime; + struct vm_area_struct *ft_vma; + /** + * locked page returned from vvp_io + */ + struct page *ft_vmpage; + /** + * kernel fault info + */ + struct vm_fault *ft_vmf; + /** + * fault API used bitflags for return code. + */ + unsigned int ft_flags; + /** + * check that flags are from filemap_fault + */ + bool ft_flags_valid; + } fault; + struct { + struct pipe_inode_info *vui_pipe; + unsigned int vui_flags; + } splice; + struct { + struct cl_page_list vui_queue; + unsigned long vui_written; + int vui_from; + int vui_to; + } write; + } u; + + enum vvp_io_subtype vui_io_subtype; + + /** + * Layout version when this IO is initialized + */ + __u32 vui_layout_gen; + /** + * File descriptor against which IO is done. + */ + struct ll_file_data *vui_fd; + struct kiocb *vui_iocb; + + /* Readahead state. */ + pgoff_t vui_ra_start; + pgoff_t vui_ra_count; + /* Set when vui_ra_{start,count} have been initialized. */ + bool vui_ra_valid; +}; + +extern struct lu_device_type vvp_device_type; + +extern struct lu_context_key vvp_session_key; +extern struct lu_context_key vvp_thread_key; + +extern struct kmem_cache *vvp_lock_kmem; +extern struct kmem_cache *vvp_object_kmem; +extern struct kmem_cache *vvp_req_kmem; + +struct vvp_thread_info { + struct cl_lock vti_lock; + struct cl_lock_descr vti_descr; + struct cl_io vti_io; + struct cl_attr vti_attr; +}; + +static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) +{ + struct vvp_thread_info *vti; + + vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key); + LASSERT(vti); + + return vti; +} + +static inline struct cl_lock *vvp_env_lock(const struct lu_env *env) +{ + struct cl_lock *lock = &vvp_env_info(env)->vti_lock; + + memset(lock, 0, sizeof(*lock)); + return lock; +} + +static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env) +{ + struct cl_attr *attr = &vvp_env_info(env)->vti_attr; + + memset(attr, 0, sizeof(*attr)); + + return attr; +} + +static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env) +{ + struct cl_io *io = &vvp_env_info(env)->vti_io; + + memset(io, 0, sizeof(*io)); + + return io; +} + +struct vvp_session { + struct vvp_io cs_ios; +}; + +static inline struct vvp_session *vvp_env_session(const struct lu_env *env) +{ + struct vvp_session *ses; + + ses = lu_context_key_get(env->le_ses, &vvp_session_key); + LASSERT(ses); + + return ses; +} + +static inline struct vvp_io *vvp_env_io(const struct lu_env *env) +{ + return &vvp_env_session(env)->cs_ios; +} + +/** + * ccc-private object state. + */ +struct vvp_object { + struct cl_object_header vob_header; + struct cl_object vob_cl; + struct inode *vob_inode; + + /** + * A list of dirty pages pending IO in the cache. Used by + * SOM. Protected by ll_inode_info::lli_lock. + * + * \see vvp_page::vpg_pending_linkage + */ + struct list_head vob_pending_list; + + /** + * Access this counter is protected by inode->i_sem. Now that + * the lifetime of transient pages must be covered by inode sem, + * we don't need to hold any lock.. + */ + int vob_transient_pages; + /** + * Number of outstanding mmaps on this file. + * + * \see ll_vm_open(), ll_vm_close(). + */ + atomic_t vob_mmap_cnt; + + /** + * various flags + * vob_discard_page_warned + * if pages belonging to this object are discarded when a client + * is evicted, some debug info will be printed, this flag will be set + * during processing the first discarded page, then avoid flooding + * debug message for lots of discarded pages. + * + * \see ll_dirty_page_discard_warn. + */ + unsigned int vob_discard_page_warned:1; +}; + +/** + * VVP-private page state. + */ +struct vvp_page { + struct cl_page_slice vpg_cl; + int vpg_defer_uptodate; + int vpg_ra_used; + int vpg_write_queued; + /** + * Non-empty iff this page is already counted in + * vvp_object::vob_pending_list. This list is only used as a flag, + * that is, never iterated through, only checked for list_empty(), but + * having a list is useful for debugging. + */ + struct list_head vpg_pending_linkage; + /** VM page */ + struct page *vpg_page; +}; + +static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice) +{ + return container_of(slice, struct vvp_page, vpg_cl); +} + +static inline pgoff_t vvp_index(struct vvp_page *vvp) +{ + return vvp->vpg_cl.cpl_index; +} + +struct vvp_device { + struct cl_device vdv_cl; + struct super_block *vdv_sb; + struct cl_device *vdv_next; +}; + +struct vvp_lock { + struct cl_lock_slice vlk_cl; +}; + +struct vvp_req { + struct cl_req_slice vrq_cl; +}; + +void *ccc_key_init(const struct lu_context *ctx, + struct lu_context_key *key); +void ccc_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data); + +void ccc_umount(const struct lu_env *env, struct cl_device *dev); + +static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv) +{ + return &vdv->vdv_cl.cd_lu_dev; +} + +static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev); +} + +static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl); +} + +static inline struct vvp_object *cl2vvp(const struct cl_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl); +} + +static inline struct vvp_object *lu2vvp(const struct lu_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl.co_lu); +} + +static inline struct inode *vvp_object_inode(const struct cl_object *obj) +{ + return cl2vvp(obj)->vob_inode; +} + +int vvp_object_invariant(const struct cl_object *obj); +struct vvp_object *cl_inode2vvp(struct inode *inode); + +static inline struct page *cl2vm_page(const struct cl_page_slice *slice) +{ + return cl2vvp_page(slice)->vpg_page; +} + +static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) +{ + return container_of(slice, struct vvp_lock, vlk_cl); +} + +# define CLOBINVRNT(env, clob, expr) \ + ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) + +/** + * New interfaces to get and put lov_stripe_md from lov layer. This violates + * layering because lov_stripe_md is supposed to be a private data in lov. + * + * NB: If you find you have to use these interfaces for your new code, please + * think about it again. These interfaces may be removed in the future for + * better layering. + */ +struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); +void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); +int lov_read_and_clear_async_rc(struct cl_object *clob); + +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); +void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); + +int vvp_io_init(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io); +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); +int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, + struct cl_lock *lock, const struct cl_io *io); int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req); struct lu_object *vvp_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); -struct ccc_object *cl_inode2ccc(struct inode *inode); +int vvp_global_init(void); +void vvp_global_fini(void); extern const struct file_operations vvp_dump_pgcache_file_ops; diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 85a835976174..5bf9592ae5d2 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -44,21 +44,30 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice); +struct vvp_io *cl2vvp_io(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct vvp_io *vio; + + vio = container_of(slice, struct vvp_io, vui_cl); + LASSERT(vio == vvp_env_io(env)); + + return vio; +} /** * True, if \a io is a normal io, False for splice_{read,write} */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) +static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - return vio->cui_io_subtype == IO_NORMAL; + return vio->vui_io_subtype == IO_NORMAL; } /** @@ -71,7 +80,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); bool rc = true; switch (io->ci_type) { @@ -80,7 +89,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, /* don't need lock here to check lli_layout_gen as we have held * extent lock and GROUP lock has to hold to swap layout */ - if (ll_layout_version_get(lli) != cio->cui_layout_gen) { + if (ll_layout_version_get(lli) != vio->vui_layout_gen) { io->ci_need_restart = 1; /* this will return application a short read/write */ io->ci_continue = 0; @@ -95,20 +104,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, return rc; } +static void vvp_object_size_lock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + ll_inode_size_lock(inode); + cl_object_attr_lock(obj); +} + +static void vvp_object_size_unlock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + cl_object_attr_unlock(obj); + ll_inode_size_unlock(inode); +} + +/** + * Helper function that if necessary adjusts file size (inode->i_size), when + * position at the offset \a pos is accessed. File size can be arbitrary stale + * on a Lustre client, but client at least knows KMS. If accessed area is + * inside [0, KMS], set file size to KMS, otherwise glimpse file size. + * + * Locking: cl_isize_lock is used to serialize changes to inode size and to + * protect consistency between inode size and cl_object + * attributes. cl_object_size_lock() protects consistency between cl_attr's of + * top-object and sub-objects. + */ +static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io, loff_t start, size_t count, + int *exceed) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct inode *inode = vvp_object_inode(obj); + loff_t pos = start + count - 1; + loff_t kms; + int result; + + /* + * Consistency guarantees: following possibilities exist for the + * relation between region being accessed and real file size at this + * moment: + * + * (A): the region is completely inside of the file; + * + * (B-x): x bytes of region are inside of the file, the rest is + * outside; + * + * (C): the region is completely outside of the file. + * + * This classification is stable under DLM lock already acquired by + * the caller, because to change the class, other client has to take + * DLM lock conflicting with our lock. Also, any updates to ->i_size + * by other threads on this client are serialized by + * ll_inode_size_lock(). This guarantees that short reads are handled + * correctly in the face of concurrent writes and truncates. + */ + vvp_object_size_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + kms = attr->cat_kms; + if (pos > kms) { + /* + * A glimpse is necessary to determine whether we + * return a short read (B) or some zeroes at the end + * of the buffer (C) + */ + vvp_object_size_unlock(obj); + result = cl_glimpse_lock(env, io, inode, obj, 0); + if (result == 0 && exceed) { + /* If objective page index exceed end-of-file + * page index, return directly. Do not expect + * kernel will check such case correctly. + * linux-2.6.18-128.1.1 miss to do that. + * --bug 17336 + */ + loff_t size = i_size_read(inode); + loff_t cur_index = start >> PAGE_SHIFT; + loff_t size_index = (size - 1) >> PAGE_SHIFT; + + if ((size == 0 && cur_index != 0) || + size_index < cur_index) + *exceed = 1; + } + return result; + } + /* + * region is within kms and, hence, within real file + * size (A). We need to increase i_size to cover the + * read region so that generic_file_read() will do its + * job, but that doesn't mean the kms size is + * _correct_, it is only the _minimum_ size. If + * someone does a stat they will get the correct size + * which will always be >= the kms value here. + * b=11081 + */ + if (i_size_read(inode) < kms) { + i_size_write(inode, kms); + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(lu_object_fid(&obj->co_lu)), + (__u64)i_size_read(inode)); + } + } + + vvp_object_size_unlock(obj); + + return result; +} + /***************************************************************************** * * io operations. * */ +static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + pgoff_t start, pgoff_t end) +{ + struct vvp_io *vio = vvp_env_io(env); + struct cl_lock_descr *descr = &vio->vui_link.cill_descr; + struct cl_object *obj = io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); + + memset(&vio->vui_link, 0, sizeof(vio->vui_link)); + + if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + descr->cld_mode = CLM_GROUP; + descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid; + } else { + descr->cld_mode = mode; + } + descr->cld_obj = obj; + descr->cld_start = start; + descr->cld_end = end; + descr->cld_enq_flags = enqflags; + + cl_io_lock_add(env, io, &vio->vui_link); + return 0; +} + +static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + loff_t start, loff_t end) +{ + struct cl_object *obj = io->ci_obj; + + return vvp_io_one_lock_index(env, io, enqflags, mode, + cl_index(obj, start), cl_index(obj, end)); +} + +static int vvp_io_write_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + cl_page_list_init(&vio->u.write.vui_queue); + vio->u.write.vui_written = 0; + vio->u.write.vui_from = 0; + vio->u.write.vui_to = PAGE_SIZE; + + return 0; +} + +static void vvp_io_write_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + LASSERT(vio->u.write.vui_queue.pl_nr == 0); +} + static int vvp_io_fault_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct inode *inode = ccc_object_inode(ios->cis_obj); + struct inode *inode = vvp_object_inode(ios->cis_obj); - LASSERT(inode == - file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file)); + LASSERT(inode == file_inode(vio->vui_fd->fd_file)); vio->u.fault.ft_mtime = inode->i_mtime.tv_sec; return 0; } @@ -117,15 +293,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); + struct inode *inode = vvp_object_inode(obj); - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); if (io->ci_restore_needed == 1) { int rc; @@ -133,7 +310,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* file was detected release, we need to restore it * before finishing the io */ - rc = ll_layout_restore(ccc_object_inode(obj)); + rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF); /* if restore registration failed, no restart, * we will return -ENODATA */ @@ -159,16 +336,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) __u32 gen = 0; /* check layout version */ - ll_layout_refresh(ccc_object_inode(obj), &gen); - io->ci_need_restart = cio->cui_layout_gen != gen; + ll_layout_refresh(inode, &gen); + io->ci_need_restart = vio->vui_layout_gen != gen; if (io->ci_need_restart) { CDEBUG(D_VFSTRACE, DFID" layout changed from %d to %d.\n", PFID(lu_object_fid(&obj->co_lu)), - cio->cui_layout_gen, gen); + vio->vui_layout_gen, gen); /* today successful restore is the only possible case */ /* restore was done, clear restoring state */ - ll_i2info(ccc_object_inode(obj))->lli_flags &= + ll_i2info(vvp_object_inode(obj))->lli_flags &= ~LLIF_FILE_RESTORING; } } @@ -180,7 +357,7 @@ static void vvp_io_fault_fini(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_page *page = io->u.ci_fault.ft_page; - CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj)); + CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj)); if (page) { lu_ref_del(&page->cp_reference, "fault", io); @@ -203,16 +380,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma) } static int vvp_mmap_locks(const struct lu_env *env, - struct ccc_io *vio, struct cl_io *io) + struct vvp_io *vio, struct cl_io *io) { - struct ccc_thread_info *cti = ccc_env_info(env); + struct vvp_thread_info *cti = vvp_env_info(env); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct cl_lock_descr *descr = &cti->cti_descr; + struct cl_lock_descr *descr = &cti->vti_descr; ldlm_policy_data_t policy; unsigned long addr; ssize_t count; - int result; + int result = 0; struct iov_iter i; struct iovec iov; @@ -221,21 +398,21 @@ static int vvp_mmap_locks(const struct lu_env *env, if (!cl_is_normalio(env, io)) return 0; - if (!vio->cui_iter) /* nfs or loop back device write */ + if (!vio->vui_iter) /* nfs or loop back device write */ return 0; /* No MM (e.g. NFS)? No vmas too. */ if (!mm) return 0; - iov_for_each(iov, i, *(vio->cui_iter)) { + iov_for_each(iov, i, *vio->vui_iter) { addr = (unsigned long)iov.iov_base; count = iov.iov_len; if (count == 0) continue; - count += addr & (~CFS_PAGE_MASK); - addr &= CFS_PAGE_MASK; + count += addr & (~PAGE_MASK); + addr &= PAGE_MASK; down_read(&mm->mmap_sem); while ((vma = our_vma(mm, addr, count)) != NULL) { @@ -244,10 +421,10 @@ static int vvp_mmap_locks(const struct lu_env *env, if (ll_file_nolock(vma->vm_file)) { /* - * For no lock case, a lockless lock will be - * generated. + * For no lock case is not allowed for mmap */ - flags = CEF_NEVER; + result = -EINVAL; + break; } /* @@ -269,10 +446,8 @@ static int vvp_mmap_locks(const struct lu_env *env, descr->cld_mode, descr->cld_start, descr->cld_end); - if (result < 0) { - up_read(&mm->mmap_sem); - return result; - } + if (result < 0) + break; if (vma->vm_end - addr >= count) break; @@ -281,26 +456,55 @@ static int vvp_mmap_locks(const struct lu_env *env, addr = vma->vm_end; } up_read(&mm->mmap_sem); + if (result < 0) + break; } - return 0; + return result; +} + +static void vvp_io_advance(const struct lu_env *env, + const struct cl_io_slice *ios, + size_t nob) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + struct cl_io *io = ios->cis_io; + struct cl_object *obj = ios->cis_io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + if (!cl_is_normalio(env, io)) + return; + + iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); +} + +static void vvp_io_update_iov(const struct lu_env *env, + struct vvp_io *vio, struct cl_io *io) +{ + size_t size = io->u.ci_rw.crw_count; + + if (!cl_is_normalio(env, io) || !vio->vui_iter) + return; + + iov_iter_truncate(vio->vui_iter, size); } static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, enum cl_lock_mode mode, loff_t start, loff_t end) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); int result; int ast_flags = 0; LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - ccc_io_update_iov(env, cio, io); + vvp_io_update_iov(env, vio, io); if (io->u.ci_rw.crw_nonblock) ast_flags |= CEF_NONBLOCK; - result = vvp_mmap_locks(env, cio, io); + result = vvp_mmap_locks(env, vio, io); if (result == 0) - result = ccc_io_one_lock(env, io, ast_flags, mode, start, end); + result = vvp_io_one_lock(env, io, ast_flags, mode, start, end); return result; } @@ -325,9 +529,11 @@ static int vvp_io_fault_lock(const struct lu_env *env, /* * XXX LDLM_FL_CBPENDING */ - return ccc_io_one_lock_index - (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma), - io->u.ci_fault.ft_index, io->u.ci_fault.ft_index); + return vvp_io_one_lock_index(env, + io, 0, + vvp_mode_from_vma(vio->u.fault.ft_vma), + io->u.ci_fault.ft_index, + io->u.ci_fault.ft_index); } static int vvp_io_write_lock(const struct lu_env *env, @@ -354,14 +560,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env, } /** - * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io. + * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io. * * Handles "lockless io" mode when extent locking is done by server. */ static int vvp_io_setattr_lock(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); struct cl_io *io = ios->cis_io; __u64 new_size; __u32 enqflags = 0; @@ -378,8 +583,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env, return 0; new_size = 0; } - cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK; - return ccc_io_one_lock(env, io, enqflags, CLM_WRITE, + + return vvp_io_one_lock(env, io, enqflags, CLM_WRITE, new_size, OBD_OBJECT_EOF); } @@ -413,7 +618,7 @@ static int vvp_io_setattr_time(const struct lu_env *env, { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct cl_attr *attr = ccc_env_thread_attr(env); + struct cl_attr *attr = vvp_env_thread_attr(env); int result; unsigned valid = CAT_CTIME; @@ -437,7 +642,7 @@ static int vvp_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); int result = 0; inode_lock(inode); @@ -453,7 +658,7 @@ static void vvp_io_setattr_end(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); if (cl_io_is_trunc(io)) /* Truncate in memory pages - they must be clean pages @@ -474,27 +679,25 @@ static int vvp_io_read_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); - struct ll_ra_read *bead = &vio->cui_bead; - struct file *file = cio->cui_fd->fd_file; + struct inode *inode = vvp_object_inode(obj); + struct file *file = vio->vui_fd->fd_file; int result; loff_t pos = io->u.ci_rd.rd.crw_pos; long cnt = io->u.ci_rd.rd.crw_count; - long tot = cio->cui_tot_count; + long tot = vio->vui_tot_count; int exceed = 0; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); if (!can_populate_pages(env, io, inode)) return 0; - result = ccc_prep_size(env, obj, io, pos, tot, &exceed); + result = vvp_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; else if (exceed != 0) @@ -505,30 +708,27 @@ static int vvp_io_read_start(const struct lu_env *env, inode->i_ino, cnt, pos, i_size_read(inode)); /* turn off the kernel's read-ahead */ - cio->cui_fd->fd_file->f_ra.ra_pages = 0; + vio->vui_fd->fd_file->f_ra.ra_pages = 0; /* initialize read-ahead window once per syscall */ - if (!vio->cui_ra_window_set) { - vio->cui_ra_window_set = 1; - bead->lrr_start = cl_index(obj, pos); - /* - * XXX: explicit PAGE_SIZE - */ - bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1); - ll_ra_read_in(file, bead); + if (!vio->vui_ra_valid) { + vio->vui_ra_valid = true; + vio->vui_ra_start = cl_index(obj, pos); + vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1); + ll_ras_enter(file); } /* BUG: 5972 */ file_accessed(file); - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - LASSERT(cio->cui_iocb->ki_pos == pos); - result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter); + LASSERT(vio->vui_iocb->ki_pos == pos); + result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); break; case IO_SPLICE: result = generic_file_splice_read(file, &pos, - vio->u.splice.cui_pipe, cnt, - vio->u.splice.cui_flags); + vio->u.splice.vui_pipe, cnt, + vio->u.splice.vui_flags); /* LU-1109: do splice read stripe by stripe otherwise if it * may make nfsd stuck if this read occupied all internal pipe * buffers. @@ -536,7 +736,7 @@ static int vvp_io_read_start(const struct lu_env *env, io->ci_continue = 0; break; default: - CERROR("Wrong IO type %u\n", vio->cui_io_subtype); + CERROR("Wrong IO type %u\n", vio->vui_io_subtype); LBUG(); } @@ -546,30 +746,201 @@ out: io->ci_continue = 0; io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, READ); + vio->vui_fd, pos, result, READ); result = 0; } return result; } -static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios) +static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *plist, int from, int to) { - struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); + struct cl_2queue *queue = &io->ci_queue; + struct cl_page *page; + unsigned int bytes = 0; + int rc = 0; - if (vio->cui_ra_window_set) - ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead); + if (plist->pl_nr == 0) + return 0; - vvp_io_fini(env, ios); + if (from > 0 || to != PAGE_SIZE) { + page = cl_page_list_first(plist); + if (plist->pl_nr == 1) { + cl_page_clip(env, page, from, to); + } else { + if (from > 0) + cl_page_clip(env, page, from, PAGE_SIZE); + if (to != PAGE_SIZE) { + page = cl_page_list_last(plist); + cl_page_clip(env, page, 0, to); + } + } + } + + cl_2queue_init(queue); + cl_page_list_splice(plist, &queue->c2_qin); + rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0); + + /* plist is not sorted any more */ + cl_page_list_splice(&queue->c2_qin, plist); + cl_page_list_splice(&queue->c2_qout, plist); + cl_2queue_fini(env, queue); + + if (rc == 0) { + /* calculate bytes */ + bytes = plist->pl_nr << PAGE_SHIFT; + bytes -= from + PAGE_SIZE - to; + + while (plist->pl_nr > 0) { + page = cl_page_list_first(plist); + cl_page_list_del(env, plist, page); + + cl_page_clip(env, page, 0, PAGE_SIZE); + + SetPageUptodate(cl_page_vmpage(page)); + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + } + + return bytes > 0 ? bytes : rc; +} + +static void write_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct page *vmpage = page->cp_vmpage; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + SetPageUptodate(vmpage); + set_page_dirty(vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); +} + +/* make sure the page list is contiguous */ +static bool page_list_sanity_check(struct cl_object *obj, + struct cl_page_list *plist) +{ + struct cl_page *page; + pgoff_t index = CL_PAGE_EOF; + + cl_page_list_for_each(page, plist) { + struct vvp_page *vpg = cl_object_page_slice(obj, page); + + if (index == CL_PAGE_EOF) { + index = vvp_index(vpg); + continue; + } + + ++index; + if (index == vvp_index(vpg)) + continue; + + return false; + } + return true; +} + +/* Return how many bytes have queued or written */ +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io) +{ + struct cl_object *obj = io->ci_obj; + struct inode *inode = vvp_object_inode(obj); + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *queue = &vio->u.write.vui_queue; + struct cl_page *page; + int rc = 0; + int bytes = 0; + unsigned int npages = vio->u.write.vui_queue.pl_nr; + + if (npages == 0) + return 0; + + CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n", + npages, vio->u.write.vui_from, vio->u.write.vui_to); + + LASSERT(page_list_sanity_check(obj, queue)); + + /* submit IO with async write */ + rc = cl_io_commit_async(env, io, queue, + vio->u.write.vui_from, vio->u.write.vui_to, + write_commit_callback); + npages -= queue->pl_nr; /* already committed pages */ + if (npages > 0) { + /* calculate how many bytes were written */ + bytes = npages << PAGE_SHIFT; + + /* first page */ + bytes -= vio->u.write.vui_from; + if (queue->pl_nr == 0) /* last page */ + bytes -= PAGE_SIZE - vio->u.write.vui_to; + LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages); + + vio->u.write.vui_written += bytes; + + CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n", + npages, bytes, vio->u.write.vui_written); + + /* the first page must have been written. */ + vio->u.write.vui_from = 0; + } + LASSERT(page_list_sanity_check(obj, queue)); + LASSERT(ergo(rc == 0, queue->pl_nr == 0)); + + /* out of quota, try sync write */ + if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) { + rc = vvp_io_commit_sync(env, io, queue, + vio->u.write.vui_from, + vio->u.write.vui_to); + if (rc > 0) { + vio->u.write.vui_written += rc; + rc = 0; + } + } + + /* update inode size */ + ll_merge_attr(env, inode); + + /* Now the pages in queue were failed to commit, discard them + * unless they were dirtied before. + */ + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + cl_page_list_del(env, queue, page); + + if (!PageDirty(cl_page_vmpage(page))) + cl_page_discard(env, io, page); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + cl_page_list_fini(env, queue); + + return rc; } static int vvp_io_write_start(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); ssize_t result = 0; loff_t pos = io->u.ci_wr.wr.crw_pos; size_t cnt = io->u.ci_wr.wr.crw_count; @@ -582,25 +953,41 @@ static int vvp_io_write_start(const struct lu_env *env, * PARALLEL IO This has to be changed for parallel IO doing * out-of-order writes. */ + ll_merge_attr(env, inode); pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode); - cio->cui_iocb->ki_pos = pos; + vio->vui_iocb->ki_pos = pos; } else { - LASSERT(cio->cui_iocb->ki_pos == pos); + LASSERT(vio->vui_iocb->ki_pos == pos); } CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); - if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */ + if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */ result = 0; else - result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter); + result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter); + + if (result > 0) { + result = vvp_io_write_commit(env, io); + if (vio->u.write.vui_written > 0) { + result = vio->u.write.vui_written; + io->ci_nob += result; + CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n", + io->ci_nob, result); + } + } if (result > 0) { + struct ll_inode_info *lli = ll_i2info(inode); + + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + if (result < cnt) io->ci_continue = 0; - io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, WRITE); + vio->vui_fd, pos, result, WRITE); result = 0; } return result; @@ -608,10 +995,10 @@ static int vvp_io_write_start(const struct lu_env *env, static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { - struct vm_fault *vmf = cfio->fault.ft_vmf; + struct vm_fault *vmf = cfio->ft_vmf; - cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); - cfio->fault.ft_flags_valid = 1; + cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf); + cfio->ft_flags_valid = 1; if (vmf->page) { CDEBUG(D_PAGE, @@ -619,39 +1006,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) vmf->page, vmf->page->mapping, vmf->page->index, (long)vmf->page->flags, page_count(vmf->page), page_private(vmf->page), vmf->virtual_address); - if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { + if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) { lock_page(vmf->page); - cfio->fault.ft_flags |= VM_FAULT_LOCKED; + cfio->ft_flags |= VM_FAULT_LOCKED; } cfio->ft_vmpage = vmf->page; return 0; } - if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { + if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); return -EFAULT; } - if (cfio->fault.ft_flags & VM_FAULT_OOM) { + if (cfio->ft_flags & VM_FAULT_OOM) { CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address); return -ENOMEM; } - if (cfio->fault.ft_flags & VM_FAULT_RETRY) + if (cfio->ft_flags & VM_FAULT_RETRY) return -EAGAIN; - CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags); + CERROR("Unknown error in page fault %d!\n", cfio->ft_flags); return -EINVAL; } +static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + set_page_dirty(page->cp_vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); +} + static int vvp_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; @@ -659,7 +1058,7 @@ static int vvp_io_fault_start(const struct lu_env *env, struct page *vmpage = NULL; struct cl_page *page; loff_t size; - pgoff_t last; /* last page in a file data region */ + pgoff_t last_index; if (fio->ft_executable && inode->i_mtime.tv_sec != vio->u.fault.ft_mtime) @@ -670,7 +1069,7 @@ static int vvp_io_fault_start(const struct lu_env *env, /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; LASSERT(cl_index(obj, offset) == fio->ft_index); - result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); + result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL); if (result != 0) return result; @@ -705,15 +1104,15 @@ static int vvp_io_fault_start(const struct lu_env *env, goto out; } + last_index = cl_index(obj, size - 1); + if (fio->ft_mkwrite) { - pgoff_t last_index; /* * Capture the size while holding the lli_trunc_sem from above * we want to make sure that we complete the mkwrite action * while holding this lock. We need to make sure that we are * not past the end of the file. */ - last_index = cl_index(obj, size - 1); if (last_index < fio->ft_index) { CDEBUG(D_PAGE, "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n", @@ -745,25 +1144,32 @@ static int vvp_io_fault_start(const struct lu_env *env, */ if (fio->ft_mkwrite) { wait_on_page_writeback(vmpage); - if (set_page_dirty(vmpage)) { - struct ccc_page *cp; + if (!PageDirty(vmpage)) { + struct cl_page_list *plist = &io->ci_queue.c2_qin; + struct vvp_page *vpg = cl_object_page_slice(obj, page); + int to = PAGE_SIZE; /* vvp_page_assume() calls wait_on_page_writeback(). */ cl_page_assume(env, io, page); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vvp_write_pending(cl2ccc(obj), cp); + cl_page_list_init(plist); + cl_page_list_add(plist, page); + + /* size fixup */ + if (last_index == vvp_index(vpg)) + to = size & ~PAGE_MASK; /* Do not set Dirty bit here so that in case IO is * started before the page is really made dirty, we * still have chance to detect it. */ - result = cl_page_cache_add(env, io, page, CRT_WRITE); + result = cl_io_commit_async(env, io, plist, 0, to, + mkwrite_commit_callback); LASSERT(cl_page_is_owned(page, io)); + cl_page_list_fini(env, plist); vmpage = NULL; if (result < 0) { - cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); @@ -773,20 +1179,20 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result == -EDQUOT) result = -ENOSPC; goto out; - } else + } else { cl_page_disown(env, io, page); + } } } - last = cl_index(obj, size - 1); /* * The ft_index is only used in the case of * a mkwrite action. We need to check * our assertions are correct, since * we should have caught this above */ - LASSERT(!fio->ft_mkwrite || fio->ft_index <= last); - if (fio->ft_index == last) + LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index); + if (fio->ft_index == last_index) /* * Last page is mapped partially. */ @@ -801,7 +1207,9 @@ out: /* return unlocked vmpage to avoid deadlocking */ if (vmpage) unlock_page(vmpage); - cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; + + cfio->ft_flags &= ~VM_FAULT_LOCKED; + return result; } @@ -820,293 +1228,58 @@ static int vvp_io_read_page(const struct lu_env *env, const struct cl_page_slice *slice) { struct cl_io *io = ios->cis_io; - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *page = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd; + struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd; struct ll_readahead_state *ras = &fd->fd_ras; - struct page *vmpage = cp->cpg_page; struct cl_2queue *queue = &io->ci_queue; - int rc; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - LASSERT(slice->cpl_obj == obj); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ras_update(sbi, inode, ras, page->cp_index, - cp->cpg_defer_uptodate); - - /* Sanity check whether the page is protected by a lock. */ - rc = cl_page_is_under_lock(env, io, page); - if (rc != -EBUSY) { - CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n", - rc == -ENODATA ? "without a lock" : - "match failed", rc); - if (rc != -ENODATA) - return rc; - } + ras_update(sbi, inode, ras, vvp_index(vpg), + vpg->vpg_defer_uptodate); - if (cp->cpg_defer_uptodate) { - cp->cpg_ra_used = 1; + if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; cl_page_export(env, page, 1); } /* * Add page into the queue even when it is marked uptodate above. * this will unlock it automatically as part of cl_page_list_disown(). */ + cl_page_list_add(&queue->c2_qin, page); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ll_readahead(env, io, ras, - vmpage->mapping, &queue->c2_qin, fd->fd_flags); + ll_readahead(env, io, &queue->c2_qin, ras, + vpg->vpg_defer_uptodate); return 0; } -static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, struct ccc_page *cp, - enum cl_req_type crt) +void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios) { - struct cl_2queue *queue; - int result; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - queue = &io->ci_queue; - cl_2queue_init_page(queue, page); - - result = cl_io_submit_sync(env, io, crt, queue, 0); - LASSERT(cl_page_is_owned(page, io)); - - if (crt == CRT_READ) - /* - * in CRT_WRITE case page is left locked even in case of - * error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - - return result; -} - -/** - * Prepare partially written-to page for a write. - */ -static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io, - struct cl_object *obj, struct cl_page *pg, - struct ccc_page *cp, - unsigned from, unsigned to) -{ - struct cl_attr *attr = ccc_env_thread_attr(env); - loff_t offset = cl_offset(obj, pg->cp_index); - int result; - - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - if (result == 0) { - /* - * If are writing to a new page, no need to read old data. - * The extent locking will have updated the KMS, and for our - * purposes here we can treat it like i_size. - */ - if (attr->cat_kms <= offset) { - char *kaddr = kmap_atomic(cp->cpg_page); - - memset(kaddr, 0, cl_page_size(obj)); - kunmap_atomic(kaddr); - } else if (cp->cpg_defer_uptodate) - cp->cpg_ra_used = 1; - else - result = vvp_page_sync_io(env, io, pg, cp, CRT_READ); - /* - * In older implementations, obdo_refresh_inode is called here - * to update the inode because the write might modify the - * object info at OST. However, this has been proven useless, - * since LVB functions will be called when user space program - * tries to retrieve inode attribute. Also, see bug 15909 for - * details. -jay - */ - if (result == 0) - cl_page_export(env, pg, 1); - } - return result; -} - -static int vvp_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; - - int result; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == ccc_object_inode(obj)); - - result = 0; - - CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to); - if (!PageUptodate(vmpage)) { - /* - * We're completely overwriting an existing page, so _don't_ - * set it up to date until commit_write - */ - if (from == 0 && to == PAGE_SIZE) { - CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n"); - POISON_PAGE(page, 0x11); - } else - result = vvp_io_prepare_partial(env, ios->cis_io, obj, - pg, cp, from, to); - } else - CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n"); - return result; -} - -static int vvp_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct cl_io *io = ios->cis_io; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct page *vmpage = cp->cpg_page; - - int result; - int tallyop; - loff_t size; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == inode); - - LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n"); - CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to); - - /* - * queue a write for some time in the future the first time we - * dirty the page. - * - * This is different from what other file systems do: they usually - * just mark page (and some of its buffers) dirty and rely on - * balance_dirty_pages() to start a write-back. Lustre wants write-back - * to be started earlier for the following reasons: - * - * (1) with a large number of clients we need to limit the amount - * of cached data on the clients a lot; - * - * (2) large compute jobs generally want compute-only then io-only - * and the IO should complete as quickly as possible; - * - * (3) IO is batched up to the RPC size and is async until the - * client max cache is hit - * (/sys/fs/lustre/osc/OSC.../max_dirty_mb) - * - */ - if (!PageDirty(vmpage)) { - tallyop = LPROC_LL_DIRTY_MISSES; - result = cl_page_cache_add(env, io, pg, CRT_WRITE); - if (result == 0) { - /* page was added into cache successfully. */ - set_page_dirty(vmpage); - vvp_write_pending(cl2ccc(obj), cp); - } else if (result == -EDQUOT) { - pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT; - bool need_clip = true; - - /* - * Client ran out of disk space grant. Possible - * strategies are: - * - * (a) do a sync write, renewing grant; - * - * (b) stop writing on this stripe, switch to the - * next one. - * - * (b) is a part of "parallel io" design that is the - * ultimate goal. (a) is what "old" client did, and - * what the new code continues to do for the time - * being. - */ - if (last_index > pg->cp_index) { - to = PAGE_SIZE; - need_clip = false; - } else if (last_index == pg->cp_index) { - int size_to = i_size_read(inode) & ~CFS_PAGE_MASK; - - if (to < size_to) - to = size_to; - } - if (need_clip) - cl_page_clip(env, pg, 0, to); - result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE); - if (result) - CERROR("Write page %lu of inode %p failed %d\n", - pg->cp_index, inode, result); - } - } else { - tallyop = LPROC_LL_DIRTY_HITS; - result = 0; - } - ll_stats_ops_tally(sbi, tallyop, 1); - - /* Inode should be marked DIRTY even if no new page was marked DIRTY - * because page could have been not flushed between 2 modifications. - * It is important the file is marked DIRTY as soon as the I/O is done - * Indeed, when cache is flushed, file could be already closed and it - * is too late to warn the MDT. - * It is acceptable that file is marked DIRTY even if I/O is dropped - * for some reasons before being flushed to OST. - */ - if (result == 0) { - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - size = cl_offset(obj, pg->cp_index) + to; - - ll_inode_size_lock(inode); - if (result == 0) { - if (size > i_size_read(inode)) { - cl_isize_write_nolock(inode, size); - CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n", - PFID(lu_object_fid(&obj->co_lu)), - (unsigned long)size); - } - cl_page_export(env, pg, 1); - } else { - if (size > i_size_read(inode)) - cl_page_discard(env, io, pg); - } - ll_inode_size_unlock(inode); - return result; + CLOBINVRNT(env, ios->cis_io->ci_obj, + vvp_object_invariant(ios->cis_io->ci_obj)); } static const struct cl_io_operations vvp_io_ops = { .op = { [CIT_READ] = { - .cio_fini = vvp_io_read_fini, + .cio_fini = vvp_io_fini, .cio_lock = vvp_io_read_lock, .cio_start = vvp_io_read_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_WRITE] = { .cio_fini = vvp_io_fini, + .cio_iter_init = vvp_io_write_iter_init, + .cio_iter_fini = vvp_io_write_iter_fini, .cio_lock = vvp_io_write_lock, .cio_start = vvp_io_write_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_SETATTR] = { .cio_fini = vvp_io_setattr_fini, @@ -1120,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_iter_init = vvp_io_fault_iter_init, .cio_lock = vvp_io_fault_lock, .cio_start = vvp_io_fault_start, - .cio_end = ccc_io_end + .cio_end = vvp_io_end, }, [CIT_FSYNC] = { .cio_start = vvp_io_fsync_start, @@ -1131,29 +1304,26 @@ static const struct cl_io_operations vvp_io_ops = { } }, .cio_read_page = vvp_io_read_page, - .cio_prepare_write = vvp_io_prepare_write, - .cio_commit_write = vvp_io_commit_write }; int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); int result; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); - CL_IO_SLICE_CLEAN(cio, cui_cl); - cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops); - vio->cui_ra_window_set = 0; + CL_IO_SLICE_CLEAN(vio, vui_cl); + cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops); + vio->vui_ra_valid = false; result = 0; if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { size_t count; @@ -1166,7 +1336,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, if (count == 0) result = 1; else - cio->cui_tot_count = count; + vio->vui_tot_count = count; /* for read/write, we store the jobid in the inode, and * it'll be fetched by osc when building RPC. @@ -1192,7 +1362,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, * because it might not grant layout lock in IT_OPEN. */ if (result == 0 && !io->ci_ignore_layout) { - result = ll_layout_refresh(inode, &cio->cui_layout_gen); + result = ll_layout_refresh(inode, &vio->vui_layout_gen); if (result == -ENOENT) /* If the inode on MDS has been removed, but the objects * on OSTs haven't been destroyed (async unlink), layout @@ -1208,11 +1378,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, return result; } - -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - /* Calling just for assertion */ - cl2ccc_io(env, slice); - return vvp_env_io(env); -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c index ff0948043c7a..f5bd6c22e112 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_lock.c +++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c @@ -40,7 +40,7 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include "../include/obd_support.h" #include "../include/lustre_lite.h" #include "vvp_internal.h" @@ -51,36 +51,41 @@ * */ -/** - * Estimates lock value for the purpose of managing the lock cache during - * memory shortages. - * - * Locks for memory mapped files are almost infinitely precious, others are - * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are - * ordered within themselves by weights assigned from other layers. - */ -static unsigned long vvp_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) +static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) +{ + struct vvp_lock *vlk = cl2vvp_lock(slice); + + kmem_cache_free(vvp_lock_kmem, vlk); +} + +static int vvp_lock_enqueue(const struct lu_env *env, + const struct cl_lock_slice *slice, + struct cl_io *unused, struct cl_sync_io *anchor) { - struct ccc_object *cob = cl2ccc(slice->cls_obj); + CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj)); - return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0; + return 0; } static const struct cl_lock_operations vvp_lock_ops = { - .clo_delete = ccc_lock_delete, - .clo_fini = ccc_lock_fini, - .clo_enqueue = ccc_lock_enqueue, - .clo_wait = ccc_lock_wait, - .clo_use = ccc_lock_use, - .clo_unuse = ccc_lock_unuse, - .clo_fits_into = ccc_lock_fits_into, - .clo_state = ccc_lock_state, - .clo_weigh = vvp_lock_weigh + .clo_fini = vvp_lock_fini, + .clo_enqueue = vvp_lock_enqueue, }; int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) + struct cl_lock *lock, const struct cl_io *unused) { - return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops); + struct vvp_lock *vlk; + int result; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS); + if (vlk) { + cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c index 03c887d8ed83..18c9df7ebdda 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ b/drivers/staging/lustre/lustre/llite/vvp_object.c @@ -45,6 +45,7 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -53,16 +54,25 @@ * */ +int vvp_object_invariant(const struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); + + return (S_ISREG(inode->i_mode) || inode->i_mode == 0) && + lli->lli_clob == obj; +} + static int vvp_object_print(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o) { - struct ccc_object *obj = lu2ccc(o); - struct inode *inode = obj->cob_inode; + struct vvp_object *obj = lu2vvp(o); + struct inode *inode = obj->vob_inode; struct ll_inode_info *lli; (*p)(env, cookie, "(%s %d %d) inode: %p ", - list_empty(&obj->cob_pending_list) ? "-" : "+", - obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt), + list_empty(&obj->vob_pending_list) ? "-" : "+", + obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt), inode); if (inode) { lli = ll_i2info(inode); @@ -77,7 +87,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie, static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, struct cl_attr *attr) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); /* * lov overwrites most of these fields in @@ -99,7 +109,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, const struct cl_attr *attr, unsigned valid) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); if (valid & CAT_UID) inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid); @@ -112,7 +122,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, if (valid & CAT_CTIME) inode->i_ctime.tv_sec = attr->cat_ctime; if (0 && valid & CAT_SIZE) - cl_isize_write_nolock(inode, attr->cat_size); + i_size_write(inode, attr->cat_size); /* not currently necessary */ if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE)) mark_inode_dirty(inode); @@ -165,6 +175,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, return 0; } +static int vvp_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + int rc; + + rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); + if (rc < 0) { + CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n", + PFID(lu_object_fid(&obj->co_lu)), rc); + return rc; + } + + truncate_inode_pages(inode->i_mapping, 0); + return 0; +} + +static int vvp_object_glimpse(const struct lu_env *env, + const struct cl_object *obj, struct ost_lvb *lvb) +{ + struct inode *inode = vvp_object_inode(obj); + + lvb->lvb_mtime = LTIME_S(inode->i_mtime); + lvb->lvb_atime = LTIME_S(inode->i_atime); + lvb->lvb_ctime = LTIME_S(inode->i_ctime); + /* + * LU-417: Add dirty pages block count lest i_blocks reports 0, some + * "cp" or "tar" on remote node may think it's a completely sparse file + * and skip it. + */ + if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) + lvb->lvb_blocks = dirty_cnt(inode); + return 0; +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_lock_init = vvp_lock_init, @@ -172,29 +216,94 @@ static const struct cl_object_operations vvp_ops = { .coo_attr_get = vvp_attr_get, .coo_attr_set = vvp_attr_set, .coo_conf_set = vvp_conf_set, - .coo_glimpse = ccc_object_glimpse + .coo_prune = vvp_prune, + .coo_glimpse = vvp_object_glimpse }; +static int vvp_object_init0(const struct lu_env *env, + struct vvp_object *vob, + const struct cl_object_conf *conf) +{ + vob->vob_inode = conf->coc_inode; + vob->vob_transient_pages = 0; + cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page)); + return 0; +} + +static int vvp_object_init(const struct lu_env *env, struct lu_object *obj, + const struct lu_object_conf *conf) +{ + struct vvp_device *dev = lu2vvp_dev(obj->lo_dev); + struct vvp_object *vob = lu2vvp(obj); + struct lu_object *below; + struct lu_device *under; + int result; + + under = &dev->vdv_next->cd_lu_dev; + below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); + if (below) { + const struct cl_object_conf *cconf; + + cconf = lu2cl_conf(conf); + INIT_LIST_HEAD(&vob->vob_pending_list); + lu_object_add(obj, below); + result = vvp_object_init0(env, vob, cconf); + } else { + result = -ENOMEM; + } + + return result; +} + +static void vvp_object_free(const struct lu_env *env, struct lu_object *obj) +{ + struct vvp_object *vob = lu2vvp(obj); + + lu_object_fini(obj); + lu_object_header_fini(obj->lo_header); + kmem_cache_free(vvp_object_kmem, vob); +} + static const struct lu_object_operations vvp_lu_obj_ops = { - .loo_object_init = ccc_object_init, - .loo_object_free = ccc_object_free, - .loo_object_print = vvp_object_print + .loo_object_init = vvp_object_init, + .loo_object_free = vvp_object_free, + .loo_object_print = vvp_object_print, }; -struct ccc_object *cl_inode2ccc(struct inode *inode) +struct vvp_object *cl_inode2vvp(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; struct lu_object *lu; lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type); LASSERT(lu); - return lu2ccc(lu); + return lu2vvp(lu); } struct lu_object *vvp_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, + const struct lu_object_header *unused, struct lu_device *dev) { - return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops); + struct vvp_object *vob; + struct lu_object *obj; + + vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS); + if (vob) { + struct cl_object_header *hdr; + + obj = &vob->vob_cl.co_lu; + hdr = &vob->vob_header; + cl_object_header_init(hdr); + hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); + + lu_object_init(obj, &hdr->coh_lu, dev); + lu_object_add_top(&hdr->coh_lu, obj); + + vob->vob_cl.co_ops = &vvp_ops; + obj->lo_ops = &vvp_lu_obj_ops; + } else { + obj = NULL; + } + return obj; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c index 33ca3eb34965..6cd2af7a958f 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ b/drivers/staging/lustre/lustre/llite/vvp_page.c @@ -41,9 +41,16 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> + #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -52,9 +59,9 @@ * */ -static void vvp_page_fini_common(struct ccc_page *cp) +static void vvp_page_fini_common(struct vvp_page *vpg) { - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); put_page(vmpage); @@ -63,23 +70,23 @@ static void vvp_page_fini_common(struct ccc_page *cp) static void vvp_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; /* * vmpage->private was already cleared when page was moved into * VPG_FREEING state. */ LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); - vvp_page_fini_common(cp); + vvp_page_fini_common(vpg); } static int vvp_page_own(const struct lu_env *env, const struct cl_page_slice *slice, struct cl_io *io, int nonblock) { - struct ccc_page *vpg = cl2ccc_page(slice); - struct page *vmpage = vpg->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); if (nonblock) { @@ -96,6 +103,7 @@ static int vvp_page_own(const struct lu_env *env, lock_page(vmpage); wait_on_page_writeback(vmpage); + return 0; } @@ -136,41 +144,15 @@ static void vvp_page_discard(const struct lu_env *env, struct cl_io *unused) { struct page *vmpage = cl2vm_page(slice); - struct address_space *mapping; - struct ccc_page *cpg = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); LASSERT(vmpage); LASSERT(PageLocked(vmpage)); - mapping = vmpage->mapping; - - if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used) - ll_ra_stats_inc(mapping, RA_STAT_DISCARDED); + if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used) + ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED); - /* - * truncate_complete_page() calls - * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). - */ - truncate_complete_page(mapping, vmpage); -} - -static int vvp_page_unmap(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - __u64 offset; - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - - offset = vmpage->index << PAGE_SHIFT; - - /* - * XXX is it safe to call this with the page lock held? - */ - ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE); - return 0; + ll_invalidate_page(vmpage); } static void vvp_page_delete(const struct lu_env *env, @@ -179,12 +161,20 @@ static void vvp_page_delete(const struct lu_env *env, struct page *vmpage = cl2vm_page(slice); struct inode *inode = vmpage->mapping->host; struct cl_object *obj = slice->cpl_obj; + struct cl_page *page = slice->cpl_page; + int refc; LASSERT(PageLocked(vmpage)); - LASSERT((struct cl_page *)vmpage->private == slice->cpl_page); - LASSERT(inode == ccc_object_inode(obj)); + LASSERT((struct cl_page *)vmpage->private == page); + LASSERT(inode == vvp_object_inode(obj)); - vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice)); + vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice)); + + /* Drop the reference count held in vvp_page_init */ + refc = atomic_dec_return(&page->cp_ref); + LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); + + ClearPageUptodate(vmpage); ClearPagePrivate(vmpage); vmpage->private = 0; /* @@ -237,7 +227,7 @@ static int vvp_page_prep_write(const struct lu_env *env, if (!pg->cp_sync_io) set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); return 0; } @@ -250,11 +240,11 @@ static int vvp_page_prep_write(const struct lu_env *env, */ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret) { - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); if (ioret == 0) { ClearPageError(vmpage); - obj->cob_discard_page_warned = 0; + obj->vob_discard_page_warned = 0; } else { SetPageError(vmpage); if (ioret == -ENOSPC) @@ -263,8 +253,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret set_bit(AS_EIO, &inode->i_mapping->flags); if ((ioret == -ESHUTDOWN || ioret == -EINTR) && - obj->cob_discard_page_warned == 0) { - obj->cob_discard_page_warned = 1; + obj->vob_discard_page_warned == 0) { + obj->vob_discard_page_warned = 1; ll_dirty_page_discard_warn(vmpage, ioret); } } @@ -274,22 +264,23 @@ static void vvp_page_completion_read(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; - struct cl_page *page = cl_page_top(slice->cpl_page); - struct inode *inode = ccc_object_inode(page->cp_obj); + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; + struct cl_page *page = slice->cpl_page; + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(PageLocked(vmpage)); CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret); - if (cp->cpg_defer_uptodate) + if (vpg->vpg_defer_uptodate) ll_ra_count_put(ll_i2sbi(inode), 1); if (ioret == 0) { - if (!cp->cpg_defer_uptodate) + if (!vpg->vpg_defer_uptodate) cl_page_export(env, page, 1); - } else - cp->cpg_defer_uptodate = 0; + } else { + vpg->vpg_defer_uptodate = 0; + } if (!page->cp_sync_io) unlock_page(vmpage); @@ -299,9 +290,9 @@ static void vvp_page_completion_write(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); @@ -315,8 +306,8 @@ static void vvp_page_completion_write(const struct lu_env *env, * and then re-add the page into pending transfer queue. -jay */ - cp->cpg_write_queued = 0; - vvp_write_complete(cl2ccc(slice->cpl_obj), cp); + vpg->vpg_write_queued = 0; + vvp_write_complete(cl2vvp(slice->cpl_obj), vpg); if (pg->cp_sync_io) { LASSERT(PageLocked(vmpage)); @@ -327,7 +318,7 @@ static void vvp_page_completion_write(const struct lu_env *env, * Only mark the page error only when it's an async write * because applications won't wait for IO to finish. */ - vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret); + vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret); end_page_writeback(vmpage); } @@ -359,7 +350,7 @@ static int vvp_page_make_ready(const struct lu_env *env, LASSERT(pg->cp_state == CPS_CACHED); /* This actually clears the dirty bit in the radix tree. */ set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); } else if (pg->cp_state == CPS_PAGEOUT) { /* is it possible for osc_flush_async_page() to already @@ -375,24 +366,51 @@ static int vvp_page_make_ready(const struct lu_env *env, return result; } +static int vvp_page_is_under_lock(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *io, pgoff_t *max_index) +{ + if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || + io->ci_type == CIT_FAULT) { + struct vvp_io *vio = vvp_env_io(env); + + if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) + *max_index = CL_PAGE_EOF; + } + return 0; +} + static int vvp_page_print(const struct lu_env *env, const struct cl_page_slice *slice, void *cookie, lu_printer_t printer) { - struct ccc_page *vp = cl2ccc_page(slice); - struct page *vmpage = vp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ", - vp, vp->cpg_defer_uptodate, vp->cpg_ra_used, - vp->cpg_write_queued, vmpage); + vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, + vpg->vpg_write_queued, vmpage); if (vmpage) { (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", (long)vmpage->flags, page_count(vmpage), page_mapcount(vmpage), vmpage->private, - page_index(vmpage), + vmpage->index, list_empty(&vmpage->lru) ? "not-" : ""); } + (*printer)(env, cookie, "\n"); + + return 0; +} + +static int vvp_page_fail(const struct lu_env *env, + const struct cl_page_slice *slice) +{ + /* + * Cached read? + */ + LBUG(); + return 0; } @@ -401,32 +419,38 @@ static const struct cl_page_operations vvp_page_ops = { .cpo_assume = vvp_page_assume, .cpo_unassume = vvp_page_unassume, .cpo_disown = vvp_page_disown, - .cpo_vmpage = ccc_page_vmpage, .cpo_discard = vvp_page_discard, .cpo_delete = vvp_page_delete, - .cpo_unmap = vvp_page_unmap, .cpo_export = vvp_page_export, .cpo_is_vmlocked = vvp_page_is_vmlocked, .cpo_fini = vvp_page_fini, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { .cpo_prep = vvp_page_prep_read, .cpo_completion = vvp_page_completion_read, - .cpo_make_ready = ccc_fail, + .cpo_make_ready = vvp_page_fail, }, [CRT_WRITE] = { .cpo_prep = vvp_page_prep_write, .cpo_completion = vvp_page_completion_write, .cpo_make_ready = vvp_page_make_ready, - } - } + }, + }, }; +static int vvp_transient_page_prep(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *unused) +{ + /* transient page should always be sent. */ + return 0; +} + static void vvp_transient_page_verify(const struct cl_page *page) { - struct inode *inode = ccc_object_inode(page->cp_obj); + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(!inode_trylock(inode)); } @@ -477,7 +501,7 @@ static void vvp_transient_page_discard(const struct lu_env *env, static int vvp_transient_page_is_vmlocked(const struct lu_env *env, const struct cl_page_slice *slice) { - struct inode *inode = ccc_object_inode(slice->cpl_obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); int locked; locked = !inode_trylock(inode); @@ -497,13 +521,13 @@ vvp_transient_page_completion(const struct lu_env *env, static void vvp_transient_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *clp = slice->cpl_page; - struct ccc_object *clobj = cl2ccc(clp->cp_obj); + struct vvp_object *clobj = cl2vvp(clp->cp_obj); - vvp_page_fini_common(cp); - LASSERT(!inode_trylock(clobj->cob_inode)); - clobj->cob_transient_pages--; + vvp_page_fini_common(vpg); + LASSERT(!inode_trylock(clobj->vob_inode)); + clobj->vob_transient_pages--; } static const struct cl_page_operations vvp_transient_page_ops = { @@ -512,45 +536,48 @@ static const struct cl_page_operations vvp_transient_page_ops = { .cpo_unassume = vvp_transient_page_unassume, .cpo_disown = vvp_transient_page_disown, .cpo_discard = vvp_transient_page_discard, - .cpo_vmpage = ccc_page_vmpage, .cpo_fini = vvp_transient_page_fini, .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, }, [CRT_WRITE] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, } } }; int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { - struct ccc_page *cpg = cl_object_page_slice(obj, page); + struct vvp_page *vpg = cl_object_page_slice(obj, page); + struct page *vmpage = page->cp_vmpage; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - cpg->cpg_page = vmpage; + vpg->vpg_page = vmpage; get_page(vmpage); - INIT_LIST_HEAD(&cpg->cpg_pending_linkage); + INIT_LIST_HEAD(&vpg->vpg_pending_linkage); if (page->cp_type == CPT_CACHEABLE) { + /* in cache, decref in vvp_page_delete */ + atomic_inc(&page->cp_ref); SetPagePrivate(vmpage); vmpage->private = (unsigned long)page; - cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, + &vvp_page_ops); } else { - struct ccc_object *clobj = cl2ccc(obj); + struct vvp_object *clobj = cl2vvp(obj); - LASSERT(!inode_trylock(clobj->cob_inode)); - cl_page_slice_add(page, &cpg->cpg_cl, obj, + LASSERT(!inode_trylock(clobj->vob_inode)); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, &vvp_transient_page_ops); - clobj->cob_transient_pages++; + clobj->vob_transient_pages++; } return 0; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c new file mode 100644 index 000000000000..fb886291a4e2 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/vvp_req.c @@ -0,0 +1,121 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../include/lustre/lustre_idl.h" +#include "../include/cl_object.h" +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_lite.h" +#include "llite_internal.h" +#include "vvp_internal.h" + +static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice) +{ + return container_of0(slice, struct vvp_req, vrq_cl); +} + +/** + * Implementation of struct cl_req_operations::cro_attr_set() for VVP + * layer. VVP is responsible for + * + * - o_[mac]time + * + * - o_mode + * + * - o_parent_seq + * + * - o_[ug]id + * + * - o_parent_oid + * + * - o_parent_ver + * + * - o_ioepoch, + * + */ +void vvp_req_attr_set(const struct lu_env *env, + const struct cl_req_slice *slice, + const struct cl_object *obj, + struct cl_req_attr *attr, u64 flags) +{ + struct inode *inode; + struct obdo *oa; + u32 valid_flags; + + oa = attr->cra_oa; + inode = vvp_object_inode(obj); + valid_flags = OBD_MD_FLTYPE; + + if (slice->crs_req->crq_type == CRT_WRITE) { + if (flags & OBD_MD_FLEPOCH) { + oa->o_valid |= OBD_MD_FLEPOCH; + oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch; + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID; + } + } + obdo_from_inode(oa, inode, valid_flags & flags); + obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); + memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, + JOBSTATS_JOBID_SIZE); +} + +void vvp_req_completion(const struct lu_env *env, + const struct cl_req_slice *slice, int ioret) +{ + struct vvp_req *vrq; + + if (ioret > 0) + cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); + + vrq = cl2vvp_req(slice); + kmem_cache_free(vvp_req_kmem, vrq); +} + +static const struct cl_req_operations vvp_req_ops = { + .cro_attr_set = vvp_req_attr_set, + .cro_completion = vvp_req_completion +}; + +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req) +{ + struct vvp_req *vrq; + int result; + + vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS); + if (vrq) { + cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index b68dcc921ca2..608014b0dbcd 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -181,8 +181,9 @@ int ll_setxattr_common(struct inode *inode, const char *name, size = rc; pv = (const char *)new_value; - } else + } else { return -EOPNOTSUPP; + } valid |= rce_ops2valid(rce->rce_ops); } @@ -210,16 +211,14 @@ int ll_setxattr_common(struct inode *inode, const char *name, return 0; } -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1); @@ -243,12 +242,12 @@ int ll_setxattr(struct dentry *dentry, const char *name, lump->lmm_stripe_offset = -1; if (lump && S_ISREG(inode->i_mode)) { - int flags = FMODE_WRITE; + __u64 it_flags = FMODE_WRITE; int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ? sizeof(*lump) : sizeof(struct lov_user_md_v3); - rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump, - lum_size); + rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, + lump, lum_size); /* b10667: rc always be 0 here for now */ rc = 0; } else if (S_ISDIR(inode->i_mode)) { @@ -272,8 +271,8 @@ int ll_removexattr(struct dentry *dentry, const char *name) LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); return ll_setxattr_common(inode, name, NULL, 0, 0, @@ -292,8 +291,8 @@ int ll_getxattr_common(struct inode *inode, const char *name, struct rmtacl_ctl_entry *rce = NULL; struct ll_inode_info *lli = ll_i2info(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); /* listxattr have slightly different behavior from of ext3: * without 'user_xattr' ext3 will list all xattr names but @@ -338,7 +337,6 @@ int ll_getxattr_common(struct inode *inode, const char *name, */ if (xattr_type == XATTR_ACL_ACCESS_T && !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { - struct posix_acl *acl; spin_lock(&lli->lli_lock); @@ -423,8 +421,7 @@ getxattr_nocache: if (rce && rce->rce_ops == RMT_LSETFACL) { ext_acl_xattr_header *acl; - acl = lustre_posix_acl_xattr_2ext( - (posix_acl_xattr_header *)buffer, rc); + acl = lustre_posix_acl_xattr_2ext(buffer, rc); if (IS_ERR(acl)) { rc = PTR_ERR(acl); goto out; @@ -451,16 +448,14 @@ out: return rc; } -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); @@ -554,8 +549,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) LASSERT(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1); diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c index 3480ce2bb3cc..d7e17abbe361 100644 --- a/drivers/staging/lustre/lustre/llite/xattr_cache.c +++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c @@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli) */ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) { - if (!ll_xattr_cache_valid(lli)) return 0; |