diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
21 files changed, 342 insertions, 333 deletions
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 65bf0c401b44..966f580e26fb 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -247,17 +247,14 @@ static int ll_revalidate_dentry(struct dentry *dentry, return 1; /* - * if open&create is set, talk to MDS to make sure file is created if - * necessary, because we can't do this in ->open() later since that's - * called on an inode. return 0 here to let lookup to handle this. + * VFS warns us that this is the second go around and previous + * operation failed (most likely open|creat), so this time + * we better talk to the server via the lookup path by name, + * not by fid. */ - if ((lookup_flags & (LOOKUP_OPEN | LOOKUP_CREATE)) == - (LOOKUP_OPEN | LOOKUP_CREATE)) + if (lookup_flags & LOOKUP_REVAL) return 0; - if (lookup_flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE)) - return 1; - if (!dentry_may_statahead(dir, dentry)) return 1; diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index ea5d247a3f70..13b35922a4ca 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -432,7 +432,7 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent))) mode &= ~current_umask(); - mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; + mode = (mode & (0777 | S_ISVTX)) | S_IFDIR; op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname, strlen(dirname), mode, LUSTRE_OPC_MKDIR, lump); @@ -521,12 +521,15 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); - if (rc) { - if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); - } + if (rc) + return rc; - /* In the following we use the fact that LOV_USER_MAGIC_V1 and +#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE + /* + * 2.9 server has stored filesystem default stripe in ROOT xattr, + * and it's stored into system config for backward compatibility. + * + * In the following we use the fact that LOV_USER_MAGIC_V1 and * LOV_USER_MAGIC_V3 have the same initial fields so we do not * need to make the distinction between the 2 versions */ @@ -567,6 +570,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, end: kfree(param); } +#endif return rc; } diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index f634c11216e6..10adfcdd7035 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -122,26 +122,25 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, enum mds_op_bias bias, void *data) { - struct obd_export *exp = ll_i2mdexp(inode); + const struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data; struct ptlrpc_request *req = NULL; - struct obd_device *obd = class_exp2obd(exp); int rc; - if (!obd) { - /* - * XXX: in case of LMV, is this correct to access - * ->exp_handle? - */ - CERROR("Invalid MDC connection handle %#llx\n", - ll_i2mdexp(inode)->exp_handle.h_cookie); + if (!class_exp2obd(md_exp)) { + CERROR("%s: invalid MDC connection handle closing " DFID "\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lli->lli_fid)); rc = 0; goto out; } op_data = kzalloc(sizeof(*op_data), GFP_NOFS); + /* + * We leak openhandle and request here on error, but not much to be + * done in OOM case since app won't retry close on error either. + */ if (!op_data) { - /* XXX We leak openhandle and request here. */ rc = -ENOMEM; goto out; } @@ -170,10 +169,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, } rc = md_close(md_exp, op_data, och->och_mod, &req); - if (rc) { - CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", - ll_i2mdexp(inode)->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); + if (rc && rc != -EINTR) { + CERROR("%s: inode " DFID " mdc close failed: rc = %d\n", + md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc); } if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) && @@ -192,8 +190,7 @@ out: och->och_fh.cookie = DEAD_HANDLE_MAGIC; kfree(och); - if (req) /* This is close request */ - ptlrpc_req_finished(req); + ptlrpc_req_finished(req); return rc; } @@ -420,6 +417,17 @@ out: ptlrpc_req_finished(req); ll_intent_drop_lock(itp); + /* + * We did open by fid, but by the time we got to the server, + * the object disappeared. If this is a create, we cannot really + * tell the userspace that the file it was trying to create + * does not exist. Instead let's return -ESTALE, and the VFS will + * retry the create with LOOKUP_REVAL that we are going to catch + * in ll_revalidate_dentry() and use lookup then. + */ + if (rc == -ENOENT && itp->it_op & IT_CREAT) + rc = -ESTALE; + return rc; } @@ -1016,7 +1024,7 @@ static bool file_is_noatime(const struct file *file) return false; } -void ll_io_init(struct cl_io *io, const struct file *file, int write) +static void ll_io_init(struct cl_io *io, const struct file *file, int write) { struct inode *inode = file_inode(file); @@ -1821,7 +1829,7 @@ free: return rc; } -static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) +int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) { struct md_op_data *op_data; int rc; @@ -1883,7 +1891,7 @@ static int ll_hsm_import(struct inode *inode, struct file *file, goto free_hss; } - attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + attr->ia_mode = hui->hui_mode & 0777; attr->ia_mode |= S_IFREG; attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); @@ -2618,18 +2626,18 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, ll_get_fsname(parent->i_sb, NULL, 0), name, PFID(&op_data->op_fid3)); rc = -EINVAL; - goto out_free; + goto out_unlock; } rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3); if (rc < 0) - goto out_free; + goto out_unlock; if (rc == mdtidx) { CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name, PFID(&op_data->op_fid3), mdtidx); rc = 0; - goto out_free; + goto out_unlock; } again: if (S_ISREG(child_inode->i_mode)) { @@ -2637,13 +2645,13 @@ again: if (IS_ERR(och)) { rc = PTR_ERR(och); och = NULL; - goto out_free; + goto out_unlock; } rc = ll_data_version(child_inode, &data_version, LL_DV_WR_FLUSH); if (rc) - goto out_free; + goto out_close; op_data->op_handle = och->och_fh; op_data->op_data = och->och_mod; @@ -2656,40 +2664,45 @@ again: op_data->op_cli_flags = CLI_MIGRATE; rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name, namelen, name, namelen, &request); - if (!rc) + if (!rc) { + LASSERT(request); ll_update_times(request, parent); - body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); - if (!body) { - rc = -EPROTO; - goto out_free; + body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); + LASSERT(body); + + /* + * If the server does release layout lock, then we cleanup + * the client och here, otherwise release it in out_close: + */ + if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) { + obd_mod_put(och->och_mod); + md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, + och); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + kfree(och); + och = NULL; + } } - /* - * If the server does release layout lock, then we cleanup - * the client och here, otherwise release it in out_free: - */ - if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) { - obd_mod_put(och->och_mod); - md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och); - och->och_fh.cookie = DEAD_HANDLE_MAGIC; - kfree(och); - och = NULL; + if (request) { + ptlrpc_req_finished(request); + request = NULL; } - ptlrpc_req_finished(request); /* Try again if the file layout has changed. */ if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) goto again; -out_free: - if (child_inode) { - if (och) /* close the file */ - ll_lease_close(och, child_inode, NULL); - clear_nlink(child_inode); - inode_unlock(child_inode); - iput(child_inode); - } +out_close: + if (och) /* close the file */ + ll_lease_close(och, child_inode, NULL); + if (!rc) + clear_nlink(child_inode); +out_unlock: + inode_unlock(child_inode); + iput(child_inode); +out_free: ll_finish_md_op_data(op_data); return rc; } diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c index dd1cfd8f5213..f1036f477a51 100644 --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -94,6 +94,7 @@ int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, io = vvp_env_thread_io(env); io->ci_obj = obj; + io->ci_verify_layout = 1; io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); @@ -120,13 +121,7 @@ again: cl_io_fini(env, io); if (unlikely(io->ci_need_restart)) goto again; - /* HSM import case: file is released, cannot be restored - * no need to fail except if restore registration failed - * with -ENODATA - */ - if (result == -ENODATA && io->ci_restore_needed && - io->ci_result != -ENODATA) - result = 0; + cl_env_put(env, &refcheck); return result; } diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c index f48660ed350f..f0c132e2cf92 100644 --- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c @@ -33,6 +33,7 @@ * future). * */ +#define DEBUG_SUBSYSTEM S_LLITE #include "../include/obd_class.h" #include "../include/obd_support.h" #include "../include/obd.h" @@ -132,7 +133,6 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, io = vvp_env_thread_io(env); io->ci_obj = obj; - io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc != 0) { diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 065a9a7e120a..ecdfd0c29b7f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) return container_of(inode, struct ll_inode_info, lli_vfs_inode); } -/* default to about 40meg of readahead on a given system. That much tied - * up in 512k readahead requests serviced at 40ms each is about 1GB/s. - */ -#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT)) +/* default to about 64M of readahead on a given system. */ +#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT)) /* default to read-ahead full files smaller than 2MB on the second read */ #define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT)) @@ -321,6 +319,9 @@ struct ll_ra_info { struct ra_io_arg { unsigned long ria_start; /* start offset of read-ahead*/ unsigned long ria_end; /* end offset of read-ahead*/ + unsigned long ria_reserved; /* reserved pages for read-ahead */ + unsigned long ria_end_min; /* minimum end to cover current read */ + bool ria_eof; /* reach end of file */ /* If stride read pattern is detected, ria_stoff means where * stride read is started. Note: for normal read-ahead, the * value here is meaningless, and also it will not be accessed @@ -505,6 +506,7 @@ struct ll_sb_info { */ /* root squash */ struct root_squash_info ll_squash; + struct path ll_mnt; __kernel_fsid_t ll_fsid; struct kobject ll_kobj; /* sysfs object */ @@ -551,6 +553,11 @@ struct ll_readahead_state { */ unsigned long ras_window_start, ras_window_len; /* + * Optimal RPC size. It decides how many pages will be sent + * for each read-ahead. + */ + unsigned long ras_rpc_size; + /* * Where next read-ahead should start at. This lies within read-ahead * window. Read-ahead window is read in pieces rather than at once * because: 1. lustre limits total number of pages under read-ahead by @@ -766,6 +773,7 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode); int ll_fid2path(struct inode *inode, void __user *arg); int ll_data_version(struct inode *inode, __u64 *data_version, int flags); int ll_hsm_release(struct inode *inode); +int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss); /* llite/dcache.c */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 25f5aed97f63..b229cbc7bb33 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -103,6 +103,7 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb) sbi->ll_flags |= LL_SBI_CHECKSUM; sbi->ll_flags |= LL_SBI_LRU_RESIZE; + sbi->ll_flags |= LL_SBI_LAZYSTATFS; for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. @@ -303,6 +304,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sb->s_magic = LL_SUPER_MAGIC; sb->s_maxbytes = MAX_LFS_FILESIZE; sbi->ll_namelen = osfs->os_namelen; + sbi->ll_mnt.mnt = current->fs->root.mnt; if ((sbi->ll_flags & LL_SBI_USER_XATTR) && !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) { @@ -1402,7 +1404,11 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) * cache is not cleared yet. */ op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE); + if (S_ISREG(inode->i_mode)) + inode_lock(inode); rc = simple_setattr(dentry, &op_data->op_attr); + if (S_ISREG(inode->i_mode)) + inode_unlock(inode); op_data->op_attr.ia_valid = ia_valid; rc = ll_update_inode(inode, &md); @@ -1431,7 +1437,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) struct inode *inode = d_inode(dentry); struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data = NULL; - bool file_is_released = false; int rc = 0; CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n", @@ -1486,76 +1491,35 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), (s64)ktime_get_real_seconds()); - /* We always do an MDS RPC, even if we're only changing the size; - * only the MDS knows whether truncate() should fail with -ETXTBUSY - */ - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return -ENOMEM; - - if (!S_ISDIR(inode->i_mode)) + if (S_ISREG(inode->i_mode)) inode_unlock(inode); - /* truncate on a released file must failed with -ENODATA, - * so size must not be set on MDS for released file - * but other attributes must be set + /* + * We always do an MDS RPC, even if we're only changing the size; + * only the MDS knows whether truncate() should fail with -ETXTBUSY */ - if (S_ISREG(inode->i_mode)) { - struct cl_layout cl = { - .cl_is_released = false, - }; - struct lu_env *env; - int refcheck; - __u32 gen; + op_data = kzalloc(sizeof(*op_data), GFP_NOFS); + if (!op_data) { + rc = -ENOMEM; + goto out; + } - rc = ll_layout_refresh(inode, &gen); - if (rc < 0) - goto out; + op_data->op_attr = *attr; + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { /* - * XXX: the only place we need to know the layout type, - * this will be removed by a later patch. -Jinshan + * If we are changing file size, file content is + * modified, flag it. */ - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - rc = PTR_ERR(env); - goto out; - } - - rc = cl_object_layout_get(env, lli->lli_clob, &cl); - cl_env_put(env, &refcheck); - if (rc < 0) - goto out; - - file_is_released = cl.cl_is_released; - - if (!hsm_import && attr->ia_valid & ATTR_SIZE) { - if (file_is_released) { - rc = ll_layout_restore(inode, 0, attr->ia_size); - if (rc < 0) - goto out; - - file_is_released = false; - ll_layout_refresh(inode, &gen); - } - - /* - * If we are changing file size, file content is - * modified, flag it. - */ - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - op_data->op_bias |= MDS_DATA_MODIFIED; - } + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + op_data->op_bias |= MDS_DATA_MODIFIED; } - memcpy(&op_data->op_attr, attr, sizeof(*attr)); - rc = ll_md_setattr(dentry, op_data); if (rc) goto out; - if (!S_ISREG(inode->i_mode) || file_is_released) { + if (!S_ISREG(inode->i_mode) || hsm_import) { rc = 0; goto out; } @@ -1572,11 +1536,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) */ rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0); } + + /* + * If the file was restored, it needs to set dirty flag. + * + * We've already sent MDS_DATA_MODIFIED flag in + * ll_md_setattr() for truncate. However, the MDT refuses to + * set the HS_DIRTY flag on released files, so we have to set + * it again if the file has been restored. Please check how + * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini(). + * + * Please notice that if the file is not released, the previous + * MDS_DATA_MODIFIED has taken effect and usually + * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()). + * This way we can save an RPC for common open + trunc + * operation. + */ + if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) { + struct hsm_state_set hss = { + .hss_valid = HSS_SETMASK, + .hss_setmask = HS_DIRTY, + }; + int rc2; + + rc2 = ll_hsm_state_set(inode, &hss); + if (rc2 < 0) + CERROR(DFID "HSM set dirty failed: rc2 = %d\n", + PFID(ll_inode2fid(inode)), rc2); + } + out: if (op_data) ll_finish_md_op_data(op_data); - if (!S_ISDIR(inode->i_mode)) { + if (S_ISREG(inode->i_mode)) { inode_lock(inode); if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); @@ -1599,7 +1592,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr) if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) == (ATTR_SIZE | ATTR_MODE)) && (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) || - (((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) && + (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) && !(attr->ia_mode & S_ISGID)))) attr->ia_valid |= ATTR_FORCE; @@ -1610,7 +1603,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr) attr->ia_valid |= ATTR_KILL_SUID; if ((attr->ia_valid & ATTR_MODE) && - ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) && + ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) && !(attr->ia_mode & S_ISGID) && !(attr->ia_valid & ATTR_KILL_SGID)) attr->ia_valid |= ATTR_KILL_SGID; @@ -1998,6 +1991,8 @@ void ll_umount_begin(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; struct obd_ioctl_data *ioc_data; + wait_queue_head_t waitq; + struct l_wait_info lwi; CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, sb->s_count, atomic_read(&sb->s_active)); @@ -2030,9 +2025,14 @@ void ll_umount_begin(struct super_block *sb) } /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule() and sleep one second if needed, and hope. + * and then continue. For now, we just periodically checking for vfs + * to decrement mnt_cnt and hope to finish it within 10sec. */ + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(10), + cfs_time_seconds(1), NULL, NULL); + l_wait_event(waitq, may_umount(sbi->ll_mnt.mnt), &lwi); + schedule(); } diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index ee01f20d8b11..9afa6bec3e6f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -390,15 +390,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = VM_FAULT_LOCKED; break; case -ENODATA: + case -EAGAIN: case -EFAULT: result = VM_FAULT_NOPAGE; break; case -ENOMEM: result = VM_FAULT_OOM; break; - case -EAGAIN: - result = VM_FAULT_RETRY; - break; default: result = VM_FAULT_SIGBUS; break; diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 03682c10fc9e..f3ee584157e0 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -924,27 +924,29 @@ static ssize_t ll_unstable_stats_seq_write(struct file *file, } LPROC_SEQ_FOPS(ll_unstable_stats); -static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr, - char *buf) +static int ll_root_squash_seq_show(struct seq_file *m, void *v) { - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); struct root_squash_info *squash = &sbi->ll_squash; - return sprintf(buf, "%u:%u\n", squash->rsi_uid, squash->rsi_gid); + seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid); + return 0; } -static ssize_t root_squash_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) +static ssize_t ll_root_squash_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) { - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); + struct seq_file *m = file->private_data; + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); struct root_squash_info *squash = &sbi->ll_squash; return lprocfs_wr_root_squash(buffer, count, squash, - ll_get_fsname(sbi->ll_sb, NULL, 0)); + ll_get_fsname(sb, NULL, 0)); } -LUSTRE_RW_ATTR(root_squash); +LPROC_SEQ_FOPS(ll_root_squash); static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v) { @@ -997,6 +999,8 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = { { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 }, { "unstable_stats", &ll_unstable_stats_fops, NULL }, { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 }, + { .name = "root_squash", + .fops = &ll_root_squash_fops }, { .name = "nosquash_nids", .fops = &ll_nosquash_nids_fops }, { NULL } @@ -1027,7 +1031,6 @@ static struct attribute *llite_attrs[] = { &lustre_attr_max_easize.attr, &lustre_attr_default_easize.attr, &lustre_attr_xattr_cache.attr, - &lustre_attr_root_squash.attr, NULL, }; diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index a8f4e7fb0a46..fc176540bb95 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -994,11 +994,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, return rc; } -/* ll_unlink() doesn't update the inode with the new link count. - * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there - * is any lock existing. They will recycle dentries and inodes based upon locks - * too. b=20433 - */ static int ll_unlink(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; @@ -1041,7 +1036,7 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); - mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; + mode = (mode & (0777 | S_ISVTX)) | S_IFDIR; err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR); if (!err) @@ -1089,7 +1084,7 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n", dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); - err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO, + err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777, 0, LUSTRE_OPC_SYMLINK); if (!err) diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c index 94c818f1478b..14148a097476 100644 --- a/drivers/staging/lustre/lustre/llite/range_lock.c +++ b/drivers/staging/lustre/lustre/llite/range_lock.c @@ -61,17 +61,23 @@ void range_lock_tree_init(struct range_lock_tree *tree) * Pre: Caller should have allocated the range lock node. * Post: The range lock node is meant to cover [start, end] region */ -void range_lock_init(struct range_lock *lock, __u64 start, __u64 end) +int range_lock_init(struct range_lock *lock, __u64 start, __u64 end) { + int rc; + memset(&lock->rl_node, 0, sizeof(lock->rl_node)); if (end != LUSTRE_EOF) end >>= PAGE_SHIFT; - interval_set(&lock->rl_node, start >> PAGE_SHIFT, end); + rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end); + if (rc) + return rc; + INIT_LIST_HEAD(&lock->rl_next_lock); lock->rl_task = NULL; lock->rl_lock_count = 0; lock->rl_blocking_ranges = 0; lock->rl_sequence = 0; + return rc; } static inline struct range_lock *next_lock(struct range_lock *lock) diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h index c6d04a6f99fd..779091ccec4e 100644 --- a/drivers/staging/lustre/lustre/llite/range_lock.h +++ b/drivers/staging/lustre/lustre/llite/range_lock.h @@ -76,7 +76,7 @@ struct range_lock_tree { }; void range_lock_tree_init(struct range_lock_tree *tree); -void range_lock_init(struct range_lock *lock, __u64 start, __u64 end); +int range_lock_init(struct range_lock *lock, __u64 start, __u64 end); int range_lock(struct range_lock_tree *tree, struct range_lock *lock); void range_unlock(struct range_lock_tree *tree, struct range_lock *lock); #endif diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index f10e092979fe..50d027e0cfab 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -92,25 +92,6 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, goto out; } - /* If the non-strided (ria_pages == 0) readahead window - * (ria_start + ret) has grown across an RPC boundary, then trim - * readahead size by the amount beyond the RPC so it ends on an - * RPC boundary. If the readahead window is already ending on - * an RPC boundary (beyond_rpc == 0), or smaller than a full - * RPC (beyond_rpc < ret) the readahead size is unchanged. - * The (beyond_rpc != 0) check is skipped since the conditional - * branch is more expensive than subtracting zero from the result. - * - * Strided read is left unaligned to avoid small fragments beyond - * the RPC boundary from needing an extra read RPC. - */ - if (ria->ria_pages == 0) { - long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES; - - if (/* beyond_rpc != 0 && */ beyond_rpc < ret) - ret -= beyond_rpc; - } - if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) { atomic_sub(ret, &ra->ra_cur_pages); ret = 0; @@ -147,11 +128,12 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) #define RAS_CDEBUG(ras) \ CDEBUG(D_READA, \ - "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \ - "csr %lu sf %lu sp %lu sl %lu\n", \ + "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \ + "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \ ras->ras_last_readpage, ras->ras_consecutive_requests, \ ras->ras_consecutive_pages, ras->ras_window_start, \ ras->ras_window_len, ras->ras_next_readahead, \ + ras->ras_rpc_size, \ ras->ras_requests, ras->ras_request_index, \ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \ ras->ras_stride_pages, ras->ras_stride_length) @@ -261,20 +243,6 @@ out: ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\ ria->ria_pages) -/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't - * know what the actual RPC size is. If this needs to change, it makes more - * sense to tune the i_blkbits value for the file based on the OSTs it is - * striped over, rather than having a constant value for all files here. - */ - -/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)). - * Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled - * by default, this should be adjusted corresponding with max_read_ahead_mb - * and max_read_ahead_per_file_mb otherwise the readahead budget can be used - * up quickly which will affect read performance significantly. See LU-2816 - */ -#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT) - static inline int stride_io_mode(struct ll_readahead_state *ras) { return ras->ras_consecutive_stride_requests > 1; @@ -345,6 +313,17 @@ static int ria_page_count(struct ra_io_arg *ria) length); } +static unsigned long ras_align(struct ll_readahead_state *ras, + unsigned long index, + unsigned long *remainder) +{ + unsigned long rem = index % ras->ras_rpc_size; + + if (remainder) + *remainder = rem; + return index - rem; +} + /*Check whether the index is in the defined ra-window */ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) { @@ -358,42 +337,63 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) ria->ria_length < ria->ria_pages); } -static int ll_read_ahead_pages(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *queue, - struct ra_io_arg *ria, - unsigned long *reserved_pages, - pgoff_t *ra_end) +static unsigned long +ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, struct ll_readahead_state *ras, + struct ra_io_arg *ria) { struct cl_read_ahead ra = { 0 }; - int rc, count = 0; + unsigned long ra_end = 0; bool stride_ria; pgoff_t page_idx; + int rc; LASSERT(ria); RIA_DEBUG(ria); stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0; for (page_idx = ria->ria_start; - page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) { + page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) { if (ras_inside_ra_window(page_idx, ria)) { if (!ra.cra_end || ra.cra_end < page_idx) { + unsigned long end; + cl_read_ahead_release(env, &ra); rc = cl_io_read_ahead(env, io, page_idx, &ra); if (rc < 0) break; + CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n", + page_idx, ra.cra_end, ra.cra_rpc_size); LASSERTF(ra.cra_end >= page_idx, "object: %p, indcies %lu / %lu\n", io->ci_obj, ra.cra_end, page_idx); + /* + * update read ahead RPC size. + * NB: it's racy but doesn't matter + */ + if (ras->ras_rpc_size > ra.cra_rpc_size && + ra.cra_rpc_size > 0) + ras->ras_rpc_size = ra.cra_rpc_size; + /* trim it to align with optimal RPC size */ + end = ras_align(ras, ria->ria_end + 1, NULL); + if (end > 0 && !ria->ria_eof) + ria->ria_end = end - 1; + if (ria->ria_end < ria->ria_end_min) + ria->ria_end = ria->ria_end_min; + if (ria->ria_end > ra.cra_end) + ria->ria_end = ra.cra_end; } - /* If the page is inside the read-ahead window*/ + /* If the page is inside the read-ahead window */ rc = ll_read_ahead_page(env, io, queue, page_idx); - if (!rc) { - (*reserved_pages)--; - count++; - } + if (rc < 0) + break; + + ra_end = page_idx; + if (!rc) + ria->ria_reserved--; } else if (stride_ria) { /* If it is not in the read-ahead window, and it is * read-ahead mode, then check whether it should skip @@ -420,8 +420,7 @@ static int ll_read_ahead_pages(const struct lu_env *env, } cl_read_ahead_release(env, &ra); - *ra_end = page_idx; - return count; + return ra_end; } static int ll_readahead(const struct lu_env *env, struct cl_io *io, @@ -431,7 +430,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, struct vvp_io *vio = vvp_env_io(env); struct ll_thread_info *lti = ll_env_info(env); struct cl_attr *attr = vvp_env_thread_attr(env); - unsigned long len, mlen = 0, reserved; + unsigned long len, mlen = 0; pgoff_t ra_end, start = 0, end = 0; struct inode *inode; struct ra_io_arg *ria = <i->lti_ria; @@ -478,29 +477,15 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, end < vio->vui_ra_start + vio->vui_ra_count - 1) end = vio->vui_ra_start + vio->vui_ra_count - 1; - if (end != 0) { - unsigned long rpc_boundary; - /* - * Align RA window to an optimal boundary. - * - * XXX This would be better to align to cl_max_pages_per_rpc - * instead of PTLRPC_MAX_BRW_PAGES, because the RPC size may - * be aligned to the RAID stripe size in the future and that - * is more important than the RPC size. - */ - /* Note: we only trim the RPC, instead of extending the RPC - * to the boundary, so to avoid reading too much pages during - * random reading. - */ - rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1)); - if (rpc_boundary > 0) - rpc_boundary--; - - if (rpc_boundary > start) - end = rpc_boundary; + if (end) { + unsigned long end_index; /* Truncate RA window to end of file */ - end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT)); + end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT); + if (end_index <= end) { + end = end_index; + ria->ria_eof = true; + } ras->ras_next_readahead = max(end, end + 1); RAS_CDEBUG(ras); @@ -535,28 +520,31 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, /* at least to extend the readahead window to cover current read */ if (!hit && vio->vui_ra_valid && vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { + unsigned long remainder; + /* to the end of current read window. */ mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; /* trim to RPC boundary */ - start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1); - mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start); + ras_align(ras, ria->ria_start, &remainder); + mlen = min(mlen, ras->ras_rpc_size - remainder); + ria->ria_end_min = ria->ria_start + mlen; } - reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); - if (reserved < len) + ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); + if (ria->ria_reserved < len) ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", - reserved, len, mlen, + ria->ria_reserved, len, mlen, atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), ll_i2sbi(inode)->ll_ra_info.ra_max_pages); - ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end); + ra_end = ll_read_ahead_pages(env, io, queue, ras, ria); - if (reserved != 0) - ll_ra_count_put(ll_i2sbi(inode), reserved); + if (ria->ria_reserved) + ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved); - if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT)) + if (ra_end == end && ra_end == (kms >> PAGE_SHIFT)) ll_ra_stats_inc(inode, RA_STAT_EOF); /* if we didn't get to the end of the region we reserved from @@ -568,13 +556,13 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n", ra_end, end, ria->ria_end, ret); - if (ra_end != end + 1) { + if (ra_end > 0 && ra_end != end) { ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); spin_lock(&ras->ras_lock); - if (ra_end < ras->ras_next_readahead && + if (ra_end <= ras->ras_next_readahead && index_in_window(ra_end, ras->ras_window_start, 0, ras->ras_window_len)) { - ras->ras_next_readahead = ra_end; + ras->ras_next_readahead = ra_end + 1; RAS_CDEBUG(ras); } spin_unlock(&ras->ras_lock); @@ -586,7 +574,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io, static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras, unsigned long index) { - ras->ras_window_start = index & (~(RAS_INCREASE_STEP(inode) - 1)); + ras->ras_window_start = ras_align(ras, index, NULL); } /* called with the ras_lock held or from places where it doesn't matter */ @@ -615,6 +603,7 @@ static void ras_stride_reset(struct ll_readahead_state *ras) void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) { spin_lock_init(&ras->ras_lock); + ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES; ras_reset(inode, ras, 0); ras->ras_requests = 0; } @@ -719,12 +708,15 @@ static void ras_increase_window(struct inode *inode, * but current clio architecture does not support retrieve such * information from lower layer. FIXME later */ - if (stride_io_mode(ras)) - ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP(inode)); - else - ras->ras_window_len = min(ras->ras_window_len + - RAS_INCREASE_STEP(inode), - ra->ra_max_pages_per_file); + if (stride_io_mode(ras)) { + ras_stride_increase_window(ras, ra, ras->ras_rpc_size); + } else { + unsigned long wlen; + + wlen = min(ras->ras_window_len + ras->ras_rpc_size, + ra->ra_max_pages_per_file); + ras->ras_window_len = ras_align(ras, wlen, NULL); + } } static void ras_update(struct ll_sb_info *sbi, struct inode *inode, @@ -737,6 +729,10 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, spin_lock(&ras->ras_lock); + if (!hit) + CDEBUG(D_READA, DFID " pages at %lu miss.\n", + PFID(ll_inode2fid(inode)), index); + ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS); /* reset the read-ahead window in two cases. First when the app seeks @@ -852,6 +848,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, * instead of ras_window_start, which is RPC aligned */ ras->ras_next_readahead = max(index, ras->ras_next_readahead); + ras->ras_window_start = max(ras->ras_stride_offset, + ras->ras_window_start); } else { if (ras->ras_next_readahead < ras->ras_window_start) ras->ras_next_readahead = ras->ras_window_start; @@ -881,7 +879,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, */ ras->ras_next_readahead = max(index, ras->ras_next_readahead); ras->ras_stride_offset = index; - ras->ras_window_len = RAS_INCREASE_STEP(inode); + ras->ras_window_start = max(index, ras->ras_window_start); } /* The initial ras_window_len is set to the request size. To avoid @@ -1098,38 +1096,39 @@ static int ll_io_read_page(const struct lu_env *env, struct cl_io *io, struct cl_2queue *queue = &io->ci_queue; struct ll_sb_info *sbi = ll_i2sbi(inode); struct vvp_page *vpg; + bool uptodate; int rc = 0; vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page)); + uptodate = vpg->vpg_defer_uptodate; + if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && sbi->ll_ra_info.ra_max_pages > 0) { struct vvp_io *vio = vvp_env_io(env); enum ras_update_flags flags = 0; - if (vpg->vpg_defer_uptodate) + if (uptodate) flags |= LL_RAS_HIT; if (!vio->vui_ra_valid) flags |= LL_RAS_MMAP; ras_update(sbi, inode, ras, vvp_index(vpg), flags); } - if (vpg->vpg_defer_uptodate) { + cl_2queue_init(queue); + if (uptodate) { vpg->vpg_ra_used = 1; cl_page_export(env, page, 1); + cl_page_disown(env, io, page); + } else { + cl_page_list_add(&queue->c2_qin, page); } - cl_2queue_init(queue); - /* - * Add page into the queue even when it is marked uptodate above. - * this will unlock it automatically as part of cl_page_list_disown(). - */ - cl_page_list_add(&queue->c2_qin, page); if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && sbi->ll_ra_info.ra_max_pages > 0) { int rc2; rc2 = ll_readahead(env, io, &queue->c2_qin, ras, - vpg->vpg_defer_uptodate); + uptodate); CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n", PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg)); } diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 21e06e5b514e..d89e79599199 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -345,6 +345,10 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) ssize_t tot_bytes = 0, result = 0; long size = MAX_DIO_SIZE; + /* Check EOF by ourselves */ + if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode)) + return 0; + /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) return -EINVAL; diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index f1ee17f9ec0d..fb7c315b33cb 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -79,6 +79,8 @@ struct sa_entry { struct inode *se_inode; /* entry name */ struct qstr se_qstr; + /* entry fid */ + struct lu_fid se_fid; }; static unsigned int sai_generation; @@ -169,7 +171,7 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index) /* allocate sa_entry and hash it to allow scanner process to find it */ static struct sa_entry * sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index, - const char *name, int len) + const char *name, int len, const struct lu_fid *fid) { struct ll_inode_info *lli; struct sa_entry *entry; @@ -194,6 +196,7 @@ sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index, entry->se_qstr.hash = full_name_hash(parent, name, len); entry->se_qstr.len = len; entry->se_qstr.name = dname; + entry->se_fid = *fid; lli = ll_i2info(sai->sai_dentry->d_inode); spin_lock(&lli->lli_sa_lock); @@ -566,24 +569,8 @@ static void sa_instantiate(struct ll_statahead_info *sai, } child = entry->se_inode; - if (!child) { - /* - * lookup. - */ - LASSERT(fid_is_zero(&minfo->mi_data.op_fid2)); - - /* XXX: No fid in reply, this is probably cross-ref case. - * SA can't handle it yet. - */ - if (body->mbo_valid & OBD_MD_MDS) { - rc = -EAGAIN; - goto out; - } - } else { - /* - * revalidate. - */ - /* unlinked and re-created with the same name */ + if (child) { + /* revalidate; unlinked and re-created with the same name */ if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) { entry->se_inode = NULL; iput(child); @@ -720,50 +707,42 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, } /* finish async stat RPC arguments */ -static void sa_fini_data(struct md_enqueue_info *minfo, - struct ldlm_enqueue_info *einfo) +static void sa_fini_data(struct md_enqueue_info *minfo) { - LASSERT(minfo && einfo); iput(minfo->mi_dir); kfree(minfo); - kfree(einfo); } /** * prepare arguments for async stat RPC. */ -static int sa_prep_data(struct inode *dir, struct inode *child, - struct sa_entry *entry, struct md_enqueue_info **pmi, - struct ldlm_enqueue_info **pei) +static struct md_enqueue_info * +sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry) { - const struct qstr *qstr = &entry->se_qstr; struct md_enqueue_info *minfo; struct ldlm_enqueue_info *einfo; struct md_op_data *op_data; - einfo = kzalloc(sizeof(*einfo), GFP_NOFS); - if (!einfo) - return -ENOMEM; - minfo = kzalloc(sizeof(*minfo), GFP_NOFS); - if (!minfo) { - kfree(einfo); - return -ENOMEM; - } + if (!minfo) + return ERR_PTR(-ENOMEM); - op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, qstr->name, - qstr->len, 0, LUSTRE_OPC_ANY, NULL); + op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) { - kfree(einfo); kfree(minfo); - return PTR_ERR(op_data); + return (struct md_enqueue_info *)op_data; } + if (!child) + op_data->op_fid2 = entry->se_fid; + minfo->mi_it.it_op = IT_GETATTR; minfo->mi_dir = igrab(dir); minfo->mi_cb = ll_statahead_interpret; minfo->mi_cbdata = entry; + einfo = &minfo->mi_einfo; einfo->ei_type = LDLM_IBITS; einfo->ei_mode = it_to_lock_mode(&minfo->mi_it); einfo->ei_cb_bl = ll_md_blocking_ast; @@ -771,26 +750,22 @@ static int sa_prep_data(struct inode *dir, struct inode *child, einfo->ei_cb_gl = NULL; einfo->ei_cbdata = NULL; - *pmi = minfo; - *pei = einfo; - - return 0; + return minfo; } /* async stat for file not found in dcache */ static int sa_lookup(struct inode *dir, struct sa_entry *entry) { struct md_enqueue_info *minfo; - struct ldlm_enqueue_info *einfo; int rc; - rc = sa_prep_data(dir, NULL, entry, &minfo, &einfo); - if (rc) - return rc; + minfo = sa_prep_data(dir, NULL, entry); + if (IS_ERR(minfo)) + return PTR_ERR(minfo); - rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo); + rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo); if (rc) - sa_fini_data(minfo, einfo); + sa_fini_data(minfo); return rc; } @@ -809,7 +784,6 @@ static int sa_revalidate(struct inode *dir, struct sa_entry *entry, struct lookup_intent it = { .it_op = IT_GETATTR, .it_lock_handle = 0 }; struct md_enqueue_info *minfo; - struct ldlm_enqueue_info *einfo; int rc; if (unlikely(!inode)) @@ -827,25 +801,26 @@ static int sa_revalidate(struct inode *dir, struct sa_entry *entry, return 1; } - rc = sa_prep_data(dir, inode, entry, &minfo, &einfo); - if (rc) { + minfo = sa_prep_data(dir, inode, entry); + if (IS_ERR(minfo)) { entry->se_inode = NULL; iput(inode); - return rc; + return PTR_ERR(minfo); } - rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo); + rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo); if (rc) { entry->se_inode = NULL; iput(inode); - sa_fini_data(minfo, einfo); + sa_fini_data(minfo); } return rc; } /* async stat for file with @name */ -static void sa_statahead(struct dentry *parent, const char *name, int len) +static void sa_statahead(struct dentry *parent, const char *name, int len, + const struct lu_fid *fid) { struct inode *dir = d_inode(parent); struct ll_inode_info *lli = ll_i2info(dir); @@ -854,7 +829,7 @@ static void sa_statahead(struct dentry *parent, const char *name, int len) struct sa_entry *entry; int rc; - entry = sa_alloc(parent, sai, sai->sai_index, name, len); + entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid); if (IS_ERR(entry)) return; @@ -1043,6 +1018,7 @@ static int ll_statahead_thread(void *arg) for (ent = lu_dirent_start(dp); ent && thread_is_running(sa_thread) && !sa_low_hit(sai); ent = lu_dirent_next(ent)) { + struct lu_fid fid; __u64 hash; int namelen; char *name; @@ -1088,6 +1064,8 @@ static int ll_statahead_thread(void *arg) if (unlikely(++first == 1)) continue; + fid_le_to_cpu(&fid, &ent->lde_fid); + /* wait for spare statahead window */ do { l_wait_event(sa_thread->t_ctl_waitq, @@ -1117,7 +1095,7 @@ static int ll_statahead_thread(void *arg) } while (sa_sent_full(sai) && thread_is_running(sa_thread)); - sa_statahead(parent, name, namelen); + sa_statahead(parent, name, namelen, &fid); } pos = le64_to_cpu(dp->ldp_hash_end); diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 106cd00910a7..4759802e062d 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -88,7 +88,7 @@ static int __init lustre_init(void) struct timespec64 ts; int i, rc, seed[2]; - CLASSERT(sizeof(LUSTRE_VOLATILE_HDR) == LUSTRE_VOLATILE_HDR_LEN + 1); + BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) != LUSTRE_VOLATILE_HDR_LEN + 1); /* print an address of _any_ initialized kernel symbol from this * module, to allow debugging with gdb that doesn't support data diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c index 12c129f7e4ad..3669ea77ee93 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c @@ -391,7 +391,7 @@ struct vvp_pgcache_id { static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id) { - CLASSERT(sizeof(pos) == sizeof(__u64)); + BUILD_BUG_ON(sizeof(pos) != sizeof(__u64)); id->vpi_index = pos & 0xffffffff; id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf; diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index c60d0414ac25..f40fd7f115d1 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -301,8 +301,6 @@ static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) # define CLOBINVRNT(env, clob, expr) \ ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) -int lov_read_and_clear_async_rc(struct cl_object *clob); - int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io); int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 697cbfbe9374..3e9cf710501b 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -288,7 +288,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) io->ci_ignore_layout, io->ci_verify_layout, vio->vui_layout_gen, io->ci_restore_needed); - if (io->ci_restore_needed == 1) { + if (io->ci_restore_needed) { int rc; /* file was detected release, we need to restore it @@ -657,7 +657,15 @@ static void vvp_io_setattr_end(const struct lu_env *env, static void vvp_io_setattr_fini(const struct lu_env *env, const struct cl_io_slice *ios) { + bool restore_needed = ios->cis_io->ci_restore_needed; + struct inode *inode = vvp_object_inode(ios->cis_obj); + vvp_io_fini(env, ios); + + if (restore_needed && !ios->cis_io->ci_restore_needed) { + /* restore finished, set data modified flag for HSM */ + set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags); + } } static int vvp_io_read_start(const struct lu_env *env, @@ -1340,13 +1348,6 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, io->ci_lockreq = CILR_MANDATORY; } - /* ignore layout change for generic CIT_MISC but not for glimpse. - * io context for glimpse must set ci_verify_layout to true, - * see cl_glimpse_size0() for details. - */ - if (io->ci_type == CIT_MISC && !io->ci_verify_layout) - io->ci_ignore_layout = 1; - /* Enqueue layout lock and get layout version. We need to do this * even for operations requiring to open file, such as read and write, * because it might not grant layout lock in IT_OPEN. diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c index 23d66308ff20..687c0c79d621 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ b/drivers/staging/lustre/lustre/llite/vvp_page.c @@ -227,7 +227,8 @@ static int vvp_page_prep_write(const struct lu_env *env, * This takes inode as a separate argument, because inode on which error is to * be set can be different from \a vmpage inode in case of direct-io. */ -static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret) +static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, + int ioret) { struct vvp_object *obj = cl_inode2vvp(inode); diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index 7a848ebc57c1..421cc04ecf1e 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -132,6 +132,15 @@ ll_xattr_set_common(const struct xattr_handler *handler, (!strcmp(name, "ima") || !strcmp(name, "evm"))) return -EOPNOTSUPP; + /* + * In user.* namespace, only regular files and directories can have + * extended attributes. + */ + if (handler->flags == XATTR_USER_T) { + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + return -EPERM; + } + sprintf(fullname, "%s%s\n", handler->prefix, name); rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, pv, size, 0, flags, |