aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/llite
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c13
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c16
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c109
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c9
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h16
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c126
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c11
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c27
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c9
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.c10
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.h2
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c199
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c94
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h2
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c19
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c3
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c9
21 files changed, 347 insertions, 337 deletions
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 65bf0c401b44..966f580e26fb 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -247,17 +247,14 @@ static int ll_revalidate_dentry(struct dentry *dentry,
return 1;
/*
- * if open&create is set, talk to MDS to make sure file is created if
- * necessary, because we can't do this in ->open() later since that's
- * called on an inode. return 0 here to let lookup to handle this.
+ * VFS warns us that this is the second go around and previous
+ * operation failed (most likely open|creat), so this time
+ * we better talk to the server via the lookup path by name,
+ * not by fid.
*/
- if ((lookup_flags & (LOOKUP_OPEN | LOOKUP_CREATE)) ==
- (LOOKUP_OPEN | LOOKUP_CREATE))
+ if (lookup_flags & LOOKUP_REVAL)
return 0;
- if (lookup_flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
- return 1;
-
if (!dentry_may_statahead(dir, dentry))
return 1;
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index ea5d247a3f70..13b35922a4ca 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -432,7 +432,7 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
mode &= ~current_umask();
- mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
+ mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
strlen(dirname), mode, LUSTRE_OPC_MKDIR,
lump);
@@ -521,12 +521,15 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
- if (rc) {
- if (rc != -EPERM && rc != -EACCES)
- CERROR("mdc_setattr fails: rc = %d\n", rc);
- }
+ if (rc)
+ return rc;
- /* In the following we use the fact that LOV_USER_MAGIC_V1 and
+#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE
+ /*
+ * 2.9 server has stored filesystem default stripe in ROOT xattr,
+ * and it's stored into system config for backward compatibility.
+ *
+ * In the following we use the fact that LOV_USER_MAGIC_V1 and
* LOV_USER_MAGIC_V3 have the same initial fields so we do not
* need to make the distinction between the 2 versions
*/
@@ -567,6 +570,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
end:
kfree(param);
}
+#endif
return rc;
}
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index f634c11216e6..10adfcdd7035 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -122,26 +122,25 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
enum mds_op_bias bias,
void *data)
{
- struct obd_export *exp = ll_i2mdexp(inode);
+ const struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data;
struct ptlrpc_request *req = NULL;
- struct obd_device *obd = class_exp2obd(exp);
int rc;
- if (!obd) {
- /*
- * XXX: in case of LMV, is this correct to access
- * ->exp_handle?
- */
- CERROR("Invalid MDC connection handle %#llx\n",
- ll_i2mdexp(inode)->exp_handle.h_cookie);
+ if (!class_exp2obd(md_exp)) {
+ CERROR("%s: invalid MDC connection handle closing " DFID "\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid));
rc = 0;
goto out;
}
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ /*
+ * We leak openhandle and request here on error, but not much to be
+ * done in OOM case since app won't retry close on error either.
+ */
if (!op_data) {
- /* XXX We leak openhandle and request here. */
rc = -ENOMEM;
goto out;
}
@@ -170,10 +169,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
}
rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc) {
- CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
- ll_i2mdexp(inode)->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
+ if (rc && rc != -EINTR) {
+ CERROR("%s: inode " DFID " mdc close failed: rc = %d\n",
+ md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
}
if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
@@ -192,8 +190,7 @@ out:
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
kfree(och);
- if (req) /* This is close request */
- ptlrpc_req_finished(req);
+ ptlrpc_req_finished(req);
return rc;
}
@@ -420,6 +417,17 @@ out:
ptlrpc_req_finished(req);
ll_intent_drop_lock(itp);
+ /*
+ * We did open by fid, but by the time we got to the server,
+ * the object disappeared. If this is a create, we cannot really
+ * tell the userspace that the file it was trying to create
+ * does not exist. Instead let's return -ESTALE, and the VFS will
+ * retry the create with LOOKUP_REVAL that we are going to catch
+ * in ll_revalidate_dentry() and use lookup then.
+ */
+ if (rc == -ENOENT && itp->it_op & IT_CREAT)
+ rc = -ESTALE;
+
return rc;
}
@@ -1016,7 +1024,7 @@ static bool file_is_noatime(const struct file *file)
return false;
}
-void ll_io_init(struct cl_io *io, const struct file *file, int write)
+static void ll_io_init(struct cl_io *io, const struct file *file, int write)
{
struct inode *inode = file_inode(file);
@@ -1821,7 +1829,7 @@ free:
return rc;
}
-static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
+int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
{
struct md_op_data *op_data;
int rc;
@@ -1883,7 +1891,7 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
goto free_hss;
}
- attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ attr->ia_mode = hui->hui_mode & 0777;
attr->ia_mode |= S_IFREG;
attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
@@ -2618,18 +2626,18 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
ll_get_fsname(parent->i_sb, NULL, 0), name,
PFID(&op_data->op_fid3));
rc = -EINVAL;
- goto out_free;
+ goto out_unlock;
}
rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
if (rc < 0)
- goto out_free;
+ goto out_unlock;
if (rc == mdtidx) {
CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
PFID(&op_data->op_fid3), mdtidx);
rc = 0;
- goto out_free;
+ goto out_unlock;
}
again:
if (S_ISREG(child_inode->i_mode)) {
@@ -2637,13 +2645,13 @@ again:
if (IS_ERR(och)) {
rc = PTR_ERR(och);
och = NULL;
- goto out_free;
+ goto out_unlock;
}
rc = ll_data_version(child_inode, &data_version,
LL_DV_WR_FLUSH);
if (rc)
- goto out_free;
+ goto out_close;
op_data->op_handle = och->och_fh;
op_data->op_data = och->och_mod;
@@ -2656,40 +2664,45 @@ again:
op_data->op_cli_flags = CLI_MIGRATE;
rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
namelen, name, namelen, &request);
- if (!rc)
+ if (!rc) {
+ LASSERT(request);
ll_update_times(request, parent);
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out_free;
+ body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body);
+
+ /*
+ * If the server does release layout lock, then we cleanup
+ * the client och here, otherwise release it in out_close:
+ */
+ if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
+ obd_mod_put(och->och_mod);
+ md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
+ och);
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ kfree(och);
+ och = NULL;
+ }
}
- /*
- * If the server does release layout lock, then we cleanup
- * the client och here, otherwise release it in out_free:
- */
- if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
- obd_mod_put(och->och_mod);
- md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
- och = NULL;
+ if (request) {
+ ptlrpc_req_finished(request);
+ request = NULL;
}
- ptlrpc_req_finished(request);
/* Try again if the file layout has changed. */
if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
goto again;
-out_free:
- if (child_inode) {
- if (och) /* close the file */
- ll_lease_close(och, child_inode, NULL);
- clear_nlink(child_inode);
- inode_unlock(child_inode);
- iput(child_inode);
- }
+out_close:
+ if (och) /* close the file */
+ ll_lease_close(och, child_inode, NULL);
+ if (!rc)
+ clear_nlink(child_inode);
+out_unlock:
+ inode_unlock(child_inode);
+ iput(child_inode);
+out_free:
ll_finish_md_op_data(op_data);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index dd1cfd8f5213..f1036f477a51 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -94,6 +94,7 @@ int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
io = vvp_env_thread_io(env);
io->ci_obj = obj;
+ io->ci_verify_layout = 1;
io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
@@ -120,13 +121,7 @@ again:
cl_io_fini(env, io);
if (unlikely(io->ci_need_restart))
goto again;
- /* HSM import case: file is released, cannot be restored
- * no need to fail except if restore registration failed
- * with -ENODATA
- */
- if (result == -ENODATA && io->ci_restore_needed &&
- io->ci_result != -ENODATA)
- result = 0;
+
cl_env_put(env, &refcheck);
return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index f48660ed350f..f0c132e2cf92 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -33,6 +33,7 @@
* future).
*
*/
+#define DEBUG_SUBSYSTEM S_LLITE
#include "../include/obd_class.h"
#include "../include/obd_support.h"
#include "../include/obd.h"
@@ -132,7 +133,6 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
io = vvp_env_thread_io(env);
io->ci_obj = obj;
- io->ci_ignore_layout = 1;
rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
if (rc != 0) {
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 065a9a7e120a..ecdfd0c29b7f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -281,10 +281,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
return container_of(inode, struct ll_inode_info, lli_vfs_inode);
}
-/* default to about 40meg of readahead on a given system. That much tied
- * up in 512k readahead requests serviced at 40ms each is about 1GB/s.
- */
-#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT))
+/* default to about 64M of readahead on a given system. */
+#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
/* default to read-ahead full files smaller than 2MB on the second read */
#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
@@ -321,6 +319,9 @@ struct ll_ra_info {
struct ra_io_arg {
unsigned long ria_start; /* start offset of read-ahead*/
unsigned long ria_end; /* end offset of read-ahead*/
+ unsigned long ria_reserved; /* reserved pages for read-ahead */
+ unsigned long ria_end_min; /* minimum end to cover current read */
+ bool ria_eof; /* reach end of file */
/* If stride read pattern is detected, ria_stoff means where
* stride read is started. Note: for normal read-ahead, the
* value here is meaningless, and also it will not be accessed
@@ -505,6 +506,7 @@ struct ll_sb_info {
*/
/* root squash */
struct root_squash_info ll_squash;
+ struct path ll_mnt;
__kernel_fsid_t ll_fsid;
struct kobject ll_kobj; /* sysfs object */
@@ -551,6 +553,11 @@ struct ll_readahead_state {
*/
unsigned long ras_window_start, ras_window_len;
/*
+ * Optimal RPC size. It decides how many pages will be sent
+ * for each read-ahead.
+ */
+ unsigned long ras_rpc_size;
+ /*
* Where next read-ahead should start at. This lies within read-ahead
* window. Read-ahead window is read in pieces rather than at once
* because: 1. lustre limits total number of pages under read-ahead by
@@ -766,6 +773,7 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode);
int ll_fid2path(struct inode *inode, void __user *arg);
int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
int ll_hsm_release(struct inode *inode);
+int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss);
/* llite/dcache.c */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 25f5aed97f63..b229cbc7bb33 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -103,6 +103,7 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
sbi->ll_flags |= LL_SBI_CHECKSUM;
sbi->ll_flags |= LL_SBI_LRU_RESIZE;
+ sbi->ll_flags |= LL_SBI_LAZYSTATFS;
for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
@@ -303,6 +304,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
sb->s_magic = LL_SUPER_MAGIC;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sbi->ll_namelen = osfs->os_namelen;
+ sbi->ll_mnt.mnt = current->fs->root.mnt;
if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
!(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
@@ -1402,7 +1404,11 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
* cache is not cleared yet.
*/
op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
+ if (S_ISREG(inode->i_mode))
+ inode_lock(inode);
rc = simple_setattr(dentry, &op_data->op_attr);
+ if (S_ISREG(inode->i_mode))
+ inode_unlock(inode);
op_data->op_attr.ia_valid = ia_valid;
rc = ll_update_inode(inode, &md);
@@ -1431,7 +1437,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
struct inode *inode = d_inode(dentry);
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data = NULL;
- bool file_is_released = false;
int rc = 0;
CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
@@ -1486,76 +1491,35 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
(s64)ktime_get_real_seconds());
- /* We always do an MDS RPC, even if we're only changing the size;
- * only the MDS knows whether truncate() should fail with -ETXTBUSY
- */
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return -ENOMEM;
-
- if (!S_ISDIR(inode->i_mode))
+ if (S_ISREG(inode->i_mode))
inode_unlock(inode);
- /* truncate on a released file must failed with -ENODATA,
- * so size must not be set on MDS for released file
- * but other attributes must be set
+ /*
+ * We always do an MDS RPC, even if we're only changing the size;
+ * only the MDS knows whether truncate() should fail with -ETXTBUSY
*/
- if (S_ISREG(inode->i_mode)) {
- struct cl_layout cl = {
- .cl_is_released = false,
- };
- struct lu_env *env;
- int refcheck;
- __u32 gen;
+ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ if (!op_data) {
+ rc = -ENOMEM;
+ goto out;
+ }
- rc = ll_layout_refresh(inode, &gen);
- if (rc < 0)
- goto out;
+ op_data->op_attr = *attr;
+ if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
/*
- * XXX: the only place we need to know the layout type,
- * this will be removed by a later patch. -Jinshan
+ * If we are changing file size, file content is
+ * modified, flag it.
*/
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- rc = PTR_ERR(env);
- goto out;
- }
-
- rc = cl_object_layout_get(env, lli->lli_clob, &cl);
- cl_env_put(env, &refcheck);
- if (rc < 0)
- goto out;
-
- file_is_released = cl.cl_is_released;
-
- if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
- if (file_is_released) {
- rc = ll_layout_restore(inode, 0, attr->ia_size);
- if (rc < 0)
- goto out;
-
- file_is_released = false;
- ll_layout_refresh(inode, &gen);
- }
-
- /*
- * If we are changing file size, file content is
- * modified, flag it.
- */
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- op_data->op_bias |= MDS_DATA_MODIFIED;
- }
+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+ op_data->op_bias |= MDS_DATA_MODIFIED;
}
- memcpy(&op_data->op_attr, attr, sizeof(*attr));
-
rc = ll_md_setattr(dentry, op_data);
if (rc)
goto out;
- if (!S_ISREG(inode->i_mode) || file_is_released) {
+ if (!S_ISREG(inode->i_mode) || hsm_import) {
rc = 0;
goto out;
}
@@ -1572,11 +1536,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
*/
rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
}
+
+ /*
+ * If the file was restored, it needs to set dirty flag.
+ *
+ * We've already sent MDS_DATA_MODIFIED flag in
+ * ll_md_setattr() for truncate. However, the MDT refuses to
+ * set the HS_DIRTY flag on released files, so we have to set
+ * it again if the file has been restored. Please check how
+ * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
+ *
+ * Please notice that if the file is not released, the previous
+ * MDS_DATA_MODIFIED has taken effect and usually
+ * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
+ * This way we can save an RPC for common open + trunc
+ * operation.
+ */
+ if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
+ struct hsm_state_set hss = {
+ .hss_valid = HSS_SETMASK,
+ .hss_setmask = HS_DIRTY,
+ };
+ int rc2;
+
+ rc2 = ll_hsm_state_set(inode, &hss);
+ if (rc2 < 0)
+ CERROR(DFID "HSM set dirty failed: rc2 = %d\n",
+ PFID(ll_inode2fid(inode)), rc2);
+ }
+
out:
if (op_data)
ll_finish_md_op_data(op_data);
- if (!S_ISDIR(inode->i_mode)) {
+ if (S_ISREG(inode->i_mode)) {
inode_lock(inode);
if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
@@ -1599,7 +1592,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
(ATTR_SIZE | ATTR_MODE)) &&
(((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
- (((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
+ (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
!(attr->ia_mode & S_ISGID))))
attr->ia_valid |= ATTR_FORCE;
@@ -1610,7 +1603,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
attr->ia_valid |= ATTR_KILL_SUID;
if ((attr->ia_valid & ATTR_MODE) &&
- ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
+ ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
!(attr->ia_mode & S_ISGID) &&
!(attr->ia_valid & ATTR_KILL_SGID))
attr->ia_valid |= ATTR_KILL_SGID;
@@ -1998,6 +1991,8 @@ void ll_umount_begin(struct super_block *sb)
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct obd_device *obd;
struct obd_ioctl_data *ioc_data;
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
sb->s_count, atomic_read(&sb->s_active));
@@ -2030,9 +2025,14 @@ void ll_umount_begin(struct super_block *sb)
}
/* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just invalidate the requests,
- * schedule() and sleep one second if needed, and hope.
+ * and then continue. For now, we just periodically checking for vfs
+ * to decrement mnt_cnt and hope to finish it within 10sec.
*/
+ init_waitqueue_head(&waitq);
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(10),
+ cfs_time_seconds(1), NULL, NULL);
+ l_wait_event(waitq, may_umount(sbi->ll_mnt.mnt), &lwi);
+
schedule();
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index ee01f20d8b11..896196c74cd2 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -321,7 +321,7 @@ out:
return fault_ret;
}
-static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_fault(struct vm_fault *vmf)
{
int count = 0;
bool printed = false;
@@ -335,7 +335,7 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
restart:
- result = ll_fault0(vma, vmf);
+ result = ll_fault0(vmf->vma, vmf);
LASSERT(!(result & VM_FAULT_LOCKED));
if (result == 0) {
struct page *vmpage = vmf->page;
@@ -362,8 +362,9 @@ restart:
return result;
}
-static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ll_page_mkwrite(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
int count = 0;
bool printed = false;
bool retry;
@@ -390,15 +391,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
result = VM_FAULT_LOCKED;
break;
case -ENODATA:
+ case -EAGAIN:
case -EFAULT:
result = VM_FAULT_NOPAGE;
break;
case -ENOMEM:
result = VM_FAULT_OOM;
break;
- case -EAGAIN:
- result = VM_FAULT_RETRY;
- break;
default:
result = VM_FAULT_SIGBUS;
break;
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 03682c10fc9e..f3ee584157e0 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -924,27 +924,29 @@ static ssize_t ll_unstable_stats_seq_write(struct file *file,
}
LPROC_SEQ_FOPS(ll_unstable_stats);
-static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static int ll_root_squash_seq_show(struct seq_file *m, void *v)
{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
struct root_squash_info *squash = &sbi->ll_squash;
- return sprintf(buf, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
+ seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
+ return 0;
}
-static ssize_t root_squash_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t ll_root_squash_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
+ struct seq_file *m = file->private_data;
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
struct root_squash_info *squash = &sbi->ll_squash;
return lprocfs_wr_root_squash(buffer, count, squash,
- ll_get_fsname(sbi->ll_sb, NULL, 0));
+ ll_get_fsname(sb, NULL, 0));
}
-LUSTRE_RW_ATTR(root_squash);
+LPROC_SEQ_FOPS(ll_root_squash);
static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
{
@@ -997,6 +999,8 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
{ "statahead_stats", &ll_statahead_stats_fops, NULL, 0 },
{ "unstable_stats", &ll_unstable_stats_fops, NULL },
{ "sbi_flags", &ll_sbi_flags_fops, NULL, 0 },
+ { .name = "root_squash",
+ .fops = &ll_root_squash_fops },
{ .name = "nosquash_nids",
.fops = &ll_nosquash_nids_fops },
{ NULL }
@@ -1027,7 +1031,6 @@ static struct attribute *llite_attrs[] = {
&lustre_attr_max_easize.attr,
&lustre_attr_default_easize.attr,
&lustre_attr_xattr_cache.attr,
- &lustre_attr_root_squash.attr,
NULL,
};
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index a8f4e7fb0a46..fc176540bb95 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -994,11 +994,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
return rc;
}
-/* ll_unlink() doesn't update the inode with the new link count.
- * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
- * is any lock existing. They will recycle dentries and inodes based upon locks
- * too. b=20433
- */
static int ll_unlink(struct inode *dir, struct dentry *dchild)
{
struct ptlrpc_request *request = NULL;
@@ -1041,7 +1036,7 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
- mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
+ mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
if (!err)
@@ -1089,7 +1084,7 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n",
dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
- err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO,
+ err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777,
0, LUSTRE_OPC_SYMLINK);
if (!err)
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
index 94c818f1478b..14148a097476 100644
--- a/drivers/staging/lustre/lustre/llite/range_lock.c
+++ b/drivers/staging/lustre/lustre/llite/range_lock.c
@@ -61,17 +61,23 @@ void range_lock_tree_init(struct range_lock_tree *tree)
* Pre: Caller should have allocated the range lock node.
* Post: The range lock node is meant to cover [start, end] region
*/
-void range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
+int range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
{
+ int rc;
+
memset(&lock->rl_node, 0, sizeof(lock->rl_node));
if (end != LUSTRE_EOF)
end >>= PAGE_SHIFT;
- interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
+ rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
+ if (rc)
+ return rc;
+
INIT_LIST_HEAD(&lock->rl_next_lock);
lock->rl_task = NULL;
lock->rl_lock_count = 0;
lock->rl_blocking_ranges = 0;
lock->rl_sequence = 0;
+ return rc;
}
static inline struct range_lock *next_lock(struct range_lock *lock)
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
index c6d04a6f99fd..779091ccec4e 100644
--- a/drivers/staging/lustre/lustre/llite/range_lock.h
+++ b/drivers/staging/lustre/lustre/llite/range_lock.h
@@ -76,7 +76,7 @@ struct range_lock_tree {
};
void range_lock_tree_init(struct range_lock_tree *tree);
-void range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
+int range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
int range_lock(struct range_lock_tree *tree, struct range_lock *lock);
void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index f10e092979fe..50d027e0cfab 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -92,25 +92,6 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
goto out;
}
- /* If the non-strided (ria_pages == 0) readahead window
- * (ria_start + ret) has grown across an RPC boundary, then trim
- * readahead size by the amount beyond the RPC so it ends on an
- * RPC boundary. If the readahead window is already ending on
- * an RPC boundary (beyond_rpc == 0), or smaller than a full
- * RPC (beyond_rpc < ret) the readahead size is unchanged.
- * The (beyond_rpc != 0) check is skipped since the conditional
- * branch is more expensive than subtracting zero from the result.
- *
- * Strided read is left unaligned to avoid small fragments beyond
- * the RPC boundary from needing an extra read RPC.
- */
- if (ria->ria_pages == 0) {
- long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
-
- if (/* beyond_rpc != 0 && */ beyond_rpc < ret)
- ret -= beyond_rpc;
- }
-
if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
atomic_sub(ret, &ra->ra_cur_pages);
ret = 0;
@@ -147,11 +128,12 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
#define RAS_CDEBUG(ras) \
CDEBUG(D_READA, \
- "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
- "csr %lu sf %lu sp %lu sl %lu\n", \
+ "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \
+ "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \
ras->ras_last_readpage, ras->ras_consecutive_requests, \
ras->ras_consecutive_pages, ras->ras_window_start, \
ras->ras_window_len, ras->ras_next_readahead, \
+ ras->ras_rpc_size, \
ras->ras_requests, ras->ras_request_index, \
ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
ras->ras_stride_pages, ras->ras_stride_length)
@@ -261,20 +243,6 @@ out:
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
ria->ria_pages)
-/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't
- * know what the actual RPC size is. If this needs to change, it makes more
- * sense to tune the i_blkbits value for the file based on the OSTs it is
- * striped over, rather than having a constant value for all files here.
- */
-
-/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)).
- * Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
- * by default, this should be adjusted corresponding with max_read_ahead_mb
- * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
- * up quickly which will affect read performance significantly. See LU-2816
- */
-#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT)
-
static inline int stride_io_mode(struct ll_readahead_state *ras)
{
return ras->ras_consecutive_stride_requests > 1;
@@ -345,6 +313,17 @@ static int ria_page_count(struct ra_io_arg *ria)
length);
}
+static unsigned long ras_align(struct ll_readahead_state *ras,
+ unsigned long index,
+ unsigned long *remainder)
+{
+ unsigned long rem = index % ras->ras_rpc_size;
+
+ if (remainder)
+ *remainder = rem;
+ return index - rem;
+}
+
/*Check whether the index is in the defined ra-window */
static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
{
@@ -358,42 +337,63 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
ria->ria_length < ria->ria_pages);
}
-static int ll_read_ahead_pages(const struct lu_env *env,
- struct cl_io *io, struct cl_page_list *queue,
- struct ra_io_arg *ria,
- unsigned long *reserved_pages,
- pgoff_t *ra_end)
+static unsigned long
+ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, struct ll_readahead_state *ras,
+ struct ra_io_arg *ria)
{
struct cl_read_ahead ra = { 0 };
- int rc, count = 0;
+ unsigned long ra_end = 0;
bool stride_ria;
pgoff_t page_idx;
+ int rc;
LASSERT(ria);
RIA_DEBUG(ria);
stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
for (page_idx = ria->ria_start;
- page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) {
+ page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
if (ras_inside_ra_window(page_idx, ria)) {
if (!ra.cra_end || ra.cra_end < page_idx) {
+ unsigned long end;
+
cl_read_ahead_release(env, &ra);
rc = cl_io_read_ahead(env, io, page_idx, &ra);
if (rc < 0)
break;
+ CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
+ page_idx, ra.cra_end, ra.cra_rpc_size);
LASSERTF(ra.cra_end >= page_idx,
"object: %p, indcies %lu / %lu\n",
io->ci_obj, ra.cra_end, page_idx);
+ /*
+ * update read ahead RPC size.
+ * NB: it's racy but doesn't matter
+ */
+ if (ras->ras_rpc_size > ra.cra_rpc_size &&
+ ra.cra_rpc_size > 0)
+ ras->ras_rpc_size = ra.cra_rpc_size;
+ /* trim it to align with optimal RPC size */
+ end = ras_align(ras, ria->ria_end + 1, NULL);
+ if (end > 0 && !ria->ria_eof)
+ ria->ria_end = end - 1;
+ if (ria->ria_end < ria->ria_end_min)
+ ria->ria_end = ria->ria_end_min;
+ if (ria->ria_end > ra.cra_end)
+ ria->ria_end = ra.cra_end;
}
- /* If the page is inside the read-ahead window*/
+ /* If the page is inside the read-ahead window */
rc = ll_read_ahead_page(env, io, queue, page_idx);
- if (!rc) {
- (*reserved_pages)--;
- count++;
- }
+ if (rc < 0)
+ break;
+
+ ra_end = page_idx;
+ if (!rc)
+ ria->ria_reserved--;
} else if (stride_ria) {
/* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip
@@ -420,8 +420,7 @@ static int ll_read_ahead_pages(const struct lu_env *env,
}
cl_read_ahead_release(env, &ra);
- *ra_end = page_idx;
- return count;
+ return ra_end;
}
static int ll_readahead(const struct lu_env *env, struct cl_io *io,
@@ -431,7 +430,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
struct vvp_io *vio = vvp_env_io(env);
struct ll_thread_info *lti = ll_env_info(env);
struct cl_attr *attr = vvp_env_thread_attr(env);
- unsigned long len, mlen = 0, reserved;
+ unsigned long len, mlen = 0;
pgoff_t ra_end, start = 0, end = 0;
struct inode *inode;
struct ra_io_arg *ria = &lti->lti_ria;
@@ -478,29 +477,15 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
end < vio->vui_ra_start + vio->vui_ra_count - 1)
end = vio->vui_ra_start + vio->vui_ra_count - 1;
- if (end != 0) {
- unsigned long rpc_boundary;
- /*
- * Align RA window to an optimal boundary.
- *
- * XXX This would be better to align to cl_max_pages_per_rpc
- * instead of PTLRPC_MAX_BRW_PAGES, because the RPC size may
- * be aligned to the RAID stripe size in the future and that
- * is more important than the RPC size.
- */
- /* Note: we only trim the RPC, instead of extending the RPC
- * to the boundary, so to avoid reading too much pages during
- * random reading.
- */
- rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1));
- if (rpc_boundary > 0)
- rpc_boundary--;
-
- if (rpc_boundary > start)
- end = rpc_boundary;
+ if (end) {
+ unsigned long end_index;
/* Truncate RA window to end of file */
- end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT));
+ end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
+ if (end_index <= end) {
+ end = end_index;
+ ria->ria_eof = true;
+ }
ras->ras_next_readahead = max(end, end + 1);
RAS_CDEBUG(ras);
@@ -535,28 +520,31 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
/* at least to extend the readahead window to cover current read */
if (!hit && vio->vui_ra_valid &&
vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
+ unsigned long remainder;
+
/* to the end of current read window. */
mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
/* trim to RPC boundary */
- start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
- mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+ ras_align(ras, ria->ria_start, &remainder);
+ mlen = min(mlen, ras->ras_rpc_size - remainder);
+ ria->ria_end_min = ria->ria_start + mlen;
}
- reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
- if (reserved < len)
+ ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
+ if (ria->ria_reserved < len)
ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
- reserved, len, mlen,
+ ria->ria_reserved, len, mlen,
atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
- ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
+ ra_end = ll_read_ahead_pages(env, io, queue, ras, ria);
- if (reserved != 0)
- ll_ra_count_put(ll_i2sbi(inode), reserved);
+ if (ria->ria_reserved)
+ ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
- if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
+ if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
ll_ra_stats_inc(inode, RA_STAT_EOF);
/* if we didn't get to the end of the region we reserved from
@@ -568,13 +556,13 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
ra_end, end, ria->ria_end, ret);
- if (ra_end != end + 1) {
+ if (ra_end > 0 && ra_end != end) {
ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
spin_lock(&ras->ras_lock);
- if (ra_end < ras->ras_next_readahead &&
+ if (ra_end <= ras->ras_next_readahead &&
index_in_window(ra_end, ras->ras_window_start, 0,
ras->ras_window_len)) {
- ras->ras_next_readahead = ra_end;
+ ras->ras_next_readahead = ra_end + 1;
RAS_CDEBUG(ras);
}
spin_unlock(&ras->ras_lock);
@@ -586,7 +574,7 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
unsigned long index)
{
- ras->ras_window_start = index & (~(RAS_INCREASE_STEP(inode) - 1));
+ ras->ras_window_start = ras_align(ras, index, NULL);
}
/* called with the ras_lock held or from places where it doesn't matter */
@@ -615,6 +603,7 @@ static void ras_stride_reset(struct ll_readahead_state *ras)
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
{
spin_lock_init(&ras->ras_lock);
+ ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
ras_reset(inode, ras, 0);
ras->ras_requests = 0;
}
@@ -719,12 +708,15 @@ static void ras_increase_window(struct inode *inode,
* but current clio architecture does not support retrieve such
* information from lower layer. FIXME later
*/
- if (stride_io_mode(ras))
- ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP(inode));
- else
- ras->ras_window_len = min(ras->ras_window_len +
- RAS_INCREASE_STEP(inode),
- ra->ra_max_pages_per_file);
+ if (stride_io_mode(ras)) {
+ ras_stride_increase_window(ras, ra, ras->ras_rpc_size);
+ } else {
+ unsigned long wlen;
+
+ wlen = min(ras->ras_window_len + ras->ras_rpc_size,
+ ra->ra_max_pages_per_file);
+ ras->ras_window_len = ras_align(ras, wlen, NULL);
+ }
}
static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
@@ -737,6 +729,10 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
spin_lock(&ras->ras_lock);
+ if (!hit)
+ CDEBUG(D_READA, DFID " pages at %lu miss.\n",
+ PFID(ll_inode2fid(inode)), index);
+
ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
/* reset the read-ahead window in two cases. First when the app seeks
@@ -852,6 +848,8 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
* instead of ras_window_start, which is RPC aligned
*/
ras->ras_next_readahead = max(index, ras->ras_next_readahead);
+ ras->ras_window_start = max(ras->ras_stride_offset,
+ ras->ras_window_start);
} else {
if (ras->ras_next_readahead < ras->ras_window_start)
ras->ras_next_readahead = ras->ras_window_start;
@@ -881,7 +879,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
*/
ras->ras_next_readahead = max(index, ras->ras_next_readahead);
ras->ras_stride_offset = index;
- ras->ras_window_len = RAS_INCREASE_STEP(inode);
+ ras->ras_window_start = max(index, ras->ras_window_start);
}
/* The initial ras_window_len is set to the request size. To avoid
@@ -1098,38 +1096,39 @@ static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
struct cl_2queue *queue = &io->ci_queue;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct vvp_page *vpg;
+ bool uptodate;
int rc = 0;
vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
+ uptodate = vpg->vpg_defer_uptodate;
+
if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
sbi->ll_ra_info.ra_max_pages > 0) {
struct vvp_io *vio = vvp_env_io(env);
enum ras_update_flags flags = 0;
- if (vpg->vpg_defer_uptodate)
+ if (uptodate)
flags |= LL_RAS_HIT;
if (!vio->vui_ra_valid)
flags |= LL_RAS_MMAP;
ras_update(sbi, inode, ras, vvp_index(vpg), flags);
}
- if (vpg->vpg_defer_uptodate) {
+ cl_2queue_init(queue);
+ if (uptodate) {
vpg->vpg_ra_used = 1;
cl_page_export(env, page, 1);
+ cl_page_disown(env, io, page);
+ } else {
+ cl_page_list_add(&queue->c2_qin, page);
}
- cl_2queue_init(queue);
- /*
- * Add page into the queue even when it is marked uptodate above.
- * this will unlock it automatically as part of cl_page_list_disown().
- */
- cl_page_list_add(&queue->c2_qin, page);
if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
sbi->ll_ra_info.ra_max_pages > 0) {
int rc2;
rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
- vpg->vpg_defer_uptodate);
+ uptodate);
CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 21e06e5b514e..d89e79599199 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -345,6 +345,10 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
ssize_t tot_bytes = 0, result = 0;
long size = MAX_DIO_SIZE;
+ /* Check EOF by ourselves */
+ if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode))
+ return 0;
+
/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
return -EINVAL;
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index f1ee17f9ec0d..fb7c315b33cb 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -79,6 +79,8 @@ struct sa_entry {
struct inode *se_inode;
/* entry name */
struct qstr se_qstr;
+ /* entry fid */
+ struct lu_fid se_fid;
};
static unsigned int sai_generation;
@@ -169,7 +171,7 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
/* allocate sa_entry and hash it to allow scanner process to find it */
static struct sa_entry *
sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
- const char *name, int len)
+ const char *name, int len, const struct lu_fid *fid)
{
struct ll_inode_info *lli;
struct sa_entry *entry;
@@ -194,6 +196,7 @@ sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
entry->se_qstr.hash = full_name_hash(parent, name, len);
entry->se_qstr.len = len;
entry->se_qstr.name = dname;
+ entry->se_fid = *fid;
lli = ll_i2info(sai->sai_dentry->d_inode);
spin_lock(&lli->lli_sa_lock);
@@ -566,24 +569,8 @@ static void sa_instantiate(struct ll_statahead_info *sai,
}
child = entry->se_inode;
- if (!child) {
- /*
- * lookup.
- */
- LASSERT(fid_is_zero(&minfo->mi_data.op_fid2));
-
- /* XXX: No fid in reply, this is probably cross-ref case.
- * SA can't handle it yet.
- */
- if (body->mbo_valid & OBD_MD_MDS) {
- rc = -EAGAIN;
- goto out;
- }
- } else {
- /*
- * revalidate.
- */
- /* unlinked and re-created with the same name */
+ if (child) {
+ /* revalidate; unlinked and re-created with the same name */
if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
entry->se_inode = NULL;
iput(child);
@@ -720,50 +707,42 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
}
/* finish async stat RPC arguments */
-static void sa_fini_data(struct md_enqueue_info *minfo,
- struct ldlm_enqueue_info *einfo)
+static void sa_fini_data(struct md_enqueue_info *minfo)
{
- LASSERT(minfo && einfo);
iput(minfo->mi_dir);
kfree(minfo);
- kfree(einfo);
}
/**
* prepare arguments for async stat RPC.
*/
-static int sa_prep_data(struct inode *dir, struct inode *child,
- struct sa_entry *entry, struct md_enqueue_info **pmi,
- struct ldlm_enqueue_info **pei)
+static struct md_enqueue_info *
+sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry)
{
- const struct qstr *qstr = &entry->se_qstr;
struct md_enqueue_info *minfo;
struct ldlm_enqueue_info *einfo;
struct md_op_data *op_data;
- einfo = kzalloc(sizeof(*einfo), GFP_NOFS);
- if (!einfo)
- return -ENOMEM;
-
minfo = kzalloc(sizeof(*minfo), GFP_NOFS);
- if (!minfo) {
- kfree(einfo);
- return -ENOMEM;
- }
+ if (!minfo)
+ return ERR_PTR(-ENOMEM);
- op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, qstr->name,
- qstr->len, 0, LUSTRE_OPC_ANY, NULL);
+ op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data)) {
- kfree(einfo);
kfree(minfo);
- return PTR_ERR(op_data);
+ return (struct md_enqueue_info *)op_data;
}
+ if (!child)
+ op_data->op_fid2 = entry->se_fid;
+
minfo->mi_it.it_op = IT_GETATTR;
minfo->mi_dir = igrab(dir);
minfo->mi_cb = ll_statahead_interpret;
minfo->mi_cbdata = entry;
+ einfo = &minfo->mi_einfo;
einfo->ei_type = LDLM_IBITS;
einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
einfo->ei_cb_bl = ll_md_blocking_ast;
@@ -771,26 +750,22 @@ static int sa_prep_data(struct inode *dir, struct inode *child,
einfo->ei_cb_gl = NULL;
einfo->ei_cbdata = NULL;
- *pmi = minfo;
- *pei = einfo;
-
- return 0;
+ return minfo;
}
/* async stat for file not found in dcache */
static int sa_lookup(struct inode *dir, struct sa_entry *entry)
{
struct md_enqueue_info *minfo;
- struct ldlm_enqueue_info *einfo;
int rc;
- rc = sa_prep_data(dir, NULL, entry, &minfo, &einfo);
- if (rc)
- return rc;
+ minfo = sa_prep_data(dir, NULL, entry);
+ if (IS_ERR(minfo))
+ return PTR_ERR(minfo);
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
+ rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
if (rc)
- sa_fini_data(minfo, einfo);
+ sa_fini_data(minfo);
return rc;
}
@@ -809,7 +784,6 @@ static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
struct lookup_intent it = { .it_op = IT_GETATTR,
.it_lock_handle = 0 };
struct md_enqueue_info *minfo;
- struct ldlm_enqueue_info *einfo;
int rc;
if (unlikely(!inode))
@@ -827,25 +801,26 @@ static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
return 1;
}
- rc = sa_prep_data(dir, inode, entry, &minfo, &einfo);
- if (rc) {
+ minfo = sa_prep_data(dir, inode, entry);
+ if (IS_ERR(minfo)) {
entry->se_inode = NULL;
iput(inode);
- return rc;
+ return PTR_ERR(minfo);
}
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
+ rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
if (rc) {
entry->se_inode = NULL;
iput(inode);
- sa_fini_data(minfo, einfo);
+ sa_fini_data(minfo);
}
return rc;
}
/* async stat for file with @name */
-static void sa_statahead(struct dentry *parent, const char *name, int len)
+static void sa_statahead(struct dentry *parent, const char *name, int len,
+ const struct lu_fid *fid)
{
struct inode *dir = d_inode(parent);
struct ll_inode_info *lli = ll_i2info(dir);
@@ -854,7 +829,7 @@ static void sa_statahead(struct dentry *parent, const char *name, int len)
struct sa_entry *entry;
int rc;
- entry = sa_alloc(parent, sai, sai->sai_index, name, len);
+ entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid);
if (IS_ERR(entry))
return;
@@ -1043,6 +1018,7 @@ static int ll_statahead_thread(void *arg)
for (ent = lu_dirent_start(dp);
ent && thread_is_running(sa_thread) && !sa_low_hit(sai);
ent = lu_dirent_next(ent)) {
+ struct lu_fid fid;
__u64 hash;
int namelen;
char *name;
@@ -1088,6 +1064,8 @@ static int ll_statahead_thread(void *arg)
if (unlikely(++first == 1))
continue;
+ fid_le_to_cpu(&fid, &ent->lde_fid);
+
/* wait for spare statahead window */
do {
l_wait_event(sa_thread->t_ctl_waitq,
@@ -1117,7 +1095,7 @@ static int ll_statahead_thread(void *arg)
} while (sa_sent_full(sai) &&
thread_is_running(sa_thread));
- sa_statahead(parent, name, namelen);
+ sa_statahead(parent, name, namelen, &fid);
}
pos = le64_to_cpu(dp->ldp_hash_end);
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 106cd00910a7..4759802e062d 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -88,7 +88,7 @@ static int __init lustre_init(void)
struct timespec64 ts;
int i, rc, seed[2];
- CLASSERT(sizeof(LUSTRE_VOLATILE_HDR) == LUSTRE_VOLATILE_HDR_LEN + 1);
+ BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) != LUSTRE_VOLATILE_HDR_LEN + 1);
/* print an address of _any_ initialized kernel symbol from this
* module, to allow debugging with gdb that doesn't support data
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 12c129f7e4ad..3669ea77ee93 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -391,7 +391,7 @@ struct vvp_pgcache_id {
static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
{
- CLASSERT(sizeof(pos) == sizeof(__u64));
+ BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
id->vpi_index = pos & 0xffffffff;
id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index c60d0414ac25..f40fd7f115d1 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -301,8 +301,6 @@ static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
# define CLOBINVRNT(env, clob, expr) \
((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
struct cl_io *io);
int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 697cbfbe9374..4c57755e06e7 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -288,7 +288,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
io->ci_ignore_layout, io->ci_verify_layout,
vio->vui_layout_gen, io->ci_restore_needed);
- if (io->ci_restore_needed == 1) {
+ if (io->ci_restore_needed) {
int rc;
/* file was detected release, we need to restore it
@@ -657,7 +657,15 @@ static void vvp_io_setattr_end(const struct lu_env *env,
static void vvp_io_setattr_fini(const struct lu_env *env,
const struct cl_io_slice *ios)
{
+ bool restore_needed = ios->cis_io->ci_restore_needed;
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
+
vvp_io_fini(env, ios);
+
+ if (restore_needed && !ios->cis_io->ci_restore_needed) {
+ /* restore finished, set data modified flag for HSM */
+ set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
+ }
}
static int vvp_io_read_start(const struct lu_env *env,
@@ -1006,7 +1014,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
struct vm_fault *vmf = cfio->ft_vmf;
- cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+ cfio->ft_flags = filemap_fault(vmf);
cfio->ft_flags_valid = 1;
if (vmf->page) {
@@ -1340,13 +1348,6 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
io->ci_lockreq = CILR_MANDATORY;
}
- /* ignore layout change for generic CIT_MISC but not for glimpse.
- * io context for glimpse must set ci_verify_layout to true,
- * see cl_glimpse_size0() for details.
- */
- if (io->ci_type == CIT_MISC && !io->ci_verify_layout)
- io->ci_ignore_layout = 1;
-
/* Enqueue layout lock and get layout version. We need to do this
* even for operations requiring to open file, such as read and write,
* because it might not grant layout lock in IT_OPEN.
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 23d66308ff20..687c0c79d621 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -227,7 +227,8 @@ static int vvp_page_prep_write(const struct lu_env *env,
* This takes inode as a separate argument, because inode on which error is to
* be set can be different from \a vmpage inode in case of direct-io.
*/
-static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
+static void vvp_vmpage_error(struct inode *inode, struct page *vmpage,
+ int ioret)
{
struct vvp_object *obj = cl_inode2vvp(inode);
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 7a848ebc57c1..421cc04ecf1e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -132,6 +132,15 @@ ll_xattr_set_common(const struct xattr_handler *handler,
(!strcmp(name, "ima") || !strcmp(name, "evm")))
return -EOPNOTSUPP;
+ /*
+ * In user.* namespace, only regular files and directories can have
+ * extended attributes.
+ */
+ if (handler->flags == XATTR_USER_T) {
+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -EPERM;
+ }
+
sprintf(fullname, "%s%s\n", handler->prefix, name);
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
valid, fullname, pv, size, 0, flags,