aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/llite
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c15
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c99
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c277
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c255
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c327
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c201
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_close.c71
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h274
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c176
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c48
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c29
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c3
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c33
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c143
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c367
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c318
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c17
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c14
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c10
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c270
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h332
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c928
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c53
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c141
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c211
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_req.c121
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c45
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c1
29 files changed, 3254 insertions, 1530 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 9ac29e718da3..2ce10ff01b80 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -4,7 +4,8 @@ lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
rw.o namei.o symlink.o llite_mmap.o \
xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \
rw26.o super25.o statahead.o \
- ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o
+ glimpse.o lcommon_cl.o lcommon_misc.o \
+ vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
+ lproc_llite.o
llite_lloop-y := lloop.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index dd1c827013b9..1b6f82a1a435 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -108,11 +108,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
static inline int return_if_equal(struct ldlm_lock *lock, void *data)
{
- if ((lock->l_flags &
- (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) ==
- (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA))
- return LDLM_ITER_CONTINUE;
- return LDLM_ITER_STOP;
+ return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
+ LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
}
/* find any ldlm lock of the inode in mdc and lov
@@ -253,8 +250,8 @@ void ll_invalidate_aliases(struct inode *inode)
{
struct dentry *dentry;
- CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_lock_dcache(inode);
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
@@ -289,8 +286,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
if (it->d.lustre.it_lock_mode && inode) {
struct ll_sb_info *sbi = ll_i2sbi(inode);
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
- inode, inode->i_ino, inode->i_generation);
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
}
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index e4c82883e580..4b00d1ac84fb 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -158,11 +158,16 @@ static int ll_dir_filler(void *_hash, struct page *page0)
int i;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n",
- inode->i_ino, inode->i_generation, inode, hash);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
+ PFID(ll_inode2fid(inode)), inode, hash);
LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+
page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
if (page_pool) {
page_pool[0] = page0;
@@ -177,8 +182,6 @@ static int ll_dir_filler(void *_hash, struct page *page0)
page_pool[npages] = page;
}
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
op_data->op_npages = npages;
op_data->op_offset = hash;
rc = md_readpage(exp, op_data, page_pool, &request);
@@ -190,7 +193,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
/* Checked by mdc_readpage() */
if (body->valid & OBD_MD_FLSIZE)
- cl_isize_write(inode, body->size);
+ i_size_write(inode, body->size);
nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
>> PAGE_SHIFT;
@@ -372,8 +375,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
return ERR_PTR(rc);
}
- CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
- dir, dir->i_ino, dir->i_generation);
+ CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(dir)), dir);
md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
&it.d.lustre.it_lock_handle, dir, NULL);
} else {
@@ -468,6 +471,28 @@ fail:
goto out_unlock;
}
+/**
+ * return IF_* type for given lu_dirent entry.
+ * IF_* flag shld be converted to particular OS file type in
+ * platform llite module.
+ */
+static __u16 ll_dirent_type_get(struct lu_dirent *ent)
+{
+ __u16 type = 0;
+ struct luda_type *lt;
+ int len = 0;
+
+ if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
+ const unsigned int align = sizeof(struct luda_type) - 1;
+
+ len = le16_to_cpu(ent->lde_namelen);
+ len = (len + align) & ~align;
+ lt = (void *)ent->lde_name + len;
+ type = IFTODT(le16_to_cpu(lt->lt_type));
+ }
+ return type;
+}
+
int ll_dir_read(struct inode *inode, struct dir_context *ctx)
{
struct ll_inode_info *info = ll_i2info(inode);
@@ -589,15 +614,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
struct inode *inode = file_inode(filp);
struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
struct ll_sb_info *sbi = ll_i2sbi(inode);
+ __u64 pos = lfd ? lfd->lfd_pos : 0;
int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
int api32 = ll_need_32bit_api(sbi);
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
- inode->i_ino, inode->i_generation,
- inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+ PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
+ i_size_read(inode), api32);
- if (lfd->lfd_pos == MDS_DIR_END_OFF) {
+ if (pos == MDS_DIR_END_OFF) {
/*
* end-of-file.
*/
@@ -605,9 +631,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
goto out;
}
- ctx->pos = lfd->lfd_pos;
+ ctx->pos = pos;
rc = ll_dir_read(inode, ctx);
- lfd->lfd_pos = ctx->pos;
+ if (lfd)
+ lfd->lfd_pos = ctx->pos;
if (ctx->pos == MDS_DIR_END_OFF) {
if (api32)
ctx->pos = LL_DIR_END_OFF_32BIT;
@@ -804,9 +831,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n",
- inode->i_ino,
- inode->i_generation, rc);
+ CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n",
+ PFID(ll_inode2fid(inode)), rc);
goto out;
}
@@ -916,7 +942,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
}
/* Read current file data version */
- rc = ll_data_version(inode, &data_version, 1);
+ rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
iput(inode);
if (rc != 0) {
CDEBUG(D_HSM, "Could not read file data version of "
@@ -936,6 +962,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
}
progress:
+ /* On error, the request should be considered as completed */
+ if (hpk.hpk_errval > 0)
+ hpk.hpk_flags |= HP_FLAG_COMPLETED;
rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
&hpk, NULL);
@@ -997,8 +1026,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
goto progress;
}
- rc = ll_data_version(inode, &data_version,
- copy->hc_hai.hai_action == HSMA_ARCHIVE);
+ rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
iput(inode);
if (rc) {
CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n");
@@ -1033,7 +1061,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
/* hpk_errval must be >= 0 */
hpk.hpk_errval = EBUSY;
}
-
}
progress:
@@ -1242,8 +1269,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct obd_ioctl_data *data;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
- inode->i_ino, inode->i_generation, inode, cmd);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n",
+ PFID(ll_inode2fid(inode)), inode, cmd);
/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
@@ -1362,7 +1389,6 @@ out_free:
lmv_out_free:
obd_ioctl_freedata(buf, len);
return rc;
-
}
case LL_IOC_LOV_SETSTRIPE: {
struct lov_user_md_v3 lumv3;
@@ -1474,8 +1500,9 @@ free_lmv:
cmd == LL_IOC_MDC_GETINFO)) {
rc = 0;
goto skip_lmm;
- } else
+ } else {
goto out_req;
+ }
}
if (cmd == IOC_MDC_GETFILESTRIPE ||
@@ -1688,15 +1715,16 @@ out_quotactl:
return ll_flush_ctx(inode);
#ifdef CONFIG_FS_POSIX_ACL
case LL_IOC_RMTACL: {
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- rc = rct_add(&sbi->ll_rct, current_pid(), arg);
- if (!rc)
- fd->fd_flags |= LL_FILE_RMTACL;
- return rc;
- } else
- return 0;
+ rc = rct_add(&sbi->ll_rct, current_pid(), arg);
+ if (!rc)
+ fd->fd_flags |= LL_FILE_RMTACL;
+ return rc;
+ } else {
+ return 0;
+ }
}
#endif
case LL_IOC_GETOBDCOUNT: {
@@ -1817,6 +1845,9 @@ out_quotactl:
return rc;
}
case LL_IOC_HSM_CT_START:
+ if (!capable(CFS_CAP_SYS_ADMIN))
+ return -EPERM;
+
rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
sizeof(struct lustre_kernelcomm));
return rc;
@@ -1865,7 +1896,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
- inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
@@ -1903,7 +1933,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
goto out;
out:
- inode_unlock(inode);
return ret;
}
@@ -1922,7 +1951,7 @@ const struct file_operations ll_dir_operations = {
.open = ll_dir_open,
.release = ll_dir_release,
.read = generic_read_dir,
- .iterate = ll_readdir,
+ .iterate_shared = ll_readdir,
.unlocked_ioctl = ll_dir_ioctl,
.fsync = ll_fsync,
};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index cf619af3caf5..f47f2acaf90c 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -45,6 +45,7 @@
#include "../include/lustre_lite.h"
#include <linux/pagemap.h>
#include <linux/file.h>
+#include <linux/mount.h>
#include "llite_internal.h"
#include "../include/lustre/ll_fiemap.h"
@@ -87,8 +88,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
op_data->op_attr.ia_ctime = inode->i_ctime;
op_data->op_attr.ia_size = i_size_read(inode);
op_data->op_attr_blocks = inode->i_blocks;
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
- ll_inode_to_ext_flags(inode->i_flags);
+ op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
if (fh)
op_data->op_handle = *fh;
@@ -170,13 +170,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
*/
rc = ll_som_update(inode, op_data);
if (rc) {
- CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n",
- inode->i_ino, rc);
+ CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
+ ll_i2mdexp(inode)->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
rc = 0;
}
} else if (rc) {
- CERROR("inode %lu mdc close failed: rc = %d\n",
- inode->i_ino, rc);
+ CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
+ ll_i2mdexp(inode)->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
}
/* DATA_MODIFIED flag was successfully sent on close, cancel data
@@ -278,7 +280,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
/* clear group lock, if present */
if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
- ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+ ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
if (fd->fd_lease_och) {
bool lease_broken;
@@ -343,8 +345,8 @@ int ll_file_release(struct inode *inode, struct file *file)
struct ll_inode_info *lli = ll_i2info(inode);
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
#ifdef CONFIG_FS_POSIX_ACL
if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
@@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file)
struct ll_file_data *fd;
int rc = 0, opendir_set = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
- inode->i_generation, inode, file->f_flags);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
+ PFID(ll_inode2fid(inode)), inode, file->f_flags);
it = file->private_data; /* XXX: compat macro */
file->private_data = NULL; /* prevent ll_local_open assertion */
@@ -677,7 +679,9 @@ restart:
if (rc)
goto out_och_free;
- LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
+ LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
+ "inode %p: disposition %x, status %d\n", inode,
+ it_disposition(it, ~0), it->d.lustre.it_status);
rc = ll_local_open(file, it, fd, *och_p);
if (rc)
@@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
return och;
out_close:
- rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
- if (rc2)
- CERROR("Close openhandle returned %d\n", rc2);
-
- /* cancel open lock */
+ /* Cancel open lock */
if (it.d.lustre.it_lock_mode != 0) {
ldlm_lock_decref_and_cancel(&och->och_lease_handle,
it.d.lustre.it_lock_mode);
it.d.lustre.it_lock_mode = 0;
+ och->och_lease_handle.cookie = 0ULL;
}
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+ if (rc2 < 0)
+ CERROR("%s: error closing file "DFID": %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&ll_i2info(inode)->lli_fid), rc2);
+ och = NULL; /* och has been freed in ll_close_inode_openhandle() */
out_release_it:
ll_intent_release(&it);
out:
@@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
lock_res_and_lock(lock);
cancelled = ldlm_is_cancel(lock);
unlock_res_and_lock(lock);
- ldlm_lock_put(lock);
+ LDLM_LOCK_PUT(lock);
}
CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
@@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
/* Fills the obdo with the attributes for the lsm */
static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
- struct obdo *obdo, __u64 ioepoch, int sync)
+ struct obdo *obdo, __u64 ioepoch, int dv_flags)
{
struct ptlrpc_request_set *set;
struct obd_info oinfo = { };
@@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
OBD_MD_FLDATAVERSION;
- if (sync) {
+ if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
+ if (dv_flags & LL_DV_WR_FLUSH)
+ oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
}
set = ptlrpc_prep_set();
@@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
}
- if (rc == 0)
+ if (rc == 0) {
oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
OBD_MD_FLATIME | OBD_MD_FLMTIME |
OBD_MD_FLCTIME | OBD_MD_FLSIZE |
- OBD_MD_FLDATAVERSION);
+ OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
+ if (dv_flags & LL_DV_WR_FLUSH &&
+ !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
+ oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
+ return -ENOTSUPP;
+ }
return rc;
}
@@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
lsm = ccc_inode_lsm_get(inode);
rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
- obdo, ioepoch, sync);
+ obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
if (rc == 0) {
struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
@@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
return rc;
}
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
+int ll_merge_attr(const struct lu_env *env, struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
- struct cl_attr *attr = ccc_env_thread_attr(env);
- struct ost_lvb lvb;
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ s64 atime;
+ s64 mtime;
+ s64 ctime;
int rc = 0;
ll_inode_size_lock(inode);
+
/* merge timestamps the most recently obtained from mds with
* timestamps obtained from osts
*/
- LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
- LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
- LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
+ LTIME_S(inode->i_atime) = lli->lli_atime;
+ LTIME_S(inode->i_mtime) = lli->lli_mtime;
+ LTIME_S(inode->i_ctime) = lli->lli_ctime;
- lvb.lvb_size = i_size_read(inode);
- lvb.lvb_blocks = inode->i_blocks;
- lvb.lvb_mtime = LTIME_S(inode->i_mtime);
- lvb.lvb_atime = LTIME_S(inode->i_atime);
- lvb.lvb_ctime = LTIME_S(inode->i_ctime);
+ mtime = LTIME_S(inode->i_mtime);
+ atime = LTIME_S(inode->i_atime);
+ ctime = LTIME_S(inode->i_ctime);
cl_object_attr_lock(obj);
rc = cl_object_attr_get(env, obj, attr);
cl_object_attr_unlock(obj);
- if (rc == 0) {
- if (lvb.lvb_atime < attr->cat_atime)
- lvb.lvb_atime = attr->cat_atime;
- if (lvb.lvb_ctime < attr->cat_ctime)
- lvb.lvb_ctime = attr->cat_ctime;
- if (lvb.lvb_mtime < attr->cat_mtime)
- lvb.lvb_mtime = attr->cat_mtime;
+ if (rc != 0)
+ goto out_size_unlock;
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(&lli->lli_fid), attr->cat_size);
- cl_isize_write_nolock(inode, attr->cat_size);
+ if (atime < attr->cat_atime)
+ atime = attr->cat_atime;
- inode->i_blocks = attr->cat_blocks;
+ if (ctime < attr->cat_ctime)
+ ctime = attr->cat_ctime;
- LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
- LTIME_S(inode->i_atime) = lvb.lvb_atime;
- LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
- }
+ if (mtime < attr->cat_mtime)
+ mtime = attr->cat_mtime;
+
+ CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+ PFID(&lli->lli_fid), attr->cat_size);
+
+ i_size_write(inode, attr->cat_size);
+
+ inode->i_blocks = attr->cat_blocks;
+
+ LTIME_S(inode->i_mtime) = mtime;
+ LTIME_S(inode->i_atime) = atime;
+ LTIME_S(inode->i_ctime) = ctime;
+
+out_size_unlock:
ll_inode_size_unlock(inode);
return rc;
@@ -1120,47 +1141,48 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
struct cl_io *io;
ssize_t result;
+ CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+ file->f_path.dentry->d_name.name, iot, *ppos, count);
+
restart:
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
ll_io_init(io, file, iot == CIT_WRITE);
if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
struct vvp_io *vio = vvp_env_io(env);
- struct ccc_io *cio = ccc_env_io(env);
int write_mutex_locked = 0;
- cio->cui_fd = LUSTRE_FPRIVATE(file);
- vio->cui_io_subtype = args->via_io_subtype;
+ vio->vui_fd = LUSTRE_FPRIVATE(file);
+ vio->vui_io_subtype = args->via_io_subtype;
- switch (vio->cui_io_subtype) {
+ switch (vio->vui_io_subtype) {
case IO_NORMAL:
- cio->cui_iter = args->u.normal.via_iter;
- cio->cui_iocb = args->u.normal.via_iocb;
+ vio->vui_iter = args->u.normal.via_iter;
+ vio->vui_iocb = args->u.normal.via_iocb;
if ((iot == CIT_WRITE) &&
- !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
if (mutex_lock_interruptible(&lli->
lli_write_mutex)) {
result = -ERESTARTSYS;
goto out;
}
write_mutex_locked = 1;
- } else if (iot == CIT_READ) {
- down_read(&lli->lli_trunc_sem);
}
+ down_read(&lli->lli_trunc_sem);
break;
case IO_SPLICE:
- vio->u.splice.cui_pipe = args->u.splice.via_pipe;
- vio->u.splice.cui_flags = args->u.splice.via_flags;
+ vio->u.splice.vui_pipe = args->u.splice.via_pipe;
+ vio->u.splice.vui_flags = args->u.splice.via_flags;
break;
default:
- CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
+ CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
LBUG();
}
result = cl_io_loop(env, io);
+ if (args->via_io_subtype == IO_NORMAL)
+ up_read(&lli->lli_trunc_sem);
if (write_mutex_locked)
mutex_unlock(&lli->lli_write_mutex);
- else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
- up_read(&lli->lli_trunc_sem);
} else {
/* cl_io_rw_init() handled IO */
result = io->ci_result;
@@ -1197,6 +1219,7 @@ out:
fd->fd_write_failed = true;
}
}
+ CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
return result;
}
@@ -1212,7 +1235,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_NORMAL);
+ args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = to;
args->u.normal.via_iocb = iocb;
@@ -1236,7 +1259,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_NORMAL);
+ args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = from;
args->u.normal.via_iocb = iocb;
@@ -1262,7 +1285,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_SPLICE);
+ args = ll_env_args(env, IO_SPLICE);
args->u.splice.via_pipe = pipe;
args->u.splice.via_flags = flags;
@@ -1354,7 +1377,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
}
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- int flags, struct lov_user_md *lum, int lum_size)
+ __u64 flags, struct lov_user_md *lum,
+ int lum_size)
{
struct lov_stripe_md *lsm = NULL;
struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
@@ -1363,8 +1387,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
lsm = ccc_inode_lsm_get(inode);
if (lsm) {
ccc_inode_lsm_put(inode, lsm);
- CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
- inode->i_ino);
+ CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
rc = -EEXIST;
goto out;
}
@@ -1478,7 +1502,7 @@ out:
static int ll_lov_setea(struct inode *inode, struct file *file,
unsigned long arg)
{
- int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+ __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
struct lov_user_md *lump;
int lum_size = sizeof(struct lov_user_md) +
sizeof(struct lov_user_ost_data);
@@ -1512,7 +1536,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
int lum_size, rc;
- int flags = FMODE_WRITE;
+ __u64 flags = FMODE_WRITE;
/* first try with v1 which is smaller than v3 */
lum_size = sizeof(struct lov_user_md_v1);
@@ -1561,7 +1585,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ccc_grouplock grouplock;
+ struct ll_grouplock grouplock;
int rc;
if (arg == 0) {
@@ -1575,14 +1599,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
spin_lock(&lli->lli_lock);
if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
CWARN("group lock already existed with gid %lu\n",
- fd->fd_grouplock.cg_gid);
+ fd->fd_grouplock.lg_gid);
spin_unlock(&lli->lli_lock);
return -EINVAL;
}
- LASSERT(!fd->fd_grouplock.cg_lock);
+ LASSERT(!fd->fd_grouplock.lg_lock);
spin_unlock(&lli->lli_lock);
- rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
+ rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
arg, (file->f_flags & O_NONBLOCK), &grouplock);
if (rc)
return rc;
@@ -1608,7 +1632,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ccc_grouplock grouplock;
+ struct ll_grouplock grouplock;
spin_lock(&lli->lli_lock);
if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1616,11 +1640,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
CWARN("no group lock held\n");
return -EINVAL;
}
- LASSERT(fd->fd_grouplock.cg_lock);
+ LASSERT(fd->fd_grouplock.lg_lock);
- if (fd->fd_grouplock.cg_gid != arg) {
+ if (fd->fd_grouplock.lg_gid != arg) {
CWARN("group lock %lu doesn't match current id %lu\n",
- arg, fd->fd_grouplock.cg_gid);
+ arg, fd->fd_grouplock.lg_gid);
spin_unlock(&lli->lli_lock);
return -EINVAL;
}
@@ -1861,11 +1885,12 @@ error:
* This value is computed using stripe object version on OST.
* Version is computed using server side locking.
*
- * @param extent_lock Take extent lock. Not needed if a process is already
- * holding the OST object group locks.
+ * @param sync if do sync on the OST side;
+ * 0: no sync
+ * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+ * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
*/
-int ll_data_version(struct inode *inode, __u64 *data_version,
- int extent_lock)
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
{
struct lov_stripe_md *lsm = NULL;
struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -1887,7 +1912,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
goto out;
}
- rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock);
+ rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
if (rc == 0) {
if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
rc = -EOPNOTSUPP;
@@ -1923,7 +1948,7 @@ int ll_hsm_release(struct inode *inode)
}
/* Grab latest data_version and [am]time values */
- rc = ll_data_version(inode, &data_version, 1);
+ rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
if (rc != 0)
goto out;
@@ -1933,7 +1958,7 @@ int ll_hsm_release(struct inode *inode)
goto out;
}
- ll_merge_lvb(env, inode);
+ ll_merge_attr(env, inode);
cl_env_nested_put(&nest, env);
/* Release the file.
@@ -2227,8 +2252,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
int flags, rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
- inode->i_generation, inode, cmd);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
+ PFID(ll_inode2fid(inode)), inode, cmd);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
@@ -2331,9 +2356,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
return -EFAULT;
- rc = ll_data_version(inode, &idv.idv_version,
- !(idv.idv_flags & LL_DV_NOFLUSH));
-
+ idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
+ rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
if (rc == 0 && copy_to_user((char __user *)arg, &idv,
sizeof(idv)))
return -EFAULT;
@@ -2499,7 +2523,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
rc = och->och_flags &
(FMODE_READ | FMODE_WRITE);
unlock_res_and_lock(lock);
- ldlm_lock_put(lock);
+ LDLM_LOCK_PUT(lock);
}
}
mutex_unlock(&lli->lli_och_mutex);
@@ -2537,9 +2561,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
(origin == SEEK_CUR) ? file->f_pos : 0);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
- inode->i_ino, inode->i_generation, inode, retval, retval,
- origin);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
+ PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
@@ -2603,8 +2626,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
if (IS_ERR(env))
return PTR_ERR(env);
- io = ccc_env_thread_io(env);
- io->ci_obj = cl_i2info(inode)->lli_clob;
+ io = vvp_env_thread_io(env);
+ io->ci_obj = ll_i2info(inode)->lli_clob;
io->ci_ignore_layout = ignore_layout;
/* initialize parameters for sync */
@@ -2634,8 +2657,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct ptlrpc_request *req;
int rc, err;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2693,8 +2716,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
int rc;
int rc2 = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
- inode->i_ino, file_lock);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
+ PFID(ll_inode2fid(inode)), file_lock);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
@@ -2777,9 +2800,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
- inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode,
- flock.l_flock.start, flock.l_flock.end);
+ CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
+ PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
+ einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
op_data, &lockh, &flock, 0, NULL /* req */, flags);
@@ -2901,8 +2924,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
struct obd_export *exp;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n",
- inode->i_ino, inode->i_generation, inode, dentry);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
+ PFID(ll_inode2fid(inode)), inode, dentry);
exp = ll_i2mdexp(inode);
@@ -2998,9 +3021,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
/* if object isn't regular file, don't validate size */
if (!S_ISREG(inode->i_mode)) {
- LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
- LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
- LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
+ LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
+ LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
+ LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
} else {
/* In case of restore, the MDT has the right size and has
* already send it back without granting the layout lock,
@@ -3124,8 +3147,8 @@ int ll_inode_permission(struct inode *inode, int mask)
return rc;
}
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
- inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
+ PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
return lustre_check_remote_perm(inode, mask);
@@ -3335,10 +3358,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
int rc;
CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
- PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
+ PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
lock->l_lvb_data, lock->l_lvb_len);
- if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY))
+ if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
return 0;
/* if layout lock was granted right away, the layout is returned
@@ -3415,14 +3438,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
LASSERT(lock);
LASSERT(ldlm_has_layout(lock));
- LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d",
- inode, PFID(&lli->lli_fid), reconf);
+ LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
+ PFID(&lli->lli_fid), inode, reconf);
/* in case this is a caching lock and reinstate with new inode */
md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
lock_res_and_lock(lock);
- lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
+ lvb_ready = ldlm_is_lvb_ready(lock);
unlock_res_and_lock(lock);
/* checking lvb_ready is racy but this is okay. The worst case is
* that multi processes may configure the file on the same time.
@@ -3487,9 +3510,9 @@ out:
/* wait for IO to complete if it's still being used. */
if (wait_layout) {
- CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n",
+ CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
ll_get_fsname(inode->i_sb, NULL, 0),
- inode, PFID(&lli->lli_fid));
+ PFID(&lli->lli_fid), inode);
memset(&conf, 0, sizeof(conf));
conf.coc_opc = OBJECT_CONF_WAIT;
@@ -3498,7 +3521,8 @@ out:
if (rc == 0)
rc = -EAGAIN;
- CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n",
+ CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
PFID(&lli->lli_fid), rc);
}
return rc;
@@ -3571,9 +3595,9 @@ again:
it.it_op = IT_LAYOUT;
lockh.cookie = 0ULL;
- LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "",
- ll_get_fsname(inode->i_sb, NULL, 0), inode,
- PFID(&lli->lli_fid));
+ LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid), inode);
rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
NULL, 0, NULL, 0);
@@ -3601,7 +3625,7 @@ again:
/**
* This function send a restore request to the MDT
*/
-int ll_layout_restore(struct inode *inode)
+int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
{
struct hsm_user_request *hur;
int len, rc;
@@ -3617,9 +3641,10 @@ int ll_layout_restore(struct inode *inode)
hur->hur_request.hr_flags = 0;
memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
sizeof(hur->hur_user_item[0].hui_fid));
- hur->hur_user_item[0].hui_extent.length = -1;
+ hur->hur_user_item[0].hui_extent.offset = offset;
+ hur->hur_user_item[0].hui_extent.length = length;
hur->hur_request.hr_itemcount = 1;
- rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+ rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
len, hur, NULL);
kfree(hur);
return rc;
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
new file mode 100644
index 000000000000..d8ea75424e2f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -0,0 +1,255 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * glimpse code shared between vvp and liblustre (and other Lustre clients in
+ * the future).
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Oleg Drokin <oleg.drokin@sun.com>
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_mdc.h"
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include "../include/cl_object.h"
+#include "../llite/llite_internal.h"
+
+static const struct cl_lock_descr whole_file = {
+ .cld_start = 0,
+ .cld_end = CL_PAGE_EOF,
+ .cld_mode = CLM_READ
+};
+
+/*
+ * Check whether file has possible unwriten pages.
+ *
+ * \retval 1 file is mmap-ed or has dirty pages
+ * 0 otherwise
+ */
+blkcnt_t dirty_cnt(struct inode *inode)
+{
+ blkcnt_t cnt = 0;
+ struct vvp_object *vob = cl_inode2vvp(inode);
+ void *results[1];
+
+ if (inode->i_mapping)
+ cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+ results, 0, 1,
+ PAGECACHE_TAG_DIRTY);
+ if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
+ cnt = 1;
+
+ return (cnt > 0) ? 1 : 0;
+}
+
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode, struct cl_object *clob, int agl)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
+ int result;
+
+ result = 0;
+ if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
+ CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
+ if (lli->lli_has_smd) {
+ struct cl_lock *lock = vvp_env_lock(env);
+ struct cl_lock_descr *descr = &lock->cll_descr;
+
+ /* NOTE: this looks like DLM lock request, but it may
+ * not be one. Due to CEF_ASYNC flag (translated
+ * to LDLM_FL_HAS_INTENT by osc), this is
+ * glimpse request, that won't revoke any
+ * conflicting DLM locks held. Instead,
+ * ll_glimpse_callback() will be called on each
+ * client holding a DLM lock against this file,
+ * and resulting size will be returned for each
+ * stripe. DLM lock on [0, EOF] is acquired only
+ * if there were no conflicting locks. If there
+ * were conflicting locks, enqueuing or waiting
+ * fails with -ENAVAIL, but valid inode
+ * attributes are returned anyway.
+ */
+ *descr = whole_file;
+ descr->cld_obj = clob;
+ descr->cld_mode = CLM_READ;
+ descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
+ if (agl)
+ descr->cld_enq_flags |= CEF_AGL;
+ /*
+ * CEF_ASYNC is used because glimpse sub-locks cannot
+ * deadlock (because they never conflict with other
+ * locks) and, hence, can be enqueued out-of-order.
+ *
+ * CEF_MUST protects glimpse lock from conversion into
+ * a lockless mode.
+ */
+ result = cl_lock_request(env, io, lock);
+ if (result < 0)
+ return result;
+
+ if (!agl) {
+ ll_merge_attr(env, inode);
+ if (i_size_read(inode) > 0 &&
+ inode->i_blocks == 0) {
+ /*
+ * LU-417: Add dirty pages block count
+ * lest i_blocks reports 0, some "cp" or
+ * "tar" may think it's a completely
+ * sparse file and skip it.
+ */
+ inode->i_blocks = dirty_cnt(inode);
+ }
+ }
+ cl_lock_release(env, lock);
+ } else {
+ CDEBUG(D_DLMTRACE, "No objects for inode\n");
+ ll_merge_attr(env, inode);
+ }
+ }
+
+ return result;
+}
+
+static int cl_io_get(struct inode *inode, struct lu_env **envout,
+ struct cl_io **ioout, int *refcheck)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ int result;
+
+ if (S_ISREG(inode->i_mode)) {
+ env = cl_env_get(refcheck);
+ if (!IS_ERR(env)) {
+ io = vvp_env_thread_io(env);
+ io->ci_obj = clob;
+ *envout = env;
+ *ioout = io;
+ result = 1;
+ } else {
+ result = PTR_ERR(env);
+ }
+ } else {
+ result = 0;
+ }
+ return result;
+}
+
+int cl_glimpse_size0(struct inode *inode, int agl)
+{
+ /*
+ * We don't need ast_flags argument to cl_glimpse_size(), because
+ * osc_lock_enqueue() takes care of the possible deadlock that said
+ * argument was introduced to avoid.
+ */
+ /*
+ * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
+ * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
+ * blocking anyway.
+ */
+ struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
+ int result;
+ int refcheck;
+
+ result = cl_io_get(inode, &env, &io, &refcheck);
+ if (result > 0) {
+again:
+ io->ci_verify_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (result > 0)
+ /*
+ * nothing to do for this io. This currently happens
+ * when stripe sub-object's are not yet created.
+ */
+ result = io->ci_result;
+ else if (result == 0)
+ result = cl_glimpse_lock(env, io, inode, io->ci_obj,
+ agl);
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
+ cl_io_fini(env, io);
+ if (unlikely(io->ci_need_restart))
+ goto again;
+ cl_env_put(env, &refcheck);
+ }
+ return result;
+}
+
+int cl_local_size(struct inode *inode)
+{
+ struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
+ struct cl_object *clob;
+ int result;
+ int refcheck;
+
+ if (!ll_i2info(inode)->lli_has_smd)
+ return 0;
+
+ result = cl_io_get(inode, &env, &io, &refcheck);
+ if (result <= 0)
+ return result;
+
+ clob = io->ci_obj;
+ result = cl_io_init(env, io, CIT_MISC, clob);
+ if (result > 0) {
+ result = io->ci_result;
+ } else if (result == 0) {
+ struct cl_lock *lock = vvp_env_lock(env);
+
+ lock->cll_descr = whole_file;
+ lock->cll_descr.cld_enq_flags = CEF_PEEK;
+ lock->cll_descr.cld_obj = clob;
+ result = cl_lock_request(env, io, lock);
+ if (result == 0) {
+ ll_merge_attr(env, inode);
+ cl_lock_release(env, lock);
+ }
+ }
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+ return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
new file mode 100644
index 000000000000..6c00715b438f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -0,0 +1,327 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../../include/linux/libcfs/libcfs.h"
+# include <linux/fs.h>
+# include <linux/sched.h>
+# include <linux/mm.h>
+# include <linux/quotaops.h>
+# include <linux/highmem.h>
+# include <linux/pagemap.h>
+# include <linux/rbtree.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_ver.h"
+#include "../include/lustre_mdc.h"
+#include "../include/cl_object.h"
+
+#include "../llite/llite_internal.h"
+
+/*
+ * ccc_ prefix stands for "Common Client Code".
+ */
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+/**
+ * An `emergency' environment used by cl_inode_fini() when cl_env_get()
+ * fails. Access to this environment is serialized by cl_inode_fini_guard
+ * mutex.
+ */
+struct lu_env *cl_inode_fini_env;
+int cl_inode_fini_refcheck;
+
+/**
+ * A mutex serializing calls to slp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+static DEFINE_MUTEX(cl_inode_fini_guard);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ int result;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ io = vvp_env_thread_io(env);
+ io->ci_obj = ll_i2info(inode)->lli_clob;
+
+ io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
+ io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
+ io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
+ io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+ io->u.ci_setattr.sa_valid = attr->ia_valid;
+
+again:
+ if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ if (attr->ia_valid & ATTR_FILE)
+ /* populate the file descriptor for ftruncate to honor
+ * group lock - see LU-787
+ */
+ vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
+
+ result = cl_io_loop(env, io);
+ } else {
+ result = io->ci_result;
+ }
+ cl_io_fini(env, io);
+ if (unlikely(io->ci_need_restart))
+ goto again;
+ /* HSM import case: file is released, cannot be restored
+ * no need to fail except if restore registration failed
+ * with -ENODATA
+ */
+ if (result == -ENODATA && io->ci_restore_needed &&
+ io->ci_result != -ENODATA)
+ result = 0;
+ cl_env_put(env, &refcheck);
+ return result;
+}
+
+/**
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
+ *
+ * \param inode regular file inode
+ * \param md new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
+ */
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
+{
+ struct lu_env *env;
+ struct ll_inode_info *lli;
+ struct cl_object *clob;
+ struct lu_site *site;
+ struct lu_fid *fid;
+ struct cl_object_conf conf = {
+ .coc_inode = inode,
+ .u = {
+ .coc_md = md
+ }
+ };
+ int result = 0;
+ int refcheck;
+
+ LASSERT(md->body->valid & OBD_MD_FLID);
+ LASSERT(S_ISREG(inode->i_mode));
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ site = ll_i2sbi(inode)->ll_site;
+ lli = ll_i2info(inode);
+ fid = &lli->lli_fid;
+ LASSERT(fid_is_sane(fid));
+
+ if (!lli->lli_clob) {
+ /* clob is slave of inode, empty lli_clob means for new inode,
+ * there is no clob in cache with the given fid, so it is
+ * unnecessary to perform lookup-alloc-lookup-insert, just
+ * alloc and insert directly.
+ */
+ LASSERT(inode->i_state & I_NEW);
+ conf.coc_lu.loc_flags = LOC_F_NEW;
+ clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
+ fid, &conf);
+ if (!IS_ERR(clob)) {
+ /*
+ * No locking is necessary, as new inode is
+ * locked by I_NEW bit.
+ */
+ lli->lli_clob = clob;
+ lli->lli_has_smd = lsm_has_objects(md->lsm);
+ lu_object_ref_add(&clob->co_lu, "inode", inode);
+ } else {
+ result = PTR_ERR(clob);
+ }
+ } else {
+ result = cl_conf_set(env, lli->lli_clob, &conf);
+ }
+
+ cl_env_put(env, &refcheck);
+
+ if (result != 0)
+ CERROR("Failure to initialize cl object " DFID ": %d\n",
+ PFID(fid), result);
+ return result;
+}
+
+/**
+ * Wait for others drop their references of the object at first, then we drop
+ * the last one, which will lead to the object be destroyed immediately.
+ * Must be called after cl_object_kill() against this object.
+ *
+ * The reason we want to do this is: destroying top object will wait for sub
+ * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
+ * to initiate top object destroying which may deadlock. See bz22520.
+ */
+static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
+{
+ struct lu_object_header *header = obj->co_lu.lo_header;
+ wait_queue_t waiter;
+
+ if (unlikely(atomic_read(&header->loh_ref) != 1)) {
+ struct lu_site *site = obj->co_lu.lo_dev->ld_site;
+ struct lu_site_bkt_data *bkt;
+
+ bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+
+ init_waitqueue_entry(&waiter, current);
+ add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&header->loh_ref) == 1)
+ break;
+ schedule();
+ }
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+ }
+
+ cl_object_put(env, obj);
+}
+
+void cl_inode_fini(struct inode *inode)
+{
+ struct lu_env *env;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ int refcheck;
+ int emergency;
+
+ if (clob) {
+ void *cookie;
+
+ cookie = cl_env_reenter();
+ env = cl_env_get(&refcheck);
+ emergency = IS_ERR(env);
+ if (emergency) {
+ mutex_lock(&cl_inode_fini_guard);
+ LASSERT(cl_inode_fini_env);
+ cl_env_implant(cl_inode_fini_env, &refcheck);
+ env = cl_inode_fini_env;
+ }
+ /*
+ * cl_object cache is a slave to inode cache (which, in turn
+ * is a slave to dentry cache), don't keep cl_object in memory
+ * when its master is evicted.
+ */
+ cl_object_kill(env, clob);
+ lu_object_ref_del(&clob->co_lu, "inode", inode);
+ cl_object_put_last(env, clob);
+ lli->lli_clob = NULL;
+ if (emergency) {
+ cl_env_unplant(cl_inode_fini_env, &refcheck);
+ mutex_unlock(&cl_inode_fini_guard);
+ } else {
+ cl_env_put(env, &refcheck);
+ }
+ cl_env_reexit(cookie);
+ }
+}
+
+/**
+ * build inode number from passed @fid
+ */
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
+{
+ if (BITS_PER_LONG == 32 || api32)
+ return fid_flatten32(fid);
+ else
+ return fid_flatten(fid);
+}
+
+/**
+ * build inode generation from passed @fid. If our FID overflows the 32-bit
+ * inode number then return a non-zero generation to distinguish them.
+ */
+__u32 cl_fid_build_gen(const struct lu_fid *fid)
+{
+ __u32 gen;
+
+ if (fid_is_igif(fid)) {
+ gen = lu_igif_gen(fid);
+ return gen;
+ }
+
+ gen = fid_flatten(fid) >> 32;
+ return gen;
+}
+
+/* lsm is unreliable after hsm implementation as layout can be changed at
+ * any time. This is only to support old, non-clio-ized interfaces. It will
+ * cause deadlock if clio operations are called with this extra layout refcount
+ * because in case the layout changed during the IO, ll_layout_refresh() will
+ * have to wait for the refcount to become zero to destroy the older layout.
+ *
+ * Notice that the lsm returned by this function may not be valid unless called
+ * inside layout lock - MDS_INODELOCK_LAYOUT.
+ */
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
+{
+ return lov_lsm_get(ll_i2info(inode)->lli_clob);
+}
+
+inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
+{
+ lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
+}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
new file mode 100644
index 000000000000..12f3e71f48c2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -0,0 +1,201 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ */
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+#include "../include/cl_object.h"
+
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+
+/* Initialize the default and maximum LOV EA and cookie sizes. This allows
+ * us to make MDS RPCs with large enough reply buffers to hold the
+ * maximum-sized (= maximum striped) EA and cookie without having to
+ * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
+ */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
+{
+ struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
+ __u32 valsize = sizeof(struct lov_desc);
+ int rc, easize, def_easize, cookiesize;
+ struct lov_desc desc;
+ __u16 stripes, def_stripes;
+
+ rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
+ &valsize, &desc, NULL);
+ if (rc)
+ return rc;
+
+ stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT);
+ lsm.lsm_stripe_count = stripes;
+ easize = obd_size_diskmd(dt_exp, &lsm);
+
+ def_stripes = min_t(__u32, desc.ld_default_stripe_count,
+ LOV_MAX_STRIPE_COUNT);
+ lsm.lsm_stripe_count = def_stripes;
+ def_easize = obd_size_diskmd(dt_exp, &lsm);
+
+ cookiesize = stripes * sizeof(struct llog_cookie);
+
+ /* default cookiesize is 0 because from 2.4 server doesn't send
+ * llog cookies to client.
+ */
+ CDEBUG(D_HA,
+ "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n",
+ def_easize, easize, cookiesize);
+
+ rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0);
+ return rc;
+}
+
+/**
+ * This function is used as an upcall-callback hooked by liblustre and llite
+ * clients into obd_notify() listeners chain to handle notifications about
+ * change of import connect_flags. See llu_fsswop_mount() and
+ * lustre_common_fill_super().
+ */
+int cl_ocd_update(struct obd_device *host,
+ struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data)
+{
+ struct lustre_client_ocd *lco;
+ struct client_obd *cli;
+ __u64 flags;
+ int result;
+
+ if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ cli = &watched->u.cli;
+ lco = owner;
+ flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
+ CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
+ lco->lco_flags, flags);
+ mutex_lock(&lco->lco_lock);
+ lco->lco_flags &= flags;
+ /* for each osc event update ea size */
+ if (lco->lco_dt_exp)
+ cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
+
+ mutex_unlock(&lco->lco_lock);
+ result = 0;
+ } else {
+ CERROR("unexpected notification from %s %s!\n",
+ watched->obd_type->typ_name,
+ watched->obd_name);
+ result = -EINVAL;
+ }
+ return result;
+}
+
+#define GROUPLOCK_SCOPE "grouplock"
+
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+ struct ll_grouplock *cg)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_lock *lock;
+ struct cl_lock_descr *descr;
+ __u32 enqflags;
+ int refcheck;
+ int rc;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ io = vvp_env_thread_io(env);
+ io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
+
+ rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (rc != 0) {
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+ /* Does not make sense to take GL for released layout */
+ if (rc > 0)
+ rc = -ENOTSUPP;
+ return rc;
+ }
+
+ lock = vvp_env_lock(env);
+ descr = &lock->cll_descr;
+ descr->cld_obj = obj;
+ descr->cld_start = 0;
+ descr->cld_end = CL_PAGE_EOF;
+ descr->cld_gid = gid;
+ descr->cld_mode = CLM_GROUP;
+
+ enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
+ descr->cld_enq_flags = enqflags;
+
+ rc = cl_lock_request(env, io, lock);
+ if (rc < 0) {
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+ return rc;
+ }
+
+ cg->lg_env = cl_env_get(&refcheck);
+ cg->lg_io = io;
+ cg->lg_lock = lock;
+ cg->lg_gid = gid;
+ LASSERT(cg->lg_env == env);
+
+ cl_env_unplant(env, &refcheck);
+ return 0;
+}
+
+void cl_put_grouplock(struct ll_grouplock *cg)
+{
+ struct lu_env *env = cg->lg_env;
+ struct cl_io *io = cg->lg_io;
+ struct cl_lock *lock = cg->lg_lock;
+ int refcheck;
+
+ LASSERT(cg->lg_env);
+ LASSERT(cg->lg_gid);
+
+ cl_env_implant(env, &refcheck);
+ cl_env_put(env, &refcheck);
+
+ cl_lock_release(env, lock);
+ cl_io_fini(env, io);
+ cl_env_put(env, NULL);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index a55ac4dccd90..2df551d3ae6c 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -46,31 +46,31 @@
#include "llite_internal.h"
/** records that a write is in flight */
-void vvp_write_pending(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page)
{
- struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+ struct ll_inode_info *lli = ll_i2info(club->vob_inode);
spin_lock(&lli->lli_lock);
lli->lli_flags |= LLIF_SOM_DIRTY;
- if (page && list_empty(&page->cpg_pending_linkage))
- list_add(&page->cpg_pending_linkage, &club->cob_pending_list);
+ if (page && list_empty(&page->vpg_pending_linkage))
+ list_add(&page->vpg_pending_linkage, &club->vob_pending_list);
spin_unlock(&lli->lli_lock);
}
/** records that a write has completed */
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page)
{
- struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+ struct ll_inode_info *lli = ll_i2info(club->vob_inode);
int rc = 0;
spin_lock(&lli->lli_lock);
- if (page && !list_empty(&page->cpg_pending_linkage)) {
- list_del_init(&page->cpg_pending_linkage);
+ if (page && !list_empty(&page->vpg_pending_linkage)) {
+ list_del_init(&page->vpg_pending_linkage);
rc = 1;
}
spin_unlock(&lli->lli_lock);
if (rc)
- ll_queue_done_writing(club->cob_inode, 0);
+ ll_queue_done_writing(club->vob_inode, 0);
}
/** Queues DONE_WRITING if
@@ -80,25 +80,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
void ll_queue_done_writing(struct inode *inode, unsigned long flags)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
spin_lock(&lli->lli_lock);
lli->lli_flags |= flags;
if ((lli->lli_flags & LLIF_DONE_WRITING) &&
- list_empty(&club->cob_pending_list)) {
+ list_empty(&club->vob_pending_list)) {
struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
/* DONE_WRITING is allowed and inode has no dirty page. */
spin_lock(&lcq->lcq_lock);
LASSERT(list_empty(&lli->lli_close_list));
- CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
- inode->i_ino, inode->i_generation);
+ CDEBUG(D_INODE, "adding inode "DFID" to close list\n",
+ PFID(ll_inode2fid(inode)));
list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
/* Avoid a concurrent insertion into the close thread queue:
@@ -124,9 +124,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
op_data->op_flags |= MF_SOM_CHANGE;
/* Check if Size-on-MDS attributes are valid. */
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
if (!cl_local_size(inode)) {
/* Send Size-on-MDS Attributes if valid. */
@@ -140,10 +140,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
struct obd_client_handle **och, unsigned long flags)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
spin_lock(&lli->lli_lock);
- if (!(list_empty(&club->cob_pending_list))) {
+ if (!(list_empty(&club->vob_pending_list))) {
if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
LASSERT(*och);
LASSERT(!lli->lli_pending_och);
@@ -198,7 +198,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
}
}
- LASSERT(list_empty(&club->cob_pending_list));
+ LASSERT(list_empty(&club->vob_pending_list));
lli->lli_flags &= ~LLIF_SOM_DIRTY;
spin_unlock(&lli->lli_lock);
ll_done_writing_attr(inode, op_data);
@@ -221,9 +221,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
LASSERT(op_data);
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
if (!oa) {
@@ -241,9 +241,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
if (rc) {
oa->o_valid = 0;
if (rc != -ENOENT)
- CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n",
- rc, inode->i_ino,
- inode->i_generation);
+ CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
} else {
CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
PFID(&lli->lli_fid));
@@ -302,9 +302,11 @@ static void ll_done_writing(struct inode *inode)
* OSTs and send setattr to back to MDS.
*/
rc = ll_som_update(inode, op_data);
- else if (rc)
- CERROR("inode %lu mdc done_writing failed: rc = %d\n",
- inode->i_ino, rc);
+ else if (rc) {
+ CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
+ }
out:
ll_finish_md_op_data(op_data);
if (och) {
@@ -323,8 +325,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
lli_close_list);
list_del_init(&lli->lli_close_list);
- } else if (atomic_read(&lcq->lcq_stop))
+ } else if (atomic_read(&lcq->lcq_stop)) {
lli = ERR_PTR(-EALREADY);
+ }
spin_unlock(&lcq->lcq_lock);
return lli;
@@ -348,8 +351,8 @@ static int ll_close_thread(void *arg)
break;
inode = ll_info2i(lli);
- CDEBUG(D_INFO, "done_writing for inode %lu/%u\n",
- inode->i_ino, inode->i_generation);
+ CDEBUG(D_INFO, "done_writing for inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
ll_done_writing(inode);
iput(inode);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index e3c0f1dd4d31..3f2f30b6542c 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -43,11 +43,11 @@
/* for struct cl_lock_descr and struct cl_io */
#include "../include/cl_object.h"
-#include "../include/lclient.h"
#include "../include/lustre_mdc.h"
#include "../include/lustre_intent.h"
#include <linux/compat.h>
#include <linux/posix_acl_xattr.h>
+#include "vvp_internal.h"
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
@@ -99,6 +99,13 @@ struct ll_remote_perm {
*/
};
+struct ll_grouplock {
+ struct lu_env *lg_env;
+ struct cl_io *lg_io;
+ struct cl_lock *lg_lock;
+ unsigned long lg_gid;
+};
+
enum lli_flags {
/* MDS has an authority for the Size-on-MDS attributes. */
LLIF_MDS_SIZE_LOCK = (1 << 0),
@@ -161,7 +168,9 @@ struct ll_inode_info {
struct inode lli_vfs_inode;
/* the most recent timestamps obtained from mds */
- struct ost_lvb lli_lvb;
+ s64 lli_atime;
+ s64 lli_mtime;
+ s64 lli_ctime;
spinlock_t lli_agl_lock;
/* Try to make the d::member and f::member are aligned. Before using
@@ -328,6 +337,7 @@ enum ra_stat {
RA_STAT_EOF,
RA_STAT_MAX_IN_FLIGHT,
RA_STAT_WRONG_GRAB_PAGE,
+ RA_STAT_FAILED_REACH_END,
_NR_RA_STAT,
};
@@ -481,6 +491,12 @@ struct ll_sb_info {
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
+ /*
+ * Used to track "unstable" pages on a client, and maintain a
+ * LRU list of clean pages. An "unstable" page is defined as
+ * any page which is sent to a server as part of a bulk request,
+ * but is uncommitted to stable storage.
+ */
struct cl_client_cache ll_cache;
struct lprocfs_stats *ll_ra_stats;
@@ -525,13 +541,6 @@ struct ll_sb_info {
struct completion ll_kobj_unregister;
};
-struct ll_ra_read {
- pgoff_t lrr_start;
- pgoff_t lrr_count;
- struct task_struct *lrr_reader;
- struct list_head lrr_linkage;
-};
-
/*
* per file-descriptor read-ahead data.
*/
@@ -590,12 +599,6 @@ struct ll_readahead_state {
*/
unsigned long ras_request_index;
/*
- * list of struct ll_ra_read's one per read(2) call current in
- * progress against this file descriptor. Used by read-ahead code,
- * protected by ->ras_lock.
- */
- struct list_head ras_read_beads;
- /*
* The following 3 items are used for detecting the stride I/O
* mode.
* In stride I/O mode,
@@ -622,7 +625,7 @@ extern struct kmem_cache *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
struct ll_readahead_state fd_ras;
- struct ccc_grouplock fd_grouplock;
+ struct ll_grouplock fd_grouplock;
__u64 lfd_pos;
__u32 fd_flags;
fmode_t fd_omode;
@@ -663,8 +666,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
#endif
}
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
+void ll_ras_enter(struct file *f);
+
+/* llite/lcommon_misc.c */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+ struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data);
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+ struct ll_grouplock *cg);
+void cl_put_grouplock(struct ll_grouplock *cg);
/* llite/lproc_llite.c */
int ldebugfs_register_mountpoint(struct dentry *parent,
@@ -697,15 +708,15 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
/* llite/rw.c */
-int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_writepage(struct page *page, struct writeback_control *wbc);
int ll_writepages(struct address_space *, struct writeback_control *wbc);
int ll_readpage(struct file *file, struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct ll_readahead_state *ras, struct address_space *mapping,
- struct cl_page_list *queue, int flags);
+ struct cl_page_list *queue, struct ll_readahead_state *ras,
+ bool hit);
+struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage);
+void ll_cl_fini(struct ll_cl_context *lcc);
extern const struct address_space_operations ll_aops;
@@ -740,7 +751,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type);
int ll_inode_permission(struct inode *inode, int mask);
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- int flags, struct lov_user_md *lum,
+ __u64 flags, struct lov_user_md *lum,
int lum_size);
int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
struct lov_mds_md **lmm, int *lmm_size,
@@ -750,9 +761,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
int *lmm_size, struct ptlrpc_request **request);
int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
+int ll_merge_attr(const struct lu_env *env, struct inode *inode);
int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
int ll_hsm_release(struct inode *inode);
/* llite/dcache.c */
@@ -824,65 +835,8 @@ struct ll_close_queue {
atomic_t lcq_stop;
};
-struct ccc_object *cl_inode2ccc(struct inode *inode);
-
-void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
-
-/* specific architecture can implement only part of this list */
-enum vvp_io_subtype {
- /** normal IO */
- IO_NORMAL,
- /** io started from splice_{read|write} */
- IO_SPLICE
-};
-
-/* IO subtypes */
-struct vvp_io {
- /** io subtype */
- enum vvp_io_subtype cui_io_subtype;
-
- union {
- struct {
- struct pipe_inode_info *cui_pipe;
- unsigned int cui_flags;
- } splice;
- struct vvp_fault_io {
- /**
- * Inode modification time that is checked across DLM
- * lock request.
- */
- time64_t ft_mtime;
- struct vm_area_struct *ft_vma;
- /**
- * locked page returned from vvp_io
- */
- struct page *ft_vmpage;
- struct vm_fault_api {
- /**
- * kernel fault info
- */
- struct vm_fault *ft_vmf;
- /**
- * fault API used bitflags for return code.
- */
- unsigned int ft_flags;
- /**
- * check that flags are from filemap_fault
- */
- bool ft_flags_valid;
- } fault;
- } fault;
- } u;
- /**
- * Read-ahead state used by read and page-fault IO contexts.
- */
- struct ll_ra_read cui_bead;
- /**
- * Set when cui_bead has been initialized.
- */
- int cui_ra_window_set;
-};
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page);
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
/**
* IO arguments for various VFS I/O interfaces.
@@ -911,54 +865,32 @@ struct ll_cl_context {
int lcc_refcheck;
};
-struct vvp_thread_info {
- struct vvp_io_args vti_args;
- struct ra_io_arg vti_ria;
- struct ll_cl_context vti_io_ctx;
+struct ll_thread_info {
+ struct vvp_io_args lti_args;
+ struct ra_io_arg lti_ria;
+ struct ll_cl_context lti_io_ctx;
};
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
-{
- extern struct lu_context_key vvp_key;
- struct vvp_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &vvp_key);
- LASSERT(info);
- return info;
-}
-
-static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
- enum vvp_io_subtype type)
+extern struct lu_context_key ll_thread_key;
+static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
{
- struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
-
- ret->via_io_subtype = type;
+ struct ll_thread_info *lti;
- return ret;
+ lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
+ LASSERT(lti);
+ return lti;
}
-struct vvp_session {
- struct vvp_io vs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
+ enum vvp_io_subtype type)
{
- extern struct lu_context_key vvp_session_key;
- struct vvp_session *ses;
+ struct vvp_io_args *via = &ll_env_info(env)->lti_args;
- ses = lu_context_key_get(env->le_ses, &vvp_session_key);
- LASSERT(ses);
- return ses;
-}
+ via->via_io_subtype = type;
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
- return &vvp_env_session(env)->vs_ios;
+ return via;
}
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
void ll_queue_done_writing(struct inode *inode, unsigned long flags);
void ll_close_thread_shutdown(struct ll_close_queue *lcq);
int ll_close_thread_start(struct ll_close_queue **lcq_ret);
@@ -981,6 +913,10 @@ static inline void ll_invalidate_page(struct page *vmpage)
if (!mapping)
return;
+ /*
+ * truncate_complete_page() calls
+ * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+ */
ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
truncate_complete_page(mapping, vmpage);
}
@@ -1040,10 +976,10 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
}
/* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags);
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size);
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
int ll_removexattr(struct dentry *dentry, const char *name);
@@ -1055,9 +991,6 @@ void free_rmtperm_hash(struct hlist_head *hash);
int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
int lustre_check_remote_perm(struct inode *inode, int mask);
-/* llite/llite_cl.c */
-extern struct lu_device_type vvp_device_type;
-
/**
* Common IO arguments for various VFS I/O interfaces.
*/
@@ -1069,7 +1002,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
struct ll_readahead_state *ras, unsigned long index,
unsigned hit);
void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
/* llite/llite_rmtacl.c */
#ifdef CONFIG_FS_POSIX_ACL
@@ -1163,6 +1096,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry,
int only_unplug);
void ll_stop_statahead(struct inode *dir, void *key);
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 1);
+}
+
static inline int ll_glimpse_size(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
@@ -1285,43 +1234,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
void ll_iocontrol_unregister(void *magic);
-/* lclient compat stuff */
-#define cl_inode_info ll_inode_info
-#define cl_i2info(info) ll_i2info(info)
-#define cl_inode_mode(inode) ((inode)->i_mode)
-#define cl_i2sbi ll_i2sbi
-
-static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
- const struct iattr *attr)
-{
- LASSERT(attr->ia_valid & ATTR_FILE);
- return LUSTRE_FPRIVATE(attr->ia_file);
-}
-
-static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
-{
- LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex));
- i_size_write(inode, kms);
-}
-
-static inline void cl_isize_write(struct inode *inode, loff_t kms)
-{
- ll_inode_size_lock(inode);
- i_size_write(inode, kms);
- ll_inode_size_unlock(inode);
-}
-
-#define cl_isize_read(inode) i_size_read(inode)
-
-static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
-{
- return ll_merge_lvb(env, inode);
-}
-
-#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
-#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
-#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
-
int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
enum cl_fsync_mode mode, int ignore_layout);
@@ -1350,7 +1262,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
LPROC_LL_OSC_WRITE;
- ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+ ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc);
}
ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -1382,18 +1294,16 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
*/
if (it->d.lustre.it_remote_lock_mode) {
handle.cookie = it->d.lustre.it_remote_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
- inode,
- inode->i_ino, inode->i_generation,
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
+ PFID(ll_inode2fid(inode)), inode,
handle.cookie);
md_set_lock_data(exp, &handle.cookie, inode, NULL);
}
handle.cookie = it->d.lustre.it_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n",
- inode, inode->i_ino,
- inode->i_generation, handle.cookie);
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
+ PFID(ll_inode2fid(inode)), inode, handle.cookie);
md_set_lock_data(exp, &handle.cookie, inode,
&it->d.lustre.it_lock_bits);
@@ -1471,9 +1381,25 @@ enum {
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode);
+int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
int ll_xattr_init(void);
void ll_xattr_fini(void);
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, enum cl_req_type crt);
+
+/* lcommon_cl.c */
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
+
+extern struct lu_env *cl_inode_fini_env;
+extern int cl_inode_fini_refcheck;
+
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b57a992688a8..96c7e9fc6e5f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -85,18 +85,18 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
si_meminfo(&si);
pages = si.totalram - si.totalhigh;
- if (pages >> (20 - PAGE_SHIFT) < 512)
- lru_page_max = pages / 2;
- else
- lru_page_max = (pages / 4) * 3;
+ lru_page_max = pages / 2;
- /* initialize lru data */
+ /* initialize ll_cache data */
atomic_set(&sbi->ll_cache.ccc_users, 0);
sbi->ll_cache.ccc_lru_max = lru_page_max;
atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
+ atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
+ init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
+
sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
SBI_DEFAULT_READAHEAD_MAX);
sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
@@ -169,12 +169,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
return -ENOMEM;
}
- if (llite_root) {
- err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
- if (err < 0)
- CERROR("could not register mount in <debugfs>/lustre/llite\n");
- }
-
/* indicate the features supported by this client */
data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
OBD_CONNECT_ATTRFID |
@@ -337,10 +331,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
else
sbi->ll_md_brw_size = PAGE_SIZE;
- if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
- LCONSOLE_INFO("Layout lock feature supported.\n");
+ if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
- }
if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
@@ -453,7 +445,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
/* make root inode
* XXX: move this to after cbd setup?
*/
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
+ valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
valid |= OBD_MD_FLRMTPERM;
else if (sbi->ll_flags & LL_SBI_ACL)
@@ -555,6 +547,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
kfree(data);
kfree(osfs);
+ if (llite_root) {
+ err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
+ if (err < 0) {
+ CERROR("%s: could not register mount in debugfs: "
+ "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
+ err = 0;
+ }
+ }
+
return err;
out_root:
iput(root);
@@ -573,7 +574,6 @@ out_md:
out:
kfree(data);
kfree(osfs);
- ldebugfs_unregister_mountpoint(sbi);
return err;
}
@@ -897,10 +897,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
cfg->cfg_callback = class_config_llog_handler;
/* set up client obds */
err = lustre_process_log(sb, profilenm, cfg);
- if (err < 0) {
- CERROR("Unable to process log: %d\n", err);
+ if (err < 0)
goto out_free;
- }
/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
lprof = class_get_profile(profilenm);
@@ -947,7 +945,7 @@ void ll_put_super(struct super_block *sb)
struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi = ll_s2sbi(sb);
char *profilenm = get_profile_name(sb);
- int next, force = 1;
+ int ccc_count, next, force = 1, rc = 0;
CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -963,6 +961,19 @@ void ll_put_super(struct super_block *sb)
force = obd->obd_force;
}
+ /* Wait for unstable pages to be committed to stable storage */
+ if (!force) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+ rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
+ !atomic_read(&sbi->ll_cache.ccc_unstable_nr),
+ &lwi);
+ }
+
+ ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
+ if (!force && rc != -EINTR)
+ LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+
/* We need to set force before the lov_disconnect in
* lustre_common_put_super, since l_d cleans up osc's as well.
*/
@@ -999,6 +1010,8 @@ void ll_put_super(struct super_block *sb)
lustre_common_put_super(sb);
+ cl_env_cache_purge(~0);
+
module_put(THIS_MODULE);
} /* client_put_super */
@@ -1032,8 +1045,8 @@ void ll_clear_inode(struct inode *inode)
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
if (S_ISDIR(inode->i_mode)) {
/* these should have been cleared in ll_file_release */
@@ -1180,9 +1193,11 @@ static int ll_setattr_done_writing(struct inode *inode,
* from OSTs and send setattr to back to MDS.
*/
rc = ll_som_update(inode, op_data);
- else if (rc)
- CERROR("inode %lu mdc truncate failed: rc = %d\n",
- inode->i_ino, rc);
+ else if (rc) {
+ CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
+ ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
+ }
return rc;
}
@@ -1210,12 +1225,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
bool file_is_released = false;
int rc = 0, rc1 = 0;
- CDEBUG(D_VFSTRACE,
- "%s: setattr inode %p/fid:" DFID
- " from %llu to %llu, valid %x, hsm_import %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), inode,
- PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
- attr->ia_valid, hsm_import);
+ CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
+ i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
if (attr->ia_valid & ATTR_SIZE) {
/* Check new size against VFS/VM file size limit and rlimit */
@@ -1265,14 +1277,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
(s64)ktime_get_real_seconds());
- /* If we are changing file size, file content is modified, flag it. */
- if (attr->ia_valid & ATTR_SIZE) {
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
/* We always do an MDS RPC, even if we're only changing the size;
* only the MDS knows whether truncate() should fail with -ETXTBUSY
*/
@@ -1284,13 +1288,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
if (!S_ISDIR(inode->i_mode))
inode_unlock(inode);
- memcpy(&op_data->op_attr, attr, sizeof(*attr));
-
- /* Open epoch for truncate. */
- if (exp_connect_som(ll_i2mdexp(inode)) &&
- (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
- op_data->op_flags = MF_EPOCH_OPEN;
-
/* truncate on a released file must failed with -ENODATA,
* so size must not be set on MDS for released file
* but other attributes must be set
@@ -1304,29 +1301,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
file_is_released = true;
ccc_inode_lsm_put(inode, lsm);
+
+ if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
+ if (file_is_released) {
+ rc = ll_layout_restore(inode, 0, attr->ia_size);
+ if (rc < 0)
+ goto out;
+
+ file_is_released = false;
+ ll_layout_refresh(inode, &gen);
+ }
+
+ /*
+ * If we are changing file size, file content is
+ * modified, flag it.
+ */
+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ op_data->op_bias |= MDS_DATA_MODIFIED;
+ }
}
- /* if not in HSM import mode, clear size attr for released file
- * we clear the attribute send to MDT in op_data, not the original
- * received from caller in attr which is used later to
- * decide return code
- */
- if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
- op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+ memcpy(&op_data->op_attr, attr, sizeof(*attr));
+
+ /* Open epoch for truncate. */
+ if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
+ (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
+ op_data->op_flags = MF_EPOCH_OPEN;
rc = ll_md_setattr(dentry, op_data, &mod);
if (rc)
goto out;
- /* truncate failed (only when non HSM import), others succeed */
- if (file_is_released) {
- if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
- rc = -ENODATA;
- else
- rc = 0;
- goto out;
- }
-
/* RPC to MDT is sent, cancel data modification flag */
if (op_data->op_bias & MDS_DATA_MODIFIED) {
spin_lock(&lli->lli_lock);
@@ -1335,7 +1343,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
}
ll_ioepoch_open(lli, op_data->op_ioepoch);
- if (!S_ISREG(inode->i_mode)) {
+ if (!S_ISREG(inode->i_mode) || file_is_released) {
rc = 0;
goto out;
}
@@ -1552,7 +1560,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
if (body->valid & OBD_MD_FLATIME) {
if (body->atime > LTIME_S(inode->i_atime))
LTIME_S(inode->i_atime) = body->atime;
- lli->lli_lvb.lvb_atime = body->atime;
+ lli->lli_atime = body->atime;
}
if (body->valid & OBD_MD_FLMTIME) {
if (body->mtime > LTIME_S(inode->i_mtime)) {
@@ -1561,12 +1569,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
body->mtime);
LTIME_S(inode->i_mtime) = body->mtime;
}
- lli->lli_lvb.lvb_mtime = body->mtime;
+ lli->lli_mtime = body->mtime;
}
if (body->valid & OBD_MD_FLCTIME) {
if (body->ctime > LTIME_S(inode->i_ctime))
LTIME_S(inode->i_ctime) = body->ctime;
- lli->lli_lvb.lvb_ctime = body->ctime;
+ lli->lli_ctime = body->ctime;
}
if (body->valid & OBD_MD_FLMODE)
inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
@@ -1593,12 +1601,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
/* FID shouldn't be changed! */
if (fid_is_sane(&lli->lli_fid)) {
LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
- "Trying to change FID "DFID
- " to the "DFID", inode %lu/%u(%p)\n",
+ "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
PFID(&lli->lli_fid), PFID(&body->fid1),
- inode->i_ino, inode->i_generation, inode);
- } else
+ PFID(ll_inode2fid(inode)), inode);
+ } else {
lli->lli_fid = body->fid1;
+ }
}
LASSERT(fid_seq(&lli->lli_fid) != 0);
@@ -1622,8 +1630,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
if (lli->lli_flags & (LLIF_DONE_WRITING |
LLIF_EPOCH_PENDING |
LLIF_SOM_DIRTY)) {
- CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
- inode->i_ino, lli->lli_flags);
+ CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n",
+ sbi->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)),
+ lli->lli_flags);
} else {
/* Use old size assignment to avoid
* deadlock bz14138 & bz14326
@@ -1699,7 +1709,7 @@ void ll_read_inode2(struct inode *inode, void *opaque)
void ll_delete_inode(struct inode *inode)
{
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
if (S_ISREG(inode->i_mode) && lli->lli_clob)
/* discard all dirty pages before truncating them, required by
@@ -1715,8 +1725,8 @@ void ll_delete_inode(struct inode *inode)
spin_lock_irq(&inode->i_data.tree_lock);
spin_unlock_irq(&inode->i_data.tree_lock);
LASSERTF(inode->i_data.nrpages == 0,
- "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- inode->i_ino, inode->i_generation, inode,
+ "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+ PFID(ll_inode2fid(inode)), inode,
inode->i_data.nrpages);
}
/* Workaround end */
@@ -1747,7 +1757,9 @@ int ll_iocontrol(struct inode *inode, struct file *file,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc) {
- CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+ CERROR("%s: failure inode "DFID": rc = %d\n",
+ sbi->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
return -abs(rc);
}
@@ -1772,7 +1784,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+ op_data->op_attr_flags = flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
rc = md_setattr(sbi->ll_md_exp, op_data,
NULL, 0, NULL, 0, &req, NULL);
@@ -2066,11 +2078,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg)
}
memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
- if (type & LL_STATFS_LMV)
+ if (type & LL_STATFS_LMV) {
exp = sbi->ll_md_exp;
- else if (type & LL_STATFS_LOV)
+ } else if (type & LL_STATFS_LOV) {
exp = sbi->ll_dt_exp;
- else {
+ } else {
rc = -ENODEV;
goto out_statfs;
}
@@ -2271,7 +2283,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
{
char *buf, *path = NULL;
struct dentry *dentry = NULL;
- struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+ struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
/* this can be called inside spin lock so use GFP_ATOMIC. */
buf = (char *)__get_free_page(GFP_ATOMIC);
@@ -2285,7 +2297,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
"%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
- PFID(&obj->cob_header.coh_lu.loh_fid),
+ PFID(&obj->vob_header.coh_lu.loh_fid),
(path && !IS_ERR(path)) ? path : "", ioret);
if (dentry)
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 5b484e62ffd0..88ef1cac9e0f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -57,10 +57,10 @@ void policy_from_vma(ldlm_policy_data_t *policy,
struct vm_area_struct *vma, unsigned long addr,
size_t count)
{
- policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+ policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
(vma->vm_pgoff << PAGE_SHIFT);
policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~CFS_PAGE_MASK;
+ ~PAGE_MASK;
}
struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
@@ -123,7 +123,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
*env_ret = env;
- io = ccc_env_thread_io(env);
+restart:
+ io = vvp_env_thread_io(env);
io->ci_obj = ll_i2info(inode)->lli_clob;
LASSERT(io->ci_obj);
@@ -146,17 +147,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
if (rc == 0) {
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- LASSERT(cio->cui_cl.cis_io == io);
+ LASSERT(vio->vui_cl.cis_io == io);
/* mmap lock must be MANDATORY it has to cache pages. */
io->ci_lockreq = CILR_MANDATORY;
- cio->cui_fd = fd;
+ vio->vui_fd = fd;
} else {
LASSERT(rc < 0);
cl_io_fini(env, io);
+ if (io->ci_need_restart)
+ goto restart;
+
cl_env_nested_put(nest, env);
io = ERR_PTR(rc);
}
@@ -200,7 +204,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
* Otherwise, we could add dirty pages into osc cache
* while truncate is on-going.
*/
- inode = ccc_object_inode(io->ci_obj);
+ inode = vvp_object_inode(io->ci_obj);
lli = ll_i2info(inode);
down_read(&lli->lli_trunc_sem);
@@ -307,17 +311,17 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
vio = vvp_env_io(env);
vio->u.fault.ft_vma = vma;
vio->u.fault.ft_vmpage = NULL;
- vio->u.fault.fault.ft_vmf = vmf;
- vio->u.fault.fault.ft_flags = 0;
- vio->u.fault.fault.ft_flags_valid = false;
+ vio->u.fault.ft_vmf = vmf;
+ vio->u.fault.ft_flags = 0;
+ vio->u.fault.ft_flags_valid = false;
result = cl_io_loop(env, io);
/* ft_flags are only valid if we reached
* the call to filemap_fault
*/
- if (vio->u.fault.fault.ft_flags_valid)
- fault_ret = vio->u.fault.fault.ft_flags;
+ if (vio->u.fault.ft_flags_valid)
+ fault_ret = vio->u.fault.ft_flags;
vmpage = vio->u.fault.ft_vmpage;
if (result != 0 && vmpage) {
@@ -390,9 +394,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
result = ll_page_mkwrite0(vma, vmf->page, &retry);
if (!printed && ++count > 16) {
- CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n",
+ const struct dentry *de = vma->vm_file->f_path.dentry;
+
+ CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n",
current->comm, vmf->pgoff,
- file_inode(vma->vm_file)->i_ino);
+ PFID(ll_inode2fid(de->d_inode)));
printed = true;
}
} while (retry);
@@ -422,16 +428,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/**
* To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count in ccc_object::cob_mmap_cnt.
+ * we track the mapped vma count in vvp_object::vob_mmap_cnt.
*/
static void ll_vm_open(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
- struct ccc_object *vob = cl_inode2ccc(inode);
+ struct vvp_object *vob = cl_inode2vvp(inode);
LASSERT(vma->vm_file);
- LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
- atomic_inc(&vob->cob_mmap_cnt);
+ LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
+ atomic_inc(&vob->vob_mmap_cnt);
}
/**
@@ -440,11 +446,11 @@ static void ll_vm_open(struct vm_area_struct *vma)
static void ll_vm_close(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
- struct ccc_object *vob = cl_inode2ccc(inode);
+ struct vvp_object *vob = cl_inode2vvp(inode);
LASSERT(vma->vm_file);
- atomic_dec(&vob->cob_mmap_cnt);
- LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+ atomic_dec(&vob->vob_mmap_cnt);
+ LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
}
/* XXX put nice comment here. talk about __free_pte -> dirty pages and
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 193aab879709..c1eef6198b25 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -119,7 +119,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
kfree(op_data);
if (rc) {
- CERROR("can't get object attrs, fid "DFID", rc %d\n",
+ CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n",
PFID(fid), rc);
return ERR_PTR(rc);
}
@@ -191,8 +191,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
struct lustre_nfs_fid *nfs_fid = (void *)fh;
- CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n",
- inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len);
+ CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), *plen, fileid_len);
if (*plen < fileid_len) {
*plen = fileid_len;
@@ -298,8 +299,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
sbi = ll_s2sbi(dir->i_sb);
- CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n",
- dir->i_ino, PFID(ll_inode2fid(dir)));
+ CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n",
+ ll_get_fsname(dir->i_sb, NULL, 0),
+ PFID(ll_inode2fid(dir)));
rc = ll_get_default_mdsize(sbi, &lmmsize);
if (rc != 0)
@@ -314,15 +316,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc) {
- CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+ CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
+ ll_get_fsname(dir->i_sb, NULL, 0),
+ PFID(ll_inode2fid(dir)), rc);
return ERR_PTR(rc);
}
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body->valid & OBD_MD_FLID);
-
- CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->fid1));
-
+ /*
+ * LU-3952: MDT may lost the FID of its parent, we should not crash
+ * the NFS server, ll_iget_for_nfs() will handle the error.
+ */
+ if (body->valid & OBD_MD_FLID) {
+ CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
+ PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+ }
result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
ptlrpc_req_finished(req);
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index f169c0db63b4..813a9a354e5f 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -274,8 +274,9 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
if (lo->lo_biotail) {
lo->lo_biotail->bi_next = bio;
lo->lo_biotail = bio;
- } else
+ } else {
lo->lo_bio = lo->lo_biotail = bio;
+ }
spin_unlock_irqrestore(&lo->lo_lock, flags);
atomic_inc(&lo->lo_pending);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 27ab1261400e..55d62eb11957 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -254,7 +254,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
if (pages_number > totalram_pages / 2) {
-
CERROR("can't set file readahead more than %lu MB\n",
totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
return -ERANGE;
@@ -393,6 +392,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
struct super_block *sb = ((struct seq_file *)file->private_data)->private;
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct cl_client_cache *cache = &sbi->ll_cache;
+ struct lu_env *env;
+ int refcheck;
int mult, rc, pages_number;
int diff = 0;
int nrpages = 0;
@@ -430,6 +431,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
goto out;
}
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return 0;
+
diff = -diff;
while (diff > 0) {
int tmp;
@@ -455,19 +460,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
break;
if (!sbi->ll_dt_exp) { /* being initialized */
- rc = -ENODEV;
- break;
+ rc = 0;
+ goto out;
}
/* difficult - have to ask OSCs to drop LRU slots. */
tmp = diff << 1;
- rc = obd_set_info_async(NULL, sbi->ll_dt_exp,
+ rc = obd_set_info_async(env, sbi->ll_dt_exp,
sizeof(KEY_CACHE_LRU_SHRINK),
KEY_CACHE_LRU_SHRINK,
sizeof(tmp), &tmp, NULL);
if (rc < 0)
break;
}
+ cl_env_put(env, &refcheck);
out:
if (rc >= 0) {
@@ -818,6 +824,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj,
}
LUSTRE_RW_ATTR(xattr_cache);
+static ssize_t unstable_stats_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ struct cl_client_cache *cache = &sbi->ll_cache;
+ int pages, mb;
+
+ pages = atomic_read(&cache->ccc_unstable_nr);
+ mb = (pages * PAGE_SIZE) >> 20;
+
+ return sprintf(buf, "unstable_pages: %8d\n"
+ "unstable_mb: %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
/* { "mntpt_path", ll_rd_path, 0, 0 }, */
{ "site", &ll_site_stats_fops, NULL, 0 },
@@ -853,6 +876,7 @@ static struct attribute *llite_attrs[] = {
&lustre_attr_max_easize.attr,
&lustre_attr_default_easize.attr,
&lustre_attr_xattr_cache.attr,
+ &lustre_attr_unstable_stats.attr,
NULL,
};
@@ -953,6 +977,7 @@ static const char *ra_stat_string[] = {
[RA_STAT_EOF] = "read-ahead to EOF",
[RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
[RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+ [RA_STAT_FAILED_REACH_END] = "failed to reach end"
};
int ldebugfs_register_mountpoint(struct dentry *parent,
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index f8f98e4e8258..5eba0ebae10f 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -128,12 +128,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
if (rc != 0) {
iget_failed(inode);
inode = NULL;
- } else
+ } else {
unlock_new_inode(inode);
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
+ }
+ } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
- inode, PFID(&md->body->fid1));
+ CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
+ PFID(&md->body->fid1), inode);
+ }
}
return inode;
}
@@ -188,7 +190,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
break;
/* Invalidate all dentries associated with this inode */
- LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+ LASSERT(ldlm_is_canceling(lock));
if (!fid_res_name_eq(ll_inode2fid(inode),
&lock->l_resource->lr_name)) {
@@ -255,8 +257,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
}
if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- CDEBUG(D_INODE, "invalidating inode %lu\n",
- inode->i_ino);
+ CDEBUG(D_INODE, "invalidating inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
truncate_inode_pages(inode->i_mapping, 0);
ll_invalidate_negative_children(inode);
}
@@ -476,9 +478,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
return ERR_PTR(-ENAMETOOLONG);
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
- dentry, parent->i_ino,
- parent->i_generation, parent, LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n",
+ dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
if (d_mountpoint(dentry))
CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
@@ -553,9 +554,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
struct dentry *de;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n",
- dentry, parent->i_ino,
- parent->i_generation, parent, flags);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
+ dentry, PFID(ll_inode2fid(parent)), parent, flags);
/* Optimize away (CREATE && !OPEN). Let .create handle the race. */
if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
@@ -586,10 +586,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
long long lookup_flags = LOOKUP_OPEN;
int rc = 0;
- CDEBUG(D_VFSTRACE,
- "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, file, open_flags, mode, *opened);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
+ *opened);
it = kzalloc(sizeof(*it), GFP_NOFS);
if (!it)
@@ -680,8 +679,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
* lock on the inode. Since we finally have an inode pointer,
* stuff it in the lock.
*/
- CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
- inode, inode->i_ino, inode->i_generation);
+ CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(dir)), inode);
ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
out:
ptlrpc_req_finished(request);
@@ -708,9 +707,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
struct inode *inode;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
rc = it_open_error(DISP_OPEN_CREATE, it);
if (rc)
@@ -733,8 +731,9 @@ static void ll_update_times(struct ptlrpc_request *request,
LASSERT(body);
if (body->valid & OBD_MD_FLMTIME &&
body->mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
- inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
+ CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
+ PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
+ body->mtime);
LTIME_S(inode->i_mtime) = body->mtime;
}
if (body->valid & OBD_MD_FLCTIME &&
@@ -791,9 +790,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild,
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n",
- dchild, dir->i_ino, dir->i_generation, dir,
- mode, old_encode_dev(rdev));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
+ dchild, PFID(ll_inode2fid(dir)), dir, mode,
+ old_encode_dev(rdev));
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
@@ -831,9 +830,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
{
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, mode, want_excl);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
rc = ll_mknod(dir, dentry, mode, 0);
@@ -845,12 +843,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
return rc;
}
-static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
-{
- if (d_really_is_positive(child))
- *fid = *ll_inode2fid(d_inode(child));
-}
-
int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
{
struct mdt_body *body;
@@ -927,23 +919,25 @@ out:
* is any lock existing. They will recycle dentries and inodes based upon locks
* too. b=20433
*/
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
+static int ll_unlink(struct inode *dir, struct dentry *dchild)
{
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
int rc;
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ dchild, dir->i_ino, dir->i_generation, dir);
op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
+ dchild->d_name.name,
+ dchild->d_name.len,
0, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(dentry, &op_data->op_fid3);
+ if (dchild && dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
op_data->op_fid2 = op_data->op_fid3;
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
ll_finish_md_op_data(op_data);
@@ -963,8 +957,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n",
+ dentry, PFID(ll_inode2fid(dir)), dir);
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
@@ -977,23 +971,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return err;
}
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+static int ll_rmdir(struct inode *dir, struct dentry *dchild)
{
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
+ dchild, PFID(ll_inode2fid(dir)), dir);
op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
+ dchild->d_name.name,
+ dchild->d_name.len,
S_IFDIR, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(dentry, &op_data->op_fid3);
+ if (dchild && dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
op_data->op_fid2 = op_data->op_fid3;
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
ll_finish_md_op_data(op_data);
@@ -1011,9 +1007,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n",
- dentry, dir->i_ino, dir->i_generation,
- dir, 3000, oldname);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO,
0, LUSTRE_OPC_SYMLINK);
@@ -1033,10 +1028,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
struct md_op_data *op_data;
int err;
- CDEBUG(D_VFSTRACE,
- "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n",
- src->i_ino, src->i_generation, src, dir->i_ino,
- dir->i_generation, dir, new_dentry);
+ CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
+ PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
+ new_dentry);
op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -1056,42 +1050,45 @@ out:
return err;
}
-static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+static int ll_rename(struct inode *src, struct dentry *src_dchild,
+ struct inode *tgt, struct dentry *tgt_dchild)
{
struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(old_dir);
+ struct ll_sb_info *sbi = ll_i2sbi(src);
struct md_op_data *op_data;
int err;
CDEBUG(D_VFSTRACE,
- "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n",
- old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir,
- new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir);
+ "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n",
+ src_dchild, PFID(ll_inode2fid(src)), src,
+ tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
- op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0,
+ op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(old_dentry, &op_data->op_fid3);
- ll_get_child_fid(new_dentry, &op_data->op_fid4);
+ if (src_dchild && src_dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
+ if (tgt_dchild && tgt_dchild->d_inode)
+ op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
+
err = md_rename(sbi->ll_md_exp, op_data,
- old_dentry->d_name.name,
- old_dentry->d_name.len,
- new_dentry->d_name.name,
- new_dentry->d_name.len, &request);
+ src_dchild->d_name.name,
+ src_dchild->d_name.len,
+ tgt_dchild->d_name.name,
+ tgt_dchild->d_name.len, &request);
ll_finish_md_op_data(op_data);
if (!err) {
- ll_update_times(request, old_dir);
- ll_update_times(request, new_dir);
+ ll_update_times(request, src);
+ ll_update_times(request, tgt);
ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- err = ll_objects_destroy(request, old_dir);
+ err = ll_objects_destroy(request, src);
}
ptlrpc_req_finished(request);
if (!err)
- d_move(old_dentry, new_dentry);
+ d_move(src_dchild, tgt_dchild);
return err;
}
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index edab6c5b7e50..336397773fbb 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -63,7 +63,7 @@
* Finalizes cl-data before exiting typical address_space operation. Dual to
* ll_cl_init().
*/
-static void ll_cl_fini(struct ll_cl_context *lcc)
+void ll_cl_fini(struct ll_cl_context *lcc)
{
struct lu_env *env = lcc->lcc_env;
struct cl_io *io = lcc->lcc_io;
@@ -84,200 +84,59 @@ static void ll_cl_fini(struct ll_cl_context *lcc)
* Initializes common cl-data at the typical address_space operation entry
* point.
*/
-static struct ll_cl_context *ll_cl_init(struct file *file,
- struct page *vmpage, int create)
+struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage)
{
struct ll_cl_context *lcc;
struct lu_env *env;
struct cl_io *io;
struct cl_object *clob;
- struct ccc_io *cio;
+ struct vvp_io *vio;
int refcheck;
int result = 0;
- clob = ll_i2info(vmpage->mapping->host)->lli_clob;
+ clob = ll_i2info(file_inode(file))->lli_clob;
LASSERT(clob);
env = cl_env_get(&refcheck);
if (IS_ERR(env))
return ERR_CAST(env);
- lcc = &vvp_env_info(env)->vti_io_ctx;
+ lcc = &ll_env_info(env)->lti_io_ctx;
memset(lcc, 0, sizeof(*lcc));
lcc->lcc_env = env;
lcc->lcc_refcheck = refcheck;
lcc->lcc_cookie = current;
- cio = ccc_env_io(env);
- io = cio->cui_cl.cis_io;
- if (!io && create) {
- struct inode *inode = vmpage->mapping->host;
- loff_t pos;
-
- if (inode_trylock(inode)) {
- inode_unlock((inode));
-
- /* this is too bad. Someone is trying to write the
- * page w/o holding inode mutex. This means we can
- * add dirty pages into cache during truncate
- */
- CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
- current->comm);
- dump_stack();
- LBUG();
- return ERR_PTR(-EIO);
- }
-
- /*
- * Loop-back driver calls ->prepare_write().
- * methods directly, bypassing file system ->write() operation,
- * so cl_io has to be created here.
- */
- io = ccc_env_thread_io(env);
- ll_io_init(io, file, 1);
-
- /* No lock at all for this kind of IO - we can't do it because
- * we have held page lock, it would cause deadlock.
- * XXX: This causes poor performance to loop device - One page
- * per RPC.
- * In order to get better performance, users should use
- * lloop driver instead.
- */
- io->ci_lockreq = CILR_NEVER;
-
- pos = vmpage->index << PAGE_SHIFT;
-
- /* Create a temp IO to serve write. */
- result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
- if (result == 0) {
- cio->cui_fd = LUSTRE_FPRIVATE(file);
- cio->cui_iter = NULL;
- result = cl_io_iter_init(env, io);
- if (result == 0) {
- result = cl_io_lock(env, io);
- if (result == 0)
- result = cl_io_start(env, io);
- }
- } else
- result = io->ci_result;
- }
-
+ vio = vvp_env_io(env);
+ io = vio->vui_cl.cis_io;
lcc->lcc_io = io;
if (!io)
result = -EIO;
- if (result == 0) {
+
+ if (result == 0 && vmpage) {
struct cl_page *page;
LASSERT(io->ci_state == CIS_IO_GOING);
- LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file));
+ LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file));
page = cl_page_find(env, clob, vmpage->index, vmpage,
CPT_CACHEABLE);
if (!IS_ERR(page)) {
lcc->lcc_page = page;
lu_ref_add(&page->cp_reference, "cl_io", io);
result = 0;
- } else
+ } else {
result = PTR_ERR(page);
+ }
}
if (result) {
ll_cl_fini(lcc);
lcc = ERR_PTR(result);
}
- CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
- vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
- env, io);
- return lcc;
-}
-
-static struct ll_cl_context *ll_cl_get(void)
-{
- struct ll_cl_context *lcc;
- struct lu_env *env;
- int refcheck;
-
- env = cl_env_get(&refcheck);
- LASSERT(!IS_ERR(env));
- lcc = &vvp_env_info(env)->vti_io_ctx;
- LASSERT(env == lcc->lcc_env);
- LASSERT(current == lcc->lcc_cookie);
- cl_env_put(env, &refcheck);
-
- /* env has got in ll_cl_init, so it is still usable. */
return lcc;
}
-/**
- * ->prepare_write() address space operation called by generic_file_write()
- * for every page during write.
- */
-int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
- unsigned to)
-{
- struct ll_cl_context *lcc;
- int result;
-
- lcc = ll_cl_init(file, vmpage, 1);
- if (!IS_ERR(lcc)) {
- struct lu_env *env = lcc->lcc_env;
- struct cl_io *io = lcc->lcc_io;
- struct cl_page *page = lcc->lcc_page;
-
- cl_page_assume(env, io, page);
-
- result = cl_io_prepare_write(env, io, page, from, to);
- if (result == 0) {
- /*
- * Add a reference, so that page is not evicted from
- * the cache until ->commit_write() is called.
- */
- cl_page_get(page);
- lu_ref_add(&page->cp_reference, "prepare_write",
- current);
- } else {
- cl_page_unassume(env, io, page);
- ll_cl_fini(lcc);
- }
- /* returning 0 in prepare assumes commit must be called
- * afterwards
- */
- } else {
- result = PTR_ERR(lcc);
- }
- return result;
-}
-
-int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
- unsigned to)
-{
- struct ll_cl_context *lcc;
- struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- int result = 0;
-
- lcc = ll_cl_get();
- env = lcc->lcc_env;
- page = lcc->lcc_page;
- io = lcc->lcc_io;
-
- LASSERT(cl_page_is_owned(page, io));
- LASSERT(from <= to);
- if (from != to) /* handle short write case. */
- result = cl_io_commit_write(env, io, page, from, to);
- if (cl_page_is_owned(page, io))
- cl_page_unassume(env, io, page);
-
- /*
- * Release reference acquired by ll_prepare_write().
- */
- lu_ref_del(&page->cp_reference, "prepare_write", current);
- cl_page_put(env, page);
- ll_cl_fini(lcc);
- return result;
-}
-
static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
/**
@@ -301,7 +160,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
*/
static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
struct ra_io_arg *ria,
- unsigned long pages)
+ unsigned long pages, unsigned long min)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
long ret;
@@ -341,6 +200,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
}
out:
+ if (ret < min) {
+ /* override ra limit for maximum performance */
+ atomic_add(min - ret, &ra->ra_cur_pages);
+ ret = min;
+ }
return ret;
}
@@ -357,9 +221,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
lprocfs_counter_incr(sbi->ll_ra_stats, which);
}
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
{
- struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
ll_ra_stats_inc_sbi(sbi, which);
}
@@ -388,61 +252,42 @@ static int index_in_window(unsigned long index, unsigned long point,
return start <= index && index <= end;
}
-static struct ll_readahead_state *ll_ras_get(struct file *f)
+void ll_ras_enter(struct file *f)
{
- struct ll_file_data *fd;
-
- fd = LUSTRE_FPRIVATE(f);
- return &fd->fd_ras;
-}
-
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-{
- struct ll_readahead_state *ras;
-
- ras = ll_ras_get(f);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
+ struct ll_readahead_state *ras = &fd->fd_ras;
spin_lock(&ras->ras_lock);
ras->ras_requests++;
ras->ras_request_index = 0;
ras->ras_consecutive_requests++;
- rar->lrr_reader = current;
-
- list_add(&rar->lrr_linkage, &ras->ras_read_beads);
- spin_unlock(&ras->ras_lock);
-}
-
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-{
- struct ll_readahead_state *ras;
-
- ras = ll_ras_get(f);
-
- spin_lock(&ras->ras_lock);
- list_del_init(&rar->lrr_linkage);
spin_unlock(&ras->ras_lock);
}
static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, struct cl_page *page,
- struct page *vmpage)
+ struct cl_object *clob, pgoff_t *max_index)
{
- struct ccc_page *cp;
+ struct page *vmpage = page->cp_vmpage;
+ struct vvp_page *vpg;
int rc;
rc = 0;
cl_page_assume(env, io, page);
lu_ref_add(&page->cp_reference, "ra", current);
- cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
- rc = cl_page_is_under_lock(env, io, page);
- if (rc == -EBUSY) {
- cp->cpg_defer_uptodate = 1;
- cp->cpg_ra_used = 0;
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
+ CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
+ vvp_index(vpg), *max_index);
+ if (*max_index == 0 || vvp_index(vpg) > *max_index)
+ rc = cl_page_is_under_lock(env, io, page, max_index);
+ if (rc == 0) {
+ vpg->vpg_defer_uptodate = 1;
+ vpg->vpg_ra_used = 0;
cl_page_list_add(queue, page);
rc = 1;
} else {
- cl_page_delete(env, page);
+ cl_page_discard(env, io, page);
rc = -ENOLCK;
}
} else {
@@ -466,24 +311,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
*/
static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue,
- pgoff_t index, struct address_space *mapping)
+ pgoff_t index, pgoff_t *max_index)
{
+ struct cl_object *clob = io->ci_obj;
+ struct inode *inode = vvp_object_inode(clob);
struct page *vmpage;
- struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
struct cl_page *page;
enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
int rc = 0;
const char *msg = NULL;
- vmpage = grab_cache_page_nowait(mapping, index);
+ vmpage = grab_cache_page_nowait(inode->i_mapping, index);
if (vmpage) {
/* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping == mapping) {
+ if (vmpage->mapping == inode->i_mapping) {
page = cl_page_find(env, clob, vmpage->index,
vmpage, CPT_CACHEABLE);
if (!IS_ERR(page)) {
rc = cl_read_ahead_page(env, io, queue,
- page, vmpage);
+ page, clob, max_index);
if (rc == -ENOLCK) {
which = RA_STAT_FAILED_MATCH;
msg = "lock match failed";
@@ -504,7 +350,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
msg = "g_c_p_n failed";
}
if (msg) {
- ll_ra_stats_inc(mapping, which);
+ ll_ra_stats_inc(inode, which);
CDEBUG(D_READA, "%s\n", msg);
}
return rc;
@@ -616,11 +462,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
struct cl_io *io, struct cl_page_list *queue,
struct ra_io_arg *ria,
unsigned long *reserved_pages,
- struct address_space *mapping,
unsigned long *ra_end)
{
- int rc, count = 0, stride_ria;
- unsigned long page_idx;
+ int rc, count = 0;
+ bool stride_ria;
+ pgoff_t page_idx;
+ pgoff_t max_index = 0;
LASSERT(ria);
RIA_DEBUG(ria);
@@ -631,12 +478,13 @@ static int ll_read_ahead_pages(const struct lu_env *env,
if (ras_inside_ra_window(page_idx, ria)) {
/* If the page is inside the read-ahead window*/
rc = ll_read_ahead_page(env, io, queue,
- page_idx, mapping);
+ page_idx, &max_index);
if (rc == 1) {
(*reserved_pages)--;
count++;
- } else if (rc == -ENOLCK)
+ } else if (rc == -ENOLCK) {
break;
+ }
} else if (stride_ria) {
/* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip
@@ -666,25 +514,22 @@ static int ll_read_ahead_pages(const struct lu_env *env,
}
int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct ll_readahead_state *ras, struct address_space *mapping,
- struct cl_page_list *queue, int flags)
+ struct cl_page_list *queue, struct ll_readahead_state *ras,
+ bool hit)
{
struct vvp_io *vio = vvp_env_io(env);
- struct vvp_thread_info *vti = vvp_env_info(env);
- struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct ll_thread_info *lti = ll_env_info(env);
+ struct cl_attr *attr = vvp_env_thread_attr(env);
unsigned long start = 0, end = 0, reserved;
- unsigned long ra_end, len;
+ unsigned long ra_end, len, mlen = 0;
struct inode *inode;
- struct ll_ra_read *bead;
- struct ra_io_arg *ria = &vti->vti_ria;
- struct ll_inode_info *lli;
+ struct ra_io_arg *ria = &lti->lti_ria;
struct cl_object *clob;
int ret = 0;
__u64 kms;
- inode = mapping->host;
- lli = ll_i2info(inode);
- clob = lli->lli_clob;
+ clob = io->ci_obj;
+ inode = vvp_object_inode(clob);
memset(ria, 0, sizeof(*ria));
@@ -696,22 +541,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
return ret;
kms = attr->cat_kms;
if (kms == 0) {
- ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
return 0;
}
spin_lock(&ras->ras_lock);
- if (vio->cui_ra_window_set)
- bead = &vio->cui_bead;
- else
- bead = NULL;
/* Enlarge the RA window to encompass the full read */
- if (bead && ras->ras_window_start + ras->ras_window_len <
- bead->lrr_start + bead->lrr_count) {
- ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+ if (vio->vui_ra_valid &&
+ ras->ras_window_start + ras->ras_window_len <
+ vio->vui_ra_start + vio->vui_ra_count) {
+ ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
ras->ras_window_start;
}
+
/* Reserve a part of the read-ahead window that we'll be issuing */
if (ras->ras_window_len) {
start = ras->ras_next_readahead;
@@ -755,29 +598,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
spin_unlock(&ras->ras_lock);
if (end == 0) {
- ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
return 0;
}
len = ria_page_count(ria);
- if (len == 0)
+ if (len == 0) {
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
return 0;
+ }
+
+ CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
+ PFID(lu_object_fid(&clob->co_lu)),
+ ria->ria_start, ria->ria_end,
+ vio->vui_ra_valid ? vio->vui_ra_start : 0,
+ vio->vui_ra_valid ? vio->vui_ra_count : 0,
+ hit);
+
+ /* at least to extend the readahead window to cover current read */
+ if (!hit && vio->vui_ra_valid &&
+ vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
+ /* to the end of current read window. */
+ mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
+ /* trim to RPC boundary */
+ start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
+ mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+ }
- reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+ reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
if (reserved < len)
- ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+ ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
- CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+ CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
+ reserved, len, mlen,
atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
- ret = ll_read_ahead_pages(env, io, queue,
- ria, &reserved, mapping, &ra_end);
+ ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
if (reserved != 0)
ll_ra_count_put(ll_i2sbi(inode), reserved);
if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(mapping, RA_STAT_EOF);
+ ll_ra_stats_inc(inode, RA_STAT_EOF);
/* if we didn't get to the end of the region we reserved from
* the ras we need to go back and update the ras so that the
@@ -789,6 +651,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
ra_end, end, ria->ria_end);
if (ra_end != end + 1) {
+ ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
spin_lock(&ras->ras_lock);
if (ra_end < ras->ras_next_readahead &&
index_in_window(ra_end, ras->ras_window_start, 0,
@@ -836,7 +699,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
spin_lock_init(&ras->ras_lock);
ras_reset(inode, ras, 0);
ras->ras_requests = 0;
- INIT_LIST_HEAD(&ras->ras_read_beads);
}
/*
@@ -1059,15 +921,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
ras->ras_last_readpage = index;
ras_set_start(inode, ras, index);
- if (stride_io_mode(ras))
+ if (stride_io_mode(ras)) {
/* Since stride readahead is sensitive to the offset
* of read-ahead, so we use original offset here,
* instead of ras_window_start, which is RPC aligned
*/
ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- else
- ras->ras_next_readahead = max(ras->ras_window_start,
- ras->ras_next_readahead);
+ } else {
+ if (ras->ras_next_readahead < ras->ras_window_start)
+ ras->ras_next_readahead = ras->ras_window_start;
+ if (!hit)
+ ras->ras_next_readahead = index + 1;
+ }
RAS_CDEBUG(ras);
/* Trigger RA in the mmap case where ras_consecutive_requests
@@ -1129,7 +994,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
clob = ll_i2info(inode)->lli_clob;
LASSERT(clob);
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
io->ci_obj = clob;
io->ci_ignore_layout = 1;
result = cl_io_init(env, io, CIT_MISC, clob);
@@ -1240,8 +1105,9 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
if (end == OBD_OBJECT_EOF)
- end = i_size_read(inode);
- mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
+ mapping->writeback_index = 0;
+ else
+ mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
}
return result;
}
@@ -1251,7 +1117,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
struct ll_cl_context *lcc;
int result;
- lcc = ll_cl_init(file, vmpage, 0);
+ lcc = ll_cl_init(file, vmpage);
if (!IS_ERR(lcc)) {
struct lu_env *env = lcc->lcc_env;
struct cl_io *io = lcc->lcc_io;
@@ -1273,3 +1139,28 @@ int ll_readpage(struct file *file, struct page *vmpage)
}
return result;
}
+
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, enum cl_req_type crt)
+{
+ struct cl_2queue *queue;
+ int result;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+ queue = &io->ci_queue;
+ cl_2queue_init_page(queue, page);
+
+ result = cl_io_submit_sync(env, io, crt, queue, 0);
+ LASSERT(cl_page_is_owned(page, io));
+
+ if (crt == CRT_READ)
+ /*
+ * in CRT_WRITE case page is left locked even in case of
+ * error.
+ */
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_2queue_fini(env, queue);
+
+ return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 69aa15e8e3ef..c12a048fce59 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -95,15 +95,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
if (obj) {
page = cl_vmpage_page(vmpage, obj);
if (page) {
- lu_ref_add(&page->cp_reference,
- "delete", vmpage);
cl_page_delete(env, page);
- lu_ref_del(&page->cp_reference,
- "delete", vmpage);
cl_page_put(env, page);
}
- } else
+ } else {
LASSERT(vmpage->private == 0);
+ }
cl_env_put(env, &refcheck);
}
}
@@ -111,12 +108,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
{
- struct cl_env_nest nest;
struct lu_env *env;
+ void *cookie;
struct cl_object *obj;
struct cl_page *page;
struct address_space *mapping;
- int result;
+ int result = 0;
LASSERT(PageLocked(vmpage));
if (PageWriteback(vmpage) || PageDirty(vmpage))
@@ -130,53 +127,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
if (!obj)
return 1;
- /* 1 for page allocator, 1 for cl_page and 1 for page cache */
+ /* 1 for caller, 1 for cl_page and 1 for page cache */
if (page_count(vmpage) > 3)
return 0;
- /* TODO: determine what gfp should be used by @gfp_mask. */
- env = cl_env_nested_get(&nest);
- if (IS_ERR(env))
- /* If we can't allocate an env we won't call cl_page_put()
- * later on which further means it's impossible to drop
- * page refcount by cl_page, so ask kernel to not free
- * this page.
- */
- return 0;
-
page = cl_vmpage_page(vmpage, obj);
- result = !page;
- if (page) {
- if (!cl_page_in_use(page)) {
- result = 1;
- cl_page_delete(env, page);
- }
- cl_page_put(env, page);
- }
- cl_env_nested_put(&nest, env);
- return result;
-}
+ if (!page)
+ return 1;
-static int ll_set_page_dirty(struct page *vmpage)
-{
-#if 0
- struct cl_page *page = vvp_vmpage_page_transient(vmpage);
- struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host);
- struct vvp_page *cpg;
+ cookie = cl_env_reenter();
+ env = cl_env_percpu_get();
+ LASSERT(!IS_ERR(env));
- /*
- * XXX should page method be called here?
- */
- LASSERT(&obj->co_cl == page->cp_obj);
- cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
- /*
- * XXX cannot do much here, because page is possibly not locked:
- * sys_munmap()->...
- * ->unmap_page_range()->zap_pte_range()->set_page_dirty().
+ if (!cl_page_in_use(page)) {
+ result = 1;
+ cl_page_delete(env, page);
+ }
+
+ /* To use percpu env array, the call path can not be rescheduled;
+ * otherwise percpu array will be messed if ll_releaspage() called
+ * again on the same CPU.
+ *
+ * If this page holds the last refc of cl_object, the following
+ * call path may cause reschedule:
+ * cl_page_put -> cl_page_free -> cl_object_put ->
+ * lu_object_put -> lu_object_free -> lov_delete_raid0.
+ *
+ * However, the kernel can't get rid of this inode until all pages have
+ * been cleaned up. Now that we hold page lock here, it's pretty safe
+ * that we won't get into object delete path.
*/
- vvp_write_pending(obj, cpg);
-#endif
- return __set_page_dirty_nobuffers(vmpage);
+ LASSERT(cl_object_refc(obj) > 1);
+ cl_page_put(env, page);
+
+ cl_env_percpu_put(env);
+ cl_env_reexit(cookie);
+ return result;
}
#define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
@@ -266,7 +252,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
* write directly
*/
if (clp->cp_type == CPT_CACHEABLE) {
- struct page *vmpage = cl_page_vmpage(env, clp);
+ struct page *vmpage = cl_page_vmpage(clp);
struct page *src_page;
struct page *dst_page;
void *src;
@@ -358,14 +344,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
*/
#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \
PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
- loff_t file_offset)
+static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
{
struct lu_env *env;
struct cl_io *io;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct ccc_object *obj = cl_inode2ccc(inode);
+ struct vvp_object *obj = cl_inode2vvp(inode);
+ loff_t file_offset = iocb->ki_pos;
ssize_t count = iov_iter_count(iter);
ssize_t tot_bytes = 0, result = 0;
struct ll_inode_info *lli = ll_i2info(inode);
@@ -376,22 +362,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
return -EBADF;
/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
return -EINVAL;
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
- inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
+ PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
file_offset, file_offset, count >> PAGE_SHIFT,
MAX_DIO_SIZE >> PAGE_SHIFT);
/* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+ if (iov_iter_alignment(iter) & ~PAGE_MASK)
return -EINVAL;
env = cl_env_get(&refcheck);
LASSERT(!IS_ERR(env));
- io = ccc_env_io(env)->cui_cl.cis_io;
+ io = vvp_env_io(env)->vui_cl.cis_io;
LASSERT(io);
/* 0. Need locking between buffered and direct access. and race with
@@ -401,7 +386,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == READ)
inode_lock(inode);
- LASSERT(obj->cob_transient_pages == 0);
+ LASSERT(obj->vob_transient_pages == 0);
while (iov_iter_count(iter)) {
struct page **pages;
size_t offs;
@@ -435,8 +420,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
size > (PAGE_SIZE / sizeof(*pages)) *
PAGE_SIZE) {
size = ((((size / 2) - 1) |
- ~CFS_PAGE_MASK) + 1) &
- CFS_PAGE_MASK;
+ ~PAGE_MASK) + 1) &
+ PAGE_MASK;
CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
size);
continue;
@@ -449,62 +434,213 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
file_offset += result;
}
out:
- LASSERT(obj->cob_transient_pages == 0);
+ LASSERT(obj->vob_transient_pages == 0);
if (iov_iter_rw(iter) == READ)
inode_unlock(inode);
if (tot_bytes > 0) {
- if (iov_iter_rw(iter) == WRITE) {
- struct lov_stripe_md *lsm;
-
- lsm = ccc_inode_lsm_get(inode);
- LASSERT(lsm);
- lov_stripe_lock(lsm);
- obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0);
- lov_stripe_unlock(lsm);
- ccc_inode_lsm_put(inode, lsm);
- }
+ struct vvp_io *vio = vvp_env_io(env);
+
+ /* no commit async for direct IO */
+ vio->u.write.vui_written += tot_bytes;
}
cl_env_put(env, &refcheck);
- return tot_bytes ? : result;
+ return tot_bytes ? tot_bytes : result;
+}
+
+/**
+ * Prepare partially written-to page for a write.
+ */
+static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg)
+{
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ struct cl_object *obj = io->ci_obj;
+ struct vvp_page *vpg = cl_object_page_slice(obj, pg);
+ loff_t offset = cl_offset(obj, vvp_index(vpg));
+ int result;
+
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+ if (result == 0) {
+ /*
+ * If are writing to a new page, no need to read old data.
+ * The extent locking will have updated the KMS, and for our
+ * purposes here we can treat it like i_size.
+ */
+ if (attr->cat_kms <= offset) {
+ char *kaddr = kmap_atomic(vpg->vpg_page);
+
+ memset(kaddr, 0, cl_page_size(obj));
+ kunmap_atomic(kaddr);
+ } else if (vpg->vpg_defer_uptodate) {
+ vpg->vpg_ra_used = 1;
+ } else {
+ result = ll_page_sync_io(env, io, pg, CRT_READ);
+ }
+ }
+ return result;
}
static int ll_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ struct ll_cl_context *lcc;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
+ struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
- int rc;
- unsigned from = pos & (PAGE_SIZE - 1);
+ struct page *vmpage = NULL;
+ unsigned int from = pos & (PAGE_SIZE - 1);
+ unsigned int to = from + len;
+ int result = 0;
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
- *pagep = page;
+ lcc = ll_cl_init(file, NULL);
+ if (IS_ERR(lcc)) {
+ result = PTR_ERR(lcc);
+ goto out;
+ }
- rc = ll_prepare_write(file, page, from, from + len);
- if (rc) {
- unlock_page(page);
- put_page(page);
+ env = lcc->lcc_env;
+ io = lcc->lcc_io;
+
+ /* To avoid deadlock, try to lock page first. */
+ vmpage = grab_cache_page_nowait(mapping, index);
+ if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+ /* if the page is already in dirty cache, we have to commit
+ * the pages right now; otherwise, it may cause deadlock
+ * because it holds page lock of a dirty page and request for
+ * more grants. It's okay for the dirty page to be the first
+ * one in commit page list, though.
+ */
+ if (vmpage && plist->pl_nr > 0) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ vmpage = NULL;
+ }
+
+ /* commit pages and then wait for page lock */
+ result = vvp_io_write_commit(env, io);
+ if (result < 0)
+ goto out;
+
+ if (!vmpage) {
+ vmpage = grab_cache_page_write_begin(mapping, index,
+ flags);
+ if (!vmpage) {
+ result = -ENOMEM;
+ goto out;
+ }
+ }
}
- return rc;
+
+ page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+ if (IS_ERR(page)) {
+ result = PTR_ERR(page);
+ goto out;
+ }
+
+ lcc->lcc_page = page;
+ lu_ref_add(&page->cp_reference, "cl_io", io);
+
+ cl_page_assume(env, io, page);
+ if (!PageUptodate(vmpage)) {
+ /*
+ * We're completely overwriting an existing page,
+ * so _don't_ set it up to date until commit_write
+ */
+ if (from == 0 && to == PAGE_SIZE) {
+ CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
+ POISON_PAGE(vmpage, 0x11);
+ } else {
+ /* TODO: can be optimized at OSC layer to check if it
+ * is a lockless IO. In that case, it's not necessary
+ * to read the data.
+ */
+ result = ll_prepare_partial_page(env, io, page);
+ if (result == 0)
+ SetPageUptodate(vmpage);
+ }
+ }
+ if (result < 0)
+ cl_page_unassume(env, io, page);
+out:
+ if (result < 0) {
+ if (vmpage) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ }
+ if (!IS_ERR(lcc))
+ ll_cl_fini(lcc);
+ } else {
+ *pagep = vmpage;
+ *fsdata = lcc;
+ }
+ return result;
}
static int ll_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct page *vmpage, void *fsdata)
{
+ struct ll_cl_context *lcc = fsdata;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct vvp_io *vio;
+ struct cl_page *page;
unsigned from = pos & (PAGE_SIZE - 1);
- int rc;
+ bool unplug = false;
+ int result = 0;
+
+ put_page(vmpage);
+
+ env = lcc->lcc_env;
+ page = lcc->lcc_page;
+ io = lcc->lcc_io;
+ vio = vvp_env_io(env);
+
+ LASSERT(cl_page_is_owned(page, io));
+ if (copied > 0) {
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+ lcc->lcc_page = NULL; /* page will be queued */
+
+ /* Add it into write queue */
+ cl_page_list_add(plist, page);
+ if (plist->pl_nr == 1) /* first page */
+ vio->u.write.vui_from = from;
+ else
+ LASSERT(from == 0);
+ vio->u.write.vui_to = from + copied;
+
+ /* We may have one full RPC, commit it soon */
+ if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
+ unplug = true;
+
+ CL_PAGE_DEBUG(D_VFSTRACE, env, page,
+ "queued page: %d.\n", plist->pl_nr);
+ } else {
+ cl_page_disown(env, io, page);
+
+ /* page list is not contiguous now, commit it now */
+ unplug = true;
+ }
- rc = ll_commit_write(file, page, from, from + copied);
- unlock_page(page);
- put_page(page);
+ if (unplug ||
+ file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
+ result = vvp_io_write_commit(env, io);
- return rc ?: copied;
+ ll_cl_fini(lcc);
+ return result >= 0 ? copied : result;
}
#ifdef CONFIG_MIGRATION
@@ -523,7 +659,7 @@ const struct address_space_operations ll_aops = {
.direct_IO = ll_direct_IO_26,
.writepage = ll_writepage,
.writepages = ll_writepages,
- .set_page_dirty = ll_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.write_begin = ll_write_begin,
.write_end = ll_write_end,
.invalidatepage = ll_invalidatepage,
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 99ffd1589df8..6322f88661e8 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -661,8 +661,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
if (rc)
goto out;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
- child, child->i_ino, child->i_generation);
+ CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+ ll_get_fsname(child->i_sb, NULL, 0),
+ PFID(ll_inode2fid(child)), child);
ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
entry->se_inode = child;
@@ -1591,13 +1592,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
*dentryp = alias;
} else if (d_inode(*dentryp) != inode) {
/* revalidate, but inode is recreated */
- CDEBUG(D_READA,
- "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
- *dentryp,
- d_inode(*dentryp)->i_ino,
- d_inode(*dentryp)->i_generation,
- inode->i_ino,
- inode->i_generation);
+ CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+ ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
+ *dentryp,
+ PFID(ll_inode2fid(d_inode(*dentryp))),
+ PFID(ll_inode2fid(inode)));
ll_sai_unplug(sai, entry);
return -ESTALE;
} else {
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 61856d37afc5..415750b0bff4 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -164,9 +164,18 @@ static int __init lustre_init(void)
if (rc != 0)
goto out_sysfs;
+ cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
+ LCT_REMEMBER | LCT_NOREF);
+ if (IS_ERR(cl_inode_fini_env)) {
+ rc = PTR_ERR(cl_inode_fini_env);
+ goto out_vvp;
+ }
+
+ cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
+
rc = ll_xattr_init();
if (rc != 0)
- goto out_vvp;
+ goto out_inode_fini_env;
lustre_register_client_fill_super(ll_fill_super);
lustre_register_kill_super_cb(ll_kill_super);
@@ -174,6 +183,8 @@ static int __init lustre_init(void)
return 0;
+out_inode_fini_env:
+ cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
out_vvp:
vvp_global_fini();
out_sysfs:
@@ -198,6 +209,7 @@ static void __exit lustre_exit(void)
kset_unregister(llite_kset);
ll_xattr_fini();
+ cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
vvp_global_fini();
kmem_cache_destroy(ll_inode_cachep);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 46d03ea48352..3fc736ccf85e 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -77,7 +77,9 @@ static int ll_readlink_internal(struct inode *inode,
ll_finish_md_op_data(op_data);
if (rc) {
if (rc != -ENOENT)
- CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
+ CERROR("%s: inode "DFID": rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
goto failed;
}
@@ -90,8 +92,10 @@ static int ll_readlink_internal(struct inode *inode,
LASSERT(symlen != 0);
if (body->eadatasize != symlen) {
- CERROR("inode %lu: symlink length %d not expected %d\n",
- inode->i_ino, body->eadatasize - 1, symlen - 1);
+ CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+ symlen - 1);
rc = -EPROTO;
goto failed;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 282b70b776da..47101de1c020 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -36,6 +36,7 @@
* cl_device and cl_device_type implementation for VVP layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_LLITE
@@ -56,13 +57,33 @@
* "llite_" (var. "ll_") prefix.
*/
-static struct kmem_cache *vvp_thread_kmem;
+static struct kmem_cache *ll_thread_kmem;
+struct kmem_cache *vvp_lock_kmem;
+struct kmem_cache *vvp_object_kmem;
+struct kmem_cache *vvp_req_kmem;
static struct kmem_cache *vvp_session_kmem;
+static struct kmem_cache *vvp_thread_kmem;
+
static struct lu_kmem_descr vvp_caches[] = {
{
- .ckd_cache = &vvp_thread_kmem,
- .ckd_name = "vvp_thread_kmem",
- .ckd_size = sizeof(struct vvp_thread_info),
+ .ckd_cache = &ll_thread_kmem,
+ .ckd_name = "ll_thread_kmem",
+ .ckd_size = sizeof(struct ll_thread_info),
+ },
+ {
+ .ckd_cache = &vvp_lock_kmem,
+ .ckd_name = "vvp_lock_kmem",
+ .ckd_size = sizeof(struct vvp_lock),
+ },
+ {
+ .ckd_cache = &vvp_object_kmem,
+ .ckd_name = "vvp_object_kmem",
+ .ckd_size = sizeof(struct vvp_object),
+ },
+ {
+ .ckd_cache = &vvp_req_kmem,
+ .ckd_name = "vvp_req_kmem",
+ .ckd_size = sizeof(struct vvp_req),
},
{
.ckd_cache = &vvp_session_kmem,
@@ -70,29 +91,40 @@ static struct lu_kmem_descr vvp_caches[] = {
.ckd_size = sizeof(struct vvp_session)
},
{
+ .ckd_cache = &vvp_thread_kmem,
+ .ckd_name = "vvp_thread_kmem",
+ .ckd_size = sizeof(struct vvp_thread_info),
+ },
+ {
.ckd_cache = NULL
}
};
-static void *vvp_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
+static void *ll_thread_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
{
struct vvp_thread_info *info;
- info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+ info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
if (!info)
info = ERR_PTR(-ENOMEM);
return info;
}
-static void vvp_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
+static void ll_thread_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
{
struct vvp_thread_info *info = data;
- kmem_cache_free(vvp_thread_kmem, info);
+ kmem_cache_free(ll_thread_kmem, info);
}
+struct lu_context_key ll_thread_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = ll_thread_key_init,
+ .lct_fini = ll_thread_key_fini
+};
+
static void *vvp_session_key_init(const struct lu_context *ctx,
struct lu_context_key *key)
{
@@ -112,34 +144,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx,
kmem_cache_free(vvp_session_kmem, session);
}
-struct lu_context_key vvp_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = vvp_key_init,
- .lct_fini = vvp_key_fini
-};
-
struct lu_context_key vvp_session_key = {
.lct_tags = LCT_SESSION,
.lct_init = vvp_session_key_init,
.lct_fini = vvp_session_key_fini
};
+void *vvp_thread_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct vvp_thread_info *vti;
+
+ vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+ if (!vti)
+ vti = ERR_PTR(-ENOMEM);
+ return vti;
+}
+
+void vvp_thread_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct vvp_thread_info *vti = data;
+
+ kmem_cache_free(vvp_thread_kmem, vti);
+}
+
+struct lu_context_key vvp_thread_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = vvp_thread_key_init,
+ .lct_fini = vvp_thread_key_fini
+};
+
/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key);
+LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
static const struct lu_device_operations vvp_lu_ops = {
.ldo_object_alloc = vvp_object_alloc
};
static const struct cl_device_operations vvp_cl_ops = {
- .cdo_req_init = ccc_req_init
+ .cdo_req_init = vvp_req_init
};
+static struct lu_device *vvp_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct vvp_device *vdv = lu2vvp_dev(d);
+ struct cl_site *site = lu2cl_site(d->ld_site);
+ struct lu_device *next = cl2lu_dev(vdv->vdv_next);
+
+ if (d->ld_site) {
+ cl_site_fini(site);
+ kfree(site);
+ }
+ cl_device_fini(lu2cl_dev(d));
+ kfree(vdv);
+ return next;
+}
+
static struct lu_device *vvp_device_alloc(const struct lu_env *env,
struct lu_device_type *t,
struct lustre_cfg *cfg)
{
- return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops);
+ struct vvp_device *vdv;
+ struct lu_device *lud;
+ struct cl_site *site;
+ int rc;
+
+ vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
+ if (!vdv)
+ return ERR_PTR(-ENOMEM);
+
+ lud = &vdv->vdv_cl.cd_lu_dev;
+ cl_device_init(&vdv->vdv_cl, t);
+ vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
+ vdv->vdv_cl.cd_ops = &vvp_cl_ops;
+
+ site = kzalloc(sizeof(*site), GFP_NOFS);
+ if (site) {
+ rc = cl_site_init(site, &vdv->vdv_cl);
+ if (rc == 0) {
+ rc = lu_site_init_finish(&site->cs_lu);
+ } else {
+ LASSERT(!lud->ld_site);
+ CERROR("Cannot init lu_site, rc %d.\n", rc);
+ kfree(site);
+ }
+ } else {
+ rc = -ENOMEM;
+ }
+ if (rc != 0) {
+ vvp_device_free(env, lud);
+ lud = ERR_PTR(rc);
+ }
+ return lud;
+}
+
+static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ struct vvp_device *vdv;
+ int rc;
+
+ vdv = lu2vvp_dev(d);
+ vdv->vdv_next = lu2cl_dev(next);
+
+ LASSERT(d->ld_site && next->ld_type);
+ next->ld_site = d->ld_site;
+ rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
+ next->ld_type->ldt_name,
+ NULL);
+ if (rc == 0) {
+ lu_device_get(next);
+ lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+ }
+ return rc;
+}
+
+static struct lu_device *vvp_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
}
static const struct lu_device_type_operations vvp_device_type_ops = {
@@ -150,9 +275,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = {
.ldto_stop = vvp_type_stop,
.ldto_device_alloc = vvp_device_alloc,
- .ldto_device_free = ccc_device_free,
- .ldto_device_init = ccc_device_init,
- .ldto_device_fini = ccc_device_fini
+ .ldto_device_free = vvp_device_free,
+ .ldto_device_init = vvp_device_init,
+ .ldto_device_fini = vvp_device_fini,
};
struct lu_device_type vvp_device_type = {
@@ -168,20 +293,27 @@ struct lu_device_type vvp_device_type = {
*/
int vvp_global_init(void)
{
- int result;
+ int rc;
- result = lu_kmem_init(vvp_caches);
- if (result == 0) {
- result = ccc_global_init(&vvp_device_type);
- if (result != 0)
- lu_kmem_fini(vvp_caches);
- }
- return result;
+ rc = lu_kmem_init(vvp_caches);
+ if (rc != 0)
+ return rc;
+
+ rc = lu_device_type_init(&vvp_device_type);
+ if (rc != 0)
+ goto out_kmem;
+
+ return 0;
+
+out_kmem:
+ lu_kmem_fini(vvp_caches);
+
+ return rc;
}
void vvp_global_fini(void)
{
- ccc_global_fini(&vvp_device_type);
+ lu_device_type_fini(&vvp_device_type);
lu_kmem_fini(vvp_caches);
}
@@ -205,13 +337,14 @@ int cl_sb_init(struct super_block *sb)
cl = cl_type_setup(env, NULL, &vvp_device_type,
sbi->ll_dt_exp->exp_obd->obd_lu_dev);
if (!IS_ERR(cl)) {
- cl2ccc_dev(cl)->cdv_sb = sb;
+ cl2vvp_dev(cl)->vdv_sb = sb;
sbi->ll_cl = cl;
sbi->ll_site = cl2lu_dev(cl)->ld_site;
}
cl_env_put(env, &refcheck);
- } else
+ } else {
rc = PTR_ERR(env);
+ }
return rc;
}
@@ -356,23 +489,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
return ~0ULL;
clob = vvp_pgcache_obj(env, dev, &id);
if (clob) {
- struct cl_object_header *hdr;
- int nr;
- struct cl_page *pg;
-
- /* got an object. Find next page. */
- hdr = cl_object_header(clob);
+ struct inode *inode = vvp_object_inode(clob);
+ struct page *vmpage;
+ int nr;
- spin_lock(&hdr->coh_page_guard);
- nr = radix_tree_gang_lookup(&hdr->coh_tree,
- (void **)&pg,
- id.vpi_index, 1);
+ nr = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1, &vmpage);
if (nr > 0) {
- id.vpi_index = pg->cp_index;
+ id.vpi_index = vmpage->index;
/* Cant support over 16T file */
- nr = !(pg->cp_index > 0xffffffff);
+ nr = !(vmpage->index > 0xffffffff);
+ put_page(vmpage);
}
- spin_unlock(&hdr->coh_page_guard);
lu_object_ref_del(&clob->co_lu, "dump", current);
cl_object_put(env, clob);
@@ -398,21 +526,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
static void vvp_pgcache_page_show(const struct lu_env *env,
struct seq_file *seq, struct cl_page *page)
{
- struct ccc_page *cpg;
+ struct vvp_page *vpg;
struct page *vmpage;
int has_flags;
- cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- vmpage = cpg->cpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
+ vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
+ vmpage = vpg->vpg_page;
+ seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
0 /* gen */,
- cpg, page,
+ vpg, page,
"none",
- cpg->cpg_write_queued ? "wq" : "- ",
- cpg->cpg_defer_uptodate ? "du" : "- ",
+ vpg->vpg_write_queued ? "wq" : "- ",
+ vpg->vpg_defer_uptodate ? "du" : "- ",
PageWriteback(vmpage) ? "wb" : "-",
- vmpage, vmpage->mapping->host->i_ino,
- vmpage->mapping->host->i_generation,
+ vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
vmpage->mapping->host, vmpage->index,
page_count(vmpage));
has_flags = 0;
@@ -431,8 +558,6 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
struct ll_sb_info *sbi;
struct cl_object *clob;
struct lu_env *env;
- struct cl_page *page;
- struct cl_object_header *hdr;
struct vvp_pgcache_id id;
int refcheck;
int result;
@@ -444,27 +569,38 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
sbi = f->private;
clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
if (clob) {
- hdr = cl_object_header(clob);
-
- spin_lock(&hdr->coh_page_guard);
- page = cl_page_lookup(hdr, id.vpi_index);
- spin_unlock(&hdr->coh_page_guard);
+ struct inode *inode = vvp_object_inode(clob);
+ struct cl_page *page = NULL;
+ struct page *vmpage;
+
+ result = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1,
+ &vmpage);
+ if (result > 0) {
+ lock_page(vmpage);
+ page = cl_vmpage_page(vmpage, clob);
+ unlock_page(vmpage);
+ put_page(vmpage);
+ }
- seq_printf(f, "%8x@"DFID": ",
- id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+ seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+ PFID(lu_object_fid(&clob->co_lu)));
if (page) {
vvp_pgcache_page_show(env, f, page);
cl_page_put(env, page);
- } else
+ } else {
seq_puts(f, "missing\n");
+ }
lu_object_ref_del(&clob->co_lu, "dump", current);
cl_object_put(env, clob);
- } else
+ } else {
seq_printf(f, "%llx missing\n", pos);
+ }
cl_env_put(env, &refcheck);
result = 0;
- } else
+ } else {
result = PTR_ERR(env);
+ }
return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index bb393378c9bb..27b9b0a01f32 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -41,21 +41,337 @@
#ifndef VVP_INTERNAL_H
#define VVP_INTERNAL_H
+#include "../include/lustre/lustre_idl.h"
#include "../include/cl_object.h"
-#include "llite_internal.h"
-int vvp_io_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io);
+enum obd_notify_event;
+struct inode;
+struct lov_stripe_md;
+struct lustre_md;
+struct obd_capa;
+struct obd_device;
+struct obd_export;
+struct page;
+
+/* specific architecture can implement only part of this list */
+enum vvp_io_subtype {
+ /** normal IO */
+ IO_NORMAL,
+ /** io started from splice_{read|write} */
+ IO_SPLICE
+};
+
+/**
+ * IO state private to IO state private to VVP layer.
+ */
+struct vvp_io {
+ /** super class */
+ struct cl_io_slice vui_cl;
+ struct cl_io_lock_link vui_link;
+ /**
+ * I/O vector information to or from which read/write is going.
+ */
+ struct iov_iter *vui_iter;
+ /**
+ * Total size for the left IO.
+ */
+ size_t vui_tot_count;
+
+ union {
+ struct vvp_fault_io {
+ /**
+ * Inode modification time that is checked across DLM
+ * lock request.
+ */
+ time64_t ft_mtime;
+ struct vm_area_struct *ft_vma;
+ /**
+ * locked page returned from vvp_io
+ */
+ struct page *ft_vmpage;
+ /**
+ * kernel fault info
+ */
+ struct vm_fault *ft_vmf;
+ /**
+ * fault API used bitflags for return code.
+ */
+ unsigned int ft_flags;
+ /**
+ * check that flags are from filemap_fault
+ */
+ bool ft_flags_valid;
+ } fault;
+ struct {
+ struct pipe_inode_info *vui_pipe;
+ unsigned int vui_flags;
+ } splice;
+ struct {
+ struct cl_page_list vui_queue;
+ unsigned long vui_written;
+ int vui_from;
+ int vui_to;
+ } write;
+ } u;
+
+ enum vvp_io_subtype vui_io_subtype;
+
+ /**
+ * Layout version when this IO is initialized
+ */
+ __u32 vui_layout_gen;
+ /**
+ * File descriptor against which IO is done.
+ */
+ struct ll_file_data *vui_fd;
+ struct kiocb *vui_iocb;
+
+ /* Readahead state. */
+ pgoff_t vui_ra_start;
+ pgoff_t vui_ra_count;
+ /* Set when vui_ra_{start,count} have been initialized. */
+ bool vui_ra_valid;
+};
+
+extern struct lu_device_type vvp_device_type;
+
+extern struct lu_context_key vvp_session_key;
+extern struct lu_context_key vvp_thread_key;
+
+extern struct kmem_cache *vvp_lock_kmem;
+extern struct kmem_cache *vvp_object_kmem;
+extern struct kmem_cache *vvp_req_kmem;
+
+struct vvp_thread_info {
+ struct cl_lock vti_lock;
+ struct cl_lock_descr vti_descr;
+ struct cl_io vti_io;
+ struct cl_attr vti_attr;
+};
+
+static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+{
+ struct vvp_thread_info *vti;
+
+ vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
+ LASSERT(vti);
+
+ return vti;
+}
+
+static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
+{
+ struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
+
+ memset(lock, 0, sizeof(*lock));
+ return lock;
+}
+
+static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
+{
+ struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
+
+ memset(attr, 0, sizeof(*attr));
+
+ return attr;
+}
+
+static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
+{
+ struct cl_io *io = &vvp_env_info(env)->vti_io;
+
+ memset(io, 0, sizeof(*io));
+
+ return io;
+}
+
+struct vvp_session {
+ struct vvp_io cs_ios;
+};
+
+static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+{
+ struct vvp_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &vvp_session_key);
+ LASSERT(ses);
+
+ return ses;
+}
+
+static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
+{
+ return &vvp_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct vvp_object {
+ struct cl_object_header vob_header;
+ struct cl_object vob_cl;
+ struct inode *vob_inode;
+
+ /**
+ * A list of dirty pages pending IO in the cache. Used by
+ * SOM. Protected by ll_inode_info::lli_lock.
+ *
+ * \see vvp_page::vpg_pending_linkage
+ */
+ struct list_head vob_pending_list;
+
+ /**
+ * Access this counter is protected by inode->i_sem. Now that
+ * the lifetime of transient pages must be covered by inode sem,
+ * we don't need to hold any lock..
+ */
+ int vob_transient_pages;
+ /**
+ * Number of outstanding mmaps on this file.
+ *
+ * \see ll_vm_open(), ll_vm_close().
+ */
+ atomic_t vob_mmap_cnt;
+
+ /**
+ * various flags
+ * vob_discard_page_warned
+ * if pages belonging to this object are discarded when a client
+ * is evicted, some debug info will be printed, this flag will be set
+ * during processing the first discarded page, then avoid flooding
+ * debug message for lots of discarded pages.
+ *
+ * \see ll_dirty_page_discard_warn.
+ */
+ unsigned int vob_discard_page_warned:1;
+};
+
+/**
+ * VVP-private page state.
+ */
+struct vvp_page {
+ struct cl_page_slice vpg_cl;
+ int vpg_defer_uptodate;
+ int vpg_ra_used;
+ int vpg_write_queued;
+ /**
+ * Non-empty iff this page is already counted in
+ * vvp_object::vob_pending_list. This list is only used as a flag,
+ * that is, never iterated through, only checked for list_empty(), but
+ * having a list is useful for debugging.
+ */
+ struct list_head vpg_pending_linkage;
+ /** VM page */
+ struct page *vpg_page;
+};
+
+static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
+{
+ return container_of(slice, struct vvp_page, vpg_cl);
+}
+
+static inline pgoff_t vvp_index(struct vvp_page *vvp)
+{
+ return vvp->vpg_cl.cpl_index;
+}
+
+struct vvp_device {
+ struct cl_device vdv_cl;
+ struct super_block *vdv_sb;
+ struct cl_device *vdv_next;
+};
+
+struct vvp_lock {
+ struct cl_lock_slice vlk_cl;
+};
+
+struct vvp_req {
+ struct cl_req_slice vrq_cl;
+};
+
+void *ccc_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key);
+void ccc_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+
+static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
+{
+ return &vdv->vdv_cl.cd_lu_dev;
+}
+
+static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
+{
+ return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
+}
+
+static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
+{
+ return container_of0(d, struct vvp_device, vdv_cl);
+}
+
+static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
+{
+ return container_of0(obj, struct vvp_object, vob_cl);
+}
+
+static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
+{
+ return container_of0(obj, struct vvp_object, vob_cl.co_lu);
+}
+
+static inline struct inode *vvp_object_inode(const struct cl_object *obj)
+{
+ return cl2vvp(obj)->vob_inode;
+}
+
+int vvp_object_invariant(const struct cl_object *obj);
+struct vvp_object *cl_inode2vvp(struct inode *inode);
+
+static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
+{
+ return cl2vvp_page(slice)->vpg_page;
+}
+
+static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
+{
+ return container_of(slice, struct vvp_lock, vlk_cl);
+}
+
+# define CLOBINVRNT(env, clob, expr) \
+ ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering.
+ */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
struct lu_object *vvp_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
-struct ccc_object *cl_inode2ccc(struct inode *inode);
+int vvp_global_init(void);
+void vvp_global_fini(void);
extern const struct file_operations vvp_dump_pgcache_file_ops;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 85a835976174..5bf9592ae5d2 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -44,21 +44,30 @@
#include "../include/obd.h"
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice);
+struct vvp_io *cl2vvp_io(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct vvp_io *vio;
+
+ vio = container_of(slice, struct vvp_io, vui_cl);
+ LASSERT(vio == vvp_env_io(env));
+
+ return vio;
+}
/**
* True, if \a io is a normal io, False for splice_{read,write}
*/
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
+static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
{
struct vvp_io *vio = vvp_env_io(env);
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
- return vio->cui_io_subtype == IO_NORMAL;
+ return vio->vui_io_subtype == IO_NORMAL;
}
/**
@@ -71,7 +80,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
bool rc = true;
switch (io->ci_type) {
@@ -80,7 +89,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
/* don't need lock here to check lli_layout_gen as we have held
* extent lock and GROUP lock has to hold to swap layout
*/
- if (ll_layout_version_get(lli) != cio->cui_layout_gen) {
+ if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
io->ci_need_restart = 1;
/* this will return application a short read/write */
io->ci_continue = 0;
@@ -95,20 +104,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
return rc;
}
+static void vvp_object_size_lock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ ll_inode_size_lock(inode);
+ cl_object_attr_lock(obj);
+}
+
+static void vvp_object_size_unlock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ cl_object_attr_unlock(obj);
+ ll_inode_size_unlock(inode);
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, loff_t start, size_t count,
+ int *exceed)
+{
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ struct inode *inode = vvp_object_inode(obj);
+ loff_t pos = start + count - 1;
+ loff_t kms;
+ int result;
+
+ /*
+ * Consistency guarantees: following possibilities exist for the
+ * relation between region being accessed and real file size at this
+ * moment:
+ *
+ * (A): the region is completely inside of the file;
+ *
+ * (B-x): x bytes of region are inside of the file, the rest is
+ * outside;
+ *
+ * (C): the region is completely outside of the file.
+ *
+ * This classification is stable under DLM lock already acquired by
+ * the caller, because to change the class, other client has to take
+ * DLM lock conflicting with our lock. Also, any updates to ->i_size
+ * by other threads on this client are serialized by
+ * ll_inode_size_lock(). This guarantees that short reads are handled
+ * correctly in the face of concurrent writes and truncates.
+ */
+ vvp_object_size_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ kms = attr->cat_kms;
+ if (pos > kms) {
+ /*
+ * A glimpse is necessary to determine whether we
+ * return a short read (B) or some zeroes at the end
+ * of the buffer (C)
+ */
+ vvp_object_size_unlock(obj);
+ result = cl_glimpse_lock(env, io, inode, obj, 0);
+ if (result == 0 && exceed) {
+ /* If objective page index exceed end-of-file
+ * page index, return directly. Do not expect
+ * kernel will check such case correctly.
+ * linux-2.6.18-128.1.1 miss to do that.
+ * --bug 17336
+ */
+ loff_t size = i_size_read(inode);
+ loff_t cur_index = start >> PAGE_SHIFT;
+ loff_t size_index = (size - 1) >> PAGE_SHIFT;
+
+ if ((size == 0 && cur_index != 0) ||
+ size_index < cur_index)
+ *exceed = 1;
+ }
+ return result;
+ }
+ /*
+ * region is within kms and, hence, within real file
+ * size (A). We need to increase i_size to cover the
+ * read region so that generic_file_read() will do its
+ * job, but that doesn't mean the kms size is
+ * _correct_, it is only the _minimum_ size. If
+ * someone does a stat they will get the correct size
+ * which will always be >= the kms value here.
+ * b=11081
+ */
+ if (i_size_read(inode) < kms) {
+ i_size_write(inode, kms);
+ CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ (__u64)i_size_read(inode));
+ }
+ }
+
+ vvp_object_size_unlock(obj);
+
+ return result;
+}
+
/*****************************************************************************
*
* io operations.
*
*/
+static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ pgoff_t start, pgoff_t end)
+{
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
+ struct cl_object *obj = io->ci_obj;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
+
+ memset(&vio->vui_link, 0, sizeof(vio->vui_link));
+
+ if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ descr->cld_mode = CLM_GROUP;
+ descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
+ } else {
+ descr->cld_mode = mode;
+ }
+ descr->cld_obj = obj;
+ descr->cld_start = start;
+ descr->cld_end = end;
+ descr->cld_enq_flags = enqflags;
+
+ cl_io_lock_add(env, io, &vio->vui_link);
+ return 0;
+}
+
+static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ loff_t start, loff_t end)
+{
+ struct cl_object *obj = io->ci_obj;
+
+ return vvp_io_one_lock_index(env, io, enqflags, mode,
+ cl_index(obj, start), cl_index(obj, end));
+}
+
+static int vvp_io_write_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+
+ cl_page_list_init(&vio->u.write.vui_queue);
+ vio->u.write.vui_written = 0;
+ vio->u.write.vui_from = 0;
+ vio->u.write.vui_to = PAGE_SIZE;
+
+ return 0;
+}
+
+static void vvp_io_write_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+
+ LASSERT(vio->u.write.vui_queue.pl_nr == 0);
+}
+
static int vvp_io_fault_iter_init(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = ccc_object_inode(ios->cis_obj);
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
- LASSERT(inode ==
- file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file));
+ LASSERT(inode == file_inode(vio->vui_fd->fd_file));
vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
return 0;
}
@@ -117,15 +293,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct inode *inode = vvp_object_inode(obj);
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, DFID
" ignore/verify layout %d/%d, layout version %d restore needed %d\n",
PFID(lu_object_fid(&obj->co_lu)),
io->ci_ignore_layout, io->ci_verify_layout,
- cio->cui_layout_gen, io->ci_restore_needed);
+ vio->vui_layout_gen, io->ci_restore_needed);
if (io->ci_restore_needed == 1) {
int rc;
@@ -133,7 +310,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
/* file was detected release, we need to restore it
* before finishing the io
*/
- rc = ll_layout_restore(ccc_object_inode(obj));
+ rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
/* if restore registration failed, no restart,
* we will return -ENODATA
*/
@@ -159,16 +336,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
__u32 gen = 0;
/* check layout version */
- ll_layout_refresh(ccc_object_inode(obj), &gen);
- io->ci_need_restart = cio->cui_layout_gen != gen;
+ ll_layout_refresh(inode, &gen);
+ io->ci_need_restart = vio->vui_layout_gen != gen;
if (io->ci_need_restart) {
CDEBUG(D_VFSTRACE,
DFID" layout changed from %d to %d.\n",
PFID(lu_object_fid(&obj->co_lu)),
- cio->cui_layout_gen, gen);
+ vio->vui_layout_gen, gen);
/* today successful restore is the only possible case */
/* restore was done, clear restoring state */
- ll_i2info(ccc_object_inode(obj))->lli_flags &=
+ ll_i2info(vvp_object_inode(obj))->lli_flags &=
~LLIF_FILE_RESTORING;
}
}
@@ -180,7 +357,7 @@ static void vvp_io_fault_fini(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_page *page = io->u.ci_fault.ft_page;
- CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+ CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
if (page) {
lu_ref_del(&page->cp_reference, "fault", io);
@@ -203,16 +380,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
}
static int vvp_mmap_locks(const struct lu_env *env,
- struct ccc_io *vio, struct cl_io *io)
+ struct vvp_io *vio, struct cl_io *io)
{
- struct ccc_thread_info *cti = ccc_env_info(env);
+ struct vvp_thread_info *cti = vvp_env_info(env);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct cl_lock_descr *descr = &cti->cti_descr;
+ struct cl_lock_descr *descr = &cti->vti_descr;
ldlm_policy_data_t policy;
unsigned long addr;
ssize_t count;
- int result;
+ int result = 0;
struct iov_iter i;
struct iovec iov;
@@ -221,21 +398,21 @@ static int vvp_mmap_locks(const struct lu_env *env,
if (!cl_is_normalio(env, io))
return 0;
- if (!vio->cui_iter) /* nfs or loop back device write */
+ if (!vio->vui_iter) /* nfs or loop back device write */
return 0;
/* No MM (e.g. NFS)? No vmas too. */
if (!mm)
return 0;
- iov_for_each(iov, i, *(vio->cui_iter)) {
+ iov_for_each(iov, i, *vio->vui_iter) {
addr = (unsigned long)iov.iov_base;
count = iov.iov_len;
if (count == 0)
continue;
- count += addr & (~CFS_PAGE_MASK);
- addr &= CFS_PAGE_MASK;
+ count += addr & (~PAGE_MASK);
+ addr &= PAGE_MASK;
down_read(&mm->mmap_sem);
while ((vma = our_vma(mm, addr, count)) != NULL) {
@@ -244,10 +421,10 @@ static int vvp_mmap_locks(const struct lu_env *env,
if (ll_file_nolock(vma->vm_file)) {
/*
- * For no lock case, a lockless lock will be
- * generated.
+ * For no lock case is not allowed for mmap
*/
- flags = CEF_NEVER;
+ result = -EINVAL;
+ break;
}
/*
@@ -269,10 +446,8 @@ static int vvp_mmap_locks(const struct lu_env *env,
descr->cld_mode, descr->cld_start,
descr->cld_end);
- if (result < 0) {
- up_read(&mm->mmap_sem);
- return result;
- }
+ if (result < 0)
+ break;
if (vma->vm_end - addr >= count)
break;
@@ -281,26 +456,55 @@ static int vvp_mmap_locks(const struct lu_env *env,
addr = vma->vm_end;
}
up_read(&mm->mmap_sem);
+ if (result < 0)
+ break;
}
- return 0;
+ return result;
+}
+
+static void vvp_io_advance(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ size_t nob)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = ios->cis_io->ci_obj;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ if (!cl_is_normalio(env, io))
+ return;
+
+ iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
+}
+
+static void vvp_io_update_iov(const struct lu_env *env,
+ struct vvp_io *vio, struct cl_io *io)
+{
+ size_t size = io->u.ci_rw.crw_count;
+
+ if (!cl_is_normalio(env, io) || !vio->vui_iter)
+ return;
+
+ iov_iter_truncate(vio->vui_iter, size);
}
static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
enum cl_lock_mode mode, loff_t start, loff_t end)
{
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
int result;
int ast_flags = 0;
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
- ccc_io_update_iov(env, cio, io);
+ vvp_io_update_iov(env, vio, io);
if (io->u.ci_rw.crw_nonblock)
ast_flags |= CEF_NONBLOCK;
- result = vvp_mmap_locks(env, cio, io);
+ result = vvp_mmap_locks(env, vio, io);
if (result == 0)
- result = ccc_io_one_lock(env, io, ast_flags, mode, start, end);
+ result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
return result;
}
@@ -325,9 +529,11 @@ static int vvp_io_fault_lock(const struct lu_env *env,
/*
* XXX LDLM_FL_CBPENDING
*/
- return ccc_io_one_lock_index
- (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma),
- io->u.ci_fault.ft_index, io->u.ci_fault.ft_index);
+ return vvp_io_one_lock_index(env,
+ io, 0,
+ vvp_mode_from_vma(vio->u.fault.ft_vma),
+ io->u.ci_fault.ft_index,
+ io->u.ci_fault.ft_index);
}
static int vvp_io_write_lock(const struct lu_env *env,
@@ -354,14 +560,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env,
}
/**
- * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io.
+ * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
*
* Handles "lockless io" mode when extent locking is done by server.
*/
static int vvp_io_setattr_lock(const struct lu_env *env,
const struct cl_io_slice *ios)
{
- struct ccc_io *cio = ccc_env_io(env);
struct cl_io *io = ios->cis_io;
__u64 new_size;
__u32 enqflags = 0;
@@ -378,8 +583,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
return 0;
new_size = 0;
}
- cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK;
- return ccc_io_one_lock(env, io, enqflags, CLM_WRITE,
+
+ return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
new_size, OBD_OBJECT_EOF);
}
@@ -413,7 +618,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
{
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct cl_attr *attr = vvp_env_thread_attr(env);
int result;
unsigned valid = CAT_CTIME;
@@ -437,7 +642,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
- struct inode *inode = ccc_object_inode(io->ci_obj);
+ struct inode *inode = vvp_object_inode(io->ci_obj);
int result = 0;
inode_lock(inode);
@@ -453,7 +658,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
- struct inode *inode = ccc_object_inode(io->ci_obj);
+ struct inode *inode = vvp_object_inode(io->ci_obj);
if (cl_io_is_trunc(io))
/* Truncate in memory pages - they must be clean pages
@@ -474,27 +679,25 @@ static int vvp_io_read_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
- struct ccc_io *cio = cl2ccc_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
- struct ll_ra_read *bead = &vio->cui_bead;
- struct file *file = cio->cui_fd->fd_file;
+ struct inode *inode = vvp_object_inode(obj);
+ struct file *file = vio->vui_fd->fd_file;
int result;
loff_t pos = io->u.ci_rd.rd.crw_pos;
long cnt = io->u.ci_rd.rd.crw_count;
- long tot = cio->cui_tot_count;
+ long tot = vio->vui_tot_count;
int exceed = 0;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
if (!can_populate_pages(env, io, inode))
return 0;
- result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
+ result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
if (result != 0)
return result;
else if (exceed != 0)
@@ -505,30 +708,27 @@ static int vvp_io_read_start(const struct lu_env *env,
inode->i_ino, cnt, pos, i_size_read(inode));
/* turn off the kernel's read-ahead */
- cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+ vio->vui_fd->fd_file->f_ra.ra_pages = 0;
/* initialize read-ahead window once per syscall */
- if (!vio->cui_ra_window_set) {
- vio->cui_ra_window_set = 1;
- bead->lrr_start = cl_index(obj, pos);
- /*
- * XXX: explicit PAGE_SIZE
- */
- bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ra_read_in(file, bead);
+ if (!vio->vui_ra_valid) {
+ vio->vui_ra_valid = true;
+ vio->vui_ra_start = cl_index(obj, pos);
+ vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+ ll_ras_enter(file);
}
/* BUG: 5972 */
file_accessed(file);
- switch (vio->cui_io_subtype) {
+ switch (vio->vui_io_subtype) {
case IO_NORMAL:
- LASSERT(cio->cui_iocb->ki_pos == pos);
- result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
+ LASSERT(vio->vui_iocb->ki_pos == pos);
+ result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
break;
case IO_SPLICE:
result = generic_file_splice_read(file, &pos,
- vio->u.splice.cui_pipe, cnt,
- vio->u.splice.cui_flags);
+ vio->u.splice.vui_pipe, cnt,
+ vio->u.splice.vui_flags);
/* LU-1109: do splice read stripe by stripe otherwise if it
* may make nfsd stuck if this read occupied all internal pipe
* buffers.
@@ -536,7 +736,7 @@ static int vvp_io_read_start(const struct lu_env *env,
io->ci_continue = 0;
break;
default:
- CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
+ CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
LBUG();
}
@@ -546,30 +746,201 @@ out:
io->ci_continue = 0;
io->ci_nob += result;
ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- cio->cui_fd, pos, result, READ);
+ vio->vui_fd, pos, result, READ);
result = 0;
}
return result;
}
-static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *plist, int from, int to)
{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct cl_2queue *queue = &io->ci_queue;
+ struct cl_page *page;
+ unsigned int bytes = 0;
+ int rc = 0;
- if (vio->cui_ra_window_set)
- ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead);
+ if (plist->pl_nr == 0)
+ return 0;
- vvp_io_fini(env, ios);
+ if (from > 0 || to != PAGE_SIZE) {
+ page = cl_page_list_first(plist);
+ if (plist->pl_nr == 1) {
+ cl_page_clip(env, page, from, to);
+ } else {
+ if (from > 0)
+ cl_page_clip(env, page, from, PAGE_SIZE);
+ if (to != PAGE_SIZE) {
+ page = cl_page_list_last(plist);
+ cl_page_clip(env, page, 0, to);
+ }
+ }
+ }
+
+ cl_2queue_init(queue);
+ cl_page_list_splice(plist, &queue->c2_qin);
+ rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
+
+ /* plist is not sorted any more */
+ cl_page_list_splice(&queue->c2_qin, plist);
+ cl_page_list_splice(&queue->c2_qout, plist);
+ cl_2queue_fini(env, queue);
+
+ if (rc == 0) {
+ /* calculate bytes */
+ bytes = plist->pl_nr << PAGE_SHIFT;
+ bytes -= from + PAGE_SIZE - to;
+
+ while (plist->pl_nr > 0) {
+ page = cl_page_list_first(plist);
+ cl_page_list_del(env, plist, page);
+
+ cl_page_clip(env, page, 0, PAGE_SIZE);
+
+ SetPageUptodate(cl_page_vmpage(page));
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+ }
+
+ return bytes > 0 ? bytes : rc;
+}
+
+static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ struct vvp_page *vpg;
+ struct page *vmpage = page->cp_vmpage;
+ struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+ SetPageUptodate(vmpage);
+ set_page_dirty(vmpage);
+
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ vvp_write_pending(cl2vvp(clob), vpg);
+
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+}
+
+/* make sure the page list is contiguous */
+static bool page_list_sanity_check(struct cl_object *obj,
+ struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ pgoff_t index = CL_PAGE_EOF;
+
+ cl_page_list_for_each(page, plist) {
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+
+ if (index == CL_PAGE_EOF) {
+ index = vvp_index(vpg);
+ continue;
+ }
+
+ ++index;
+ if (index == vvp_index(vpg))
+ continue;
+
+ return false;
+ }
+ return true;
+}
+
+/* Return how many bytes have queued or written */
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
+{
+ struct cl_object *obj = io->ci_obj;
+ struct inode *inode = vvp_object_inode(obj);
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_page_list *queue = &vio->u.write.vui_queue;
+ struct cl_page *page;
+ int rc = 0;
+ int bytes = 0;
+ unsigned int npages = vio->u.write.vui_queue.pl_nr;
+
+ if (npages == 0)
+ return 0;
+
+ CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
+ npages, vio->u.write.vui_from, vio->u.write.vui_to);
+
+ LASSERT(page_list_sanity_check(obj, queue));
+
+ /* submit IO with async write */
+ rc = cl_io_commit_async(env, io, queue,
+ vio->u.write.vui_from, vio->u.write.vui_to,
+ write_commit_callback);
+ npages -= queue->pl_nr; /* already committed pages */
+ if (npages > 0) {
+ /* calculate how many bytes were written */
+ bytes = npages << PAGE_SHIFT;
+
+ /* first page */
+ bytes -= vio->u.write.vui_from;
+ if (queue->pl_nr == 0) /* last page */
+ bytes -= PAGE_SIZE - vio->u.write.vui_to;
+ LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
+
+ vio->u.write.vui_written += bytes;
+
+ CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
+ npages, bytes, vio->u.write.vui_written);
+
+ /* the first page must have been written. */
+ vio->u.write.vui_from = 0;
+ }
+ LASSERT(page_list_sanity_check(obj, queue));
+ LASSERT(ergo(rc == 0, queue->pl_nr == 0));
+
+ /* out of quota, try sync write */
+ if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
+ rc = vvp_io_commit_sync(env, io, queue,
+ vio->u.write.vui_from,
+ vio->u.write.vui_to);
+ if (rc > 0) {
+ vio->u.write.vui_written += rc;
+ rc = 0;
+ }
+ }
+
+ /* update inode size */
+ ll_merge_attr(env, inode);
+
+ /* Now the pages in queue were failed to commit, discard them
+ * unless they were dirtied before.
+ */
+ while (queue->pl_nr > 0) {
+ page = cl_page_list_first(queue);
+ cl_page_list_del(env, queue, page);
+
+ if (!PageDirty(cl_page_vmpage(page)))
+ cl_page_discard(env, io, page);
+
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+ cl_page_list_fini(env, queue);
+
+ return rc;
}
static int vvp_io_write_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct vvp_io *vio = cl2vvp_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
ssize_t result = 0;
loff_t pos = io->u.ci_wr.wr.crw_pos;
size_t cnt = io->u.ci_wr.wr.crw_count;
@@ -582,25 +953,41 @@ static int vvp_io_write_start(const struct lu_env *env,
* PARALLEL IO This has to be changed for parallel IO doing
* out-of-order writes.
*/
+ ll_merge_attr(env, inode);
pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
- cio->cui_iocb->ki_pos = pos;
+ vio->vui_iocb->ki_pos = pos;
} else {
- LASSERT(cio->cui_iocb->ki_pos == pos);
+ LASSERT(vio->vui_iocb->ki_pos == pos);
}
CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
- if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */
+ if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
result = 0;
else
- result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+ result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+
+ if (result > 0) {
+ result = vvp_io_write_commit(env, io);
+ if (vio->u.write.vui_written > 0) {
+ result = vio->u.write.vui_written;
+ io->ci_nob += result;
+ CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
+ io->ci_nob, result);
+ }
+ }
if (result > 0) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+
if (result < cnt)
io->ci_continue = 0;
- io->ci_nob += result;
ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- cio->cui_fd, pos, result, WRITE);
+ vio->vui_fd, pos, result, WRITE);
result = 0;
}
return result;
@@ -608,10 +995,10 @@ static int vvp_io_write_start(const struct lu_env *env,
static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
- struct vm_fault *vmf = cfio->fault.ft_vmf;
+ struct vm_fault *vmf = cfio->ft_vmf;
- cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
- cfio->fault.ft_flags_valid = 1;
+ cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+ cfio->ft_flags_valid = 1;
if (vmf->page) {
CDEBUG(D_PAGE,
@@ -619,39 +1006,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
vmf->page, vmf->page->mapping, vmf->page->index,
(long)vmf->page->flags, page_count(vmf->page),
page_private(vmf->page), vmf->virtual_address);
- if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
+ if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
lock_page(vmf->page);
- cfio->fault.ft_flags |= VM_FAULT_LOCKED;
+ cfio->ft_flags |= VM_FAULT_LOCKED;
}
cfio->ft_vmpage = vmf->page;
return 0;
}
- if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+ if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
return -EFAULT;
}
- if (cfio->fault.ft_flags & VM_FAULT_OOM) {
+ if (cfio->ft_flags & VM_FAULT_OOM) {
CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
return -ENOMEM;
}
- if (cfio->fault.ft_flags & VM_FAULT_RETRY)
+ if (cfio->ft_flags & VM_FAULT_RETRY)
return -EAGAIN;
- CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags);
+ CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
return -EINVAL;
}
+static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ struct vvp_page *vpg;
+ struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+ set_page_dirty(page->cp_vmpage);
+
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ vvp_write_pending(cl2vvp(clob), vpg);
+}
+
static int vvp_io_fault_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
struct cl_fault_io *fio = &io->u.ci_fault;
struct vvp_fault_io *cfio = &vio->u.fault;
loff_t offset;
@@ -659,7 +1058,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
struct page *vmpage = NULL;
struct cl_page *page;
loff_t size;
- pgoff_t last; /* last page in a file data region */
+ pgoff_t last_index;
if (fio->ft_executable &&
inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
@@ -670,7 +1069,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
/* offset of the last byte on the page */
offset = cl_offset(obj, fio->ft_index + 1) - 1;
LASSERT(cl_index(obj, offset) == fio->ft_index);
- result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL);
+ result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
if (result != 0)
return result;
@@ -705,15 +1104,15 @@ static int vvp_io_fault_start(const struct lu_env *env,
goto out;
}
+ last_index = cl_index(obj, size - 1);
+
if (fio->ft_mkwrite) {
- pgoff_t last_index;
/*
* Capture the size while holding the lli_trunc_sem from above
* we want to make sure that we complete the mkwrite action
* while holding this lock. We need to make sure that we are
* not past the end of the file.
*/
- last_index = cl_index(obj, size - 1);
if (last_index < fio->ft_index) {
CDEBUG(D_PAGE,
"llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
@@ -745,25 +1144,32 @@ static int vvp_io_fault_start(const struct lu_env *env,
*/
if (fio->ft_mkwrite) {
wait_on_page_writeback(vmpage);
- if (set_page_dirty(vmpage)) {
- struct ccc_page *cp;
+ if (!PageDirty(vmpage)) {
+ struct cl_page_list *plist = &io->ci_queue.c2_qin;
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+ int to = PAGE_SIZE;
/* vvp_page_assume() calls wait_on_page_writeback(). */
cl_page_assume(env, io, page);
- cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- vvp_write_pending(cl2ccc(obj), cp);
+ cl_page_list_init(plist);
+ cl_page_list_add(plist, page);
+
+ /* size fixup */
+ if (last_index == vvp_index(vpg))
+ to = size & ~PAGE_MASK;
/* Do not set Dirty bit here so that in case IO is
* started before the page is really made dirty, we
* still have chance to detect it.
*/
- result = cl_page_cache_add(env, io, page, CRT_WRITE);
+ result = cl_io_commit_async(env, io, plist, 0, to,
+ mkwrite_commit_callback);
LASSERT(cl_page_is_owned(page, io));
+ cl_page_list_fini(env, plist);
vmpage = NULL;
if (result < 0) {
- cl_page_unmap(env, io, page);
cl_page_discard(env, io, page);
cl_page_disown(env, io, page);
@@ -773,20 +1179,20 @@ static int vvp_io_fault_start(const struct lu_env *env,
if (result == -EDQUOT)
result = -ENOSPC;
goto out;
- } else
+ } else {
cl_page_disown(env, io, page);
+ }
}
}
- last = cl_index(obj, size - 1);
/*
* The ft_index is only used in the case of
* a mkwrite action. We need to check
* our assertions are correct, since
* we should have caught this above
*/
- LASSERT(!fio->ft_mkwrite || fio->ft_index <= last);
- if (fio->ft_index == last)
+ LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
+ if (fio->ft_index == last_index)
/*
* Last page is mapped partially.
*/
@@ -801,7 +1207,9 @@ out:
/* return unlocked vmpage to avoid deadlocking */
if (vmpage)
unlock_page(vmpage);
- cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
+
+ cfio->ft_flags &= ~VM_FAULT_LOCKED;
+
return result;
}
@@ -820,293 +1228,58 @@ static int vvp_io_read_page(const struct lu_env *env,
const struct cl_page_slice *slice)
{
struct cl_io *io = ios->cis_io;
- struct cl_object *obj = slice->cpl_obj;
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *page = slice->cpl_page;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(slice->cpl_obj);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd;
+ struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd;
struct ll_readahead_state *ras = &fd->fd_ras;
- struct page *vmpage = cp->cpg_page;
struct cl_2queue *queue = &io->ci_queue;
- int rc;
-
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
- LASSERT(slice->cpl_obj == obj);
if (sbi->ll_ra_info.ra_max_pages_per_file &&
sbi->ll_ra_info.ra_max_pages)
- ras_update(sbi, inode, ras, page->cp_index,
- cp->cpg_defer_uptodate);
-
- /* Sanity check whether the page is protected by a lock. */
- rc = cl_page_is_under_lock(env, io, page);
- if (rc != -EBUSY) {
- CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n",
- rc == -ENODATA ? "without a lock" :
- "match failed", rc);
- if (rc != -ENODATA)
- return rc;
- }
+ ras_update(sbi, inode, ras, vvp_index(vpg),
+ vpg->vpg_defer_uptodate);
- if (cp->cpg_defer_uptodate) {
- cp->cpg_ra_used = 1;
+ if (vpg->vpg_defer_uptodate) {
+ vpg->vpg_ra_used = 1;
cl_page_export(env, page, 1);
}
/*
* Add page into the queue even when it is marked uptodate above.
* this will unlock it automatically as part of cl_page_list_disown().
*/
+
cl_page_list_add(&queue->c2_qin, page);
if (sbi->ll_ra_info.ra_max_pages_per_file &&
sbi->ll_ra_info.ra_max_pages)
- ll_readahead(env, io, ras,
- vmpage->mapping, &queue->c2_qin, fd->fd_flags);
+ ll_readahead(env, io, &queue->c2_qin, ras,
+ vpg->vpg_defer_uptodate);
return 0;
}
-static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, struct ccc_page *cp,
- enum cl_req_type crt)
+void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
{
- struct cl_2queue *queue;
- int result;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- queue = &io->ci_queue;
- cl_2queue_init_page(queue, page);
-
- result = cl_io_submit_sync(env, io, crt, queue, 0);
- LASSERT(cl_page_is_owned(page, io));
-
- if (crt == CRT_READ)
- /*
- * in CRT_WRITE case page is left locked even in case of
- * error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io,
- struct cl_object *obj, struct cl_page *pg,
- struct ccc_page *cp,
- unsigned from, unsigned to)
-{
- struct cl_attr *attr = ccc_env_thread_attr(env);
- loff_t offset = cl_offset(obj, pg->cp_index);
- int result;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result == 0) {
- /*
- * If are writing to a new page, no need to read old data.
- * The extent locking will have updated the KMS, and for our
- * purposes here we can treat it like i_size.
- */
- if (attr->cat_kms <= offset) {
- char *kaddr = kmap_atomic(cp->cpg_page);
-
- memset(kaddr, 0, cl_page_size(obj));
- kunmap_atomic(kaddr);
- } else if (cp->cpg_defer_uptodate)
- cp->cpg_ra_used = 1;
- else
- result = vvp_page_sync_io(env, io, pg, cp, CRT_READ);
- /*
- * In older implementations, obdo_refresh_inode is called here
- * to update the inode because the write might modify the
- * object info at OST. However, this has been proven useless,
- * since LVB functions will be called when user space program
- * tries to retrieve inode attribute. Also, see bug 15909 for
- * details. -jay
- */
- if (result == 0)
- cl_page_export(env, pg, 1);
- }
- return result;
-}
-
-static int vvp_io_prepare_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct cl_object *obj = slice->cpl_obj;
- struct ccc_page *cp = cl2ccc_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = cp->cpg_page;
-
- int result;
-
- LINVRNT(cl_page_is_vmlocked(env, pg));
- LASSERT(vmpage->mapping->host == ccc_object_inode(obj));
-
- result = 0;
-
- CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to);
- if (!PageUptodate(vmpage)) {
- /*
- * We're completely overwriting an existing page, so _don't_
- * set it up to date until commit_write
- */
- if (from == 0 && to == PAGE_SIZE) {
- CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
- POISON_PAGE(page, 0x11);
- } else
- result = vvp_io_prepare_partial(env, ios->cis_io, obj,
- pg, cp, from, to);
- } else
- CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n");
- return result;
-}
-
-static int vvp_io_commit_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct cl_object *obj = slice->cpl_obj;
- struct cl_io *io = ios->cis_io;
- struct ccc_page *cp = cl2ccc_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct inode *inode = ccc_object_inode(obj);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct page *vmpage = cp->cpg_page;
-
- int result;
- int tallyop;
- loff_t size;
-
- LINVRNT(cl_page_is_vmlocked(env, pg));
- LASSERT(vmpage->mapping->host == inode);
-
- LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n");
- CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);
-
- /*
- * queue a write for some time in the future the first time we
- * dirty the page.
- *
- * This is different from what other file systems do: they usually
- * just mark page (and some of its buffers) dirty and rely on
- * balance_dirty_pages() to start a write-back. Lustre wants write-back
- * to be started earlier for the following reasons:
- *
- * (1) with a large number of clients we need to limit the amount
- * of cached data on the clients a lot;
- *
- * (2) large compute jobs generally want compute-only then io-only
- * and the IO should complete as quickly as possible;
- *
- * (3) IO is batched up to the RPC size and is async until the
- * client max cache is hit
- * (/sys/fs/lustre/osc/OSC.../max_dirty_mb)
- *
- */
- if (!PageDirty(vmpage)) {
- tallyop = LPROC_LL_DIRTY_MISSES;
- result = cl_page_cache_add(env, io, pg, CRT_WRITE);
- if (result == 0) {
- /* page was added into cache successfully. */
- set_page_dirty(vmpage);
- vvp_write_pending(cl2ccc(obj), cp);
- } else if (result == -EDQUOT) {
- pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
- bool need_clip = true;
-
- /*
- * Client ran out of disk space grant. Possible
- * strategies are:
- *
- * (a) do a sync write, renewing grant;
- *
- * (b) stop writing on this stripe, switch to the
- * next one.
- *
- * (b) is a part of "parallel io" design that is the
- * ultimate goal. (a) is what "old" client did, and
- * what the new code continues to do for the time
- * being.
- */
- if (last_index > pg->cp_index) {
- to = PAGE_SIZE;
- need_clip = false;
- } else if (last_index == pg->cp_index) {
- int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-
- if (to < size_to)
- to = size_to;
- }
- if (need_clip)
- cl_page_clip(env, pg, 0, to);
- result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
- if (result)
- CERROR("Write page %lu of inode %p failed %d\n",
- pg->cp_index, inode, result);
- }
- } else {
- tallyop = LPROC_LL_DIRTY_HITS;
- result = 0;
- }
- ll_stats_ops_tally(sbi, tallyop, 1);
-
- /* Inode should be marked DIRTY even if no new page was marked DIRTY
- * because page could have been not flushed between 2 modifications.
- * It is important the file is marked DIRTY as soon as the I/O is done
- * Indeed, when cache is flushed, file could be already closed and it
- * is too late to warn the MDT.
- * It is acceptable that file is marked DIRTY even if I/O is dropped
- * for some reasons before being flushed to OST.
- */
- if (result == 0) {
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
- size = cl_offset(obj, pg->cp_index) + to;
-
- ll_inode_size_lock(inode);
- if (result == 0) {
- if (size > i_size_read(inode)) {
- cl_isize_write_nolock(inode, size);
- CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (unsigned long)size);
- }
- cl_page_export(env, pg, 1);
- } else {
- if (size > i_size_read(inode))
- cl_page_discard(env, io, pg);
- }
- ll_inode_size_unlock(inode);
- return result;
+ CLOBINVRNT(env, ios->cis_io->ci_obj,
+ vvp_object_invariant(ios->cis_io->ci_obj));
}
static const struct cl_io_operations vvp_io_ops = {
.op = {
[CIT_READ] = {
- .cio_fini = vvp_io_read_fini,
+ .cio_fini = vvp_io_fini,
.cio_lock = vvp_io_read_lock,
.cio_start = vvp_io_read_start,
- .cio_advance = ccc_io_advance
+ .cio_advance = vvp_io_advance,
},
[CIT_WRITE] = {
.cio_fini = vvp_io_fini,
+ .cio_iter_init = vvp_io_write_iter_init,
+ .cio_iter_fini = vvp_io_write_iter_fini,
.cio_lock = vvp_io_write_lock,
.cio_start = vvp_io_write_start,
- .cio_advance = ccc_io_advance
+ .cio_advance = vvp_io_advance,
},
[CIT_SETATTR] = {
.cio_fini = vvp_io_setattr_fini,
@@ -1120,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_iter_init = vvp_io_fault_iter_init,
.cio_lock = vvp_io_fault_lock,
.cio_start = vvp_io_fault_start,
- .cio_end = ccc_io_end
+ .cio_end = vvp_io_end,
},
[CIT_FSYNC] = {
.cio_start = vvp_io_fsync_start,
@@ -1131,29 +1304,26 @@ static const struct cl_io_operations vvp_io_ops = {
}
},
.cio_read_page = vvp_io_read_page,
- .cio_prepare_write = vvp_io_prepare_write,
- .cio_commit_write = vvp_io_commit_write
};
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
struct cl_io *io)
{
struct vvp_io *vio = vvp_env_io(env);
- struct ccc_io *cio = ccc_env_io(env);
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
int result;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, DFID
" ignore/verify layout %d/%d, layout version %d restore needed %d\n",
PFID(lu_object_fid(&obj->co_lu)),
io->ci_ignore_layout, io->ci_verify_layout,
- cio->cui_layout_gen, io->ci_restore_needed);
+ vio->vui_layout_gen, io->ci_restore_needed);
- CL_IO_SLICE_CLEAN(cio, cui_cl);
- cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
- vio->cui_ra_window_set = 0;
+ CL_IO_SLICE_CLEAN(vio, vui_cl);
+ cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
+ vio->vui_ra_valid = false;
result = 0;
if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
size_t count;
@@ -1166,7 +1336,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
if (count == 0)
result = 1;
else
- cio->cui_tot_count = count;
+ vio->vui_tot_count = count;
/* for read/write, we store the jobid in the inode, and
* it'll be fetched by osc when building RPC.
@@ -1192,7 +1362,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
* because it might not grant layout lock in IT_OPEN.
*/
if (result == 0 && !io->ci_ignore_layout) {
- result = ll_layout_refresh(inode, &cio->cui_layout_gen);
+ result = ll_layout_refresh(inode, &vio->vui_layout_gen);
if (result == -ENOENT)
/* If the inode on MDS has been removed, but the objects
* on OSTs haven't been destroyed (async unlink), layout
@@ -1208,11 +1378,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
return result;
}
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- /* Calling just for assertion */
- cl2ccc_io(env, slice);
- return vvp_env_io(env);
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index ff0948043c7a..f5bd6c22e112 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -40,7 +40,7 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/obd.h"
+#include "../include/obd_support.h"
#include "../include/lustre_lite.h"
#include "vvp_internal.h"
@@ -51,36 +51,41 @@
*
*/
-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
- const struct cl_lock_slice *slice)
+static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
+{
+ struct vvp_lock *vlk = cl2vvp_lock(slice);
+
+ kmem_cache_free(vvp_lock_kmem, vlk);
+}
+
+static int vvp_lock_enqueue(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *unused, struct cl_sync_io *anchor)
{
- struct ccc_object *cob = cl2ccc(slice->cls_obj);
+ CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
- return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0;
+ return 0;
}
static const struct cl_lock_operations vvp_lock_ops = {
- .clo_delete = ccc_lock_delete,
- .clo_fini = ccc_lock_fini,
- .clo_enqueue = ccc_lock_enqueue,
- .clo_wait = ccc_lock_wait,
- .clo_use = ccc_lock_use,
- .clo_unuse = ccc_lock_unuse,
- .clo_fits_into = ccc_lock_fits_into,
- .clo_state = ccc_lock_state,
- .clo_weigh = vvp_lock_weigh
+ .clo_fini = vvp_lock_fini,
+ .clo_enqueue = vvp_lock_enqueue,
};
int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
+ struct cl_lock *lock, const struct cl_io *unused)
{
- return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops);
+ struct vvp_lock *vlk;
+ int result;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
+ if (vlk) {
+ cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
+ result = 0;
+ } else {
+ result = -ENOMEM;
+ }
+ return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 03c887d8ed83..18c9df7ebdda 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -45,6 +45,7 @@
#include "../include/obd.h"
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
/*****************************************************************************
@@ -53,16 +54,25 @@
*
*/
+int vvp_object_invariant(const struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
+ lli->lli_clob == obj;
+}
+
static int vvp_object_print(const struct lu_env *env, void *cookie,
lu_printer_t p, const struct lu_object *o)
{
- struct ccc_object *obj = lu2ccc(o);
- struct inode *inode = obj->cob_inode;
+ struct vvp_object *obj = lu2vvp(o);
+ struct inode *inode = obj->vob_inode;
struct ll_inode_info *lli;
(*p)(env, cookie, "(%s %d %d) inode: %p ",
- list_empty(&obj->cob_pending_list) ? "-" : "+",
- obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt),
+ list_empty(&obj->vob_pending_list) ? "-" : "+",
+ obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
inode);
if (inode) {
lli = ll_i2info(inode);
@@ -77,7 +87,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
struct cl_attr *attr)
{
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
/*
* lov overwrites most of these fields in
@@ -99,7 +109,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
const struct cl_attr *attr, unsigned valid)
{
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
if (valid & CAT_UID)
inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
@@ -112,7 +122,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
if (valid & CAT_CTIME)
inode->i_ctime.tv_sec = attr->cat_ctime;
if (0 && valid & CAT_SIZE)
- cl_isize_write_nolock(inode, attr->cat_size);
+ i_size_write(inode, attr->cat_size);
/* not currently necessary */
if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
mark_inode_dirty(inode);
@@ -165,6 +175,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
return 0;
}
+static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+ int rc;
+
+ rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+ if (rc < 0) {
+ CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+ PFID(lu_object_fid(&obj->co_lu)), rc);
+ return rc;
+ }
+
+ truncate_inode_pages(inode->i_mapping, 0);
+ return 0;
+}
+
+static int vvp_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+ lvb->lvb_atime = LTIME_S(inode->i_atime);
+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+ /*
+ * LU-417: Add dirty pages block count lest i_blocks reports 0, some
+ * "cp" or "tar" on remote node may think it's a completely sparse file
+ * and skip it.
+ */
+ if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
+ lvb->lvb_blocks = dirty_cnt(inode);
+ return 0;
+}
+
static const struct cl_object_operations vvp_ops = {
.coo_page_init = vvp_page_init,
.coo_lock_init = vvp_lock_init,
@@ -172,29 +216,94 @@ static const struct cl_object_operations vvp_ops = {
.coo_attr_get = vvp_attr_get,
.coo_attr_set = vvp_attr_set,
.coo_conf_set = vvp_conf_set,
- .coo_glimpse = ccc_object_glimpse
+ .coo_prune = vvp_prune,
+ .coo_glimpse = vvp_object_glimpse
};
+static int vvp_object_init0(const struct lu_env *env,
+ struct vvp_object *vob,
+ const struct cl_object_conf *conf)
+{
+ vob->vob_inode = conf->coc_inode;
+ vob->vob_transient_pages = 0;
+ cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
+ return 0;
+}
+
+static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
+ struct vvp_object *vob = lu2vvp(obj);
+ struct lu_object *below;
+ struct lu_device *under;
+ int result;
+
+ under = &dev->vdv_next->cd_lu_dev;
+ below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+ if (below) {
+ const struct cl_object_conf *cconf;
+
+ cconf = lu2cl_conf(conf);
+ INIT_LIST_HEAD(&vob->vob_pending_list);
+ lu_object_add(obj, below);
+ result = vvp_object_init0(env, vob, cconf);
+ } else {
+ result = -ENOMEM;
+ }
+
+ return result;
+}
+
+static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct vvp_object *vob = lu2vvp(obj);
+
+ lu_object_fini(obj);
+ lu_object_header_fini(obj->lo_header);
+ kmem_cache_free(vvp_object_kmem, vob);
+}
+
static const struct lu_object_operations vvp_lu_obj_ops = {
- .loo_object_init = ccc_object_init,
- .loo_object_free = ccc_object_free,
- .loo_object_print = vvp_object_print
+ .loo_object_init = vvp_object_init,
+ .loo_object_free = vvp_object_free,
+ .loo_object_print = vvp_object_print,
};
-struct ccc_object *cl_inode2ccc(struct inode *inode)
+struct vvp_object *cl_inode2vvp(struct inode *inode)
{
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
struct lu_object *lu;
lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
LASSERT(lu);
- return lu2ccc(lu);
+ return lu2vvp(lu);
}
struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
+ const struct lu_object_header *unused,
struct lu_device *dev)
{
- return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops);
+ struct vvp_object *vob;
+ struct lu_object *obj;
+
+ vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
+ if (vob) {
+ struct cl_object_header *hdr;
+
+ obj = &vob->vob_cl.co_lu;
+ hdr = &vob->vob_header;
+ cl_object_header_init(hdr);
+ hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
+ lu_object_init(obj, &hdr->coh_lu, dev);
+ lu_object_add_top(&hdr->coh_lu, obj);
+
+ vob->vob_cl.co_ops = &vvp_ops;
+ obj->lo_ops = &vvp_lu_obj_ops;
+ } else {
+ obj = NULL;
+ }
+ return obj;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 33ca3eb34965..6cd2af7a958f 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -41,9 +41,16 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/obd.h"
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
/*****************************************************************************
@@ -52,9 +59,9 @@
*
*/
-static void vvp_page_fini_common(struct ccc_page *cp)
+static void vvp_page_fini_common(struct vvp_page *vpg)
{
- struct page *vmpage = cp->cpg_page;
+ struct page *vmpage = vpg->vpg_page;
LASSERT(vmpage);
put_page(vmpage);
@@ -63,23 +70,23 @@ static void vvp_page_fini_common(struct ccc_page *cp)
static void vvp_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct ccc_page *cp = cl2ccc_page(slice);
- struct page *vmpage = cp->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
/*
* vmpage->private was already cleared when page was moved into
* VPG_FREEING state.
*/
LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
- vvp_page_fini_common(cp);
+ vvp_page_fini_common(vpg);
}
static int vvp_page_own(const struct lu_env *env,
const struct cl_page_slice *slice, struct cl_io *io,
int nonblock)
{
- struct ccc_page *vpg = cl2ccc_page(slice);
- struct page *vmpage = vpg->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
LASSERT(vmpage);
if (nonblock) {
@@ -96,6 +103,7 @@ static int vvp_page_own(const struct lu_env *env,
lock_page(vmpage);
wait_on_page_writeback(vmpage);
+
return 0;
}
@@ -136,41 +144,15 @@ static void vvp_page_discard(const struct lu_env *env,
struct cl_io *unused)
{
struct page *vmpage = cl2vm_page(slice);
- struct address_space *mapping;
- struct ccc_page *cpg = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
LASSERT(vmpage);
LASSERT(PageLocked(vmpage));
- mapping = vmpage->mapping;
-
- if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
- ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
+ if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
+ ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
- /*
- * truncate_complete_page() calls
- * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
- */
- truncate_complete_page(mapping, vmpage);
-}
-
-static int vvp_page_unmap(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- __u64 offset;
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- offset = vmpage->index << PAGE_SHIFT;
-
- /*
- * XXX is it safe to call this with the page lock held?
- */
- ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
- return 0;
+ ll_invalidate_page(vmpage);
}
static void vvp_page_delete(const struct lu_env *env,
@@ -179,12 +161,20 @@ static void vvp_page_delete(const struct lu_env *env,
struct page *vmpage = cl2vm_page(slice);
struct inode *inode = vmpage->mapping->host;
struct cl_object *obj = slice->cpl_obj;
+ struct cl_page *page = slice->cpl_page;
+ int refc;
LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
- LASSERT(inode == ccc_object_inode(obj));
+ LASSERT((struct cl_page *)vmpage->private == page);
+ LASSERT(inode == vvp_object_inode(obj));
- vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
+ vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
+
+ /* Drop the reference count held in vvp_page_init */
+ refc = atomic_dec_return(&page->cp_ref);
+ LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
+
+ ClearPageUptodate(vmpage);
ClearPagePrivate(vmpage);
vmpage->private = 0;
/*
@@ -237,7 +227,7 @@ static int vvp_page_prep_write(const struct lu_env *env,
if (!pg->cp_sync_io)
set_page_writeback(vmpage);
- vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+ vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
return 0;
}
@@ -250,11 +240,11 @@ static int vvp_page_prep_write(const struct lu_env *env,
*/
static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
{
- struct ccc_object *obj = cl_inode2ccc(inode);
+ struct vvp_object *obj = cl_inode2vvp(inode);
if (ioret == 0) {
ClearPageError(vmpage);
- obj->cob_discard_page_warned = 0;
+ obj->vob_discard_page_warned = 0;
} else {
SetPageError(vmpage);
if (ioret == -ENOSPC)
@@ -263,8 +253,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
set_bit(AS_EIO, &inode->i_mapping->flags);
if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->cob_discard_page_warned == 0) {
- obj->cob_discard_page_warned = 1;
+ obj->vob_discard_page_warned == 0) {
+ obj->vob_discard_page_warned = 1;
ll_dirty_page_discard_warn(vmpage, ioret);
}
}
@@ -274,22 +264,23 @@ static void vvp_page_completion_read(const struct lu_env *env,
const struct cl_page_slice *slice,
int ioret)
{
- struct ccc_page *cp = cl2ccc_page(slice);
- struct page *vmpage = cp->cpg_page;
- struct cl_page *page = cl_page_top(slice->cpl_page);
- struct inode *inode = ccc_object_inode(page->cp_obj);
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
+ struct cl_page *page = slice->cpl_page;
+ struct inode *inode = vvp_object_inode(page->cp_obj);
LASSERT(PageLocked(vmpage));
CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
- if (cp->cpg_defer_uptodate)
+ if (vpg->vpg_defer_uptodate)
ll_ra_count_put(ll_i2sbi(inode), 1);
if (ioret == 0) {
- if (!cp->cpg_defer_uptodate)
+ if (!vpg->vpg_defer_uptodate)
cl_page_export(env, page, 1);
- } else
- cp->cpg_defer_uptodate = 0;
+ } else {
+ vpg->vpg_defer_uptodate = 0;
+ }
if (!page->cp_sync_io)
unlock_page(vmpage);
@@ -299,9 +290,9 @@ static void vvp_page_completion_write(const struct lu_env *env,
const struct cl_page_slice *slice,
int ioret)
{
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = cp->cpg_page;
+ struct page *vmpage = vpg->vpg_page;
CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
@@ -315,8 +306,8 @@ static void vvp_page_completion_write(const struct lu_env *env,
* and then re-add the page into pending transfer queue. -jay
*/
- cp->cpg_write_queued = 0;
- vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
+ vpg->vpg_write_queued = 0;
+ vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
if (pg->cp_sync_io) {
LASSERT(PageLocked(vmpage));
@@ -327,7 +318,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
* Only mark the page error only when it's an async write
* because applications won't wait for IO to finish.
*/
- vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
+ vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
end_page_writeback(vmpage);
}
@@ -359,7 +350,7 @@ static int vvp_page_make_ready(const struct lu_env *env,
LASSERT(pg->cp_state == CPS_CACHED);
/* This actually clears the dirty bit in the radix tree. */
set_page_writeback(vmpage);
- vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+ vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
} else if (pg->cp_state == CPS_PAGEOUT) {
/* is it possible for osc_flush_async_page() to already
@@ -375,24 +366,51 @@ static int vvp_page_make_ready(const struct lu_env *env,
return result;
}
+static int vvp_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io, pgoff_t *max_index)
+{
+ if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+ io->ci_type == CIT_FAULT) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
+ *max_index = CL_PAGE_EOF;
+ }
+ return 0;
+}
+
static int vvp_page_print(const struct lu_env *env,
const struct cl_page_slice *slice,
void *cookie, lu_printer_t printer)
{
- struct ccc_page *vp = cl2ccc_page(slice);
- struct page *vmpage = vp->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
(*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
- vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
- vp->cpg_write_queued, vmpage);
+ vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
+ vpg->vpg_write_queued, vmpage);
if (vmpage) {
(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
(long)vmpage->flags, page_count(vmpage),
page_mapcount(vmpage), vmpage->private,
- page_index(vmpage),
+ vmpage->index,
list_empty(&vmpage->lru) ? "not-" : "");
}
+
(*printer)(env, cookie, "\n");
+
+ return 0;
+}
+
+static int vvp_page_fail(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ /*
+ * Cached read?
+ */
+ LBUG();
+
return 0;
}
@@ -401,32 +419,38 @@ static const struct cl_page_operations vvp_page_ops = {
.cpo_assume = vvp_page_assume,
.cpo_unassume = vvp_page_unassume,
.cpo_disown = vvp_page_disown,
- .cpo_vmpage = ccc_page_vmpage,
.cpo_discard = vvp_page_discard,
.cpo_delete = vvp_page_delete,
- .cpo_unmap = vvp_page_unmap,
.cpo_export = vvp_page_export,
.cpo_is_vmlocked = vvp_page_is_vmlocked,
.cpo_fini = vvp_page_fini,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = ccc_page_is_under_lock,
+ .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
.cpo_prep = vvp_page_prep_read,
.cpo_completion = vvp_page_completion_read,
- .cpo_make_ready = ccc_fail,
+ .cpo_make_ready = vvp_page_fail,
},
[CRT_WRITE] = {
.cpo_prep = vvp_page_prep_write,
.cpo_completion = vvp_page_completion_write,
.cpo_make_ready = vvp_page_make_ready,
- }
- }
+ },
+ },
};
+static int vvp_transient_page_prep(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ /* transient page should always be sent. */
+ return 0;
+}
+
static void vvp_transient_page_verify(const struct cl_page *page)
{
- struct inode *inode = ccc_object_inode(page->cp_obj);
+ struct inode *inode = vvp_object_inode(page->cp_obj);
LASSERT(!inode_trylock(inode));
}
@@ -477,7 +501,7 @@ static void vvp_transient_page_discard(const struct lu_env *env,
static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
const struct cl_page_slice *slice)
{
- struct inode *inode = ccc_object_inode(slice->cpl_obj);
+ struct inode *inode = vvp_object_inode(slice->cpl_obj);
int locked;
locked = !inode_trylock(inode);
@@ -497,13 +521,13 @@ vvp_transient_page_completion(const struct lu_env *env,
static void vvp_transient_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *clp = slice->cpl_page;
- struct ccc_object *clobj = cl2ccc(clp->cp_obj);
+ struct vvp_object *clobj = cl2vvp(clp->cp_obj);
- vvp_page_fini_common(cp);
- LASSERT(!inode_trylock(clobj->cob_inode));
- clobj->cob_transient_pages--;
+ vvp_page_fini_common(vpg);
+ LASSERT(!inode_trylock(clobj->vob_inode));
+ clobj->vob_transient_pages--;
}
static const struct cl_page_operations vvp_transient_page_ops = {
@@ -512,45 +536,48 @@ static const struct cl_page_operations vvp_transient_page_ops = {
.cpo_unassume = vvp_transient_page_unassume,
.cpo_disown = vvp_transient_page_disown,
.cpo_discard = vvp_transient_page_discard,
- .cpo_vmpage = ccc_page_vmpage,
.cpo_fini = vvp_transient_page_fini,
.cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = ccc_page_is_under_lock,
+ .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
- .cpo_prep = ccc_transient_page_prep,
+ .cpo_prep = vvp_transient_page_prep,
.cpo_completion = vvp_transient_page_completion,
},
[CRT_WRITE] = {
- .cpo_prep = ccc_transient_page_prep,
+ .cpo_prep = vvp_transient_page_prep,
.cpo_completion = vvp_transient_page_completion,
}
}
};
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
- struct ccc_page *cpg = cl_object_page_slice(obj, page);
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+ struct page *vmpage = page->cp_vmpage;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
- cpg->cpg_page = vmpage;
+ vpg->vpg_page = vmpage;
get_page(vmpage);
- INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+ INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
if (page->cp_type == CPT_CACHEABLE) {
+ /* in cache, decref in vvp_page_delete */
+ atomic_inc(&page->cp_ref);
SetPagePrivate(vmpage);
vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops);
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
+ &vvp_page_ops);
} else {
- struct ccc_object *clobj = cl2ccc(obj);
+ struct vvp_object *clobj = cl2vvp(obj);
- LASSERT(!inode_trylock(clobj->cob_inode));
- cl_page_slice_add(page, &cpg->cpg_cl, obj,
+ LASSERT(!inode_trylock(clobj->vob_inode));
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
&vvp_transient_page_ops);
- clobj->cob_transient_pages++;
+ clobj->vob_transient_pages++;
}
return 0;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
new file mode 100644
index 000000000000..fb886291a4e2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/cl_object.h"
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+#include "vvp_internal.h"
+
+static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
+{
+ return container_of0(slice, struct vvp_req, vrq_cl);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for VVP
+ * layer. VVP is responsible for
+ *
+ * - o_[mac]time
+ *
+ * - o_mode
+ *
+ * - o_parent_seq
+ *
+ * - o_[ug]id
+ *
+ * - o_parent_oid
+ *
+ * - o_parent_ver
+ *
+ * - o_ioepoch,
+ *
+ */
+void vvp_req_attr_set(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, u64 flags)
+{
+ struct inode *inode;
+ struct obdo *oa;
+ u32 valid_flags;
+
+ oa = attr->cra_oa;
+ inode = vvp_object_inode(obj);
+ valid_flags = OBD_MD_FLTYPE;
+
+ if (slice->crs_req->crq_type == CRT_WRITE) {
+ if (flags & OBD_MD_FLEPOCH) {
+ oa->o_valid |= OBD_MD_FLEPOCH;
+ oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLUID | OBD_MD_FLGID;
+ }
+ }
+ obdo_from_inode(oa, inode, valid_flags & flags);
+ obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+ memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
+ JOBSTATS_JOBID_SIZE);
+}
+
+void vvp_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct vvp_req *vrq;
+
+ if (ioret > 0)
+ cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
+
+ vrq = cl2vvp_req(slice);
+ kmem_cache_free(vvp_req_kmem, vrq);
+}
+
+static const struct cl_req_operations vvp_req_ops = {
+ .cro_attr_set = vvp_req_attr_set,
+ .cro_completion = vvp_req_completion
+};
+
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct vvp_req *vrq;
+ int result;
+
+ vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS);
+ if (vrq) {
+ cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops);
+ result = 0;
+ } else {
+ result = -ENOMEM;
+ }
+ return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index b68dcc921ca2..608014b0dbcd 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -181,8 +181,9 @@ int ll_setxattr_common(struct inode *inode, const char *name,
size = rc;
pv = (const char *)new_value;
- } else
+ } else {
return -EOPNOTSUPP;
+ }
valid |= rce_ops2valid(rce->rce_ops);
}
@@ -210,16 +211,14 @@ int ll_setxattr_common(struct inode *inode, const char *name,
return 0;
}
-int ll_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags)
{
- struct inode *inode = d_inode(dentry);
-
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
@@ -243,12 +242,12 @@ int ll_setxattr(struct dentry *dentry, const char *name,
lump->lmm_stripe_offset = -1;
if (lump && S_ISREG(inode->i_mode)) {
- int flags = FMODE_WRITE;
+ __u64 it_flags = FMODE_WRITE;
int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
sizeof(*lump) : sizeof(struct lov_user_md_v3);
- rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump,
- lum_size);
+ rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
+ lump, lum_size);
/* b10667: rc always be 0 here for now */
rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
@@ -272,8 +271,8 @@ int ll_removexattr(struct dentry *dentry, const char *name)
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
return ll_setxattr_common(inode, name, NULL, 0, 0,
@@ -292,8 +291,8 @@ int ll_getxattr_common(struct inode *inode, const char *name,
struct rmtacl_ctl_entry *rce = NULL;
struct ll_inode_info *lli = ll_i2info(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
/* listxattr have slightly different behavior from of ext3:
* without 'user_xattr' ext3 will list all xattr names but
@@ -338,7 +337,6 @@ int ll_getxattr_common(struct inode *inode, const char *name,
*/
if (xattr_type == XATTR_ACL_ACCESS_T &&
!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-
struct posix_acl *acl;
spin_lock(&lli->lli_lock);
@@ -423,8 +421,7 @@ getxattr_nocache:
if (rce && rce->rce_ops == RMT_LSETFACL) {
ext_acl_xattr_header *acl;
- acl = lustre_posix_acl_xattr_2ext(
- (posix_acl_xattr_header *)buffer, rc);
+ acl = lustre_posix_acl_xattr_2ext(buffer, rc);
if (IS_ERR(acl)) {
rc = PTR_ERR(acl);
goto out;
@@ -451,16 +448,14 @@ out:
return rc;
}
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
{
- struct inode *inode = d_inode(dentry);
-
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
@@ -554,8 +549,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
LASSERT(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 3480ce2bb3cc..d7e17abbe361 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli)
*/
static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
{
-
if (!ll_xattr_cache_valid(lli))
return 0;