194 files changed, 8022 insertions, 15655 deletions
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index 8ac7cd4d6fdb..9f5d75f166e7 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -54,9 +54,3 @@ config LUSTRE_TRANSLATE_ERRNOS
 	bool
 	depends on LUSTRE_FS && !X86
 	default y
-
-config LUSTRE_LLITE_LLOOP
-	tristate "Lustre virtual block device"
-	depends on LUSTRE_FS && BLOCK
-	depends on !PPC_64K_PAGES && !ARM64_64K_PAGES && !MICROBLAZE_64K_PAGES && !PAGE_SIZE_64KB && !IA64_PAGE_SIZE_64KB && !PARISC_PAGE_SIZE_64KB
-	default m
diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h
index b79a813977cf..5c53773ecc5a 100644
--- a/drivers/staging/lustre/lustre/fid/fid_internal.h
+++ b/drivers/staging/lustre/lustre/fid/fid_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
index dd65159ebb38..99ae7eb6720e 100644
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 39269c3c56a6..454744d25956 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -66,6 +62,7 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 	unsigned int           debug_mask;
 	int                    rc;
 
+	LASSERT(exp && !IS_ERR(exp));
 	req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
 					LUSTRE_MDS_VERSION, SEQ_QUERY);
 	if (!req)
@@ -97,23 +94,28 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 		 * request here, otherwise if MDT0 is failed(umounted),
 		 * it can not release the export of MDT0
 		 */
-		if (seq->lcs_type == LUSTRE_SEQ_DATA)
-			req->rq_no_delay = req->rq_no_resend = 1;
+		if (seq->lcs_type == LUSTRE_SEQ_DATA) {
+			req->rq_no_delay = 1;
+			req->rq_no_resend = 1;
+		}
 		debug_mask = D_CONSOLE;
 	} else {
-		if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+		if (seq->lcs_type == LUSTRE_SEQ_METADATA) {
+			req->rq_reply_portal = MDC_REPLY_PORTAL;
 			req->rq_request_portal = SEQ_METADATA_PORTAL;
-		else
+		} else {
+			req->rq_reply_portal = OSC_REPLY_PORTAL;
 			req->rq_request_portal = SEQ_DATA_PORTAL;
+		}
 		debug_mask = D_INFO;
 	}
 
 	ptlrpc_at_set_req_timeout(req);
 
-	if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
 		mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	rc = ptlrpc_queue_wait(req);
-	if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
 		mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	if (rc)
 		goto out_req;
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 1f0e78686278..81b7ca9ea2fd 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
index 062f388cf38a..0100a935f4ff 100644
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -178,8 +174,9 @@ restart_fixup:
 				if (n_range->lsr_end <= c_range->lsr_end) {
 					*n_range = *c_range;
 					fld_cache_entry_delete(cache, f_curr);
-				} else
+				} else {
 					n_range->lsr_start = c_range->lsr_end;
+				}
 			}
 
 			/* we could have overlap over next
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index e8a3caf20c9b..f0efe5b9fbec 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -101,12 +97,6 @@ struct fld_cache {
 	unsigned int		 fci_no_shrink:1;
 };
 
-enum fld_op {
-	FLD_CREATE = 0,
-	FLD_DELETE = 1,
-	FLD_LOOKUP = 2
-};
-
 enum {
 	/* 4M of FLD cache will not hurt client a lot. */
 	FLD_SERVER_CACHE_SIZE      = (4 * 0x100000),
@@ -126,7 +116,8 @@ enum {
 extern struct lu_fld_hash fld_hash[];
 
 int fld_client_rpc(struct obd_export *exp,
-		   struct lu_seq_range *range, __u32 fld_op);
+		   struct lu_seq_range *range, __u32 fld_op,
+		   struct ptlrpc_request **reqp);
 
 extern struct lprocfs_vars fld_client_debugfs_list[];
 
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index a3d122d85c8d..e59d626a1548 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -64,9 +60,9 @@ static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&mcw->mcw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 };
 
@@ -75,15 +71,15 @@ static void fld_enter_request(struct client_obd *cli)
 	struct mdc_cache_waiter mcw;
 	struct l_wait_info lwi = { 0 };
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
 		init_waitqueue_head(&mcw.mcw_waitq);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
 	} else {
 		cli->cl_r_in_flight++;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 }
 
@@ -92,10 +88,9 @@ static void fld_exit_request(struct client_obd *cli)
 	struct list_head *l, *tmp;
 	struct mdc_cache_waiter *mcw;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_r_in_flight--;
 	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-
 		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 			/* No free request slots anymore */
 			break;
@@ -106,7 +101,7 @@ static void fld_exit_request(struct client_obd *cli)
 		cli->cl_r_in_flight++;
 		wake_up(&mcw->mcw_waitq);
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
@@ -392,55 +387,82 @@ void fld_client_fini(struct lu_client_fld *fld)
 EXPORT_SYMBOL(fld_client_fini);
 
 int fld_client_rpc(struct obd_export *exp,
-		   struct lu_seq_range *range, __u32 fld_op)
+		   struct lu_seq_range *range, __u32 fld_op,
+		   struct ptlrpc_request **reqp)
 {
-	struct ptlrpc_request *req;
+	struct ptlrpc_request *req = NULL;
 	struct lu_seq_range   *prange;
 	__u32		 *op;
-	int		    rc;
+	int		    rc = 0;
 	struct obd_import     *imp;
 
 	LASSERT(exp);
 
 	imp = class_exp2cliimp(exp);
-	req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION,
-					FLD_QUERY);
-	if (!req)
-		return -ENOMEM;
-
-	op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
-	*op = fld_op;
+	switch (fld_op) {
+	case FLD_QUERY:
+		req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
+						LUSTRE_MDS_VERSION, FLD_QUERY);
+		if (!req)
+			return -ENOMEM;
+
+		/*
+		 * XXX: only needed when talking to old server(< 2.6), it should
+		 * be removed when < 2.6 server is not supported
+		 */
+		op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
+		*op = FLD_LOOKUP;
+
+		if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
+			req->rq_allow_replay = 1;
+		break;
+	case FLD_READ:
+		req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ,
+						LUSTRE_MDS_VERSION, FLD_READ);
+		if (!req)
+			return -ENOMEM;
+
+		req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA,
+				     RCL_SERVER, PAGE_SIZE);
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
+	if (rc)
+		return rc;
 
 	prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD);
 	*prange = *range;
-
 	ptlrpc_request_set_replen(req);
 	req->rq_request_portal = FLD_REQUEST_PORTAL;
 	req->rq_reply_portal = MDC_REPLY_PORTAL;
 	ptlrpc_at_set_req_timeout(req);
 
-	if (fld_op == FLD_LOOKUP &&
-	    imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
-		req->rq_allow_replay = 1;
-
-	if (fld_op != FLD_LOOKUP)
-		mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	fld_enter_request(&exp->exp_obd->u.cli);
 	rc = ptlrpc_queue_wait(req);
 	fld_exit_request(&exp->exp_obd->u.cli);
-	if (fld_op != FLD_LOOKUP)
-		mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	if (rc)
 		goto out_req;
 
-	prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
-	if (!prange) {
-		rc = -EFAULT;
-		goto out_req;
+	if (fld_op == FLD_QUERY) {
+		prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
+		if (!prange) {
+			rc = -EFAULT;
+			goto out_req;
+		}
+		*range = *prange;
 	}
-	*range = *prange;
+
 out_req:
-	ptlrpc_req_finished(req);
+	if (rc || !reqp) {
+		ptlrpc_req_finished(req);
+		req = NULL;
+	}
+
+	if (reqp)
+		*reqp = req;
+
 	return rc;
 }
 
@@ -468,7 +490,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
 
 	res.lsr_start = seq;
 	fld_range_set_type(&res, flags);
-	rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP);
+	rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
 
 	if (rc == 0) {
 		*mds = res.lsr_index;
diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c
index ca898befeba6..61ac420798af 100644
--- a/drivers/staging/lustre/lustre/fld/lproc_fld.c
+++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index fb971ded5a1b..3cd4a2577d90 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -82,7 +78,6 @@
  *  - i_mutex
  *      - PG_locked
  *	  - cl_object_header::coh_page_guard
- *	  - cl_object_header::coh_lock_guard
  *	  - lu_site::ls_guard
  *
  * See the top comment in cl_object.c for the description of overall locking and
@@ -98,9 +93,12 @@
  * super-class definitions.
  */
 #include "lu_object.h"
+#include <linux/atomic.h>
 #include "linux/lustre_compat25.h"
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
 
 struct inode;
 
@@ -138,7 +136,7 @@ struct cl_device_operations {
 	 * cl_req_slice_add().
 	 *
 	 * \see osc_req_init(), lov_req_init(), lovsub_req_init()
-	 * \see ccc_req_init()
+	 * \see vvp_req_init()
 	 */
 	int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
 			    struct cl_req *req);
@@ -147,7 +145,7 @@ struct cl_device_operations {
 /**
  * Device in the client stack.
  *
- * \see ccc_device, lov_device, lovsub_device, osc_device
+ * \see vvp_device, lov_device, lovsub_device, osc_device
  */
 struct cl_device {
 	/** Super-class. */
@@ -243,7 +241,7 @@ enum cl_attr_valid {
  *    be discarded from the memory, all its sub-objects are torn-down and
  *    destroyed too.
  *
- * \see ccc_object, lov_object, lovsub_object, osc_object
+ * \see vvp_object, lov_object, lovsub_object, osc_object
  */
 struct cl_object {
 	/** super class */
@@ -322,7 +320,7 @@ struct cl_object_operations {
 	 *	 to be used instead of newly created.
 	 */
 	int  (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
-			      struct cl_page *page, struct page *vmpage);
+				struct cl_page *page, pgoff_t index);
 	/**
 	 * Initialize lock slice for this layer. Called top-to-bottom through
 	 * every object layer when a new cl_lock is instantiated. Layer
@@ -383,11 +381,17 @@ struct cl_object_operations {
 	 * object. Layers are supposed to fill parts of \a lvb that will be
 	 * shipped to the glimpse originator as a glimpse result.
 	 *
-	 * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+	 * \see vvp_object_glimpse(), lovsub_object_glimpse(),
 	 * \see osc_object_glimpse()
 	 */
 	int (*coo_glimpse)(const struct lu_env *env,
 			   const struct cl_object *obj, struct ost_lvb *lvb);
+	/**
+	 * Object prune method. Called when the layout is going to change on
+	 * this object, therefore each layer has to clean up their cache,
+	 * mainly pages and locks.
+	 */
+	int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
 };
 
 /**
@@ -398,22 +402,6 @@ struct cl_object_header {
 	 * here.
 	 */
 	struct lu_object_header  coh_lu;
-	/** \name locks
-	 * \todo XXX move locks below to the separate cache-lines, they are
-	 * mostly useless otherwise.
-	 */
-	/** @{ */
-	/** Lock protecting page tree. */
-	spinlock_t		 coh_page_guard;
-	/** Lock protecting lock list. */
-	spinlock_t		 coh_lock_guard;
-	/** @} locks */
-	/** Radix tree of cl_page's, cached for this object. */
-	struct radix_tree_root   coh_tree;
-	/** # of pages in radix tree. */
-	unsigned long	    coh_pages;
-	/** List of cl_lock's granted for this object. */
-	struct list_head	       coh_locks;
 
 	/**
 	 * Parent object. It is assumed that an object has a well-defined
@@ -460,10 +448,6 @@ struct cl_object_header {
 					co_lu.lo_linkage)
 /** @} cl_object */
 
-#ifndef pgoff_t
-#define pgoff_t unsigned long
-#endif
-
 #define CL_PAGE_EOF ((pgoff_t)~0ull)
 
 /** \addtogroup cl_page cl_page
@@ -727,16 +711,10 @@ struct cl_page {
 	atomic_t	     cp_ref;
 	/** An object this page is a part of. Immutable after creation. */
 	struct cl_object	*cp_obj;
-	/** Logical page index within the object. Immutable after creation. */
-	pgoff_t		  cp_index;
 	/** List of slices. Immutable after creation. */
 	struct list_head	       cp_layers;
-	/** Parent page, NULL for top-level page. Immutable after creation. */
-	struct cl_page	  *cp_parent;
-	/** Lower-layer page. NULL for bottommost page. Immutable after
-	 * creation.
-	 */
-	struct cl_page	  *cp_child;
+	/** vmpage */
+	struct page		*cp_vmpage;
 	/**
 	 * Page state. This field is const to avoid accidental update, it is
 	 * modified only internally within cl_page.c. Protected by a VM lock.
@@ -787,10 +765,11 @@ struct cl_page {
 /**
  * Per-layer part of cl_page.
  *
- * \see ccc_page, lov_page, osc_page
+ * \see vvp_page, lov_page, osc_page
  */
 struct cl_page_slice {
 	struct cl_page		  *cpl_page;
+	pgoff_t				 cpl_index;
 	/**
 	 * Object slice corresponding to this page slice. Immutable after
 	 * creation.
@@ -804,16 +783,9 @@ struct cl_page_slice {
 /**
  * Lock mode. For the client extent locks.
  *
- * \warning: cl_lock_mode_match() assumes particular ordering here.
  * \ingroup cl_lock
  */
 enum cl_lock_mode {
-	/**
-	 * Mode of a lock that protects no data, and exists only as a
-	 * placeholder. This is used for `glimpse' requests. A phantom lock
-	 * might get promoted to real lock at some point.
-	 */
-	CLM_PHANTOM,
 	CLM_READ,
 	CLM_WRITE,
 	CLM_GROUP
@@ -846,11 +818,6 @@ struct cl_page_operations {
 	 */
 
 	/**
-	 * \return the underlying VM page. Optional.
-	 */
-	struct page *(*cpo_vmpage)(const struct lu_env *env,
-				   const struct cl_page_slice *slice);
-	/**
 	 * Called when \a io acquires this page into the exclusive
 	 * ownership. When this method returns, it is guaranteed that the is
 	 * not owned by other io, and no transfer is going on against
@@ -897,14 +864,6 @@ struct cl_page_operations {
 	void  (*cpo_export)(const struct lu_env *env,
 			    const struct cl_page_slice *slice, int uptodate);
 	/**
-	 * Unmaps page from the user space (if it is mapped).
-	 *
-	 * \see cl_page_unmap()
-	 * \see vvp_page_unmap()
-	 */
-	int (*cpo_unmap)(const struct lu_env *env,
-			 const struct cl_page_slice *slice, struct cl_io *io);
-	/**
 	 * Checks whether underlying VM page is locked (in the suitable
 	 * sense). Used for assertions.
 	 *
@@ -957,7 +916,7 @@ struct cl_page_operations {
 	 */
 	int (*cpo_is_under_lock)(const struct lu_env *env,
 				 const struct cl_page_slice *slice,
-				 struct cl_io *io);
+				 struct cl_io *io, pgoff_t *max);
 
 	/**
 	 * Optional debugging helper. Prints given page slice.
@@ -1027,26 +986,6 @@ struct cl_page_operations {
 		 */
 		int  (*cpo_make_ready)(const struct lu_env *env,
 				       const struct cl_page_slice *slice);
-		/**
-		 * Announce that this page is to be written out
-		 * opportunistically, that is, page is dirty, it is not
-		 * necessary to start write-out transfer right now, but
-		 * eventually page has to be written out.
-		 *
-		 * Main caller of this is the write path (see
-		 * vvp_io_commit_write()), using this method to build a
-		 * "transfer cache" from which large transfers are then
-		 * constructed by the req-formation engine.
-		 *
-		 * \todo XXX it would make sense to add page-age tracking
-		 * semantics here, and to oblige the req-formation engine to
-		 * send the page out not later than it is too old.
-		 *
-		 * \see cl_page_cache_add()
-		 */
-		int  (*cpo_cache_add)(const struct lu_env *env,
-				      const struct cl_page_slice *slice,
-				      struct cl_io *io);
 	} io[CRT_NR];
 	/**
 	 * Tell transfer engine that only [to, from] part of a page should be
@@ -1098,9 +1037,8 @@ struct cl_page_operations {
  */
 #define CL_PAGE_DEBUG(mask, env, page, format, ...)		     \
 do {								    \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
-									\
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);	\
 		cl_page_print(env, &msgdata, lu_cdebug_printer, page);  \
 		CDEBUG(mask, format, ## __VA_ARGS__);		  \
 	}							       \
@@ -1111,9 +1049,8 @@ do {								    \
  */
 #define CL_PAGE_HEADER(mask, env, page, format, ...)			  \
 do {									  \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		      \
-									      \
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {			 \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
 		CDEBUG(mask, format, ## __VA_ARGS__);			\
 	}								     \
@@ -1130,6 +1067,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
 #define cl_page_in_use(pg)       __page_in_use(pg, 1)
 #define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
 
+static inline struct page *cl_page_vmpage(struct cl_page *page)
+{
+	LASSERT(page->cp_vmpage);
+	return page->cp_vmpage;
+}
+
 /** @} cl_page */
 
 /** \addtogroup cl_lock cl_lock
@@ -1150,12 +1093,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
  * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
  *
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
  * Typical cl_lock consists of the two layers:
  *
  *     - vvp_lock (vvp specific data), and
@@ -1177,111 +1114,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  *
  * LIFE CYCLE
  *
- * cl_lock is reference counted. When reference counter drops to 0, lock is
- * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
- * lock is destroyed when last reference is released. Referencing between
- * top-lock and its sub-locks is described in the lov documentation module.
- *
- * STATE MACHINE
- *
- * Also, cl_lock is a state machine. This requires some clarification. One of
- * the goals of client IO re-write was to make IO path non-blocking, or at
- * least to make it easier to make it non-blocking in the future. Here
- * `non-blocking' means that when a system call (read, write, truncate)
- * reaches a situation where it has to wait for a communication with the
- * server, it should --instead of waiting-- remember its current state and
- * switch to some other work.  E.g,. instead of waiting for a lock enqueue,
- * client should proceed doing IO on the next stripe, etc. Obviously this is
- * rather radical redesign, and it is not planned to be fully implemented at
- * this time, instead we are putting some infrastructure in place, that would
- * make it easier to do asynchronous non-blocking IO easier in the
- * future. Specifically, where old locking code goes to sleep (waiting for
- * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
- * enqueue reply comes, its completion handler signals that lock state-machine
- * is ready to transit to the next state. There is some generic code in
- * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
- * this cl_lock.c code, it looks like locking is done in normal blocking
- * fashion, and it the same time it is possible to switch to the non-blocking
- * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
- * functions).
- *
- * For a description of state machine states and transitions see enum
- * cl_lock_state.
- *
- * There are two ways to restrict a set of states which lock might move to:
- *
- *     - placing a "hold" on a lock guarantees that lock will not be moved
- *       into cl_lock_state::CLS_FREEING state until hold is released. Hold
- *       can be only acquired on a lock that is not in
- *       cl_lock_state::CLS_FREEING. All holds on a lock are counted in
- *       cl_lock::cll_holds. Hold protects lock from cancellation and
- *       destruction. Requests to cancel and destroy a lock on hold will be
- *       recorded, but only honored when last hold on a lock is released;
- *
- *     - placing a "user" on a lock guarantees that lock will not leave
- *       cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
- *       cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
- *       states, once it enters this set. That is, if a user is added onto a
- *       lock in a state not from this set, it doesn't immediately enforce
- *       lock to move to this set, but once lock enters this set it will
- *       remain there until all users are removed. Lock users are counted in
- *       cl_lock::cll_users.
- *
- *       User is used to assure that lock is not canceled or destroyed while
- *       it is being enqueued, or actively used by some IO.
- *
- *       Currently, a user always comes with a hold (cl_lock_invariant()
- *       checks that a number of holds is not less than a number of users).
- *
- * CONCURRENCY
- *
- * This is how lock state-machine operates. struct cl_lock contains a mutex
- * cl_lock::cll_guard that protects struct fields.
- *
- *     - mutex is taken, and cl_lock::cll_state is examined.
- *
- *     - for every state there are possible target states where lock can move
- *       into. They are tried in order. Attempts to move into next state are
- *       done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
- *
- *     - if the transition can be performed immediately, state is changed,
- *       and mutex is released.
- *
- *     - if the transition requires blocking, _try() function returns
- *       cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
- *       sleep, waiting for possibility of lock state change. It is woken
- *       up when some event occurs, that makes lock state change possible
- *       (e.g., the reception of the reply from the server), and repeats
- *       the loop.
- *
- * Top-lock and sub-lock has separate mutexes and the latter has to be taken
- * first to avoid dead-lock.
- *
- * To see an example of interaction of all these issues, take a look at the
- * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
- * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
- * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
- * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
- * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
- * done in parallel, rather than one after another (this is used for glimpse
- * locks, that cannot dead-lock).
+ * cl_lock is a cacheless data container for the requirements of locks to
+ * complete the IO. cl_lock is created before I/O starts and destroyed when the
+ * I/O is complete.
+ *
+ * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached
+ * to cl_lock at OSC layer. LDLM lock is still cacheable.
  *
  * INTERFACE AND USAGE
  *
- * struct cl_lock_operations provide a number of call-backs that are invoked
- * when events of interest occurs. Layers can intercept and handle glimpse,
- * blocking, cancel ASTs and a reception of the reply from the server.
+ * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel.  A
+ * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue()
+ * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock
+ * consists of multiple sub cl_locks, each sub locks will be enqueued
+ * correspondingly. At OSC layer, the lock enqueue request will tend to reuse
+ * cached LDLM lock; otherwise a new LDLM lock will have to be requested from
+ * OST side.
  *
- * One important difference with the old client locking model is that new
- * client has a representation for the top-lock, whereas in the old code only
- * sub-locks existed as real data structures and file-level locks are
- * represented by "request sets" that are created and destroyed on each and
- * every lock creation.
+ * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel()
+ * method will be called for each layer to release the resource held by this
+ * lock. At OSC layer, the reference count of LDLM lock, which is held at
+ * clo_enqueue time, is released.
  *
- * Top-locks are cached, and can be found in the cache by the system calls. It
- * is possible that top-lock is in cache, but some of its sub-locks were
- * canceled and destroyed. In that case top-lock has to be enqueued again
- * before it can be used.
+ * LDLM lock can only be canceled if there is no cl_lock using it.
  *
  * Overall process of the locking during IO operation is as following:
  *
@@ -1294,7 +1149,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  *
  *     - when all locks are acquired, IO is performed;
  *
- *     - locks are released into cache.
+ *     - locks are released after IO is complete.
  *
  * Striping introduces major additional complexity into locking. The
  * fundamental problem is that it is generally unsafe to actively use (hold)
@@ -1316,16 +1171,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  * buf is a part of memory mapped Lustre file, a lock or locks protecting buf
  * has to be held together with the usual lock on [offset, offset + count].
  *
- * As multi-stripe locks have to be allowed, it makes sense to cache them, so
- * that, for example, a sequence of O_APPEND writes can proceed quickly
- * without going down to the individual stripes to do lock matching. On the
- * other hand, multi-stripe locks shouldn't be used by normal read/write
- * calls. To achieve this, every layer can implement ->clo_fits_into() method,
- * that is called by lock matching code (cl_lock_lookup()), and that can be
- * used to selectively disable matching of certain locks for certain IOs. For
- * example, lov layer implements lov_lock_fits_into() that allow multi-stripe
- * locks to be matched only for truncates and O_APPEND writes.
- *
  * Interaction with DLM
  *
  * In the expected setup, cl_lock is ultimately backed up by a collection of
@@ -1356,295 +1201,27 @@ struct cl_lock_descr {
 	__u32	     cld_enq_flags;
 };
 
-#define DDESCR "%s(%d):[%lu, %lu]"
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
 #define PDESCR(descr)						   \
 	cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,	\
-	(descr)->cld_start, (descr)->cld_end
+	(descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
 
 const char *cl_lock_mode_name(const enum cl_lock_mode mode);
 
 /**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- *	      +------------------>NEW
- *	      |		    |
- *	      |		    | cl_enqueue_try()
- *	      |		    |
- *	      |    cl_unuse_try()  V
- *	      |  +--------------QUEUING (*)
- *	      |  |		 |
- *	      |  |		 | cl_enqueue_try()
- *	      |  |		 |
- *	      |  | cl_unuse_try()  V
- *    sub-lock  |  +-------------ENQUEUED (*)
- *    canceled  |  |		 |
- *	      |  |		 | cl_wait_try()
- *	      |  |		 |
- *	      |  |		(R)
- *	      |  |		 |
- *	      |  |		 V
- *	      |  |		HELD<---------+
- *	      |  |		 |	    |
- *	      |  |		 |	    | cl_use_try()
- *	      |  |  cl_unuse_try() |	    |
- *	      |  |		 |	    |
- *	      |  |		 V	 ---+
- *	      |  +------------>INTRANSIT (D) <--+
- *	      |		    |	    |
- *	      |     cl_unuse_try() |	    | cached lock found
- *	      |		    |	    | cl_use_try()
- *	      |		    |	    |
- *	      |		    V	    |
- *	      +------------------CACHED---------+
- *				   |
- *				  (C)
- *				   |
- *				   V
- *				FREEING
- *
- * Legend:
- *
- *	 In states marked with (*) transition to the same state (i.e., a loop
- *	 in the diagram) is possible.
- *
- *	 (R) is the point where Receive call-back is invoked: it allows layers
- *	 to handle arrival of lock reply.
- *
- *	 (C) is the point where Cancellation call-back is invoked.
- *
- *	 (D) is the transit state which means the lock is changing.
- *
- *	 Transition to FREEING state is possible from any other state in the
- *	 diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
-	/**
-	 * Lock that wasn't yet enqueued
-	 */
-	CLS_NEW,
-	/**
-	 * Enqueue is in progress, blocking for some intermediate interaction
-	 * with the other side.
-	 */
-	CLS_QUEUING,
-	/**
-	 * Lock is fully enqueued, waiting for server to reply when it is
-	 * granted.
-	 */
-	CLS_ENQUEUED,
-	/**
-	 * Lock granted, actively used by some IO.
-	 */
-	CLS_HELD,
-	/**
-	 * This state is used to mark the lock is being used, or unused.
-	 * We need this state because the lock may have several sublocks,
-	 * so it's impossible to have an atomic way to bring all sublocks
-	 * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
-	 * at unuse case.
-	 * If a thread is referring to a lock, and it sees the lock is in this
-	 * state, it must wait for the lock.
-	 * See state diagram for details.
-	 */
-	CLS_INTRANSIT,
-	/**
-	 * Lock granted, not used.
-	 */
-	CLS_CACHED,
-	/**
-	 * Lock is being destroyed.
-	 */
-	CLS_FREEING,
-	CLS_NR
-};
-
-enum cl_lock_flags {
-	/**
-	 * lock has been cancelled. This flag is never cleared once set (by
-	 * cl_lock_cancel0()).
-	 */
-	CLF_CANCELLED	= 1 << 0,
-	/** cancellation is pending for this lock. */
-	CLF_CANCELPEND	= 1 << 1,
-	/** destruction is pending for this lock. */
-	CLF_DOOMED	= 1 << 2,
-	/** from enqueue RPC reply upcall. */
-	CLF_FROM_UPCALL	= 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- *     - nested state-machines (top-lock state-machine composed of sub-lock
- *       state-machines), and
- *
- *     - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
-	/**
-	 * Lock that is mutexed when closure construction is started. When
-	 * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
-	 * origin is released before waiting.
-	 */
-	struct cl_lock   *clc_origin;
-	/**
-	 * List of enclosed locks, so far. Locks are linked here through
-	 * cl_lock::cll_inclosure.
-	 */
-	struct list_head	clc_list;
-	/**
-	 * True iff closure is in a `wait' mode. This determines what
-	 * cl_lock_enclosure() does when a lock L to be added to the closure
-	 * is currently mutexed by some other thread.
-	 *
-	 * If cl_lock_closure::clc_wait is not set, then closure construction
-	 * fails with CLO_REPEAT immediately.
-	 *
-	 * In wait mode, cl_lock_enclosure() waits until next attempt to build
-	 * a closure might succeed. To this end it releases an origin mutex
-	 * (cl_lock_closure::clc_origin), that has to be the only lock mutex
-	 * owned by the current thread, and then waits on L mutex (by grabbing
-	 * it and immediately releasing), before returning CLO_REPEAT to the
-	 * caller.
-	 */
-	int	       clc_wait;
-	/** Number of locks in the closure. */
-	int	       clc_nr;
-};
-
-/**
  * Layered client lock.
  */
 struct cl_lock {
-	/** Reference counter. */
-	atomic_t	  cll_ref;
 	/** List of slices. Immutable after creation. */
 	struct list_head	    cll_layers;
-	/**
-	 * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
-	 * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
-	 */
-	struct list_head	    cll_linkage;
-	/**
-	 * Parameters of this lock. Protected by
-	 * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
-	 * cl_lock::cll_guard. Modified only on lock creation and in
-	 * cl_lock_modify().
-	 */
+	/** lock attribute, extent, cl_object, etc. */
 	struct cl_lock_descr  cll_descr;
-	/** Protected by cl_lock::cll_guard. */
-	enum cl_lock_state    cll_state;
-	/** signals state changes. */
-	wait_queue_head_t	   cll_wq;
-	/**
-	 * Recursive lock, most fields in cl_lock{} are protected by this.
-	 *
-	 * Locking rules: this mutex is never held across network
-	 * communication, except when lock is being canceled.
-	 *
-	 * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
-	 * on a top-lock. Other direction is implemented through a
-	 * try-lock-repeat loop. Mutices of unrelated locks can be taken only
-	 * by try-locking.
-	 *
-	 * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
-	 */
-	struct mutex		cll_guard;
-	struct task_struct	*cll_guarder;
-	int		   cll_depth;
-
-	/**
-	 * the owner for INTRANSIT state
-	 */
-	struct task_struct	*cll_intransit_owner;
-	int		   cll_error;
-	/**
-	 * Number of holds on a lock. A hold prevents a lock from being
-	 * canceled and destroyed. Protected by cl_lock::cll_guard.
-	 *
-	 * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
-	 */
-	int		   cll_holds;
-	 /**
-	  * Number of lock users. Valid in cl_lock_state::CLS_HELD state
-	  * only. Lock user pins lock in CLS_HELD state. Protected by
-	  * cl_lock::cll_guard.
-	  *
-	  * \see cl_wait(), cl_unuse().
-	  */
-	int		   cll_users;
-	/**
-	 * Flag bit-mask. Values from enum cl_lock_flags. Updates are
-	 * protected by cl_lock::cll_guard.
-	 */
-	unsigned long	 cll_flags;
-	/**
-	 * A linkage into a list of locks in a closure.
-	 *
-	 * \see cl_lock_closure
-	 */
-	struct list_head	    cll_inclosure;
-	/**
-	 * Confict lock at queuing time.
-	 */
-	struct cl_lock       *cll_conflict;
-	/**
-	 * A list of references to this lock, for debugging.
-	 */
-	struct lu_ref	 cll_reference;
-	/**
-	 * A list of holds on this lock, for debugging.
-	 */
-	struct lu_ref	 cll_holders;
-	/**
-	 * A reference for cl_lock::cll_descr::cld_obj. For debugging.
-	 */
-	struct lu_ref_link    cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
-	/* "dep_map" name is assumed by lockdep.h macros. */
-	struct lockdep_map    dep_map;
-#endif
 };
 
 /**
  * Per-layer part of cl_lock
  *
- * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
  */
 struct cl_lock_slice {
 	struct cl_lock		  *cls_lock;
@@ -1658,174 +1235,36 @@ struct cl_lock_slice {
 };
 
 /**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
-	/** operation cannot be completed immediately. Wait for state change. */
-	CLO_WAIT	= 1,
-	/** operation had to release lock mutex, restart. */
-	CLO_REPEAT      = 2,
-	/** lower layer re-enqueued. */
-	CLO_REENQUEUED  = 3,
-};
-
-/**
  *
  * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
  */
 struct cl_lock_operations {
-	/**
-	 * \name statemachine
-	 *
-	 * State machine transitions. These 3 methods are called to transfer
-	 * lock from one state to another, as described in the commentary
-	 * above enum #cl_lock_state.
-	 *
-	 * \retval 0	  this layer has nothing more to do to before
-	 *		       transition to the target state happens;
-	 *
-	 * \retval CLO_REPEAT method had to release and re-acquire cl_lock
-	 *		    mutex, repeat invocation of transition method
-	 *		    across all layers;
-	 *
-	 * \retval CLO_WAIT   this layer cannot move to the target state
-	 *		    immediately, as it has to wait for certain event
-	 *		    (e.g., the communication with the server). It
-	 *		    is guaranteed, that when the state transfer
-	 *		    becomes possible, cl_lock::cll_wq wait-queue
-	 *		    is signaled. Caller can wait for this event by
-	 *		    calling cl_lock_state_wait();
-	 *
-	 * \retval -ve	failure, abort state transition, move the lock
-	 *		    into cl_lock_state::CLS_FREEING state, and set
-	 *		    cl_lock::cll_error.
-	 *
-	 * Once all layers voted to agree to transition (by returning 0), lock
-	 * is moved into corresponding target state. All state transition
-	 * methods are optional.
-	 */
 	/** @{ */
 	/**
 	 * Attempts to enqueue the lock. Called top-to-bottom.
 	 *
-	 * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+	 * \retval 0	this layer has enqueued the lock successfully
+	 * \retval >0	this layer has enqueued the lock, but need to wait on
+	 *		@anchor for resources
+	 * \retval -ve	failure
+	 *
+	 * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
 	 * \see osc_lock_enqueue()
 	 */
 	int  (*clo_enqueue)(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *io, __u32 enqflags);
-	/**
-	 * Attempts to wait for enqueue result. Called top-to-bottom.
-	 *
-	 * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
-	 */
-	int  (*clo_wait)(const struct lu_env *env,
-			 const struct cl_lock_slice *slice);
+			    struct cl_io *io, struct cl_sync_io *anchor);
 	/**
-	 * Attempts to unlock the lock. Called bottom-to-top. In addition to
-	 * usual return values of lock state-machine methods, this can return
-	 * -ESTALE to indicate that lock cannot be returned to the cache, and
-	 * has to be re-initialized.
-	 * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
-	 *
-	 * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
-	 */
-	int  (*clo_unuse)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice);
-	/**
-	 * Notifies layer that cached lock is started being used.
-	 *
-	 * \pre lock->cll_state == CLS_CACHED
-	 *
-	 * \see lov_lock_use(), osc_lock_use()
-	 */
-	int  (*clo_use)(const struct lu_env *env,
-			const struct cl_lock_slice *slice);
-	/** @} statemachine */
-	/**
-	 * A method invoked when lock state is changed (as a result of state
-	 * transition). This is used, for example, to track when the state of
-	 * a sub-lock changes, to propagate this change to the corresponding
-	 * top-lock. Optional
-	 *
-	 * \see lovsub_lock_state()
-	 */
-	void (*clo_state)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  enum cl_lock_state st);
-	/**
-	 * Returns true, iff given lock is suitable for the given io, idea
-	 * being, that there are certain "unsafe" locks, e.g., ones acquired
-	 * for O_APPEND writes, that we don't want to re-use for a normal
-	 * write, to avoid the danger of cascading evictions. Optional. Runs
-	 * under cl_object_header::coh_lock_guard.
-	 *
-	 * XXX this should take more information about lock needed by
-	 * io. Probably lock description or something similar.
-	 *
-	 * \see lov_fits_into()
-	 */
-	int (*clo_fits_into)(const struct lu_env *env,
-			     const struct cl_lock_slice *slice,
-			     const struct cl_lock_descr *need,
-			     const struct cl_io *io);
-	/**
-	 * \name ast
-	 * Asynchronous System Traps. All of then are optional, all are
-	 * executed bottom-to-top.
-	 */
-	/** @{ */
-
-	/**
-	 * Cancellation callback. Cancel a lock voluntarily, or under
-	 * the request of server.
+	 * Cancel a lock, release its DLM lock ref, while does not cancel the
+	 * DLM lock
 	 */
 	void (*clo_cancel)(const struct lu_env *env,
 			   const struct cl_lock_slice *slice);
-	/**
-	 * Lock weighting ast. Executed to estimate how precious this lock
-	 * is. The sum of results across all layers is used to determine
-	 * whether lock worth keeping in cache given present memory usage.
-	 *
-	 * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
-	 */
-	unsigned long (*clo_weigh)(const struct lu_env *env,
-				   const struct cl_lock_slice *slice);
-	/** @} ast */
-
-	/**
-	 * \see lovsub_lock_closure()
-	 */
-	int (*clo_closure)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice,
-			   struct cl_lock_closure *closure);
-	/**
-	 * Executed bottom-to-top when lock description changes (e.g., as a
-	 * result of server granting more generous lock than was requested).
-	 *
-	 * \see lovsub_lock_modify()
-	 */
-	int (*clo_modify)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  const struct cl_lock_descr *updated);
-	/**
-	 * Notifies layers (bottom-to-top) that lock is going to be
-	 * destroyed. Responsibility of layers is to prevent new references on
-	 * this lock from being acquired once this method returns.
-	 *
-	 * This can be called multiple times due to the races.
-	 *
-	 * \see cl_lock_delete()
-	 * \see osc_lock_delete(), lovsub_lock_delete()
-	 */
-	void (*clo_delete)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice);
+	/** @} */
 	/**
 	 * Destructor. Frees resources and the slice.
 	 *
-	 * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+	 * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
 	 * \see osc_lock_fini()
 	 */
 	void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
@@ -2016,7 +1455,7 @@ enum cl_io_state {
  * This is usually embedded into layer session data, rather than allocated
  * dynamically.
  *
- * \see vvp_io, lov_io, osc_io, ccc_io
+ * \see vvp_io, lov_io, osc_io
  */
 struct cl_io_slice {
 	struct cl_io		  *cis_io;
@@ -2031,6 +1470,8 @@ struct cl_io_slice {
 	struct list_head		     cis_linkage;
 };
 
+typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
+			      struct cl_page *);
 /**
  * Per-layer io operations.
  * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -2114,7 +1555,7 @@ struct cl_io_operations {
 		void (*cio_fini)(const struct lu_env *env,
 				 const struct cl_io_slice *slice);
 	} op[CIT_OP_NR];
-	struct {
+
 		/**
 		 * Submit pages from \a queue->c2_qin for IO, and move
 		 * successfully submitted pages into \a queue->c2_qout. Return
@@ -2127,7 +1568,15 @@ struct cl_io_operations {
 				   const struct cl_io_slice *slice,
 				   enum cl_req_type crt,
 				   struct cl_2queue *queue);
-	} req_op[CRT_NR];
+	/**
+	 * Queue async page for write.
+	 * The difference between cio_submit and cio_queue is that
+	 * cio_submit is for urgent request.
+	 */
+	int  (*cio_commit_async)(const struct lu_env *env,
+				 const struct cl_io_slice *slice,
+				 struct cl_page_list *queue, int from, int to,
+				 cl_commit_cbt cb);
 	/**
 	 * Read missing page.
 	 *
@@ -2140,31 +1589,6 @@ struct cl_io_operations {
 			     const struct cl_io_slice *slice,
 			     const struct cl_page_slice *page);
 	/**
-	 * Prepare write of a \a page. Called bottom-to-top by a top-level
-	 * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
-	 * get data from user-level buffer.
-	 *
-	 * \pre io->ci_type == CIT_WRITE
-	 *
-	 * \see vvp_io_prepare_write(), lov_io_prepare_write(),
-	 * osc_io_prepare_write().
-	 */
-	int (*cio_prepare_write)(const struct lu_env *env,
-				 const struct cl_io_slice *slice,
-				 const struct cl_page_slice *page,
-				 unsigned from, unsigned to);
-	/**
-	 *
-	 * \pre io->ci_type == CIT_WRITE
-	 *
-	 * \see vvp_io_commit_write(), lov_io_commit_write(),
-	 * osc_io_commit_write().
-	 */
-	int (*cio_commit_write)(const struct lu_env *env,
-				const struct cl_io_slice *slice,
-				const struct cl_page_slice *page,
-				unsigned from, unsigned to);
-	/**
 	 * Optional debugging helper. Print given io slice.
 	 */
 	int (*cio_print)(const struct lu_env *env, void *cookie,
@@ -2216,9 +1640,13 @@ enum cl_enq_flags {
 	 */
 	CEF_AGL	  = 0x00000020,
 	/**
+	 * enqueue a lock to test DLM lock existence.
+	 */
+	CEF_PEEK	= 0x00000040,
+	/**
 	 * mask of enq_flags.
 	 */
-	CEF_MASK	 = 0x0000003f,
+	CEF_MASK         = 0x0000007f,
 };
 
 /**
@@ -2228,12 +1656,12 @@ enum cl_enq_flags {
 struct cl_io_lock_link {
 	/** linkage into one of cl_lockset lists. */
 	struct list_head	   cill_linkage;
-	struct cl_lock_descr cill_descr;
-	struct cl_lock      *cill_lock;
+	struct cl_lock          cill_lock;
 	/** optional destructor */
 	void	       (*cill_fini)(const struct lu_env *env,
 				    struct cl_io_lock_link *link);
 };
+#define cill_descr	cill_lock.cll_descr
 
 /**
  * Lock-set represents a collection of locks, that io needs at a
@@ -2267,8 +1695,6 @@ struct cl_io_lock_link {
 struct cl_lockset {
 	/** locks to be acquired. */
 	struct list_head  cls_todo;
-	/** locks currently being processed. */
-	struct list_head  cls_curr;
 	/** locks acquired. */
 	struct list_head  cls_done;
 };
@@ -2632,9 +2058,7 @@ struct cl_site {
 	 * and top-locks (and top-pages) are accounted here.
 	 */
 	struct cache_stats    cs_pages;
-	struct cache_stats    cs_locks;
 	atomic_t	  cs_pages_state[CPS_NR];
-	atomic_t	  cs_locks_state[CLS_NR];
 };
 
 int  cl_site_init(struct cl_site *s, struct cl_device *top);
@@ -2725,7 +2149,7 @@ static inline void cl_device_fini(struct cl_device *d)
 }
 
 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
-		       struct cl_object *obj,
+		       struct cl_object *obj, pgoff_t index,
 		       const struct cl_page_operations *ops);
 void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
 		       struct cl_object *obj,
@@ -2758,7 +2182,7 @@ int  cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
 		       struct ost_lvb *lvb);
 int  cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 		 const struct cl_object_conf *conf);
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj);
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
 
 /**
@@ -2772,7 +2196,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
 static inline void cl_object_page_init(struct cl_object *clob, int size)
 {
 	clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
-	cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+	cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
 }
 
 static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2781,6 +2205,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob,
 	return (void *)((char *)page + clob->co_slice_off);
 }
 
+/**
+ * Return refcount of cl_object.
+ */
+static inline int cl_object_refc(struct cl_object *clob)
+{
+	struct lu_object_header *header = clob->co_lu.lo_header;
+
+	return atomic_read(&header->loh_ref);
+}
+
 /** @} cl_object */
 
 /** \defgroup cl_page cl_page
@@ -2794,28 +2228,20 @@ enum {
 };
 
 /* callback of cl_page_gang_lookup() */
-typedef int   (*cl_page_gang_cb_t)  (const struct lu_env *, struct cl_io *,
-				     struct cl_page *, void *);
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
-			struct cl_io *io, pgoff_t start, pgoff_t end,
-			cl_page_gang_cb_t cb, void *cbdata);
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index);
 struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj,
 			     pgoff_t idx, struct page *vmpage,
 			     enum cl_page_type type);
-struct cl_page *cl_page_find_sub(const struct lu_env *env,
-				 struct cl_object *obj,
-				 pgoff_t idx, struct page *vmpage,
-				     struct cl_page *parent);
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+			      struct cl_object *o, pgoff_t ind,
+			      struct page *vmpage,
+			      enum cl_page_type type);
 void cl_page_get(struct cl_page *page);
 void cl_page_put(const struct lu_env *env, struct cl_page *page);
 void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer,
 		   const struct cl_page *pg);
 void cl_page_header_print(const struct lu_env *env, void *cookie,
 			  lu_printer_t printer, const struct cl_page *pg);
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page);
 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj);
-struct cl_page *cl_page_top(struct cl_page *page);
 
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
 				       const struct lu_device_type *dtype);
@@ -2872,12 +2298,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io,
 void cl_page_discard(const struct lu_env *env, struct cl_io *io,
 		     struct cl_page *pg);
 void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
-int cl_page_unmap(const struct lu_env *env, struct cl_io *io,
-		  struct cl_page *pg);
 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page);
+			  struct cl_page *page, pgoff_t *max_index);
 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
 pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
 int cl_page_size(const struct cl_object *obj);
@@ -2890,138 +2314,74 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
 			 const struct cl_lock_descr *descr);
 /* @} helper */
 
-/** @} cl_page */
-
-/** \defgroup cl_lock cl_lock
- * @{
+/**
+ * Data structure managing a client's cached pages. A count of
+ * "unstable" pages is maintained, and an LRU of clean pages is
+ * maintained. "unstable" pages are pages pinned by the ptlrpc
+ * layer for recovery purposes.
  */
+struct cl_client_cache {
+	/**
+	 * # of client cache refcount
+	 * # of users (OSCs) + 2 (held by llite and lov)
+	 */
+	atomic_t		ccc_users;
+	/**
+	 * # of threads are doing shrinking
+	 */
+	unsigned int		ccc_lru_shrinkers;
+	/**
+	 * # of LRU entries available
+	 */
+	atomic_t		ccc_lru_left;
+	/**
+	 * List of entities(OSCs) for this LRU cache
+	 */
+	struct list_head	ccc_lru;
+	/**
+	 * Max # of LRU entries
+	 */
+	unsigned long		ccc_lru_max;
+	/**
+	 * Lock to protect ccc_lru list
+	 */
+	spinlock_t		ccc_lru_lock;
+	/**
+	 * # of unstable pages for this mount point
+	 */
+	atomic_t		ccc_unstable_nr;
+	/**
+	 * Waitq for awaiting unstable pages to reach zero.
+	 * Used at umounting time and signaled on BRW commit
+	 */
+        wait_queue_head_t	ccc_unstable_waitq;
 
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-				const struct cl_lock_descr *need,
-				const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-				 struct cl_object *obj, pgoff_t index,
-				 struct cl_lock *except, int pending,
-				 int canceld);
-static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
-					      struct cl_object *obj,
-					      struct cl_page *page,
-					      struct cl_lock *except,
-					      int pending, int canceld)
-{
-	LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
-	return cl_lock_at_pgoff(env, obj, page->cp_index, except,
-				pending, canceld);
-}
-
-const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
-				       const struct lu_device_type *dtype);
-
-void cl_lock_get(struct cl_lock *lock);
-void cl_lock_get_trust(struct cl_lock *lock);
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-		      const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-			  const char *scope, const void *source);
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-		    const char *scope, const void *source);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-		     const char *scope, const void *source);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock);
+};
 
-int cl_lock_is_intransit(struct cl_lock *lock);
+/**
+ * cl_cache functions
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max);
+void cl_cache_incref(struct cl_client_cache *cache);
+void cl_cache_decref(struct cl_client_cache *cache);
 
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
-			 int keep_mutex);
+/** @} cl_page */
 
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- *     - "try" functions that attempt to effect a state transition. If state
- *     transition is not possible right now (e.g., if it has to wait for some
- *     asynchronous event to occur), these functions return
- *     cl_lock_transition::CLO_WAIT.
- *
- *     - "non-try" functions that implement synchronous blocking interface on
- *     top of non-blocking "try" functions. These functions repeatedly call
- *     corresponding "try" versions, and if state transition is not possible
- *     immediately, wait for lock state change.
- *
- *     - methods from cl_lock_operations, called by "try" functions. Lock can
- *     be advanced to the target state only when all layers voted that they
- *     are ready for this transition. "Try" functions call methods under lock
- *     mutex. If a layer had to release a mutex, it re-acquires it and returns
- *     cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- *     layers again.
- *
- * TRY	      NON-TRY      METHOD			    FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try()    cl_wait()    cl_lock_operations::clo_wait()    CLS_HELD
- *
- * cl_unuse_try()   cl_unuse()   cl_lock_operations::clo_unuse()   CLS_CACHED
- *
- * cl_use_try()     NONE	 cl_lock_operations::clo_use()     CLS_HELD
- *
+/** \defgroup cl_lock cl_lock
  * @{
  */
 
-int cl_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock);
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-		   struct cl_io *io, __u32 flags);
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-		       enum cl_lock_state state);
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_is_mutexed(struct cl_lock *lock);
-int cl_lock_nr_mutexed(const struct lu_env *env);
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-		      const struct cl_lock_descr *need);
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-			const struct cl_lock_descr *need);
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need);
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-		   const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init(const struct lu_env *env,
-			  struct cl_lock_closure *closure,
-			  struct cl_lock *origin, int wait);
-void cl_lock_closure_fini(struct cl_lock_closure *closure);
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-			  struct cl_lock_closure *closure);
-void cl_lock_disclosure(const struct lu_env *env,
-			struct cl_lock_closure *closure);
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-		      struct cl_lock_closure *closure);
-
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+		 const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+				       const struct lu_device_type *dtype);
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock, struct cl_sync_io *anchor);
 void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
 
 /** @} cl_lock */
 
@@ -3050,15 +2410,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
 			 struct cl_lock_descr *descr);
 int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 		    struct cl_page *page);
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
-			struct cl_page *page, unsigned from, unsigned to);
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
-		       struct cl_page *page, unsigned from, unsigned to);
 int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 		    enum cl_req_type iot, struct cl_2queue *queue);
 int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		      enum cl_req_type iot, struct cl_2queue *queue,
 		      long timeout);
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+		       struct cl_page_list *queue, int from, int to,
+		       cl_commit_cbt cb);
 int cl_io_is_going(const struct lu_env *env);
 
 /**
@@ -3114,6 +2473,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
 	return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
 }
 
+static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
+{
+	LASSERT(plist->pl_nr > 0);
+	return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
+}
+
 /**
  * Iterate over pages in a page list.
  */
@@ -3130,9 +2495,14 @@ void cl_page_list_init(struct cl_page_list *plist);
 void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
 void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
 		       struct cl_page *page);
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+			    struct cl_page *page);
 void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+		      struct cl_page *page);
 void cl_page_list_disown(const struct lu_env *env,
 			 struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
 
 void cl_2queue_init(struct cl_2queue *queue);
 void cl_2queue_disown(const struct lu_env *env,
@@ -3177,13 +2547,18 @@ struct cl_sync_io {
 	atomic_t		csi_barrier;
 	/** completion to be signaled when transfer is complete. */
 	wait_queue_head_t		csi_waitq;
+	/** callback to invoke when this IO is finished */
+	void			(*csi_end_io)(const struct lu_env *,
+					      struct cl_sync_io *);
 };
 
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
-int  cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
-		     struct cl_page_list *queue, struct cl_sync_io *anchor,
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+		     void (*end)(const struct lu_env *, struct cl_sync_io *));
+int  cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
 		     long timeout);
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+		     int ioret);
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
 
 /** @} cl_sync_io */
 
@@ -3241,6 +2616,9 @@ void *cl_env_reenter(void);
 void cl_env_reexit(void *cookie);
 void cl_env_implant(struct lu_env *env, int *refcheck);
 void cl_env_unplant(struct lu_env *env, int *refcheck);
+unsigned int cl_env_cache_purge(unsigned int nr);
+struct lu_env *cl_env_percpu_get(void);
+void cl_env_percpu_put(struct lu_env *env);
 
 /** @} cl_env */
 
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
index f6df3f33e770..4a15228b5570 100644
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
deleted file mode 100644
index 5d839a9f789f..000000000000
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Definitions shared between vvp and liblustre, and other clients in the
- * future.
- *
- *   Author: Oleg Drokin <oleg.drokin@sun.com>
- *   Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef LCLIENT_H
-#define LCLIENT_H
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
-		    struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
-	return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
-	return cl_glimpse_size0(inode, 1);
-}
-
-/**
- * Locking policy for setattr.
- */
-enum ccc_setattr_lock_type {
-	/** Locking is done by server */
-	SETATTR_NOLOCK,
-	/** Extent lock is enqueued */
-	SETATTR_EXTENT_LOCK,
-	/** Existing local extent lock is used */
-	SETATTR_MATCH_LOCK
-};
-
-/**
- * IO state private to vvp or slp layers.
- */
-struct ccc_io {
-	/** super class */
-	struct cl_io_slice     cui_cl;
-	struct cl_io_lock_link cui_link;
-	/**
-	 * I/O vector information to or from which read/write is going.
-	 */
-	struct iov_iter *cui_iter;
-	/**
-	 * Total size for the left IO.
-	 */
-	size_t cui_tot_count;
-
-	union {
-		struct {
-			enum ccc_setattr_lock_type cui_local_lock;
-		} setattr;
-	} u;
-	/**
-	 * True iff io is processing glimpse right now.
-	 */
-	int		  cui_glimpse;
-	/**
-	 * Layout version when this IO is initialized
-	 */
-	__u32		cui_layout_gen;
-	/**
-	 * File descriptor against which IO is done.
-	 */
-	struct ll_file_data *cui_fd;
-	struct kiocb *cui_iocb;
-};
-
-/**
- * True, if \a io is a normal io, False for splice_{read,write}.
- * must be implemented in arch specific code.
- */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
-
-extern struct lu_context_key ccc_key;
-extern struct lu_context_key ccc_session_key;
-
-struct ccc_thread_info {
-	struct cl_lock_descr cti_descr;
-	struct cl_io	 cti_io;
-	struct cl_attr       cti_attr;
-};
-
-static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
-{
-	struct ccc_thread_info      *info;
-
-	info = lu_context_key_get(&env->le_ctx, &ccc_key);
-	LASSERT(info);
-	return info;
-}
-
-static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
-{
-	struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
-
-	memset(attr, 0, sizeof(*attr));
-	return attr;
-}
-
-static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
-{
-	struct cl_io *io = &ccc_env_info(env)->cti_io;
-
-	memset(io, 0, sizeof(*io));
-	return io;
-}
-
-struct ccc_session {
-	struct ccc_io cs_ios;
-};
-
-static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
-{
-	struct ccc_session *ses;
-
-	ses = lu_context_key_get(env->le_ses, &ccc_session_key);
-	LASSERT(ses);
-	return ses;
-}
-
-static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
-{
-	return &ccc_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct ccc_object {
-	struct cl_object_header cob_header;
-	struct cl_object	cob_cl;
-	struct inode	   *cob_inode;
-
-	/**
-	 * A list of dirty pages pending IO in the cache. Used by
-	 * SOM. Protected by ll_inode_info::lli_lock.
-	 *
-	 * \see ccc_page::cpg_pending_linkage
-	 */
-	struct list_head	     cob_pending_list;
-
-	/**
-	 * Access this counter is protected by inode->i_sem. Now that
-	 * the lifetime of transient pages must be covered by inode sem,
-	 * we don't need to hold any lock..
-	 */
-	int		     cob_transient_pages;
-	/**
-	 * Number of outstanding mmaps on this file.
-	 *
-	 * \see ll_vm_open(), ll_vm_close().
-	 */
-	atomic_t	    cob_mmap_cnt;
-
-	/**
-	 * various flags
-	 * cob_discard_page_warned
-	 *     if pages belonging to this object are discarded when a client
-	 * is evicted, some debug info will be printed, this flag will be set
-	 * during processing the first discarded page, then avoid flooding
-	 * debug message for lots of discarded pages.
-	 *
-	 * \see ll_dirty_page_discard_warn.
-	 */
-	unsigned int		cob_discard_page_warned:1;
-};
-
-/**
- * ccc-private page state.
- */
-struct ccc_page {
-	struct cl_page_slice cpg_cl;
-	int		  cpg_defer_uptodate;
-	int		  cpg_ra_used;
-	int		  cpg_write_queued;
-	/**
-	 * Non-empty iff this page is already counted in
-	 * ccc_object::cob_pending_list. Protected by
-	 * ccc_object::cob_pending_guard. This list is only used as a flag,
-	 * that is, never iterated through, only checked for list_empty(), but
-	 * having a list is useful for debugging.
-	 */
-	struct list_head	   cpg_pending_linkage;
-	/** VM page */
-	struct page	  *cpg_page;
-};
-
-static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
-{
-	return container_of(slice, struct ccc_page, cpg_cl);
-}
-
-struct ccc_device {
-	struct cl_device    cdv_cl;
-	struct super_block *cdv_sb;
-	struct cl_device   *cdv_next;
-};
-
-struct ccc_lock {
-	struct cl_lock_slice clk_cl;
-};
-
-struct ccc_req {
-	struct cl_req_slice  crq_cl;
-};
-
-void *ccc_key_init	(const struct lu_context *ctx,
-			   struct lu_context_key *key);
-void  ccc_key_fini	(const struct lu_context *ctx,
-			   struct lu_context_key *key, void *data);
-void *ccc_session_key_init(const struct lu_context *ctx,
-			   struct lu_context_key *key);
-void  ccc_session_key_fini(const struct lu_context *ctx,
-			   struct lu_context_key *key, void *data);
-
-int	      ccc_device_init  (const struct lu_env *env,
-				   struct lu_device *d,
-				   const char *name, struct lu_device *next);
-struct lu_device *ccc_device_fini (const struct lu_env *env,
-				   struct lu_device *d);
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
-				   struct lu_device_type *t,
-				   struct lustre_cfg *cfg,
-				   const struct lu_device_operations *luops,
-				   const struct cl_device_operations *clops);
-struct lu_device *ccc_device_free (const struct lu_env *env,
-				   struct lu_device *d);
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *hdr,
-				   struct lu_device *dev,
-				   const struct cl_object_operations *clops,
-				   const struct lu_object_operations *luops);
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
-		 struct cl_req *req);
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-int ccc_global_init(struct lu_device_type *device_type);
-void ccc_global_fini(struct lu_device_type *device_type);
-int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob,
-		     const struct cl_object_conf *conf);
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
-		    const struct lu_object_conf *conf);
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
-int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_lock *lock, const struct cl_io *io,
-		  const struct cl_lock_operations *lkops);
-int ccc_object_glimpse(const struct lu_env *env,
-		       const struct cl_object *obj, struct ost_lvb *lvb);
-struct page *ccc_page_vmpage(const struct lu_env *env,
-			    const struct cl_page_slice *slice);
-int ccc_page_is_under_lock(const struct lu_env *env,
-			   const struct cl_page_slice *slice, struct cl_io *io);
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
-int ccc_transient_page_prep(const struct lu_env *env,
-			    const struct cl_page_slice *slice,
-			    struct cl_io *io);
-void ccc_lock_delete(const struct lu_env *env,
-		     const struct cl_lock_slice *slice);
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
-int ccc_lock_enqueue(const struct lu_env *env,
-		     const struct cl_lock_slice *slice,
-		     struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state);
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
-			  __u32 enqflags, enum cl_lock_mode mode,
-			  pgoff_t start, pgoff_t end);
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
-		    __u32 enqflags, enum cl_lock_mode mode,
-		    loff_t start, loff_t end);
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
-void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
-		    size_t nob);
-void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
-		       struct cl_io *io);
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_io *io, loff_t start, size_t count, int *exceed);
-void ccc_req_completion(const struct lu_env *env,
-			const struct cl_req_slice *slice, int ioret);
-void ccc_req_attr_set(const struct lu_env *env,
-		      const struct cl_req_slice *slice,
-		      const struct cl_object *obj,
-		      struct cl_req_attr *oa, u64 flags);
-
-struct lu_device   *ccc2lu_dev      (struct ccc_device *vdv);
-struct lu_object   *ccc2lu	  (struct ccc_object *vob);
-struct ccc_device  *lu2ccc_dev      (const struct lu_device *d);
-struct ccc_device  *cl2ccc_dev      (const struct cl_device *d);
-struct ccc_object  *lu2ccc	  (const struct lu_object *obj);
-struct ccc_object  *cl2ccc	  (const struct cl_object *obj);
-struct ccc_lock    *cl2ccc_lock     (const struct cl_lock_slice *slice);
-struct ccc_io      *cl2ccc_io       (const struct lu_env *env,
-				     const struct cl_io_slice *slice);
-struct ccc_req     *cl2ccc_req      (const struct cl_req_slice *slice);
-struct page	 *cl2vm_page      (const struct cl_page_slice *slice);
-struct inode       *ccc_object_inode(const struct cl_object *obj);
-struct ccc_object  *cl_inode2ccc    (struct inode *inode);
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
-
-int ccc_object_invariant(const struct cl_object *obj);
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-int cl_local_size(struct inode *inode);
-
-__u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-# define CLOBINVRNT(env, clob, expr)					\
-	((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
-		  struct obd_device *watched,
-		  enum obd_notify_event ev, void *owner, void *data);
-
-struct ccc_grouplock {
-	struct lu_env   *cg_env;
-	struct cl_io    *cg_io;
-	struct cl_lock  *cg_lock;
-	unsigned long    cg_gid;
-};
-
-int  cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
-		      struct ccc_grouplock *cg);
-void cl_put_grouplock(struct ccc_grouplock *cg);
-
-/**
- * New interfaces to get and put lov_stripe_md from lov layer. This violates
- * layering because lov_stripe_md is supposed to be a private data in lov.
- *
- * NB: If you find you have to use these interfaces for your new code, please
- * think about it again. These interfaces may be removed in the future for
- * better layering.
- */
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
-void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
-void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
-
-/**
- * Data structure managing a client's cached clean pages. An LRU of
- * pages is maintained, along with other statistics.
- */
-struct cl_client_cache {
-	atomic_t	ccc_users;    /* # of users (OSCs) of this data */
-	struct list_head	ccc_lru;      /* LRU list of cached clean pages */
-	spinlock_t	ccc_lru_lock; /* lock for list */
-	atomic_t	ccc_lru_left; /* # of LRU entries available */
-	unsigned long	ccc_lru_max;  /* Max # of LRU entries possible */
-	unsigned int	ccc_lru_shrinkers; /* # of threads reclaiming */
-};
-
-#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 79d8f93075d1..1eb64ec4bed4 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
index 3420cfd1278d..d18e8a76bb25 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
index c6c7f54637fb..5842cb18b49e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
index 9cc2849f3f85..e967950e8536 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
deleted file mode 100644
index 3907bf4ce07c..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/obd.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LINUX_OBD_H
-#define __LINUX_OBD_H
-
-#ifndef __OBD_H
-#error Do not #include this file directly. #include <obd.h> instead
-#endif
-
-#include "../obd_support.h"
-
-#include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/sched.h>  /* for struct task_struct, for current.h */
-#include <linux/mount.h>
-
-#include "../lustre_intent.h"
-
-struct ll_iattr {
-	struct iattr	iattr;
-	unsigned int	ia_attr_flags;
-};
-
-#define CLIENT_OBD_LIST_LOCK_DEBUG 1
-
-struct client_obd_lock {
-	spinlock_t		lock;
-
-	unsigned long       time;
-	struct task_struct *task;
-	const char	 *func;
-	int		 line;
-};
-
-static inline void __client_obd_list_lock(struct client_obd_lock *lock,
-					  const char *func, int line)
-{
-	unsigned long cur = jiffies;
-
-	while (1) {
-		if (spin_trylock(&lock->lock)) {
-			LASSERT(!lock->task);
-			lock->task = current;
-			lock->func = func;
-			lock->line = line;
-			lock->time = jiffies;
-			break;
-		}
-
-		if (time_before(cur + 5 * HZ, jiffies) &&
-		    time_before(lock->time + 5 * HZ, jiffies)) {
-			struct task_struct *task = lock->task;
-
-			if (!task)
-				continue;
-
-			LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n",
-				      current->comm, current->pid,
-				      lock, task->comm, task->pid,
-				      lock->func, lock->line,
-				      (jiffies - lock->time) / HZ);
-			LCONSOLE_WARN("====== for current process =====\n");
-			dump_stack();
-			LCONSOLE_WARN("====== end =======\n");
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(1000 * HZ);
-		}
-		cpu_relax();
-	}
-}
-
-#define client_obd_list_lock(lock) \
-	__client_obd_list_lock(lock, __func__, __LINE__)
-
-static inline void client_obd_list_unlock(struct client_obd_lock *lock)
-{
-	LASSERT(lock->task);
-	lock->task = NULL;
-	lock->time = jiffies;
-	spin_unlock(&lock->lock);
-}
-
-static inline void client_obd_list_lock_init(struct client_obd_lock *lock)
-{
-	spin_lock_init(&lock->lock);
-}
-
-static inline void client_obd_list_lock_done(struct client_obd_lock *lock)
-{}
-
-#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index 4146c9c3999f..d68e60e7fef7 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 242bb1ef6245..6e25c1bb6aa3 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -198,7 +194,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env,
  * Operations specific for particular lu_object.
  */
 struct lu_object_operations {
-
 	/**
 	 * Allocate lower-layer parts of the object by calling
 	 * lu_device_operations::ldo_object_alloc() of the corresponding
@@ -656,21 +651,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
  * @{
  */
 
-int  lu_site_init	 (struct lu_site *s, struct lu_device *d);
-void lu_site_fini	 (struct lu_site *s);
-int  lu_site_init_finish  (struct lu_site *s);
-void lu_stack_fini	(const struct lu_env *env, struct lu_device *top);
-void lu_device_get	(struct lu_device *d);
-void lu_device_put	(struct lu_device *d);
-int  lu_device_init       (struct lu_device *d, struct lu_device_type *t);
-void lu_device_fini       (struct lu_device *d);
-int  lu_object_header_init(struct lu_object_header *h);
+int lu_site_init(struct lu_site *s, struct lu_device *d);
+void lu_site_fini(struct lu_site *s);
+int lu_site_init_finish(struct lu_site *s);
+void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
+void lu_device_get(struct lu_device *d);
+void lu_device_put(struct lu_device *d);
+int lu_device_init(struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini(struct lu_device *d);
+int lu_object_header_init(struct lu_object_header *h);
 void lu_object_header_fini(struct lu_object_header *h);
-int  lu_object_init       (struct lu_object *o,
-			   struct lu_object_header *h, struct lu_device *d);
-void lu_object_fini       (struct lu_object *o);
-void lu_object_add_top    (struct lu_object_header *h, struct lu_object *o);
-void lu_object_add	(struct lu_object *before, struct lu_object *o);
+int lu_object_init(struct lu_object *o,
+		   struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini(struct lu_object *o);
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
+void lu_object_add(struct lu_object *before, struct lu_object *o);
 
 /**
  * Helpers to initialize and finalize device types.
@@ -781,11 +776,10 @@ int lu_cdebug_printer(const struct lu_env *env,
  */
 #define LU_OBJECT_DEBUG(mask, env, object, format, ...)		   \
 do {								      \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		  \
-									  \
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		     \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
-		CDEBUG(mask, format, ## __VA_ARGS__);		    \
+		CDEBUG(mask, format "\n", ## __VA_ARGS__);		    \
 	}								 \
 } while (0)
 
@@ -794,9 +788,8 @@ do {								      \
  */
 #define LU_OBJECT_HEADER(mask, env, object, format, ...)		\
 do {								    \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
-									\
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
 				       (object)->lo_header);	    \
 		lu_cdebug_printer(env, &msgdata, "\n");		 \
@@ -1007,6 +1000,10 @@ enum lu_context_tag {
 	 */
 	LCT_LOCAL = 1 << 7,
 	/**
+	 * session for server thread
+	 **/
+	LCT_SERVER_SESSION = BIT(8),
+	/**
 	 * Set when at least one of keys, having values in this context has
 	 * non-NULL lu_context_key::lct_exit() method. This is used to
 	 * optimize lu_context_exit() call.
@@ -1118,7 +1115,7 @@ struct lu_context_key {
 	{							 \
 		type *value;				      \
 								  \
-		CLASSERT(PAGE_SIZE >= sizeof (*value));       \
+		CLASSERT(PAGE_SIZE >= sizeof(*value));        \
 								  \
 		value = kzalloc(sizeof(*value), GFP_NOFS);	\
 		if (!value)				\
@@ -1154,12 +1151,12 @@ do {						    \
 	(key)->lct_owner = THIS_MODULE;		 \
 } while (0)
 
-int   lu_context_key_register(struct lu_context_key *key);
-void  lu_context_key_degister(struct lu_context_key *key);
-void *lu_context_key_get     (const struct lu_context *ctx,
-			       const struct lu_context_key *key);
-void  lu_context_key_quiesce (struct lu_context_key *key);
-void  lu_context_key_revive  (struct lu_context_key *key);
+int lu_context_key_register(struct lu_context_key *key);
+void lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get(const struct lu_context *ctx,
+			 const struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
 
 /*
  * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
@@ -1216,21 +1213,21 @@ void  lu_context_key_revive  (struct lu_context_key *key);
 	LU_TYPE_START(mod, __VA_ARGS__);	\
 	LU_TYPE_STOP(mod, __VA_ARGS__)
 
-int   lu_context_init  (struct lu_context *ctx, __u32 tags);
-void  lu_context_fini  (struct lu_context *ctx);
-void  lu_context_enter (struct lu_context *ctx);
-void  lu_context_exit  (struct lu_context *ctx);
-int   lu_context_refill(struct lu_context *ctx);
+int lu_context_init(struct lu_context *ctx, __u32 tags);
+void lu_context_fini(struct lu_context *ctx);
+void lu_context_enter(struct lu_context *ctx);
+void lu_context_exit(struct lu_context *ctx);
+int lu_context_refill(struct lu_context *ctx);
 
 /*
  * Helper functions to operate on multiple keys. These are used by the default
  * device type operations, defined by LU_TYPE_INIT_FINI().
  */
 
-int  lu_context_key_register_many(struct lu_context_key *k, ...);
+int lu_context_key_register_many(struct lu_context_key *k, ...);
 void lu_context_key_degister_many(struct lu_context_key *k, ...);
-void lu_context_key_revive_many  (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_revive_many(struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
 
 /**
  * Environment.
@@ -1246,9 +1243,9 @@ struct lu_env {
 	struct lu_context *le_ses;
 };
 
-int  lu_env_init  (struct lu_env *env, __u32 tags);
-void lu_env_fini  (struct lu_env *env);
-int  lu_env_refill(struct lu_env *env);
+int lu_env_init(struct lu_env *env, __u32 tags);
+void lu_env_fini(struct lu_env *env);
+int lu_env_refill(struct lu_env *env);
 
 /** @} lu_context */
 
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index 07d45de69dd9..c2340d643e84 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 5aae1d06a5fa..051864c23b5b 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -183,6 +179,12 @@ struct lu_seq_range {
 	__u32 lsr_flags;
 };
 
+struct lu_seq_range_array {
+	__u32 lsra_count;
+	__u32 lsra_padding;
+	struct lu_seq_range lsra_lsr[0];
+};
+
 #define LU_SEQ_RANGE_MDT	0x0
 #define LU_SEQ_RANGE_OST	0x1
 #define LU_SEQ_RANGE_ANY	0x3
@@ -380,7 +382,7 @@ static inline __u64 fid_ver_oid(const struct lu_fid *fid)
  * used for other purposes and not risk collisions with existing inodes.
  *
  * Different FID Format
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  */
 enum fid_seq {
 	FID_SEQ_OST_MDT0	= 0,
@@ -578,7 +580,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
 	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
 		return FID_SEQ_OST_MDT0;
 
-	if (fid_seq_is_default(ostid->oi.oi_seq))
+	if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
 		return FID_SEQ_LOV_DEFAULT;
 
 	if (fid_is_idif(&ostid->oi_fid))
@@ -590,9 +592,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
 /* extract OST objid from a wire ost_id (id/seq) pair */
 static inline __u64 ostid_id(const struct ost_id *ostid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(ostid)))
+	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
 		return ostid->oi.oi_id & IDIF_OID_MASK;
 
+	if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
+		return ostid->oi.oi_id;
+
 	if (fid_is_idif(&ostid->oi_fid))
 		return fid_idif_id(fid_seq(&ostid->oi_fid),
 				   fid_oid(&ostid->oi_fid), 0);
@@ -636,12 +641,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi)
  */
 static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+	if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
 		if (oid >= IDIF_MAX_OID) {
 			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
 			return;
 		}
 		oi->oi.oi_id = oid;
+	} else if (fid_is_idif(&oi->oi_fid)) {
+		if (oid >= IDIF_MAX_OID) {
+			CERROR("Bad %llu to set "DOSTID"\n",
+			       oid, POSTID(oi));
+			return;
+		}
+		oi->oi_fid.f_seq = fid_idif_seq(oid,
+						fid_idif_ost_idx(&oi->oi_fid));
+		oi->oi_fid.f_oid = oid;
+		oi->oi_fid.f_ver = oid >> 48;
 	} else {
 		if (oid > OBIF_MAX_OID) {
 			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
@@ -651,25 +666,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 	}
 }
 
-static inline void ostid_inc_id(struct ost_id *oi)
+static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(oi))) {
-		if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
-			CERROR("Bad inc "DOSTID"\n", POSTID(oi));
-			return;
+	if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+		CERROR("bad IGIF, "DFID"\n", PFID(fid));
+		return -EBADF;
+	}
+
+	if (fid_is_idif(fid)) {
+		if (oid >= IDIF_MAX_OID) {
+			CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+			       (unsigned long long)oid, PFID(fid));
+			return -EBADF;
 		}
-		oi->oi.oi_id++;
+		fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid));
+		fid->f_oid = oid;
+		fid->f_ver = oid >> 48;
 	} else {
-		oi->oi_fid.f_oid++;
+		if (oid > OBIF_MAX_OID) {
+			CERROR("Too large OID %#llx to set REG "DFID"\n",
+			       (unsigned long long)oid, PFID(fid));
+			return -EBADF;
+		}
+		fid->f_oid = oid;
 	}
-}
-
-static inline void ostid_dec_id(struct ost_id *oi)
-{
-	if (fid_seq_is_mdt0(ostid_seq(oi)))
-		oi->oi.oi_id--;
-	else
-		oi->oi_fid.f_oid--;
+	return 0;
 }
 
 /**
@@ -679,35 +700,39 @@ static inline void ostid_dec_id(struct ost_id *oi)
  * be passed through unchanged.  Only legacy OST objects in "group 0"
  * will be mapped into the IDIF namespace so that they can fit into the
  * struct lu_fid fields without loss.  For reference see:
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  */
 static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
 			       __u32 ost_idx)
 {
+	__u64 seq = ostid_seq(ostid);
+
 	if (ost_idx > 0xffff) {
 		CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
 		       ost_idx);
 		return -EBADF;
 	}
 
-	if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+	if (fid_seq_is_mdt0(seq)) {
+		__u64 oid = ostid_id(ostid);
+
 		/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
 		 * that we map into the IDIF namespace.  It allows up to 2^48
 		 * objects per OST, as this is the object namespace that has
 		 * been in production for years.  This can handle create rates
 		 * of 1M objects/s/OST for 9 years, or combinations thereof.
 		 */
-		if (ostid_id(ostid) >= IDIF_MAX_OID) {
+		if (oid >= IDIF_MAX_OID) {
 			CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n",
 			       POSTID(ostid), ost_idx);
 			return -EBADF;
 		}
-		fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+		fid->f_seq = fid_idif_seq(oid, ost_idx);
 		/* truncate to 32 bits by assignment */
-		fid->f_oid = ostid_id(ostid);
+		fid->f_oid = oid;
 		/* in theory, not currently used */
-		fid->f_ver = ostid_id(ostid) >> 48;
-	} else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+		fid->f_ver = oid >> 48;
+	} else if (likely(!fid_seq_is_default(seq))) {
 	       /* This is either an IDIF object, which identifies objects across
 		* all OSTs, or a regular FID.  The IDIF namespace maps legacy
 		* OST objects into the FID namespace.  In both cases, we just
@@ -1001,8 +1026,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
 
 		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
 		size += sizeof(struct luda_type);
-	} else
+	} else {
 		size = sizeof(struct lu_dirent) + namelen;
+	}
 
 	return (size + 7) & ~7;
 }
@@ -1211,8 +1237,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 						  */
 #define OBD_CONNECT_ATTRFID	       0x4000ULL /*Server can GetAttr By Fid*/
 #define OBD_CONNECT_NODEVOH	       0x8000ULL /*No open hndl on specl nodes*/
-#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /*Remote client */
-#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_RMT_CLIENT	      0x10000ULL /* Remote client, never used
+						  * in production. Removed in
+						  * 2.9. Keep this flag to
+						  * avoid reuse.
+						  */
+#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /* Remote client by force,
+						  * never used in production.
+						  * Removed in 2.9. Keep this
+						  * flag to avoid reuse
+						  */
 #define OBD_CONNECT_BRW_SIZE	      0x40000ULL /*Max bytes per rpc */
 #define OBD_CONNECT_QUOTA64	      0x80000ULL /*Not used since 2.4 */
 #define OBD_CONNECT_MDS_CAPA	     0x100000ULL /*MDS capability */
@@ -1256,6 +1290,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_PINGLESS	0x4000000000000ULL/* pings not required */
 #define OBD_CONNECT_FLOCK_DEAD	0x8000000000000ULL/* flock deadlock detection */
 #define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
+#define OBD_CONNECT_OPEN_BY_FID	0x20000000000000ULL	/* open by fid won't pack
+							 * name in request
+							 */
 
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
@@ -1428,6 +1465,8 @@ enum obdo_flags {
 					   */
 	OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
 	OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
+	OBD_FL_FLUSH	    = 0x00200000, /* flush pages on the OST */
+	OBD_FL_SHORT_IO	    = 0x00400000, /* short io request */
 
 	/* Note that while these checksum values are currently separate bits,
 	 * in 2.x we can actually allow all values from 1-31 if we wanted.
@@ -1525,6 +1564,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
 	oi->oi.oi_seq = seq;
 }
 
+static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
+{
+	oi->oi.oi_id = oid;
+}
+
 static inline __u64 lmm_oi_id(struct ost_id *oi)
 {
 	return oi->oi.oi_id;
@@ -1663,7 +1707,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 #define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
 #define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL	     (0x0000008000000000ULL) /* ACL */
-#define OBD_MD_FLRMTPERM     (0x0000010000000000ULL) /* remote permission */
+/*	OBD_MD_FLRMTPERM     (0x0000010000000000ULL) remote perm, obsolete */
 #define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
 #define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
 #define OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) /* Check split on server */
@@ -1675,10 +1719,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 						      */
 #define OBD_MD_FLOBJCOUNT    (0x0000400000000000ULL) /* for multiple destroy */
 
-#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
-#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
-#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
-#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+/*	OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
+/*	OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
+/*	OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
+/*	OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
 
 #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
 #define OBD_MD_FLRELEASED    (0x0020000000000000ULL) /* file released */
@@ -1732,6 +1776,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
 #define OBD_BRW_MEMALLOC       0x800 /* Client runs in the "kswapd" context */
 #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
 #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+#define OBD_BRW_SOFT_SYNC     0x4000 /* This flag notifies the server
+				      * that the client is running low on
+				      * space for unstable pages; asking
+				      * it to sync quickly
+				      */
 
 #define OBD_OBJECT_EOF 0xffffffffffffffffULL
 
@@ -2114,26 +2163,8 @@ enum {
 	CFS_SETUID_PERM = 0x01,
 	CFS_SETGID_PERM = 0x02,
 	CFS_SETGRP_PERM = 0x04,
-	CFS_RMTACL_PERM = 0x08,
-	CFS_RMTOWN_PERM = 0x10
-};
-
-/* inode access permission for remote user, the inode info are omitted,
- * for client knows them.
- */
-struct mdt_remote_perm {
-	__u32	   rp_uid;
-	__u32	   rp_gid;
-	__u32	   rp_fsuid;
-	__u32	   rp_fsuid_h;
-	__u32	   rp_fsgid;
-	__u32	   rp_fsgid_h;
-	__u32	   rp_access_perm; /* MAY_READ/WRITE/EXEC */
-	__u32	   rp_padding;
 };
 
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
-
 struct mdt_rec_setattr {
 	__u32	   sa_opcode;
 	__u32	   sa_cap;
@@ -2436,6 +2467,7 @@ struct mdt_rec_reint {
 
 void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
 
+/* lmv structures */
 struct lmv_desc {
 	__u32 ld_tgt_count;		/* how many MDS's */
 	__u32 ld_active_tgt_count;	 /* how many active */
@@ -2460,7 +2492,6 @@ struct lmv_stripe_md {
 	struct lu_fid mea_ids[0];
 };
 
-/* lmv structures */
 #define MEA_MAGIC_LAST_CHAR      0xb2221ca1
 #define MEA_MAGIC_ALL_CHARS      0xb222a11c
 #define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
@@ -2470,9 +2501,10 @@ struct lmv_stripe_md {
 #define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
 
 enum fld_rpc_opc {
-	FLD_QUERY		       = 900,
+	FLD_QUERY	= 900,
+	FLD_READ	= 901,
 	FLD_LAST_OPC,
-	FLD_FIRST_OPC		   = FLD_QUERY
+	FLD_FIRST_OPC	= FLD_QUERY
 };
 
 enum seq_rpc_opc {
@@ -2486,6 +2518,12 @@ enum seq_op {
 	SEQ_ALLOC_META = 1
 };
 
+enum fld_op {
+	FLD_CREATE = 0,
+	FLD_DELETE = 1,
+	FLD_LOOKUP = 2,
+};
+
 /*
  *  LOV data structures
  */
@@ -2582,6 +2620,8 @@ struct ldlm_extent {
 	__u64 gid;
 };
 
+#define LDLM_GID_ANY ((__u64)-1)
+
 static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
 				      struct ldlm_extent *ex2)
 {
@@ -3304,7 +3344,7 @@ struct getinfo_fid2path {
 	char	    gf_path[0];
 } __packed;
 
-void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
 
 enum {
 	LAYOUT_INTENT_ACCESS    = 0,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 276906e646f5..ef6f38ff359e 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -193,37 +189,37 @@ struct ost_id {
  * *INFO    - set/get lov_user_mds_data
  */
 /* see <lustre_lib.h> for ioctl numberss 101-150 */
-#define LL_IOC_GETFLAGS		 _IOR ('f', 151, long)
-#define LL_IOC_SETFLAGS		 _IOW ('f', 152, long)
-#define LL_IOC_CLRFLAGS		 _IOW ('f', 153, long)
+#define LL_IOC_GETFLAGS		 _IOR('f', 151, long)
+#define LL_IOC_SETFLAGS		 _IOW('f', 152, long)
+#define LL_IOC_CLRFLAGS		 _IOW('f', 153, long)
 /* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
-#define LL_IOC_LOV_SETSTRIPE	    _IOW ('f', 154, long)
+#define LL_IOC_LOV_SETSTRIPE	    _IOW('f', 154, long)
 /* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
-#define LL_IOC_LOV_GETSTRIPE	    _IOW ('f', 155, long)
+#define LL_IOC_LOV_GETSTRIPE	    _IOW('f', 155, long)
 /* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
-#define LL_IOC_LOV_SETEA		_IOW ('f', 156, long)
-#define LL_IOC_RECREATE_OBJ	     _IOW ('f', 157, long)
-#define LL_IOC_RECREATE_FID	     _IOW ('f', 157, struct lu_fid)
-#define LL_IOC_GROUP_LOCK	       _IOW ('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK	     _IOW ('f', 159, long)
+#define LL_IOC_LOV_SETEA		_IOW('f', 156, long)
+#define LL_IOC_RECREATE_OBJ	     _IOW('f', 157, long)
+#define LL_IOC_RECREATE_FID	     _IOW('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK	       _IOW('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK	     _IOW('f', 159, long)
 /* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK	       _IOW ('f', 160, int)
+#define LL_IOC_QUOTACHECK	       _IOW('f', 160, int)
 /* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK	  _IOR ('f', 161, struct if_quotacheck *)
+#define LL_IOC_POLL_QUOTACHECK	  _IOR('f', 161, struct if_quotacheck *)
 /* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
 #define LL_IOC_QUOTACTL		 _IOWR('f', 162, struct if_quotactl)
 #define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
-#define LL_IOC_FLUSHCTX		 _IOW ('f', 166, long)
-#define LL_IOC_RMTACL		   _IOW ('f', 167, long)
-#define LL_IOC_GETOBDCOUNT	      _IOR ('f', 168, long)
+#define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
+/* LL_IOC_RMTACL			167 obsolete */
+#define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
 #define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
 #define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
 #define LL_IOC_LLOOP_INFO	       _IOWR('f', 171, struct lu_fid)
 #define LL_IOC_LLOOP_DETACH_BYDEV       _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID		 _IOR ('f', 173, long)
+#define LL_IOC_PATH2FID		 _IOR('f', 173, long)
 #define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX	       _IOR ('f', 175, int)
+#define LL_IOC_GET_MDTIDX	       _IOR('f', 175, int)
 
 /* see <lustre_lib.h> for ioctl numbers 177-210 */
 
@@ -542,19 +538,6 @@ struct identity_downcall_data {
 	__u32			    idd_groups[0];
 };
 
-/* for non-mapped uid/gid */
-#define NOBODY_UID      99
-#define NOBODY_GID      99
-
-#define INVALID_ID      (-1)
-
-enum {
-	RMT_LSETFACL    = 1,
-	RMT_LGETFACL    = 2,
-	RMT_RSETFACL    = 3,
-	RMT_RGETFACL    = 4
-};
-
 /* lustre volatile file support
  * file name header: .^L^S^T^R:volatile"
  */
@@ -676,7 +659,12 @@ static inline const char *changelog_type2str(int type)
 #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
 				     /* HSM cleaning needed */
 /* Flags for rename */
-#define CLF_RENAME_LAST       0x0001 /* rename unlink last hardlink of target */
+#define CLF_RENAME_LAST		0x0001	/* rename unlink last hardlink of
+					 * target
+					 */
+#define CLF_RENAME_LAST_EXISTS	0x0002	/* rename unlink last hardlink of target
+					 * has an archive in backend
+					 */
 
 /* Flags for HSM */
 /* 12b used (from high weight to low weight):
@@ -833,9 +821,8 @@ struct ioc_data_version {
 	__u64 idv_flags;     /* See LL_DV_xxx */
 };
 
-#define LL_DV_NOFLUSH 0x01   /* Do not take READ EXTENT LOCK before sampling
-			      * version. Dirty caches are left unchanged.
-			      */
+#define LL_DV_RD_FLUSH	BIT(0)	/* Flush dirty pages from clients */
+#define LL_DV_WR_FLUSH	BIT(1)	/* Flush all caching pages from clients */
 
 #ifndef offsetof
 # define offsetof(typ, memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
@@ -1095,12 +1082,12 @@ struct hsm_action_list {
 	__u32 padding1;
 	char  hal_fsname[0];   /* null-terminated */
 	/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
-	 * boundaries. See hai_zero
+	 * boundaries. See hai_first
 	 */
 } __packed;
 
 #ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
+static inline int cfs_size_round(int val)
 {
 	return (val + 7) & (~0x7);
 }
@@ -1109,7 +1096,7 @@ static inline int cfs_size_round (int val)
 #endif
 
 /* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal)
+static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
 {
 	return (struct hsm_action_item *)(hal->hal_fsname +
 					  cfs_size_round(strlen(hal-> \
@@ -1131,7 +1118,7 @@ static inline int hal_size(struct hsm_action_list *hal)
 	struct hsm_action_item *hai;
 
 	sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
-	hai = hai_zero(hal);
+	hai = hai_first(hal);
 	for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
 		sz += cfs_size_round(hai->hai_len);
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
index aa4cfa7b749d..fecabe139b1f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index bb16ae980b98..95a0be13c0fb 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -161,7 +157,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
 	int offset;
 	int bufcount;
 
-	LASSERT (index >= 0);
+	LASSERT(index >= 0);
 
 	bufcount = lcfg->lcfg_bufcount;
 	if (index >= bufcount)
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
index 8a089413c92e..93c1bdaf71a4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_debug.h
+++ b/drivers/staging/lustre/lustre/include/lustre_debug.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index 95fd36063f55..8886458748c1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -130,7 +126,6 @@ struct lustre_sb_info {
 	struct lustre_mount_data *lsi_lmd;     /* mount command info */
 	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
 	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
-	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
 	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
 	char			  lsi_svname[MTI_NAME_MAXLEN];
 	char			  lsi_osd_obdname[64];
@@ -158,7 +153,6 @@ struct lustre_sb_info {
 struct lustre_mount_info {
 	char		 *lmi_name;
 	struct super_block   *lmi_sb;
-	struct vfsmount      *lmi_mnt;
 	struct list_head	    lmi_list_chain;
 };
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 8b0364f71129..60051a5cfe20 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -71,6 +67,7 @@ struct obd_device;
  */
 enum ldlm_error {
 	ELDLM_OK = 0,
+	ELDLM_LOCK_MATCHED = 1,
 
 	ELDLM_LOCK_CHANGED = 300,
 	ELDLM_LOCK_ABORTED = 301,
@@ -269,7 +266,7 @@ struct ldlm_pool {
 	struct completion	 pl_kobj_unregister;
 };
 
-typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock);
 
 /**
  * LVB operations.
@@ -446,8 +443,11 @@ struct ldlm_namespace {
 	/** Limit of parallel AST RPC count. */
 	unsigned		ns_max_parallel_ast;
 
-	/** Callback to cancel locks before replaying it during recovery. */
-	ldlm_cancel_for_recovery ns_cancel_for_recovery;
+	/**
+	 * Callback to check if a lock is good to be canceled by ELC or
+	 * during recovery.
+	 */
+	ldlm_cancel_cbt		ns_cancel;
 
 	/** LDLM lock stats */
 	struct lprocfs_stats	*ns_stats;
@@ -479,9 +479,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
 }
 
 static inline void ns_register_cancel(struct ldlm_namespace *ns,
-				      ldlm_cancel_for_recovery arg)
+				      ldlm_cancel_cbt arg)
 {
-	ns->ns_cancel_for_recovery = arg;
+	ns->ns_cancel = arg;
 }
 
 struct ldlm_lock;
@@ -1073,7 +1073,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock,
 struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
 void ldlm_cancel_callback(struct ldlm_lock *);
 int ldlm_lock_remove_from_lru(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *);
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data);
 
 /**
  * Obtain a lock reference by its handle.
@@ -1162,10 +1162,10 @@ do {					    \
 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
 void ldlm_lock_put(struct ldlm_lock *lock);
 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
-int  ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
+int  ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
 void ldlm_lock_allow_match(struct ldlm_lock *lock);
 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
@@ -1174,10 +1174,10 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 			       enum ldlm_type type, ldlm_policy_data_t *,
 			       enum ldlm_mode mode, struct lustre_handle *,
 			       int unref);
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
 					   __u64 *bits);
 void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *);
 void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
 
 /* resource.c */
@@ -1251,9 +1251,9 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			  enum ldlm_type type, __u8 with_policy,
 			  enum ldlm_mode mode,
 			  __u64 *flags, void *lvb, __u32 lvb_len,
-			  struct lustre_handle *lockh, int rc);
+			  const struct lustre_handle *lockh, int rc);
 int ldlm_cli_update_pool(struct ptlrpc_request *req);
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
 		    enum ldlm_cancel_flags cancel_flags);
 int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
 			   enum ldlm_cancel_flags flags, void *opaque);
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index 7f2ba2ffe0eb..e7e0c21a9b40 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -37,17 +37,11 @@
 /** l_flags bits marked as "gone" bits */
 #define LDLM_FL_GONE_MASK               0x0006004000000000ULL
 
-/** l_flags bits marked as "hide_lock" bits */
-#define LDLM_FL_HIDE_LOCK_MASK          0x0000206400000000ULL
-
 /** l_flags bits marked as "inherit" bits */
 #define LDLM_FL_INHERIT_MASK            0x0000000000800000ULL
 
-/** l_flags bits marked as "local_only" bits */
-#define LDLM_FL_LOCAL_ONLY_MASK         0x00FFFFFF00000000ULL
-
-/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK            0x00000000C08F932FULL
+/** l_flags bits marked as "off_wire" bits */
+#define LDLM_FL_OFF_WIRE_MASK           0x00FFFFFF00000000ULL
 
 /** extent, mode, or resource changed */
 #define LDLM_FL_LOCK_CHANGED            0x0000000000000001ULL /* bit 0 */
@@ -204,7 +198,7 @@
 #define ldlm_set_cancel(_l)             LDLM_SET_FLAG((_l), 1ULL << 36)
 #define ldlm_clear_cancel(_l)           LDLM_CLEAR_FLAG((_l), 1ULL << 36)
 
-/** whatever it might mean */
+/** whatever it might mean -- never transmitted? */
 #define LDLM_FL_LOCAL_ONLY              0x0000002000000000ULL /* bit 37 */
 #define ldlm_is_local_only(_l)          LDLM_TEST_FLAG((_l), 1ULL << 37)
 #define ldlm_set_local_only(_l)         LDLM_SET_FLAG((_l), 1ULL << 37)
@@ -287,18 +281,18 @@
  * has canceled this lock and is waiting for rpc_lock which is taken by
  * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in
  * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it.
- *
- * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is
- * dropped to let ldlm_callback_handler() return EINVAL to the server. It
- * is used when ELC RPC is already prepared and is waiting for rpc_lock,
- * too late to send a separate CANCEL RPC.
  */
 #define LDLM_FL_BL_AST                  0x0000400000000000ULL /* bit 46 */
 #define ldlm_is_bl_ast(_l)              LDLM_TEST_FLAG((_l), 1ULL << 46)
 #define ldlm_set_bl_ast(_l)             LDLM_SET_FLAG((_l), 1ULL << 46)
 #define ldlm_clear_bl_ast(_l)           LDLM_CLEAR_FLAG((_l), 1ULL << 46)
 
-/** whatever it might mean */
+/**
+ * Set by ldlm_cancel_callback() when lock cache is dropped to let
+ * ldlm_callback_handler() return EINVAL to the server. It is used when
+ * ELC RPC is already prepared and is waiting for rpc_lock, too late to
+ * send a separate CANCEL RPC.
+ */
 #define LDLM_FL_BL_DONE                 0x0000800000000000ULL /* bit 47 */
 #define ldlm_is_bl_done(_l)             LDLM_TEST_FLAG((_l), 1ULL << 47)
 #define ldlm_set_bl_done(_l)            LDLM_SET_FLAG((_l), 1ULL << 47)
@@ -381,104 +375,16 @@
 /** test for ldlm_lock flag bit set */
 #define LDLM_TEST_FLAG(_l, _b)        (((_l)->l_flags & (_b)) != 0)
 
+/** multi-bit test: are any of mask bits set? */
+#define LDLM_HAVE_MASK(_l, _m)		((_l)->l_flags & LDLM_FL_##_m##_MASK)
+
 /** set a ldlm_lock flag bit */
 #define LDLM_SET_FLAG(_l, _b)         ((_l)->l_flags |= (_b))
 
 /** clear a ldlm_lock flag bit */
 #define LDLM_CLEAR_FLAG(_l, _b)       ((_l)->l_flags &= ~(_b))
 
-/** Mask of flags inherited from parent lock when doing intents. */
-#define LDLM_INHERIT_FLAGS            LDLM_FL_INHERIT_MASK
-
-/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */
-#define LDLM_AST_FLAGS                LDLM_FL_AST_MASK
-
 /** @} subgroup */
 /** @} group */
-#ifdef WIRESHARK_COMPILE
-static int hf_lustre_ldlm_fl_lock_changed        = -1;
-static int hf_lustre_ldlm_fl_block_granted       = -1;
-static int hf_lustre_ldlm_fl_block_conv          = -1;
-static int hf_lustre_ldlm_fl_block_wait          = -1;
-static int hf_lustre_ldlm_fl_ast_sent            = -1;
-static int hf_lustre_ldlm_fl_replay              = -1;
-static int hf_lustre_ldlm_fl_intent_only         = -1;
-static int hf_lustre_ldlm_fl_has_intent          = -1;
-static int hf_lustre_ldlm_fl_flock_deadlock      = -1;
-static int hf_lustre_ldlm_fl_discard_data        = -1;
-static int hf_lustre_ldlm_fl_no_timeout          = -1;
-static int hf_lustre_ldlm_fl_block_nowait        = -1;
-static int hf_lustre_ldlm_fl_test_lock           = -1;
-static int hf_lustre_ldlm_fl_cancel_on_block     = -1;
-static int hf_lustre_ldlm_fl_deny_on_contention  = -1;
-static int hf_lustre_ldlm_fl_ast_discard_data    = -1;
-static int hf_lustre_ldlm_fl_fail_loc            = -1;
-static int hf_lustre_ldlm_fl_skipped             = -1;
-static int hf_lustre_ldlm_fl_cbpending           = -1;
-static int hf_lustre_ldlm_fl_wait_noreproc       = -1;
-static int hf_lustre_ldlm_fl_cancel              = -1;
-static int hf_lustre_ldlm_fl_local_only          = -1;
-static int hf_lustre_ldlm_fl_failed              = -1;
-static int hf_lustre_ldlm_fl_canceling           = -1;
-static int hf_lustre_ldlm_fl_local               = -1;
-static int hf_lustre_ldlm_fl_lvb_ready           = -1;
-static int hf_lustre_ldlm_fl_kms_ignore          = -1;
-static int hf_lustre_ldlm_fl_cp_reqd             = -1;
-static int hf_lustre_ldlm_fl_cleaned             = -1;
-static int hf_lustre_ldlm_fl_atomic_cb           = -1;
-static int hf_lustre_ldlm_fl_bl_ast              = -1;
-static int hf_lustre_ldlm_fl_bl_done             = -1;
-static int hf_lustre_ldlm_fl_no_lru              = -1;
-static int hf_lustre_ldlm_fl_fail_notified       = -1;
-static int hf_lustre_ldlm_fl_destroyed           = -1;
-static int hf_lustre_ldlm_fl_server_lock         = -1;
-static int hf_lustre_ldlm_fl_res_locked          = -1;
-static int hf_lustre_ldlm_fl_waited              = -1;
-static int hf_lustre_ldlm_fl_ns_srv              = -1;
-static int hf_lustre_ldlm_fl_excl                = -1;
-
-const value_string lustre_ldlm_flags_vals[] = {
-	{LDLM_FL_LOCK_CHANGED,        "LDLM_FL_LOCK_CHANGED"},
-	{LDLM_FL_BLOCK_GRANTED,       "LDLM_FL_BLOCK_GRANTED"},
-	{LDLM_FL_BLOCK_CONV,          "LDLM_FL_BLOCK_CONV"},
-	{LDLM_FL_BLOCK_WAIT,          "LDLM_FL_BLOCK_WAIT"},
-	{LDLM_FL_AST_SENT,            "LDLM_FL_AST_SENT"},
-	{LDLM_FL_REPLAY,              "LDLM_FL_REPLAY"},
-	{LDLM_FL_INTENT_ONLY,         "LDLM_FL_INTENT_ONLY"},
-	{LDLM_FL_HAS_INTENT,          "LDLM_FL_HAS_INTENT"},
-	{LDLM_FL_FLOCK_DEADLOCK,      "LDLM_FL_FLOCK_DEADLOCK"},
-	{LDLM_FL_DISCARD_DATA,        "LDLM_FL_DISCARD_DATA"},
-	{LDLM_FL_NO_TIMEOUT,          "LDLM_FL_NO_TIMEOUT"},
-	{LDLM_FL_BLOCK_NOWAIT,        "LDLM_FL_BLOCK_NOWAIT"},
-	{LDLM_FL_TEST_LOCK,           "LDLM_FL_TEST_LOCK"},
-	{LDLM_FL_CANCEL_ON_BLOCK,     "LDLM_FL_CANCEL_ON_BLOCK"},
-	{LDLM_FL_DENY_ON_CONTENTION,  "LDLM_FL_DENY_ON_CONTENTION"},
-	{LDLM_FL_AST_DISCARD_DATA,    "LDLM_FL_AST_DISCARD_DATA"},
-	{LDLM_FL_FAIL_LOC,            "LDLM_FL_FAIL_LOC"},
-	{LDLM_FL_SKIPPED,             "LDLM_FL_SKIPPED"},
-	{LDLM_FL_CBPENDING,           "LDLM_FL_CBPENDING"},
-	{LDLM_FL_WAIT_NOREPROC,       "LDLM_FL_WAIT_NOREPROC"},
-	{LDLM_FL_CANCEL,              "LDLM_FL_CANCEL"},
-	{LDLM_FL_LOCAL_ONLY,          "LDLM_FL_LOCAL_ONLY"},
-	{LDLM_FL_FAILED,              "LDLM_FL_FAILED"},
-	{LDLM_FL_CANCELING,           "LDLM_FL_CANCELING"},
-	{LDLM_FL_LOCAL,               "LDLM_FL_LOCAL"},
-	{LDLM_FL_LVB_READY,           "LDLM_FL_LVB_READY"},
-	{LDLM_FL_KMS_IGNORE,          "LDLM_FL_KMS_IGNORE"},
-	{LDLM_FL_CP_REQD,             "LDLM_FL_CP_REQD"},
-	{LDLM_FL_CLEANED,             "LDLM_FL_CLEANED"},
-	{LDLM_FL_ATOMIC_CB,           "LDLM_FL_ATOMIC_CB"},
-	{LDLM_FL_BL_AST,              "LDLM_FL_BL_AST"},
-	{LDLM_FL_BL_DONE,             "LDLM_FL_BL_DONE"},
-	{LDLM_FL_NO_LRU,              "LDLM_FL_NO_LRU"},
-	{LDLM_FL_FAIL_NOTIFIED,       "LDLM_FL_FAIL_NOTIFIED"},
-	{LDLM_FL_DESTROYED,           "LDLM_FL_DESTROYED"},
-	{LDLM_FL_SERVER_LOCK,         "LDLM_FL_SERVER_LOCK"},
-	{LDLM_FL_RES_LOCKED,          "LDLM_FL_RES_LOCKED"},
-	{LDLM_FL_WAITED,              "LDLM_FL_WAITED"},
-	{LDLM_FL_NS_SRV,              "LDLM_FL_NS_SRV"},
-	{LDLM_FL_EXCL,                "LDLM_FL_EXCL"},
-	{ 0, NULL }
-};
-#endif /*  WIRESHARK_COMPILE */
+
 #endif /* LDLM_ALL_FLAGS_MASK */
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
index 0b66593a9526..d1039e1ff70d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_eacl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -70,17 +66,6 @@ typedef struct {
 #define CFS_ACL_XATTR_COUNT(size, prefix) \
 	(((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry))
 
-extern ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size);
-extern int
-lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
-			      posix_acl_xattr_header **out);
-extern void
-lustre_ext_acl_xattr_free(ext_acl_xattr_header *header);
-extern ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
-			   ext_acl_xattr_header *ext_header);
-
 #endif /* CONFIG_FS_POSIX_ACL */
 
 /** @} eacl */
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
index 3014d27e6dc2..6e7cc4689fb8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_export.h
+++ b/drivers/staging/lustre/lustre/include/lustre_export.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -180,19 +176,6 @@ static inline int exp_connect_lru_resize(struct obd_export *exp)
 	return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
 }
 
-static inline int exp_connect_rmtclient(struct obd_export *exp)
-{
-	return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT);
-}
-
-static inline int client_is_remote(struct obd_export *exp)
-{
-	struct obd_import *imp = class_exp2cliimp(exp);
-
-	return !!(imp->imp_connect_data.ocd_connect_flags &
-		  OBD_CONNECT_RMT_CLIENT);
-}
-
 static inline int exp_connect_vbr(struct obd_export *exp)
 {
 	return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index ab4a92390a43..743671a547ef 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -45,7 +41,7 @@
  *
  * @{
  *
- * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
  * describes the FID namespace and interoperability requirements for FIDs.
  * The important parts of that document are included here for reference.
  *
@@ -308,10 +304,10 @@ static inline int fid_seq_in_fldb(__u64 seq)
 	       fid_seq_is_root(seq) || fid_seq_is_dot(seq);
 }
 
-static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx)
 {
 	if (fid_seq_is_mdt0(seq)) {
-		fid->f_seq = fid_idif_seq(0, 0);
+		fid->f_seq = fid_idif_seq(0, ost_idx);
 	} else {
 		LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
 			 fid_seq_is_idif(seq), "%#llx\n", seq);
@@ -498,19 +494,6 @@ static inline void ostid_build_res_name(struct ost_id *oi,
 	}
 }
 
-static inline void ostid_res_name_to_id(struct ost_id *oi,
-					struct ldlm_res_id *name)
-{
-	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
-		/* old resid */
-		ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
-		ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
-	} else {
-		/* new resid */
-		fid_extract_from_res_name(&oi->oi_fid, name);
-	}
-}
-
 /**
  * Return true if the resource is for the object identified by this id & group.
  */
@@ -546,7 +529,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid,
 }
 
 static inline void ost_fid_from_resid(struct lu_fid *fid,
-				      const struct ldlm_res_id *name)
+				      const struct ldlm_res_id *name,
+				      int ost_idx)
 {
 	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
 		/* old resid */
@@ -554,7 +538,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid,
 
 		ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
 		ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
-		ostid_to_fid(fid, &oi, 0);
+		ostid_to_fid(fid, &oi, ost_idx);
 	} else {
 		/* new resid */
 		fid_extract_from_res_name(fid, name);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 4cf2b0e61672..932410d3e3cc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index 5488a698dabd..cde7ed702c86 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
index 27f169d2ed34..1a63a6b9e116 100644
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index dac2d84d8266..4445be7a59dd 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -109,7 +105,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
 		"RECOVER", "FULL", "EVICTED",
 	};
 
-	LASSERT (state <= LUSTRE_IMP_EVICTED);
+	LASSERT(state <= LUSTRE_IMP_EVICTED);
 	return import_state_names[state];
 }
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h
index c491d52d86a2..ed2b6c674109 100644
--- a/drivers/staging/lustre/lustre/include/lustre_intent.h
+++ b/drivers/staging/lustre/lustre/include/lustre_intent.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -38,7 +34,11 @@
 #define LUSTRE_INTENT_H
 
 /* intent IT_XXX are defined in lustre/include/obd.h */
-struct lustre_intent_data {
+
+struct lookup_intent {
+	int		it_op;
+	int		it_create_mode;
+	__u64		it_flags;
 	int		it_disposition;
 	int		it_status;
 	__u64		it_lock_handle;
@@ -46,17 +46,23 @@ struct lustre_intent_data {
 	int		it_lock_mode;
 	int		it_remote_lock_mode;
 	__u64	   it_remote_lock_handle;
-	void	   *it_data;
+	struct ptlrpc_request *it_request;
 	unsigned int    it_lock_set:1;
 };
 
-struct lookup_intent {
-	int     it_op;
-	int     it_create_mode;
-	__u64   it_flags;
-	union {
-		struct lustre_intent_data lustre;
-	} d;
-};
+static inline int it_disposition(struct lookup_intent *it, int flag)
+{
+	return it->it_disposition & flag;
+}
+
+static inline void it_set_disposition(struct lookup_intent *it, int flag)
+{
+	it->it_disposition |= flag;
+}
+
+static inline void it_clear_disposition(struct lookup_intent *it, int flag)
+{
+	it->it_disposition &= ~flag;
+}
 
 #endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index f2223d55850a..06958f217fc8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -280,16 +276,16 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_DATA_TYPE long
 
 #define OBD_IOC_CREATE		 _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY		_IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PREALLOCATE	    _IOWR('f', 105, OBD_IOC_DATA_TYPE)
 
-#define OBD_IOC_SETATTR		_IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_GETATTR		_IOWR ('f', 108, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_READ		   _IOWR('f', 109, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_WRITE		  _IOWR('f', 110, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_STATFS		 _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC		   _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC		   _IOW('f', 114, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_READ2		  _IOWR('f', 115, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_FORMAT		 _IOWR('f', 116, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARTITION	      _IOWR('f', 117, OBD_IOC_DATA_TYPE)
@@ -308,13 +304,13 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_GETDTNAME	       OBD_IOC_GETNAME
 
 #define OBD_IOC_LOV_GET_CONFIG	 _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER	 _IOW ('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET	    _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER	 _IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET	    _IOW('f', 136, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139)
-#define OBD_IOC_NO_TRANSNO	     _IOW ('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY	   _IOW ('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY	 _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NO_TRANSNO	     _IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY	   _IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY	 _IOR('f', 142, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_ROOT_SQUASH	    _IOWR('f', 143, OBD_IOC_DATA_TYPE)
 
@@ -324,27 +320,27 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 
 #define OBD_IOC_CLOSE_UUID	     _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
 
-#define OBD_IOC_CHANGELOG_SEND	 _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CHANGELOG_SEND	 _IOW('f', 148, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_GETDEVICE	      _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_FID2PATH	       _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
 /* see also <lustre/lustre_user.h> for ioctls 151-153 */
 /* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE	  _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETSTRIPE	  _IOW('f', 154, OBD_IOC_DATA_TYPE)
 /* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE	  _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_GETSTRIPE	  _IOW('f', 155, OBD_IOC_DATA_TYPE)
 /* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA	      _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETEA	      _IOW('f', 156, OBD_IOC_DATA_TYPE)
 /* see <lustre/lustre_user.h> for ioctls 157-159 */
 /* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK	     _IOW ('f', 160, int)
+#define OBD_IOC_QUOTACHECK	     _IOW('f', 160, int)
 /* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK	_IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *)
 /* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
 #define OBD_IOC_QUOTACTL	       _IOWR('f', 162, struct if_quotactl)
 /* see  also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG	  _IOW ('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG	_IOW ('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR	_IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_REG	  _IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
 #define OBD_IOC_RECORD		 _IOWR('f', 180, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_ENDRECORD	      _IOWR('f', 181, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARSE		  _IOWR('f', 182, OBD_IOC_DATA_TYPE)
@@ -352,7 +348,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_PROCESS_CFG	    _IOWR('f', 184, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_DUMP_LOG	       _IOWR('f', 185, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_CLEAR_LOG	      _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM		  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM		  _IOW('f', 187, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_POOL		   _IOWR('f', 188, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_REPLACE_NIDS	   _IOWR('f', 189, OBD_IOC_DATA_TYPE)
 
@@ -522,6 +518,28 @@ struct l_wait_info {
 			   sigmask(SIGTERM) | sigmask(SIGQUIT) |	\
 			   sigmask(SIGALRM))
 
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+#define add_wait_queue_exclusive_head(waitq, link)		\
+{								\
+	unsigned long flags;					\
+								\
+	spin_lock_irqsave(&((waitq)->lock), flags);		\
+	__add_wait_queue_exclusive(waitq, link);		\
+	spin_unlock_irqrestore(&((waitq)->lock), flags);	\
+}
+
 /*
  * wait for @condition to become true, but no longer than timeout, specified
  * by @info.
@@ -578,7 +596,7 @@ do {									   \
 									       \
 		if (condition)						 \
 			break;						 \
-		if (cfs_signal_pending()) {				    \
+		if (signal_pending(current)) {				    \
 			if (info->lwi_on_signal &&		     \
 			    (__timeout == 0 || __allow_intr)) {		\
 				if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
index fcc5ebbceed8..b16897702559 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lite.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 49618e186824..b96e02317bfc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index af77eb359c43..fa62b95d351f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -64,9 +60,27 @@ struct obd_export;
 struct ptlrpc_request;
 struct obd_device;
 
+/**
+ * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
+ *
+ * This mutex is used to implement execute-once semantics on the MDT.
+ * The MDT stores the last transaction ID and result for every client in
+ * its last_rcvd file. If the client doesn't get a reply, it can safely
+ * resend the request and the MDT will reconstruct the reply being aware
+ * that the request has already been executed. Without this lock,
+ * execution status of concurrent in-flight requests would be
+ * overwritten.
+ *
+ * This design limits the extent to which we can keep a full pipeline of
+ * in-flight requests from a single client.  This limitation could be
+ * overcome by allowing multiple slots per client in the last_rcvd file.
+ */
 struct mdc_rpc_lock {
+	/** Lock protecting in-flight RPC concurrency. */
 	struct mutex		rpcl_mutex;
+	/** Intent associated with currently executing request. */
 	struct lookup_intent	*rpcl_it;
+	/** Used for MDS/RPC load testing purposes. */
 	int			rpcl_fakes;
 };
 
@@ -171,9 +185,6 @@ struct mdc_cache_waiter {
 };
 
 /* mdc/mdc_locks.c */
-int it_disposition(struct lookup_intent *it, int flag);
-void it_clear_disposition(struct lookup_intent *it, int flag);
-void it_set_disposition(struct lookup_intent *it, int flag);
 int it_open_error(int phase, struct lookup_intent *it);
 
 static inline bool cl_is_lov_delay_create(unsigned int flags)
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
index 95d27ddecfb3..4104bd9bd5c4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 69586a522eb7..d5debd615fdf 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -270,6 +266,11 @@
 /* Macro to hide a typecast. */
 #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
 
+struct ptlrpc_replay_async_args {
+	int		praa_old_state;
+	int		praa_old_status;
+};
+
 /**
  * Structure to single define portal connection.
  */
@@ -479,8 +480,9 @@ enum rq_phase {
 	RQ_PHASE_BULK	   = 0xebc0de02,
 	RQ_PHASE_INTERPRET      = 0xebc0de03,
 	RQ_PHASE_COMPLETE       = 0xebc0de04,
-	RQ_PHASE_UNREGISTERING  = 0xebc0de05,
-	RQ_PHASE_UNDEFINED      = 0xebc0de06
+	RQ_PHASE_UNREG_RPC	= 0xebc0de05,
+	RQ_PHASE_UNREG_BULK	= 0xebc0de06,
+	RQ_PHASE_UNDEFINED	= 0xebc0de07
 };
 
 /** Type of request interpreter call-back */
@@ -1247,22 +1249,103 @@ struct ptlrpc_hpreq_ops {
 	void (*hpreq_fini)(struct ptlrpc_request *);
 };
 
-/**
- * Represents remote procedure call.
- *
- * This is a staple structure used by everybody wanting to send a request
- * in Lustre.
- */
-struct ptlrpc_request {
-	/* Request type: one of PTL_RPC_MSG_* */
-	int rq_type;
-	/** Result of request processing */
-	int rq_status;
+struct ptlrpc_cli_req {
+	/** For bulk requests on client only: bulk descriptor */
+	struct ptlrpc_bulk_desc		*cr_bulk;
+	/** optional time limit for send attempts */
+	long				 cr_delay_limit;
+	/** time request was first queued */
+	time_t				 cr_queued_time;
+	/** request sent timeval */
+	struct timespec64		 cr_sent_tv;
+	/** time for request really sent out */
+	time_t				 cr_sent_out;
+	/** when req reply unlink must finish. */
+	time_t				 cr_reply_deadline;
+	/** when req bulk unlink must finish. */
+	time_t				 cr_bulk_deadline;
+	/** when req unlink must finish. */
+	time_t				 cr_req_deadline;
+	/** Portal to which this request would be sent */
+	short				 cr_req_ptl;
+	/** Portal where to wait for reply and where reply would be sent */
+	short				 cr_rep_ptl;
+	/** request resending number */
+	unsigned int			 cr_resend_nr;
+	/** What was import generation when this request was sent */
+	int				 cr_imp_gen;
+	enum lustre_imp_state		 cr_send_state;
+	/** Per-request waitq introduced by bug 21938 for recovery waiting */
+	wait_queue_head_t		 cr_set_waitq;
+	/** Link item for request set lists */
+	struct list_head		 cr_set_chain;
+	/** link to waited ctx */
+	struct list_head		 cr_ctx_chain;
+
+	/** client's half ctx */
+	struct ptlrpc_cli_ctx		*cr_cli_ctx;
+	/** Link back to the request set */
+	struct ptlrpc_request_set	*cr_set;
+	/** outgoing request MD handle */
+	lnet_handle_md_t		 cr_req_md_h;
+	/** request-out callback parameter */
+	struct ptlrpc_cb_id		 cr_req_cbid;
+	/** incoming reply MD handle */
+	lnet_handle_md_t		 cr_reply_md_h;
+	wait_queue_head_t		 cr_reply_waitq;
+	/** reply callback parameter */
+	struct ptlrpc_cb_id		 cr_reply_cbid;
+	/** Async completion handler, called when reply is received */
+	ptlrpc_interpterer_t		 cr_reply_interp;
+	/** Async completion context */
+	union ptlrpc_async_args		 cr_async_args;
+	/** Opaq data for replay and commit callbacks. */
+	void				*cr_cb_data;
 	/**
-	 * Linkage item through which this request is included into
-	 * sending/delayed lists on client and into rqbd list on server
+	 * Commit callback, called when request is committed and about to be
+	 * freed.
 	 */
-	struct list_head rq_list;
+	void (*cr_commit_cb)(struct ptlrpc_request *);
+	/** Replay callback, called after request is replayed at recovery */
+	void (*cr_replay_cb)(struct ptlrpc_request *);
+};
+
+/** client request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_bulk			rq_cli.cr_bulk
+#define rq_delay_limit		rq_cli.cr_delay_limit
+#define rq_queued_time		rq_cli.cr_queued_time
+#define rq_sent_tv		rq_cli.cr_sent_tv
+#define rq_real_sent		rq_cli.cr_sent_out
+#define rq_reply_deadline	rq_cli.cr_reply_deadline
+#define rq_bulk_deadline	rq_cli.cr_bulk_deadline
+#define rq_req_deadline		rq_cli.cr_req_deadline
+#define rq_nr_resend		rq_cli.cr_resend_nr
+#define rq_request_portal	rq_cli.cr_req_ptl
+#define rq_reply_portal		rq_cli.cr_rep_ptl
+#define rq_import_generation	rq_cli.cr_imp_gen
+#define rq_send_state		rq_cli.cr_send_state
+#define rq_set_chain		rq_cli.cr_set_chain
+#define rq_ctx_chain		rq_cli.cr_ctx_chain
+#define rq_set			rq_cli.cr_set
+#define rq_set_waitq		rq_cli.cr_set_waitq
+#define rq_cli_ctx		rq_cli.cr_cli_ctx
+#define rq_req_md_h		rq_cli.cr_req_md_h
+#define rq_req_cbid		rq_cli.cr_req_cbid
+#define rq_reply_md_h		rq_cli.cr_reply_md_h
+#define rq_reply_waitq		rq_cli.cr_reply_waitq
+#define rq_reply_cbid		rq_cli.cr_reply_cbid
+#define rq_interpret_reply	rq_cli.cr_reply_interp
+#define rq_async_args		rq_cli.cr_async_args
+#define rq_cb_data		rq_cli.cr_cb_data
+#define rq_commit_cb		rq_cli.cr_commit_cb
+#define rq_replay_cb		rq_cli.cr_replay_cb
+
+struct ptlrpc_srv_req {
+	/** initial thread servicing this request */
+	struct ptlrpc_thread		*sr_svc_thread;
 	/**
 	 * Server side list of incoming unserved requests sorted by arrival
 	 * time.  Traversed from time to time to notice about to expire
@@ -1270,32 +1353,86 @@ struct ptlrpc_request {
 	 * know server is alive and well, just very busy to service their
 	 * requests in time
 	 */
-	struct list_head rq_timed_list;
-	/** server-side history, used for debugging purposes. */
-	struct list_head rq_history_list;
+	struct list_head		sr_timed_list;
 	/** server-side per-export list */
-	struct list_head rq_exp_list;
-	/** server-side hp handlers */
-	struct ptlrpc_hpreq_ops *rq_ops;
-
-	/** initial thread servicing this request */
-	struct ptlrpc_thread *rq_svc_thread;
-
+	struct list_head		sr_exp_list;
+	/** server-side history, used for debuging purposes. */
+	struct list_head		sr_hist_list;
 	/** history sequence # */
-	__u64 rq_history_seq;
+	__u64				sr_hist_seq;
+	/** the index of service's srv_at_array into which request is linked */
+	time_t				sr_at_index;
+	/** authed uid */
+	uid_t				sr_auth_uid;
+	/** authed uid mapped to */
+	uid_t				sr_auth_mapped_uid;
+	/** RPC is generated from what part of Lustre */
+	enum lustre_sec_part		sr_sp_from;
+	/** request session context */
+	struct lu_context		sr_ses;
 	/** \addtogroup  nrs
 	 * @{
 	 */
 	/** stub for NRS request */
-	struct ptlrpc_nrs_request rq_nrq;
+	struct ptlrpc_nrs_request	sr_nrq;
 	/** @} nrs */
-	/** the index of service's srv_at_array into which request is linked */
-	u32 rq_at_index;
+	/** request arrival time */
+	struct timespec64		sr_arrival_time;
+	/** server's half ctx */
+	struct ptlrpc_svc_ctx		*sr_svc_ctx;
+	/** (server side), pointed directly into req buffer */
+	struct ptlrpc_user_desc		*sr_user_desc;
+	/** separated reply state */
+	struct ptlrpc_reply_state	*sr_reply_state;
+	/** server-side hp handlers */
+	struct ptlrpc_hpreq_ops		*sr_ops;
+	/** incoming request buffer */
+	struct ptlrpc_request_buffer_desc *sr_rqbd;
+};
+
+/** server request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_svc_thread		rq_srv.sr_svc_thread
+#define rq_timed_list		rq_srv.sr_timed_list
+#define rq_exp_list		rq_srv.sr_exp_list
+#define rq_history_list		rq_srv.sr_hist_list
+#define rq_history_seq		rq_srv.sr_hist_seq
+#define rq_at_index		rq_srv.sr_at_index
+#define rq_auth_uid		rq_srv.sr_auth_uid
+#define rq_auth_mapped_uid	rq_srv.sr_auth_mapped_uid
+#define rq_sp_from		rq_srv.sr_sp_from
+#define rq_session		rq_srv.sr_ses
+#define rq_nrq			rq_srv.sr_nrq
+#define rq_arrival_time		rq_srv.sr_arrival_time
+#define rq_reply_state		rq_srv.sr_reply_state
+#define rq_svc_ctx		rq_srv.sr_svc_ctx
+#define rq_user_desc		rq_srv.sr_user_desc
+#define rq_ops			rq_srv.sr_ops
+#define rq_rqbd			rq_srv.sr_rqbd
+
+/**
+ * Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
+ */
+struct ptlrpc_request {
+	/* Request type: one of PTL_RPC_MSG_* */
+	int				 rq_type;
+	/** Result of request processing */
+	int				 rq_status;
+	/**
+	 * Linkage item through which this request is included into
+	 * sending/delayed lists on client and into rqbd list on server
+	 */
+	struct list_head		 rq_list;
 	/** Lock to protect request flags and some other important bits, like
 	 * rq_list
 	 */
 	spinlock_t rq_lock;
-	/** client-side flags are serialized by rq_lock */
+	/** client-side flags are serialized by rq_lock @{ */
 	unsigned int rq_intr:1, rq_replied:1, rq_err:1,
 		rq_timedout:1, rq_resend:1, rq_restart:1,
 		/**
@@ -1311,37 +1448,40 @@ struct ptlrpc_request {
 		rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
 		rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
 		rq_early:1,
-		rq_req_unlink:1, rq_reply_unlink:1,
+		rq_req_unlinked:1,	/* unlinked request buffer from lnet */
+		rq_reply_unlinked:1,	/* unlinked reply buffer from lnet */
 		rq_memalloc:1,      /* req originated from "kswapd" */
-		/* server-side flags */
-		rq_packed_final:1,  /* packed final reply */
-		rq_hp:1,	    /* high priority RPC */
-		rq_at_linked:1,     /* link into service's srv_at_array */
-		rq_reply_truncate:1,
 		rq_committed:1,
-		/* whether the "rq_set" is a valid one */
+		rq_reply_truncated:1,
+		/** whether the "rq_set" is a valid one */
 		rq_invalid_rqset:1,
 		rq_generation_set:1,
-		/* do not resend request on -EINPROGRESS */
+		/** do not resend request on -EINPROGRESS */
 		rq_no_retry_einprogress:1,
 		/* allow the req to be sent if the import is in recovery
 		 * status
 		 */
-		rq_allow_replay:1;
-
-	unsigned int rq_nr_resend;
-
-	enum rq_phase rq_phase; /* one of RQ_PHASE_* */
-	enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
-	atomic_t rq_refcount; /* client-side refcount for SENT race,
-			       * server-side refcount for multiple replies
-			       */
+		rq_allow_replay:1,
+		/* bulk request, sent to server, but uncommitted */
+		rq_unstable:1;
+	/** @} */
 
-	/** Portal to which this request would be sent */
-	short rq_request_portal;  /* XXX FIXME bug 249 */
-	/** Portal where to wait for reply and where reply would be sent */
-	short rq_reply_portal;    /* XXX FIXME bug 249 */
+	/** server-side flags @{ */
+	unsigned int
+		rq_hp:1,		/**< high priority RPC */
+		rq_at_linked:1,		/**< link into service's srv_at_array */
+		rq_packed_final:1;	/**< packed final reply */
+	/** @} */
 
+	/** one of RQ_PHASE_* */
+	enum rq_phase			rq_phase;
+	/** one of RQ_PHASE_* to be used next */
+	enum rq_phase			rq_next_phase;
+	/**
+	 * client-side refcount for SENT race, server-side refcount
+	 * for multiple replies
+	 */
+	atomic_t			rq_refcount;
 	/**
 	 * client-side:
 	 * !rq_truncate : # reply bytes actually received,
@@ -1352,6 +1492,8 @@ struct ptlrpc_request {
 	int rq_reqlen;
 	/** Reply length */
 	int rq_replen;
+	/** Pool if request is from preallocated list */
+	struct ptlrpc_request_pool     *rq_pool;
 	/** Request message - what client sent */
 	struct lustre_msg *rq_reqmsg;
 	/** Reply message - server response */
@@ -1364,19 +1506,20 @@ struct ptlrpc_request {
 	 * List item to for replay list. Not yet committed requests get linked
 	 * there.
 	 * Also see \a rq_replay comment above.
+	 * It's also link chain on obd_export::exp_req_replay_queue
 	 */
 	struct list_head rq_replay_list;
-
+	/** non-shared members for client & server request*/
+	union {
+		struct ptlrpc_cli_req    rq_cli;
+		struct ptlrpc_srv_req    rq_srv;
+	};
 	/**
 	 * security and encryption data
 	 * @{
 	 */
-	struct ptlrpc_cli_ctx   *rq_cli_ctx;     /**< client's half ctx */
-	struct ptlrpc_svc_ctx   *rq_svc_ctx;     /**< server's half ctx */
-	struct list_head	       rq_ctx_chain;   /**< link to waited ctx */
-
-	struct sptlrpc_flavor    rq_flvr;	/**< for client & server */
-	enum lustre_sec_part     rq_sp_from;
+	/** description of flavors for client & server */
+	struct sptlrpc_flavor		rq_flvr;
 
 	/* client/server security flags */
 	unsigned int
@@ -1386,7 +1529,6 @@ struct ptlrpc_request {
 				 rq_bulk_write:1,    /* request bulk write */
 				 /* server authentication flags */
 				 rq_auth_gss:1,      /* authenticated by gss */
-				 rq_auth_remote:1,   /* authed as remote user */
 				 rq_auth_usr_root:1, /* authed as root */
 				 rq_auth_usr_mdt:1,  /* authed as mdt */
 				 rq_auth_usr_ost:1,  /* authed as ost */
@@ -1395,19 +1537,15 @@ struct ptlrpc_request {
 				 rq_pack_bulk:1,
 				 /* doesn't expect reply FIXME */
 				 rq_no_reply:1,
-				 rq_pill_init:1;     /* pill initialized */
-
-	uid_t		    rq_auth_uid;	/* authed uid */
-	uid_t		    rq_auth_mapped_uid; /* authed uid mapped to */
-
-	/* (server side), pointed directly into req buffer */
-	struct ptlrpc_user_desc *rq_user_desc;
-
-	/* various buffer pointers */
-	struct lustre_msg       *rq_reqbuf;      /* req wrapper */
-	char		    *rq_repbuf;      /* rep buffer */
-	struct lustre_msg       *rq_repdata;     /* rep wrapper msg */
-	struct lustre_msg       *rq_clrbuf;      /* only in priv mode */
+				 rq_pill_init:1, /* pill initialized */
+				 rq_srv_req:1; /* server request */
+
+	/** various buffer pointers */
+	struct lustre_msg       *rq_reqbuf;	/**< req wrapper */
+	char			*rq_repbuf;	/**< rep buffer */
+	struct lustre_msg       *rq_repdata;	/**< rep wrapper msg */
+	/** only in priv mode */
+	struct lustre_msg       *rq_clrbuf;
 	int		      rq_reqbuf_len;  /* req wrapper buf len */
 	int		      rq_reqdata_len; /* req wrapper msg len */
 	int		      rq_repbuf_len;  /* rep buffer len */
@@ -1424,97 +1562,28 @@ struct ptlrpc_request {
 	__u32 rq_req_swab_mask;
 	__u32 rq_rep_swab_mask;
 
-	/** What was import generation when this request was sent */
-	int rq_import_generation;
-	enum lustre_imp_state rq_send_state;
-
 	/** how many early replies (for stats) */
 	int rq_early_count;
 
-	/** client+server request */
-	lnet_handle_md_t     rq_req_md_h;
-	struct ptlrpc_cb_id  rq_req_cbid;
-	/** optional time limit for send attempts */
-	long       rq_delay_limit;
-	/** time request was first queued */
-	unsigned long	   rq_queued_time;
-
-	/* server-side... */
-	/** request arrival time */
-	struct timespec64	rq_arrival_time;
-	/** separated reply state */
-	struct ptlrpc_reply_state *rq_reply_state;
-	/** incoming request buffer */
-	struct ptlrpc_request_buffer_desc *rq_rqbd;
-
-	/** client-only incoming reply */
-	lnet_handle_md_t     rq_reply_md_h;
-	wait_queue_head_t	  rq_reply_waitq;
-	struct ptlrpc_cb_id  rq_reply_cbid;
-
+	/** Server-side, export on which request was received */
+	struct obd_export		*rq_export;
+	/** import where request is being sent */
+	struct obd_import		*rq_import;
 	/** our LNet NID */
 	lnet_nid_t	   rq_self;
 	/** Peer description (the other side) */
 	lnet_process_id_t    rq_peer;
-	/** Server-side, export on which request was received */
-	struct obd_export   *rq_export;
-	/** Client side, import where request is being sent */
-	struct obd_import   *rq_import;
-
-	/** Replay callback, called after request is replayed at recovery */
-	void (*rq_replay_cb)(struct ptlrpc_request *);
 	/**
-	 * Commit callback, called when request is committed and about to be
-	 * freed.
+	 * service time estimate (secs)
+	 * If the request is not served by this time, it is marked as timed out.
 	 */
-	void (*rq_commit_cb)(struct ptlrpc_request *);
-	/** Opaq data for replay and commit callbacks. */
-	void  *rq_cb_data;
-
-	/** For bulk requests on client only: bulk descriptor */
-	struct ptlrpc_bulk_desc *rq_bulk;
-
-	/** client outgoing req */
+	int			rq_timeout;
 	/**
 	 * when request/reply sent (secs), or time when request should be sent
 	 */
 	time64_t rq_sent;
-	/** time for request really sent out */
-	time64_t rq_real_sent;
-
-	/** when request must finish. volatile
-	 * so that servers' early reply updates to the deadline aren't
-	 * kept in per-cpu cache
-	 */
-	volatile time64_t rq_deadline;
-	/** when req reply unlink must finish. */
-	time64_t rq_reply_deadline;
-	/** when req bulk unlink must finish. */
-	time64_t rq_bulk_deadline;
-	/**
-	 * service time estimate (secs)
-	 * If the requestsis not served by this time, it is marked as timed out.
-	 */
-	int    rq_timeout;
-
-	/** Multi-rpc bits */
-	/** Per-request waitq introduced by bug 21938 for recovery waiting */
-	wait_queue_head_t rq_set_waitq;
-	/** Link item for request set lists */
-	struct list_head  rq_set_chain;
-	/** Link back to the request set */
-	struct ptlrpc_request_set *rq_set;
-	/** Async completion handler, called when reply is received */
-	ptlrpc_interpterer_t rq_interpret_reply;
-	/** Async completion context */
-	union ptlrpc_async_args rq_async_args;
-
-	/** Pool if request is from preallocated list */
-	struct ptlrpc_request_pool *rq_pool;
-
-	struct lu_context	   rq_session;
-	struct lu_context	   rq_recov_session;
-
+	/** when request must finish. */
+	time64_t		  rq_deadline;
 	/** request format description */
 	struct req_capsule	  rq_pill;
 };
@@ -1627,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase)
 		return "Interpret";
 	case RQ_PHASE_COMPLETE:
 		return "Complete";
-	case RQ_PHASE_UNREGISTERING:
-		return "Unregistering";
+	case RQ_PHASE_UNREG_RPC:
+		return "UnregRPC";
+	case RQ_PHASE_UNREG_BULK:
+		return "UnregBULK";
 	default:
 		return "?Phase?";
 	}
@@ -1655,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 #define DEBUG_REQ_FLAGS(req)						    \
 	ptlrpc_rqphase2str(req),						\
 	FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),		    \
-	FLAG(req->rq_err, "E"),						 \
+	FLAG(req->rq_err, "E"),	FLAG(req->rq_net_err, "e"),		    \
 	FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
 	FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),		  \
 	FLAG(req->rq_no_resend, "N"),					   \
@@ -1663,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 	FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),		     \
 	FLAG(req->rq_committed, "M")
 
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
 
 void _debug_req(struct ptlrpc_request *req,
 		struct libcfs_debug_msg_data *data, const char *fmt, ...)
@@ -2314,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
 
 	desc = req->rq_bulk;
 
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-	    req->rq_bulk_deadline > ktime_get_real_seconds())
+	if (req->rq_bulk_deadline > ktime_get_real_seconds())
 		return 1;
 
 	if (!desc)
@@ -2662,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 	if (req->rq_phase == new_phase)
 		return;
 
-	if (new_phase == RQ_PHASE_UNREGISTERING) {
+	if (new_phase == RQ_PHASE_UNREG_RPC ||
+	    new_phase == RQ_PHASE_UNREG_BULK) {
+		/* No embedded unregistering phases */
+		if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		    req->rq_phase == RQ_PHASE_UNREG_BULK)
+			return;
+
 		req->rq_next_phase = req->rq_phase;
 		if (req->rq_import)
 			atomic_inc(&req->rq_import->imp_unregistering);
 	}
 
-	if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+	if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+	    req->rq_phase == RQ_PHASE_UNREG_BULK) {
 		if (req->rq_import)
 			atomic_dec(&req->rq_import->imp_unregistering);
 	}
@@ -2685,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 static inline int
 ptlrpc_client_early(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
-		return 0;
 	return req->rq_early;
 }
 
@@ -2697,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_replied(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
+	if (req->rq_reply_deadline > ktime_get_real_seconds())
 		return 0;
 	return req->rq_replied;
 }
@@ -2707,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_recv(struct ptlrpc_request *req)
 {
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds())
+	if (req->rq_reply_deadline > ktime_get_real_seconds())
 		return 1;
 	return req->rq_receiving_reply;
 }
@@ -2719,13 +2791,16 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
 	int rc;
 
 	spin_lock(&req->rq_lock);
-	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    req->rq_reply_deadline > ktime_get_real_seconds()) {
+	if (req->rq_reply_deadline > ktime_get_real_seconds()) {
+		spin_unlock(&req->rq_lock);
+		return 1;
+	}
+	if (req->rq_req_deadline > ktime_get_real_seconds()) {
 		spin_unlock(&req->rq_lock);
 		return 1;
 	}
-	rc = req->rq_receiving_reply;
-	rc = rc || req->rq_req_unlink || req->rq_reply_unlink;
+	rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
+	     req->rq_receiving_reply;
 	spin_unlock(&req->rq_lock);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 383fe6febe4b..82aadd32c2b8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -89,6 +85,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
 
 /* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
 #define PARAM_OST		  "ost."
+#define PARAM_OSD		"osd."
 #define PARAM_OSC		  "osc."
 #define PARAM_MDT		  "mdt."
 #define PARAM_MDD		  "mdd."
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index b2e67fcf9ef1..544a43c862b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -137,6 +133,7 @@ extern struct req_format RQF_MGS_CONFIG_READ;
 /* fid/fld req_format */
 extern struct req_format RQF_SEQ_QUERY;
 extern struct req_format RQF_FLD_QUERY;
+extern struct req_format RQF_FLD_READ;
 /* MDS req_format */
 extern struct req_format RQF_MDS_CONNECT;
 extern struct req_format RQF_MDS_DISCONNECT;
@@ -163,7 +160,7 @@ extern struct req_format RQF_MDS_IS_SUBDIR;
 extern struct req_format RQF_MDS_DONE_WRITING;
 extern struct req_format RQF_MDS_REINT;
 extern struct req_format RQF_MDS_REINT_CREATE;
-extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL;
+extern struct req_format RQF_MDS_REINT_CREATE_ACL;
 extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
 extern struct req_format RQF_MDS_REINT_CREATE_SYM;
 extern struct req_format RQF_MDS_REINT_OPEN;
@@ -199,7 +196,7 @@ extern struct req_format RQF_OST_BRW_READ;
 extern struct req_format RQF_OST_BRW_WRITE;
 extern struct req_format RQF_OST_STATFS;
 extern struct req_format RQF_OST_SET_GRANT_INFO;
-extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO;
 extern struct req_format RQF_OST_GET_INFO_LAST_ID;
 extern struct req_format RQF_OST_GET_INFO_LAST_FID;
 extern struct req_format RQF_OST_SET_INFO_LAST_FID;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 01b4e6726a68..90c183424802 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -221,13 +217,13 @@ enum sptlrpc_bulk_service {
 
 #define SPTLRPC_FLVR_DEFAULT	    SPTLRPC_FLVR_NULL
 
-#define SPTLRPC_FLVR_INVALID	    ((__u32) 0xFFFFFFFF)
-#define SPTLRPC_FLVR_ANY		((__u32) 0xFFF00000)
+#define SPTLRPC_FLVR_INVALID	    ((__u32)0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY		((__u32)0xFFF00000)
 
 /**
  * extract the useful part from wire flavor
  */
-#define WIRE_FLVR(wflvr)		(((__u32) (wflvr)) & 0x000FFFFF)
+#define WIRE_FLVR(wflvr)		(((__u32)(wflvr)) & 0x000FFFFF)
 
 /** @} flavor */
 
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4264d97650ec..a1bc2c478ff9 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -37,7 +33,7 @@
 #ifndef __OBD_H
 #define __OBD_H
 
-#include "linux/obd.h"
+#include <linux/spinlock.h>
 
 #define IOC_OSC_TYPE	 'h'
 #define IOC_OSC_MIN_NR       20
@@ -54,6 +50,7 @@
 #include "lustre_export.h"
 #include "lustre_fid.h"
 #include "lustre_fld.h"
+#include "lustre_intent.h"
 
 #define MAX_OBD_DEVICES 8192
 
@@ -165,9 +162,6 @@ struct obd_info {
 	obd_enqueue_update_f    oi_cb_up;
 };
 
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-
 struct obd_type {
 	struct list_head typ_chain;
 	struct obd_ops *typ_dt_ops;
@@ -234,6 +228,12 @@ enum {
 #define MDC_MAX_RIF_DEFAULT       8
 #define MDC_MAX_RIF_MAX	 512
 
+enum obd_cl_sem_lock_class {
+	OBD_CLI_SEM_NORMAL,
+	OBD_CLI_SEM_MGC,
+	OBD_CLI_SEM_MDCOSC,
+};
+
 struct mdc_rpc_lock;
 struct obd_import;
 struct client_obd {
@@ -293,14 +293,10 @@ struct client_obd {
 	 * blocking everywhere, but we don't want to slow down fast-path of
 	 * our main platform.)
 	 *
-	 * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
-	 * with client_obd_list_{un,}lock() and
-	 * client_obd_list_lock_{init,done}() functions.
-	 *
 	 * NB by Jinshan: though field names are still _loi_, but actually
 	 * osc_object{}s are in the list.
 	 */
-	struct client_obd_lock	       cl_loi_list_lock;
+	spinlock_t		       cl_loi_list_lock;
 	struct list_head	       cl_loi_ready_list;
 	struct list_head	       cl_loi_hp_ready_list;
 	struct list_head	       cl_loi_write_list;
@@ -327,7 +323,8 @@ struct client_obd {
 	atomic_t		 cl_lru_shrinkers;
 	atomic_t		 cl_lru_in_list;
 	struct list_head	 cl_lru_list; /* lru page list */
-	struct client_obd_lock   cl_lru_list_lock; /* page list protector */
+	spinlock_t		 cl_lru_list_lock; /* page list protector */
+	atomic_t		 cl_unstable_count;
 
 	/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
 	atomic_t	     cl_destroy_in_flight;
@@ -364,6 +361,7 @@ struct client_obd {
 
 	/* ptlrpc work for writeback in ptlrpcd context */
 	void		    *cl_writeback_work;
+	void			*cl_lru_work;
 	/* hash tables for osc_quota_info */
 	struct cfs_hash	      *cl_quota_hash[MAXQUOTAS];
 };
@@ -391,45 +389,9 @@ struct ost_pool {
 	struct rw_semaphore op_rw_sem;     /* to protect ost_pool use */
 };
 
-/* Round-robin allocator data */
-struct lov_qos_rr {
-	__u32	       lqr_start_idx;   /* start index of new inode */
-	__u32	       lqr_offset_idx;  /* aliasing for start_idx  */
-	int		 lqr_start_count; /* reseed counter */
-	struct ost_pool     lqr_pool;	/* round-robin optimized list */
-	unsigned long       lqr_dirty:1;     /* recalc round-robin list */
-};
-
 /* allow statfs data caching for 1 second */
 #define OBD_STATFS_CACHE_SECONDS 1
 
-struct lov_statfs_data {
-	struct obd_info   lsd_oi;
-	struct obd_statfs lsd_statfs;
-};
-
-/* Stripe placement optimization */
-struct lov_qos {
-	struct list_head    lq_oss_list; /* list of OSSs that targets use */
-	struct rw_semaphore lq_rw_sem;
-	__u32		lq_active_oss_count;
-	unsigned int	lq_prio_free;   /* priority for free space */
-	unsigned int	lq_threshold_rr;/* priority for rr */
-	struct lov_qos_rr   lq_rr;	  /* round robin qos data */
-	unsigned long       lq_dirty:1,     /* recalc qos data */
-			    lq_same_space:1,/* the ost's all have approx.
-					     * the same space avail
-					     */
-			    lq_reset:1,     /* zero current penalties */
-			    lq_statfs_in_progress:1; /* statfs op in
-							progress */
-	/* qos statfs data */
-	struct lov_statfs_data *lq_statfs_data;
-	wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs
-					    * requests completion
-					    */
-};
-
 struct lov_tgt_desc {
 	struct list_head	  ltd_kill;
 	struct obd_uuid     ltd_uuid;
@@ -442,25 +404,6 @@ struct lov_tgt_desc {
 			    ltd_reap:1;  /* should this target be deleted */
 };
 
-/* Pool metadata */
-#define pool_tgt_size(_p)   _p->pool_obds.op_size
-#define pool_tgt_count(_p)  _p->pool_obds.op_count
-#define pool_tgt_array(_p)  _p->pool_obds.op_array
-#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
-
-struct pool_desc {
-	char		  pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
-	struct ost_pool       pool_obds;	      /* pool members */
-	atomic_t	  pool_refcount;	  /* pool ref. counter */
-	struct lov_qos_rr     pool_rr;		/* round robin qos */
-	struct hlist_node      pool_hash;	      /* access by poolname */
-	struct list_head	    pool_list;	      /* serial access */
-	struct dentry		*pool_debugfs_entry;	/* file in debugfs */
-	struct obd_device    *pool_lobd;	/* obd of the lov/lod to which
-						 * this pool belongs
-						 */
-};
-
 struct lov_obd {
 	struct lov_desc	 desc;
 	struct lov_tgt_desc   **lov_tgts;	      /* sparse array */
@@ -468,8 +411,6 @@ struct lov_obd {
 	struct mutex		lov_lock;
 	struct obd_connect_data lov_ocd;
 	atomic_t	    lov_refcount;
-	__u32		   lov_tgt_count;	 /* how many OBD's */
-	__u32		   lov_active_tgt_count;  /* how many active */
 	__u32		   lov_death_row;/* tgts scheduled to be deleted */
 	__u32		   lov_tgt_size;   /* size of tgts array */
 	int		     lov_connects;
@@ -479,8 +420,8 @@ struct lov_obd {
 	struct dentry		*lov_pool_debugfs_entry;
 	enum lustre_sec_part    lov_sp_me;
 
-	/* Cached LRU pages from upper layer */
-	void		       *lov_cache;
+	/* Cached LRU and unstable data from upper layer */
+	struct cl_client_cache *lov_cache;
 
 	struct rw_semaphore     lov_notify_lock;
 
@@ -511,7 +452,7 @@ struct lmv_obd {
 	struct obd_uuid		cluuid;
 	struct obd_export	*exp;
 
-	struct mutex		init_mutex;
+	struct mutex		lmv_init_mutex;
 	int			connected;
 	int			max_easize;
 	int			max_def_easize;
@@ -1180,9 +1121,6 @@ struct md_ops {
 			     ldlm_policy_data_t *, enum ldlm_mode,
 			     enum ldlm_cancel_flags flags, void *opaque);
 
-	int (*get_remote_perm)(struct obd_export *, const struct lu_fid *,
-			       __u32, struct ptlrpc_request **);
-
 	int (*intent_getattr_async)(struct obd_export *,
 				    struct md_enqueue_info *,
 				    struct ldlm_enqueue_info *);
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
index 637fa22110a4..a8a81e662a56 100644
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -35,6 +31,7 @@
 #ifndef __OBD_CKSUM
 #define __OBD_CKSUM
 #include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/libcfs/libcfs_crypto.h"
 #include "lustre/lustre_idl.h"
 
 static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type)
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 706869f8c98f..6482a937000b 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -477,7 +473,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 		struct lu_context  session_ctx;
 		struct lu_env env;
 
-		lu_context_init(&session_ctx, LCT_SESSION);
+		lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION);
 		session_ctx.lc_thread = NULL;
 		lu_context_enter(&session_ctx);
 
@@ -490,8 +486,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 				obd->obd_lu_dev = d;
 				d->ld_obd = obd;
 				rc = 0;
-			} else
+			} else {
 				rc = PTR_ERR(d);
+			}
 		}
 		lu_context_exit(&session_ctx);
 		lu_context_fini(&session_ctx);
@@ -1653,16 +1650,6 @@ static inline int md_init_ea_size(struct obd_export *exp, int easize,
 					       cookiesize, def_cookiesize);
 }
 
-static inline int md_get_remote_perm(struct obd_export *exp,
-				     const struct lu_fid *fid, __u32 suppgid,
-				     struct ptlrpc_request **request)
-{
-	EXP_CHECK_MD_OP(exp, get_remote_perm);
-	EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm);
-	return MDP(exp->exp_obd, get_remote_perm)(exp, fid, suppgid,
-						  request);
-}
-
 static inline int md_intent_getattr_async(struct obd_export *exp,
 					  struct md_enqueue_info *minfo,
 					  struct ldlm_enqueue_info *einfo)
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index f8ee3a3254ba..845e64a56c21 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -58,6 +54,7 @@ extern int at_early_margin;
 extern int at_extra;
 extern unsigned int obd_sync_filter;
 extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_unstable_pages;
 extern atomic_t obd_dirty_pages;
 extern atomic_t obd_dirty_transit_pages;
 extern char obd_jobid_var[];
@@ -289,6 +286,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_ENOINO	      0x229
 #define OBD_FAIL_OST_DQACQ_NET	   0x230
 #define OBD_FAIL_OST_STATFS_EINPROGRESS  0x231
+#define OBD_FAIL_OST_SET_INFO_NET		0x232
 
 #define OBD_FAIL_LDLM		    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -319,6 +317,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LDLM_AGL_DELAY	  0x31a
 #define OBD_FAIL_LDLM_AGL_NOLOCK	 0x31b
 #define OBD_FAIL_LDLM_OST_LVB		 0x31c
+#define OBD_FAIL_LDLM_ENQUEUE_HANG	 0x31d
 
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
@@ -365,6 +364,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2  0x515
 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL   0x516
 #define OBD_FAIL_PTLRPC_CANCEL_RESEND    0x517
+#define OBD_FAIL_PTLRPC_DROP_BULK	 0x51a
+#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK	 0x51b
+#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 
 #define OBD_FAIL_OBD_PING_NET	    0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
@@ -426,6 +428,7 @@ extern char obd_jobid_var[];
 
 #define OBD_FAIL_FLD		     0x1100
 #define OBD_FAIL_FLD_QUERY_NET	   0x1101
+#define OBD_FAIL_FLD_READ_NET		0x1102
 
 #define OBD_FAIL_SEC_CTX		 0x1200
 #define OBD_FAIL_SEC_CTX_INIT_NET	0x1201
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
deleted file mode 100644
index 96141d17d07f..000000000000
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- *   Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "../../include/linux/libcfs/libcfs.h"
-# include <linux/fs.h>
-# include <linux/sched.h>
-# include <linux/mm.h>
-# include <linux/quotaops.h>
-# include <linux/highmem.h>
-# include <linux/pagemap.h>
-# include <linux/rbtree.h>
-
-#include "../include/obd.h"
-#include "../include/obd_support.h"
-#include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lustre_ver.h"
-#include "../include/lustre_mdc.h"
-#include "../include/cl_object.h"
-
-#include "../include/lclient.h"
-
-#include "../llite/llite_internal.h"
-
-static const struct cl_req_operations ccc_req_ops;
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-static struct kmem_cache *ccc_lock_kmem;
-static struct kmem_cache *ccc_object_kmem;
-static struct kmem_cache *ccc_thread_kmem;
-static struct kmem_cache *ccc_session_kmem;
-static struct kmem_cache *ccc_req_kmem;
-
-static struct lu_kmem_descr ccc_caches[] = {
-	{
-		.ckd_cache = &ccc_lock_kmem,
-		.ckd_name  = "ccc_lock_kmem",
-		.ckd_size  = sizeof(struct ccc_lock)
-	},
-	{
-		.ckd_cache = &ccc_object_kmem,
-		.ckd_name  = "ccc_object_kmem",
-		.ckd_size  = sizeof(struct ccc_object)
-	},
-	{
-		.ckd_cache = &ccc_thread_kmem,
-		.ckd_name  = "ccc_thread_kmem",
-		.ckd_size  = sizeof(struct ccc_thread_info),
-	},
-	{
-		.ckd_cache = &ccc_session_kmem,
-		.ckd_name  = "ccc_session_kmem",
-		.ckd_size  = sizeof(struct ccc_session)
-	},
-	{
-		.ckd_cache = &ccc_req_kmem,
-		.ckd_name  = "ccc_req_kmem",
-		.ckd_size  = sizeof(struct ccc_req)
-	},
-	{
-		.ckd_cache = NULL
-	}
-};
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key)
-{
-	struct ccc_thread_info *info;
-
-	info = kmem_cache_zalloc(ccc_thread_kmem, GFP_NOFS);
-	if (!info)
-		info = ERR_PTR(-ENOMEM);
-	return info;
-}
-
-void ccc_key_fini(const struct lu_context *ctx,
-			 struct lu_context_key *key, void *data)
-{
-	struct ccc_thread_info *info = data;
-
-	kmem_cache_free(ccc_thread_kmem, info);
-}
-
-void *ccc_session_key_init(const struct lu_context *ctx,
-				  struct lu_context_key *key)
-{
-	struct ccc_session *session;
-
-	session = kmem_cache_zalloc(ccc_session_kmem, GFP_NOFS);
-	if (!session)
-		session = ERR_PTR(-ENOMEM);
-	return session;
-}
-
-void ccc_session_key_fini(const struct lu_context *ctx,
-				 struct lu_context_key *key, void *data)
-{
-	struct ccc_session *session = data;
-
-	kmem_cache_free(ccc_session_kmem, session);
-}
-
-struct lu_context_key ccc_key = {
-	.lct_tags = LCT_CL_THREAD,
-	.lct_init = ccc_key_init,
-	.lct_fini = ccc_key_fini
-};
-
-struct lu_context_key ccc_session_key = {
-	.lct_tags = LCT_SESSION,
-	.lct_init = ccc_session_key_init,
-	.lct_fini = ccc_session_key_fini
-};
-
-/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */
-/* LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key); */
-
-int ccc_device_init(const struct lu_env *env, struct lu_device *d,
-			   const char *name, struct lu_device *next)
-{
-	struct ccc_device  *vdv;
-	int rc;
-
-	vdv = lu2ccc_dev(d);
-	vdv->cdv_next = lu2cl_dev(next);
-
-	LASSERT(d->ld_site && next->ld_type);
-	next->ld_site = d->ld_site;
-	rc = next->ld_type->ldt_ops->ldto_device_init(
-			env, next, next->ld_type->ldt_name, NULL);
-	if (rc == 0) {
-		lu_device_get(next);
-		lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
-	}
-	return rc;
-}
-
-struct lu_device *ccc_device_fini(const struct lu_env *env,
-					 struct lu_device *d)
-{
-	return cl2lu_dev(lu2ccc_dev(d)->cdv_next);
-}
-
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
-				   struct lu_device_type *t,
-				   struct lustre_cfg *cfg,
-				   const struct lu_device_operations *luops,
-				   const struct cl_device_operations *clops)
-{
-	struct ccc_device *vdv;
-	struct lu_device  *lud;
-	struct cl_site    *site;
-	int rc;
-
-	vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
-	if (!vdv)
-		return ERR_PTR(-ENOMEM);
-
-	lud = &vdv->cdv_cl.cd_lu_dev;
-	cl_device_init(&vdv->cdv_cl, t);
-	ccc2lu_dev(vdv)->ld_ops = luops;
-	vdv->cdv_cl.cd_ops = clops;
-
-	site = kzalloc(sizeof(*site), GFP_NOFS);
-	if (site) {
-		rc = cl_site_init(site, &vdv->cdv_cl);
-		if (rc == 0)
-			rc = lu_site_init_finish(&site->cs_lu);
-		else {
-			LASSERT(!lud->ld_site);
-			CERROR("Cannot init lu_site, rc %d.\n", rc);
-			kfree(site);
-		}
-	} else
-		rc = -ENOMEM;
-	if (rc != 0) {
-		ccc_device_free(env, lud);
-		lud = ERR_PTR(rc);
-	}
-	return lud;
-}
-
-struct lu_device *ccc_device_free(const struct lu_env *env,
-					 struct lu_device *d)
-{
-	struct ccc_device *vdv  = lu2ccc_dev(d);
-	struct cl_site    *site = lu2cl_site(d->ld_site);
-	struct lu_device  *next = cl2lu_dev(vdv->cdv_next);
-
-	if (d->ld_site) {
-		cl_site_fini(site);
-		kfree(site);
-	}
-	cl_device_fini(lu2cl_dev(d));
-	kfree(vdv);
-	return next;
-}
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
-			struct cl_req *req)
-{
-	struct ccc_req *vrq;
-	int result;
-
-	vrq = kmem_cache_zalloc(ccc_req_kmem, GFP_NOFS);
-	if (vrq) {
-		cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops);
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-/**
- * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by ccc_inode_fini_guard
- * mutex.
- */
-static struct lu_env *ccc_inode_fini_env;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(ccc_inode_fini_guard);
-static int dummy_refcheck;
-
-int ccc_global_init(struct lu_device_type *device_type)
-{
-	int result;
-
-	result = lu_kmem_init(ccc_caches);
-	if (result)
-		return result;
-
-	result = lu_device_type_init(device_type);
-	if (result)
-		goto out_kmem;
-
-	ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
-					  LCT_REMEMBER|LCT_NOREF);
-	if (IS_ERR(ccc_inode_fini_env)) {
-		result = PTR_ERR(ccc_inode_fini_env);
-		goto out_device;
-	}
-
-	ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
-	return 0;
-out_device:
-	lu_device_type_fini(device_type);
-out_kmem:
-	lu_kmem_fini(ccc_caches);
-	return result;
-}
-
-void ccc_global_fini(struct lu_device_type *device_type)
-{
-	if (ccc_inode_fini_env) {
-		cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
-		ccc_inode_fini_env = NULL;
-	}
-	lu_device_type_fini(device_type);
-	lu_kmem_fini(ccc_caches);
-}
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *unused,
-				   struct lu_device *dev,
-				   const struct cl_object_operations *clops,
-				   const struct lu_object_operations *luops)
-{
-	struct ccc_object *vob;
-	struct lu_object  *obj;
-
-	vob = kmem_cache_zalloc(ccc_object_kmem, GFP_NOFS);
-	if (vob) {
-		struct cl_object_header *hdr;
-
-		obj = ccc2lu(vob);
-		hdr = &vob->cob_header;
-		cl_object_header_init(hdr);
-		lu_object_init(obj, &hdr->coh_lu, dev);
-		lu_object_add_top(&hdr->coh_lu, obj);
-
-		vob->cob_cl.co_ops = clops;
-		obj->lo_ops = luops;
-	} else
-		obj = NULL;
-	return obj;
-}
-
-int ccc_object_init0(const struct lu_env *env,
-			    struct ccc_object *vob,
-			    const struct cl_object_conf *conf)
-{
-	vob->cob_inode = conf->coc_inode;
-	vob->cob_transient_pages = 0;
-	cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page));
-	return 0;
-}
-
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
-			   const struct lu_object_conf *conf)
-{
-	struct ccc_device *dev = lu2ccc_dev(obj->lo_dev);
-	struct ccc_object *vob = lu2ccc(obj);
-	struct lu_object  *below;
-	struct lu_device  *under;
-	int result;
-
-	under = &dev->cdv_next->cd_lu_dev;
-	below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
-	if (below) {
-		const struct cl_object_conf *cconf;
-
-		cconf = lu2cl_conf(conf);
-		INIT_LIST_HEAD(&vob->cob_pending_list);
-		lu_object_add(obj, below);
-		result = ccc_object_init0(env, vob, cconf);
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj)
-{
-	struct ccc_object *vob = lu2ccc(obj);
-
-	lu_object_fini(obj);
-	lu_object_header_fini(obj->lo_header);
-	kmem_cache_free(ccc_object_kmem, vob);
-}
-
-int ccc_lock_init(const struct lu_env *env,
-		  struct cl_object *obj, struct cl_lock *lock,
-		  const struct cl_io *unused,
-		  const struct cl_lock_operations *lkops)
-{
-	struct ccc_lock *clk;
-	int result;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	clk = kmem_cache_zalloc(ccc_lock_kmem, GFP_NOFS);
-	if (clk) {
-		cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops);
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-int ccc_object_glimpse(const struct lu_env *env,
-		       const struct cl_object *obj, struct ost_lvb *lvb)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	lvb->lvb_mtime = cl_inode_mtime(inode);
-	lvb->lvb_atime = cl_inode_atime(inode);
-	lvb->lvb_ctime = cl_inode_ctime(inode);
-	/*
-	 * LU-417: Add dirty pages block count lest i_blocks reports 0, some
-	 * "cp" or "tar" on remote node may think it's a completely sparse file
-	 * and skip it.
-	 */
-	if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
-		lvb->lvb_blocks = dirty_cnt(inode);
-	return 0;
-}
-
-static void ccc_object_size_lock(struct cl_object *obj)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	ll_inode_size_lock(inode);
-	cl_object_attr_lock(obj);
-}
-
-static void ccc_object_size_unlock(struct cl_object *obj)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	cl_object_attr_unlock(obj);
-	ll_inode_size_unlock(inode);
-}
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-struct page *ccc_page_vmpage(const struct lu_env *env,
-			    const struct cl_page_slice *slice)
-{
-	return cl2vm_page(slice);
-}
-
-int ccc_page_is_under_lock(const struct lu_env *env,
-			   const struct cl_page_slice *slice,
-			   struct cl_io *io)
-{
-	struct ccc_io	*cio  = ccc_env_io(env);
-	struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr;
-	struct cl_page       *page = slice->cpl_page;
-
-	int result;
-
-	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
-	    io->ci_type == CIT_FAULT) {
-		if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)
-			result = -EBUSY;
-		else {
-			desc->cld_start = page->cp_index;
-			desc->cld_end   = page->cp_index;
-			desc->cld_obj   = page->cp_obj;
-			desc->cld_mode  = CLM_READ;
-			result = cl_queue_match(&io->ci_lockset.cls_done,
-						desc) ? -EBUSY : 0;
-		}
-	} else
-		result = 0;
-	return result;
-}
-
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice)
-{
-	/*
-	 * Cached read?
-	 */
-	LBUG();
-	return 0;
-}
-
-int ccc_transient_page_prep(const struct lu_env *env,
-				   const struct cl_page_slice *slice,
-				   struct cl_io *unused)
-{
-	/* transient page should always be sent. */
-	return 0;
-}
-
-/*****************************************************************************
- *
- * Lock operations.
- *
- */
-
-void ccc_lock_delete(const struct lu_env *env,
-		     const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-}
-
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
-	struct ccc_lock *clk = cl2ccc_lock(slice);
-
-	kmem_cache_free(ccc_lock_kmem, clk);
-}
-
-int ccc_lock_enqueue(const struct lu_env *env,
-		     const struct cl_lock_slice *slice,
-		     struct cl_io *unused, __u32 enqflags)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
- * layer. This function is executed every time io finds an existing lock in
- * the lock cache while creating new lock. This function has to decide whether
- * cached lock "fits" into io.
- *
- * \param slice lock to be checked
- * \param io    IO that wants a lock.
- *
- * \see lov_lock_fits_into().
- */
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io)
-{
-	const struct cl_lock       *lock  = slice->cls_lock;
-	const struct cl_lock_descr *descr = &lock->cll_descr;
-	const struct ccc_io	*cio   = ccc_env_io(env);
-	int			 result;
-
-	/*
-	 * Work around DLM peculiarity: it assumes that glimpse
-	 * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
-	 * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
-	 * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
-	 * doesn't enqueue CLM_WRITE sub-locks.
-	 */
-	if (cio->cui_glimpse)
-		result = descr->cld_mode != CLM_WRITE;
-
-	/*
-	 * Also, don't match incomplete write locks for read, otherwise read
-	 * would enqueue missing sub-locks in the write mode.
-	 */
-	else if (need->cld_mode != descr->cld_mode)
-		result = lock->cll_state >= CLS_ENQUEUED;
-	else
-		result = 1;
-	return result;
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
- * whenever lock state changes. Transfers object attributes, that might be
- * updated as a result of lock acquiring into inode.
- */
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state)
-{
-	struct cl_lock *lock = slice->cls_lock;
-
-	/*
-	 * Refresh inode attributes when the lock is moving into CLS_HELD
-	 * state, and only when this is a result of real enqueue, rather than
-	 * of finding lock in the cache.
-	 */
-	if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
-		struct cl_object *obj;
-		struct inode     *inode;
-
-		obj   = slice->cls_obj;
-		inode = ccc_object_inode(obj);
-
-		/* vmtruncate() sets the i_size
-		 * under both a DLM lock and the
-		 * ll_inode_size_lock().  If we don't get the
-		 * ll_inode_size_lock() here we can match the DLM lock and
-		 * reset i_size.  generic_file_write can then trust the
-		 * stale i_size when doing appending writes and effectively
-		 * cancel the result of the truncate.  Getting the
-		 * ll_inode_size_lock() after the enqueue maintains the DLM
-		 * -> ll_inode_size_lock() acquiring order.
-		 */
-		if (lock->cll_descr.cld_start == 0 &&
-		    lock->cll_descr.cld_end == CL_PAGE_EOF)
-			cl_merge_lvb(env, inode);
-	}
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
-			  __u32 enqflags, enum cl_lock_mode mode,
-			  pgoff_t start, pgoff_t end)
-{
-	struct ccc_io	  *cio   = ccc_env_io(env);
-	struct cl_lock_descr   *descr = &cio->cui_link.cill_descr;
-	struct cl_object       *obj   = io->ci_obj;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
-	memset(&cio->cui_link, 0, sizeof(cio->cui_link));
-
-	if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-		descr->cld_mode = CLM_GROUP;
-		descr->cld_gid  = cio->cui_fd->fd_grouplock.cg_gid;
-	} else {
-		descr->cld_mode  = mode;
-	}
-	descr->cld_obj   = obj;
-	descr->cld_start = start;
-	descr->cld_end   = end;
-	descr->cld_enq_flags = enqflags;
-
-	cl_io_lock_add(env, io, &cio->cui_link);
-	return 0;
-}
-
-void ccc_io_update_iov(const struct lu_env *env,
-		       struct ccc_io *cio, struct cl_io *io)
-{
-	size_t size = io->u.ci_rw.crw_count;
-
-	if (!cl_is_normalio(env, io) || !cio->cui_iter)
-		return;
-
-	iov_iter_truncate(cio->cui_iter, size);
-}
-
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
-		    __u32 enqflags, enum cl_lock_mode mode,
-		    loff_t start, loff_t end)
-{
-	struct cl_object *obj = io->ci_obj;
-
-	return ccc_io_one_lock_index(env, io, enqflags, mode,
-				     cl_index(obj, start), cl_index(obj, end));
-}
-
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
-	CLOBINVRNT(env, ios->cis_io->ci_obj,
-		   ccc_object_invariant(ios->cis_io->ci_obj));
-}
-
-void ccc_io_advance(const struct lu_env *env,
-		    const struct cl_io_slice *ios,
-		    size_t nob)
-{
-	struct ccc_io    *cio = cl2ccc_io(env, ios);
-	struct cl_io     *io  = ios->cis_io;
-	struct cl_object *obj = ios->cis_io->ci_obj;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	if (!cl_is_normalio(env, io))
-		return;
-
-	iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_io *io, loff_t start, size_t count, int *exceed)
-{
-	struct cl_attr *attr  = ccc_env_thread_attr(env);
-	struct inode   *inode = ccc_object_inode(obj);
-	loff_t	  pos   = start + count - 1;
-	loff_t kms;
-	int result;
-
-	/*
-	 * Consistency guarantees: following possibilities exist for the
-	 * relation between region being accessed and real file size at this
-	 * moment:
-	 *
-	 *  (A): the region is completely inside of the file;
-	 *
-	 *  (B-x): x bytes of region are inside of the file, the rest is
-	 *  outside;
-	 *
-	 *  (C): the region is completely outside of the file.
-	 *
-	 * This classification is stable under DLM lock already acquired by
-	 * the caller, because to change the class, other client has to take
-	 * DLM lock conflicting with our lock. Also, any updates to ->i_size
-	 * by other threads on this client are serialized by
-	 * ll_inode_size_lock(). This guarantees that short reads are handled
-	 * correctly in the face of concurrent writes and truncates.
-	 */
-	ccc_object_size_lock(obj);
-	result = cl_object_attr_get(env, obj, attr);
-	if (result == 0) {
-		kms = attr->cat_kms;
-		if (pos > kms) {
-			/*
-			 * A glimpse is necessary to determine whether we
-			 * return a short read (B) or some zeroes at the end
-			 * of the buffer (C)
-			 */
-			ccc_object_size_unlock(obj);
-			result = cl_glimpse_lock(env, io, inode, obj, 0);
-			if (result == 0 && exceed) {
-				/* If objective page index exceed end-of-file
-				 * page index, return directly. Do not expect
-				 * kernel will check such case correctly.
-				 * linux-2.6.18-128.1.1 miss to do that.
-				 * --bug 17336
-				 */
-				loff_t size = cl_isize_read(inode);
-				loff_t cur_index = start >> PAGE_SHIFT;
-				loff_t size_index = (size - 1) >>
-						    PAGE_SHIFT;
-
-				if ((size == 0 && cur_index != 0) ||
-				    size_index < cur_index)
-					*exceed = 1;
-			}
-			return result;
-		}
-		/*
-		 * region is within kms and, hence, within real file
-		 * size (A). We need to increase i_size to cover the
-		 * read region so that generic_file_read() will do its
-		 * job, but that doesn't mean the kms size is
-		 * _correct_, it is only the _minimum_ size. If
-		 * someone does a stat they will get the correct size
-		 * which will always be >= the kms value here.
-		 * b=11081
-		 */
-		if (cl_isize_read(inode) < kms) {
-			cl_isize_write_nolock(inode, kms);
-			CDEBUG(D_VFSTRACE,
-					DFID" updating i_size %llu\n",
-					PFID(lu_object_fid(&obj->co_lu)),
-					(__u64)cl_isize_read(inode));
-
-		}
-	}
-	ccc_object_size_unlock(obj);
-	return result;
-}
-
-/*****************************************************************************
- *
- * Transfer operations.
- *
- */
-
-void ccc_req_completion(const struct lu_env *env,
-			const struct cl_req_slice *slice, int ioret)
-{
-	struct ccc_req *vrq;
-
-	if (ioret > 0)
-		cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
-
-	vrq = cl2ccc_req(slice);
-	kmem_cache_free(ccc_req_kmem, vrq);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for ccc
- * layer. ccc is responsible for
- *
- *    - o_[mac]time
- *
- *    - o_mode
- *
- *    - o_parent_seq
- *
- *    - o_[ug]id
- *
- *    - o_parent_oid
- *
- *    - o_parent_ver
- *
- *    - o_ioepoch,
- *
- */
-void ccc_req_attr_set(const struct lu_env *env,
-		      const struct cl_req_slice *slice,
-		      const struct cl_object *obj,
-		      struct cl_req_attr *attr, u64 flags)
-{
-	struct inode *inode;
-	struct obdo  *oa;
-	u32	      valid_flags;
-
-	oa = attr->cra_oa;
-	inode = ccc_object_inode(obj);
-	valid_flags = OBD_MD_FLTYPE;
-
-	if (slice->crs_req->crq_type == CRT_WRITE) {
-		if (flags & OBD_MD_FLEPOCH) {
-			oa->o_valid |= OBD_MD_FLEPOCH;
-			oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch;
-			valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-				       OBD_MD_FLUID | OBD_MD_FLGID;
-		}
-	}
-	obdo_from_inode(oa, inode, valid_flags & flags);
-	obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid);
-	memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid,
-	       JOBSTATS_JOBID_SIZE);
-}
-
-static const struct cl_req_operations ccc_req_ops = {
-	.cro_attr_set   = ccc_req_attr_set,
-	.cro_completion = ccc_req_completion
-};
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
-{
-	struct lu_env *env;
-	struct cl_io  *io;
-	int	    result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	io = ccc_env_thread_io(env);
-	io->ci_obj = cl_i2info(inode)->lli_clob;
-
-	io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
-	io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
-	io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
-	io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
-	io->u.ci_setattr.sa_valid = attr->ia_valid;
-
-again:
-	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
-		struct ccc_io *cio = ccc_env_io(env);
-
-		if (attr->ia_valid & ATTR_FILE)
-			/* populate the file descriptor for ftruncate to honor
-			 * group lock - see LU-787
-			 */
-			cio->cui_fd = cl_iattr2fd(inode, attr);
-
-		result = cl_io_loop(env, io);
-	} else {
-		result = io->ci_result;
-	}
-	cl_io_fini(env, io);
-	if (unlikely(io->ci_need_restart))
-		goto again;
-	/* HSM import case: file is released, cannot be restored
-	 * no need to fail except if restore registration failed
-	 * with -ENODATA
-	 */
-	if (result == -ENODATA && io->ci_restore_needed &&
-	    io->ci_result != -ENODATA)
-		result = 0;
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-struct lu_device *ccc2lu_dev(struct ccc_device *vdv)
-{
-	return &vdv->cdv_cl.cd_lu_dev;
-}
-
-struct ccc_device *lu2ccc_dev(const struct lu_device *d)
-{
-	return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev);
-}
-
-struct ccc_device *cl2ccc_dev(const struct cl_device *d)
-{
-	return container_of0(d, struct ccc_device, cdv_cl);
-}
-
-struct lu_object *ccc2lu(struct ccc_object *vob)
-{
-	return &vob->cob_cl.co_lu;
-}
-
-struct ccc_object *lu2ccc(const struct lu_object *obj)
-{
-	return container_of0(obj, struct ccc_object, cob_cl.co_lu);
-}
-
-struct ccc_object *cl2ccc(const struct cl_object *obj)
-{
-	return container_of0(obj, struct ccc_object, cob_cl);
-}
-
-struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice)
-{
-	return container_of(slice, struct ccc_lock, clk_cl);
-}
-
-struct ccc_io *cl2ccc_io(const struct lu_env *env,
-			 const struct cl_io_slice *slice)
-{
-	struct ccc_io *cio;
-
-	cio = container_of(slice, struct ccc_io, cui_cl);
-	LASSERT(cio == ccc_env_io(env));
-	return cio;
-}
-
-struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice)
-{
-	return container_of0(slice, struct ccc_req, crq_cl);
-}
-
-struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
-	return cl2ccc_page(slice)->cpg_page;
-}
-
-/*****************************************************************************
- *
- * Accessors.
- *
- */
-int ccc_object_invariant(const struct cl_object *obj)
-{
-	struct inode	 *inode = ccc_object_inode(obj);
-	struct cl_inode_info *lli   = cl_i2info(inode);
-
-	return (S_ISREG(cl_inode_mode(inode)) ||
-		/* i_mode of unlinked inode is zeroed. */
-		cl_inode_mode(inode) == 0) && lli->lli_clob == obj;
-}
-
-struct inode *ccc_object_inode(const struct cl_object *obj)
-{
-	return cl2ccc(obj)->cob_inode;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md    new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
-	struct lu_env	*env;
-	struct cl_inode_info *lli;
-	struct cl_object     *clob;
-	struct lu_site       *site;
-	struct lu_fid	*fid;
-	struct cl_object_conf conf = {
-		.coc_inode = inode,
-		.u = {
-			.coc_md    = md
-		}
-	};
-	int result = 0;
-	int refcheck;
-
-	LASSERT(md->body->valid & OBD_MD_FLID);
-	LASSERT(S_ISREG(cl_inode_mode(inode)));
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	site = cl_i2sbi(inode)->ll_site;
-	lli  = cl_i2info(inode);
-	fid  = &lli->lli_fid;
-	LASSERT(fid_is_sane(fid));
-
-	if (!lli->lli_clob) {
-		/* clob is slave of inode, empty lli_clob means for new inode,
-		 * there is no clob in cache with the given fid, so it is
-		 * unnecessary to perform lookup-alloc-lookup-insert, just
-		 * alloc and insert directly.
-		 */
-		LASSERT(inode->i_state & I_NEW);
-		conf.coc_lu.loc_flags = LOC_F_NEW;
-		clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
-				      fid, &conf);
-		if (!IS_ERR(clob)) {
-			/*
-			 * No locking is necessary, as new inode is
-			 * locked by I_NEW bit.
-			 */
-			lli->lli_clob = clob;
-			lli->lli_has_smd = lsm_has_objects(md->lsm);
-			lu_object_ref_add(&clob->co_lu, "inode", inode);
-		} else
-			result = PTR_ERR(clob);
-	} else {
-		result = cl_conf_set(env, lli->lli_clob, &conf);
-	}
-
-	cl_env_put(env, &refcheck);
-
-	if (result != 0)
-		CERROR("Failure to initialize cl object "DFID": %d\n",
-		       PFID(fid), result);
-	return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
-	struct lu_object_header *header = obj->co_lu.lo_header;
-	wait_queue_t	   waiter;
-
-	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
-		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
-		struct lu_site_bkt_data *bkt;
-
-		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
-
-		init_waitqueue_entry(&waiter, current);
-		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-
-		while (1) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (atomic_read(&header->loh_ref) == 1)
-				break;
-			schedule();
-		}
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-	}
-
-	cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
-	struct lu_env	   *env;
-	struct cl_inode_info    *lli  = cl_i2info(inode);
-	struct cl_object	*clob = lli->lli_clob;
-	int refcheck;
-	int emergency;
-
-	if (clob) {
-		void		    *cookie;
-
-		cookie = cl_env_reenter();
-		env = cl_env_get(&refcheck);
-		emergency = IS_ERR(env);
-		if (emergency) {
-			mutex_lock(&ccc_inode_fini_guard);
-			LASSERT(ccc_inode_fini_env);
-			cl_env_implant(ccc_inode_fini_env, &refcheck);
-			env = ccc_inode_fini_env;
-		}
-		/*
-		 * cl_object cache is a slave to inode cache (which, in turn
-		 * is a slave to dentry cache), don't keep cl_object in memory
-		 * when its master is evicted.
-		 */
-		cl_object_kill(env, clob);
-		lu_object_ref_del(&clob->co_lu, "inode", inode);
-		cl_object_put_last(env, clob);
-		lli->lli_clob = NULL;
-		if (emergency) {
-			cl_env_unplant(ccc_inode_fini_env, &refcheck);
-			mutex_unlock(&ccc_inode_fini_guard);
-		} else
-			cl_env_put(env, &refcheck);
-		cl_env_reexit(cookie);
-	}
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-__u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
-	__u16 type = 0;
-	struct luda_type *lt;
-	int len = 0;
-
-	if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
-		const unsigned align = sizeof(struct luda_type) - 1;
-
-		len = le16_to_cpu(ent->lde_namelen);
-		len = (len + align) & ~align;
-		lt = (void *)ent->lde_name + len;
-		type = IFTODT(le16_to_cpu(lt->lt_type));
-	}
-	return type;
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
-	if (BITS_PER_LONG == 32 || api32)
-		return fid_flatten32(fid);
-	else
-		return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid.  If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
-	__u32 gen;
-
-	if (fid_is_igif(fid)) {
-		gen = lu_igif_gen(fid);
-		return gen;
-	}
-
-	gen = fid_flatten(fid) >> 32;
-	return gen;
-}
-
-/* lsm is unreliable after hsm implementation as layout can be changed at
- * any time. This is only to support old, non-clio-ized interfaces. It will
- * cause deadlock if clio operations are called with this extra layout refcount
- * because in case the layout changed during the IO, ll_layout_refresh() will
- * have to wait for the refcount to become zero to destroy the older layout.
- *
- * Notice that the lsm returned by this function may not be valid unless called
- * inside layout lock - MDS_INODELOCK_LAYOUT.
- */
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
-{
-	return lov_lsm_get(cl_i2info(inode)->lli_clob);
-}
-
-inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
-{
-	lov_lsm_put(cl_i2info(inode)->lli_clob, lsm);
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index 323060626fdf..f4a70ebddeaf 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index e5d1344e817a..ea8840cb9056 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -54,7 +50,7 @@ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
 
 	lock_res(lock->l_resource);
 
-	lock->l_flags |= LDLM_FL_RES_LOCKED;
+	ldlm_set_res_locked(lock);
 	return lock->l_resource;
 }
 EXPORT_SYMBOL(lock_res_and_lock);
@@ -65,7 +61,7 @@ EXPORT_SYMBOL(lock_res_and_lock);
 void unlock_res_and_lock(struct ldlm_lock *lock)
 {
 	/* on server-side resource of lock doesn't change */
-	lock->l_flags &= ~LDLM_FL_RES_LOCKED;
+	ldlm_clear_res_locked(lock);
 
 	unlock_res(lock->l_resource);
 	spin_unlock(&lock->l_lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index a803e200f206..f5023d9b78f5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -75,12 +71,12 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
 	 * just after we finish and take our lock into account in its
 	 * calculation of the kms
 	 */
-	lock->l_flags |= LDLM_FL_KMS_IGNORE;
+	ldlm_set_kms_ignore(lock);
 
 	list_for_each(tmp, &res->lr_granted) {
 		lck = list_entry(tmp, struct ldlm_lock, l_res_link);
 
-		if (lck->l_flags & LDLM_FL_KMS_IGNORE)
+		if (ldlm_is_kms_ignore(lck))
 			continue;
 
 		if (lck->l_policy_data.l_extent.end >= old_kms)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index b88b78606aee..d6b61bc39135 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -101,8 +97,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
 	LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
 
 	list_del_init(&lock->l_res_link);
-	if (flags == LDLM_FL_WAIT_NOREPROC &&
-	    !(lock->l_flags & LDLM_FL_FAILED)) {
+	if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
 		/* client side - set a flag to prevent sending a CANCEL */
 		lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
 
@@ -260,14 +255,13 @@ reprocess:
 			 * overflow and underflow.
 			 */
 			if ((new->l_policy_data.l_flock.start >
-			     (lock->l_policy_data.l_flock.end + 1))
-			    && (lock->l_policy_data.l_flock.end !=
-				OBD_OBJECT_EOF))
+			     (lock->l_policy_data.l_flock.end + 1)) &&
+			    (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF))
 				continue;
 
 			if ((new->l_policy_data.l_flock.end <
-			     (lock->l_policy_data.l_flock.start - 1))
-			    && (lock->l_policy_data.l_flock.start != 0))
+			     (lock->l_policy_data.l_flock.start - 1)) &&
+			    (lock->l_policy_data.l_flock.start != 0))
 				break;
 
 			if (new->l_policy_data.l_flock.start <
@@ -436,7 +430,7 @@ ldlm_flock_interrupted_wait(void *data)
 	lock_res_and_lock(lock);
 
 	/* client side - set flag to prevent lock from being put on LRU list */
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 	unlock_res_and_lock(lock);
 }
 
@@ -520,30 +514,29 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 granted:
 	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
-		LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
-		return 0;
-	}
-
-	if (lock->l_flags & LDLM_FL_FAILED) {
+	if (ldlm_is_failed(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
 		return -EIO;
 	}
 
-	if (rc) {
-		LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
-			   rc);
-		return rc;
-	}
-
 	LDLM_DEBUG(lock, "client-side enqueue granted");
 
 	lock_res_and_lock(lock);
 
+	/*
+	 * Protect against race where lock could have been just destroyed
+	 * due to overlap in ldlm_process_flock_lock().
+	 */
+	if (ldlm_is_destroyed(lock)) {
+		unlock_res_and_lock(lock);
+		LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
+		return 0;
+	}
+
 	/* ldlm_lock_enqueue() has already placed lock on the granted list. */
 	list_del_init(&lock->l_res_link);
 
-	if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
+	if (ldlm_is_flock_deadlock(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue deadlock received");
 		rc = -EDEADLK;
 	} else if (flags & LDLM_FL_TEST_LOCK) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
index b1bed1e17d32..79f4e6fa193e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index e21373e7306f..e4cf65d2d3b1 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -95,9 +91,10 @@ enum {
 	LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
 	LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
 	LDLM_CANCEL_LRUR   = 1 << 3, /* Cancel locks from lru resize. */
-	LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither
-				      * sending nor waiting for any rpcs)
-				      */
+	LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither
+				       * sending nor waiting for any rpcs)
+				       */
+	LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */
 };
 
 int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
@@ -145,7 +142,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode);
 void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode);
 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
 		      enum ldlm_desc_ast_t ast_type);
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock);
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use);
+#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0)
 int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock);
 void ldlm_lock_destroy_nolock(struct ldlm_lock *lock);
 
@@ -216,8 +214,6 @@ enum ldlm_policy_res {
 	LDLM_POLICY_SKIP_LOCK
 };
 
-typedef enum ldlm_policy_res ldlm_policy_res_t;
-
 #define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v)
 #define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; }
 #define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v)
@@ -305,9 +301,10 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
 	int ret = 0;
 
 	lock_res_and_lock(lock);
-	if (((lock->l_req_mode == lock->l_granted_mode) &&
-	     !(lock->l_flags & LDLM_FL_CP_REQD)) ||
-	    (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL)))
+	if ((lock->l_req_mode == lock->l_granted_mode) &&
+	     !ldlm_is_cp_reqd(lock))
+		ret = 1;
+	else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
 		ret = 1;
 	unlock_res_and_lock(lock);
 
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 7dd7df59aa1f..7c832aae7d5e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -314,7 +310,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_write_list);
 	INIT_LIST_HEAD(&cli->cl_loi_read_list);
-	client_obd_list_lock_init(&cli->cl_loi_list_lock);
+	spin_lock_init(&cli->cl_loi_list_lock);
 	atomic_set(&cli->cl_pending_w_pages, 0);
 	atomic_set(&cli->cl_pending_r_pages, 0);
 	cli->cl_r_in_flight = 0;
@@ -333,7 +329,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	atomic_set(&cli->cl_lru_busy, 0);
 	atomic_set(&cli->cl_lru_in_list, 0);
 	INIT_LIST_HEAD(&cli->cl_lru_list);
-	client_obd_list_lock_init(&cli->cl_lru_list_lock);
+	spin_lock_init(&cli->cl_lru_list_lock);
+	atomic_set(&cli->cl_unstable_count, 0);
 
 	init_waitqueue_head(&cli->cl_destroy_waitq);
 	atomic_set(&cli->cl_destroy_in_flight, 0);
@@ -344,7 +341,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	 * Set cl_chksum* to CRC32 for now to avoid returning screwed info
 	 * through procfs.
 	 */
-	cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+	cli->cl_cksum_type = OBD_CKSUM_CRC32;
+	cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
 	atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
 
 	/* This value may be reduced at connect time in
@@ -355,6 +353,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
 					  LNET_MTU >> PAGE_SHIFT);
 
+	/*
+	 * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
+	 * it will be updated at OSC connection time.
+	 */
+	cli->cl_chunkbits = PAGE_SHIFT;
+
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
 		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
 	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
@@ -429,7 +433,6 @@ err_ldlm:
 	ldlm_put_ref();
 err:
 	return rc;
-
 }
 EXPORT_SYMBOL(client_obd_setup);
 
@@ -438,6 +441,7 @@ int client_obd_cleanup(struct obd_device *obddev)
 	ldlm_namespace_free_post(obddev->obd_namespace);
 	obddev->obd_namespace = NULL;
 
+	obd_cleanup_client_import(obddev);
 	LASSERT(!obddev->u.cli.cl_import);
 
 	ldlm_put_ref();
@@ -748,6 +752,7 @@ int ldlm_error2errno(enum ldlm_error error)
 
 	switch (error) {
 	case ELDLM_OK:
+	case ELDLM_LOCK_MATCHED:
 		result = 0;
 		break;
 	case ELDLM_LOCK_CHANGED:
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index ecd65a7a3dc9..a5993f745ebe 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -185,7 +181,7 @@ void ldlm_lock_put(struct ldlm_lock *lock)
 			   "final lock_put on destroyed lock, freeing it.");
 
 		res = lock->l_resource;
-		LASSERT(lock->l_flags & LDLM_FL_DESTROYED);
+		LASSERT(ldlm_is_destroyed(lock));
 		LASSERT(list_empty(&lock->l_res_link));
 		LASSERT(list_empty(&lock->l_pending_chain));
 
@@ -229,15 +225,25 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
 
 /**
  * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
+ *
+ * If \a last_use is non-zero, it will remove the lock from LRU only if
+ * it matches lock's l_last_used.
+ *
+ * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use
+ *           doesn't match lock's l_last_used;
+ *           otherwise, the lock hasn't been in the LRU list.
+ * \retval 1 the lock was in LRU list and removed.
  */
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use)
 {
 	struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-	int rc;
+	int rc = 0;
 
 	spin_lock(&ns->ns_lock);
-	rc = ldlm_lock_remove_from_lru_nolock(lock);
+	if (last_use == 0 || last_use == lock->l_last_used)
+		rc = ldlm_lock_remove_from_lru_nolock(lock);
 	spin_unlock(&ns->ns_lock);
+
 	return rc;
 }
 
@@ -252,8 +258,7 @@ static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
 	LASSERT(list_empty(&lock->l_lru));
 	LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
 	list_add_tail(&lock->l_lru, &ns->ns_unused_list);
-	if (lock->l_flags & LDLM_FL_SKIPPED)
-		lock->l_flags &= ~LDLM_FL_SKIPPED;
+	ldlm_clear_skipped(lock);
 	LASSERT(ns->ns_nr_unused >= 0);
 	ns->ns_nr_unused++;
 }
@@ -318,11 +323,11 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
 		LBUG();
 	}
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		LASSERT(list_empty(&lock->l_lru));
 		return 0;
 	}
-	lock->l_flags |= LDLM_FL_DESTROYED;
+	ldlm_set_destroyed(lock);
 
 	if (lock->l_export && lock->l_export->exp_lock_hash) {
 		/* NB: it's safe to call cfs_hash_del() even lock isn't
@@ -544,7 +549,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
 	/* It's unlikely but possible that someone marked the lock as
 	 * destroyed after we did handle2object on it
 	 */
-	if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED) == 0)) {
+	if (flags == 0 && !ldlm_is_destroyed(lock)) {
 		lu_ref_add(&lock->l_reference, "handle", current);
 		return lock;
 	}
@@ -554,21 +559,22 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
 	LASSERT(lock->l_resource);
 
 	lu_ref_add_atomic(&lock->l_reference, "handle", current);
-	if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) {
+	if (unlikely(ldlm_is_destroyed(lock))) {
 		unlock_res_and_lock(lock);
 		CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
 		LDLM_LOCK_PUT(lock);
 		return NULL;
 	}
 
-	if (flags && (lock->l_flags & flags)) {
-		unlock_res_and_lock(lock);
-		LDLM_LOCK_PUT(lock);
-		return NULL;
-	}
+	if (flags) {
+		if (lock->l_flags & flags) {
+			unlock_res_and_lock(lock);
+			LDLM_LOCK_PUT(lock);
+			return NULL;
+		}
 
-	if (flags)
 		lock->l_flags |= flags;
+	}
 
 	unlock_res_and_lock(lock);
 	return lock;
@@ -599,14 +605,14 @@ EXPORT_SYMBOL(ldlm_lock2desc);
 static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
 				  struct list_head *work_list)
 {
-	if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) {
+	if (!ldlm_is_ast_sent(lock)) {
 		LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
-		lock->l_flags |= LDLM_FL_AST_SENT;
+		ldlm_set_ast_sent(lock);
 		/* If the enqueuing client said so, tell the AST recipient to
 		 * discard dirty data, rather than writing back.
 		 */
-		if (new->l_flags & LDLM_FL_AST_DISCARD_DATA)
-			lock->l_flags |= LDLM_FL_DISCARD_DATA;
+		if (ldlm_is_ast_discard_data(new))
+			ldlm_set_discard_data(lock);
 		LASSERT(list_empty(&lock->l_bl_ast));
 		list_add(&lock->l_bl_ast, work_list);
 		LDLM_LOCK_GET(lock);
@@ -621,8 +627,8 @@ static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
 static void ldlm_add_cp_work_item(struct ldlm_lock *lock,
 				  struct list_head *work_list)
 {
-	if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
-		lock->l_flags |= LDLM_FL_CP_REQD;
+	if (!ldlm_is_cp_reqd(lock)) {
+		ldlm_set_cp_reqd(lock);
 		LDLM_DEBUG(lock, "lock granted; sending completion AST.");
 		LASSERT(list_empty(&lock->l_cp_ast));
 		list_add(&lock->l_cp_ast, work_list);
@@ -652,12 +658,12 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
  * r/w reference type is determined by \a mode
  * Calls ldlm_lock_addref_internal.
  */
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock;
 
 	lock = ldlm_handle2lock(lockh);
-	LASSERT(lock);
+	LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie);
 	ldlm_lock_addref_internal(lock, mode);
 	LDLM_LOCK_PUT(lock);
 }
@@ -694,7 +700,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
  *
  * \retval -EAGAIN lock is being canceled.
  */
-int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
+int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock;
 	int	       result;
@@ -704,7 +710,7 @@ int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
 	if (lock) {
 		lock_res_and_lock(lock);
 		if (lock->l_readers != 0 || lock->l_writers != 0 ||
-		    !(lock->l_flags & LDLM_FL_CBPENDING)) {
+		    !ldlm_is_cbpending(lock)) {
 			ldlm_lock_addref_internal_nolock(lock, mode);
 			result = 0;
 		}
@@ -770,17 +776,17 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 
 	ldlm_lock_decref_internal_nolock(lock, mode);
 
-	if (lock->l_flags & LDLM_FL_LOCAL &&
+	if (ldlm_is_local(lock) &&
 	    !lock->l_readers && !lock->l_writers) {
 		/* If this is a local lock on a server namespace and this was
 		 * the last reference, cancel the lock.
 		 */
 		CDEBUG(D_INFO, "forcing cancel of local lock\n");
-		lock->l_flags |= LDLM_FL_CBPENDING;
+		ldlm_set_cbpending(lock);
 	}
 
 	if (!lock->l_readers && !lock->l_writers &&
-	    (lock->l_flags & LDLM_FL_CBPENDING)) {
+	    ldlm_is_cbpending(lock)) {
 		/* If we received a blocked AST and this was the last reference,
 		 * run the callback.
 		 */
@@ -791,16 +797,14 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 		ldlm_lock_remove_from_lru(lock);
 		unlock_res_and_lock(lock);
 
-		if (lock->l_flags & LDLM_FL_FAIL_LOC)
+		if (ldlm_is_fail_loc(lock))
 			OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
-		if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
+		if (ldlm_is_atomic_cb(lock) ||
 		    ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
 			ldlm_handle_bl_callback(ns, NULL, lock);
 	} else if (!lock->l_readers && !lock->l_writers &&
-		   !(lock->l_flags & LDLM_FL_NO_LRU) &&
-		   !(lock->l_flags & LDLM_FL_BL_AST)) {
-
+		   !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) {
 		LDLM_DEBUG(lock, "add lock into lru list");
 
 		/* If this is a client-side namespace and this was the last
@@ -809,7 +813,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 		ldlm_lock_add_to_lru(lock);
 		unlock_res_and_lock(lock);
 
-		if (lock->l_flags & LDLM_FL_FAIL_LOC)
+		if (ldlm_is_fail_loc(lock))
 			OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
 		/* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
@@ -828,7 +832,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 /**
  * Decrease reader/writer refcount for LDLM lock with handle \a lockh
  */
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
 
@@ -845,7 +849,7 @@ EXPORT_SYMBOL(ldlm_lock_decref);
  *
  * Typical usage is for GROUP locks which we cannot allow to be cached.
  */
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode)
 {
 	struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
 
@@ -853,7 +857,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
 
 	LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
 	lock_res_and_lock(lock);
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 	unlock_res_and_lock(lock);
 	ldlm_lock_decref_internal(lock, mode);
 	LDLM_LOCK_PUT(lock);
@@ -971,7 +975,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
 	ldlm_resource_dump(D_INFO, res);
 	LDLM_DEBUG(lock, "About to add lock:");
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
 		return;
 	}
@@ -1073,10 +1077,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 		 * whose parents already hold a lock so forward progress
 		 * can still happen.
 		 */
-		if (lock->l_flags & LDLM_FL_CBPENDING &&
-		    !(flags & LDLM_FL_CBPENDING))
+		if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING))
 			continue;
-		if (!unref && lock->l_flags & LDLM_FL_CBPENDING &&
+		if (!unref && ldlm_is_cbpending(lock) &&
 		    lock->l_readers == 0 && lock->l_writers == 0)
 			continue;
 
@@ -1092,6 +1095,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 
 		if (unlikely(match == LCK_GROUP) &&
 		    lock->l_resource->lr_type == LDLM_EXTENT &&
+		    policy->l_extent.gid != LDLM_GID_ANY &&
 		    lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
 			continue;
 
@@ -1104,11 +1108,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 		      policy->l_inodebits.bits))
 			continue;
 
-		if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK))
+		if (!unref && LDLM_HAVE_MASK(lock, GONE))
 			continue;
 
-		if ((flags & LDLM_FL_LOCAL_ONLY) &&
-		    !(lock->l_flags & LDLM_FL_LOCAL))
+		if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock))
 			continue;
 
 		if (flags & LDLM_FL_TEST_LOCK) {
@@ -1142,7 +1145,7 @@ EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
  */
 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
 {
-	lock->l_flags |= LDLM_FL_LVB_READY;
+	ldlm_set_lvb_ready(lock);
 	wake_up_all(&lock->l_waitq);
 }
 EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
@@ -1243,8 +1246,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 
 	if (lock) {
 		ldlm_lock2handle(lock, lockh);
-		if ((flags & LDLM_FL_LVB_READY) &&
-		    (!(lock->l_flags & LDLM_FL_LVB_READY))) {
+		if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) {
 			__u64 wait_flags = LDLM_FL_LVB_READY |
 				LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED;
 			struct l_wait_info lwi;
@@ -1271,7 +1273,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 			l_wait_event(lock->l_waitq,
 				     lock->l_flags & wait_flags,
 				     &lwi);
-			if (!(lock->l_flags & LDLM_FL_LVB_READY)) {
+			if (!ldlm_is_lvb_ready(lock)) {
 				if (flags & LDLM_FL_TEST_LOCK)
 					LDLM_LOCK_RELEASE(lock);
 				else
@@ -1316,7 +1318,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 }
 EXPORT_SYMBOL(ldlm_lock_match);
 
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
 					   __u64 *bits)
 {
 	struct ldlm_lock *lock;
@@ -1325,10 +1327,10 @@ enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
 	lock = ldlm_handle2lock(lockh);
 	if (lock) {
 		lock_res_and_lock(lock);
-		if (lock->l_flags & LDLM_FL_GONE_MASK)
+		if (LDLM_HAVE_MASK(lock, GONE))
 			goto out;
 
-		if (lock->l_flags & LDLM_FL_CBPENDING &&
+		if (ldlm_is_cbpending(lock) &&
 		    lock->l_readers == 0 && lock->l_writers == 0)
 			goto out;
 
@@ -1438,7 +1440,7 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
 		memcpy(data, lvb, size);
 		break;
 	default:
-		LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
+		LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type);
 		dump_stack();
 		return -EINVAL;
 	}
@@ -1542,7 +1544,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
 	/* Some flags from the enqueue want to make it into the AST, via the
 	 * lock's l_flags.
 	 */
-	lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA;
+	if (*flags & LDLM_FL_AST_DISCARD_DATA)
+		ldlm_set_ast_discard_data(lock);
 
 	/*
 	 * This distinction between local lock trees is very important; a client
@@ -1581,7 +1584,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 	lock_res_and_lock(lock);
 	list_del_init(&lock->l_bl_ast);
 
-	LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+	LASSERT(ldlm_is_ast_sent(lock));
 	LASSERT(lock->l_bl_ast_run == 0);
 	LASSERT(lock->l_blocking_lock);
 	lock->l_bl_ast_run++;
@@ -1628,12 +1631,12 @@ ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 	/* nobody should touch l_cp_ast */
 	lock_res_and_lock(lock);
 	list_del_init(&lock->l_cp_ast);
-	LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
+	LASSERT(ldlm_is_cp_reqd(lock));
 	/* save l_completion_ast since it can be changed by
 	 * mds_intent_policy(), see bug 14225
 	 */
 	completion_callback = lock->l_completion_ast;
-	lock->l_flags &= ~LDLM_FL_CP_REQD;
+	ldlm_clear_cp_reqd(lock);
 	unlock_res_and_lock(lock);
 
 	if (completion_callback)
@@ -1778,8 +1781,8 @@ out:
 void ldlm_cancel_callback(struct ldlm_lock *lock)
 {
 	check_res_locked(lock->l_resource);
-	if (!(lock->l_flags & LDLM_FL_CANCEL)) {
-		lock->l_flags |= LDLM_FL_CANCEL;
+	if (!ldlm_is_cancel(lock)) {
+		ldlm_set_cancel(lock);
 		if (lock->l_blocking_ast) {
 			unlock_res_and_lock(lock);
 			lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
@@ -1789,7 +1792,7 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
 			LDLM_DEBUG(lock, "no blocking ast");
 		}
 	}
-	lock->l_flags |= LDLM_FL_BL_DONE;
+	ldlm_set_bl_done(lock);
 }
 
 /**
@@ -1846,7 +1849,7 @@ EXPORT_SYMBOL(ldlm_lock_cancel);
 /**
  * Set opaque data into the lock that only makes sense to upper layer.
  */
-int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data)
 {
 	struct ldlm_lock *lock = ldlm_handle2lock(lockh);
 	int rc = -EINVAL;
@@ -1872,7 +1875,7 @@ struct export_cl_data {
  *
  * Used when printing all locks on a resource for debug purposes.
  */
-void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh)
 {
 	struct ldlm_lock *lock;
 
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index ebe9042adb25..821939ff2e6b 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -124,10 +120,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
 	LDLM_DEBUG(lock, "client blocking AST callback handler");
 
 	lock_res_and_lock(lock);
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 
-	if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
-		lock->l_flags |= LDLM_FL_CANCEL;
+	if (ldlm_is_cancel_on_block(lock))
+		ldlm_set_cancel(lock);
 
 	do_ast = !lock->l_readers && !lock->l_writers;
 	unlock_res_and_lock(lock);
@@ -172,7 +168,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(to);
 			if (lock->l_granted_mode == lock->l_req_mode ||
-			    lock->l_flags & LDLM_FL_DESTROYED)
+			    ldlm_is_destroyed(lock))
 				break;
 		}
 	}
@@ -215,7 +211,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 	}
 
 	lock_res_and_lock(lock);
-	if ((lock->l_flags & LDLM_FL_DESTROYED) ||
+	if (ldlm_is_destroyed(lock) ||
 	    lock->l_granted_mode == lock->l_req_mode) {
 		/* bug 11300: the lock has already been granted */
 		unlock_res_and_lock(lock);
@@ -291,7 +287,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 out:
 	if (rc < 0) {
 		lock_res_and_lock(lock);
-		lock->l_flags |= LDLM_FL_FAILED;
+		ldlm_set_failed(lock);
 		unlock_res_and_lock(lock);
 		wake_up(&lock->l_waitq);
 	}
@@ -360,8 +356,7 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
 	struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
 
 	spin_lock(&blp->blp_lock);
-	if (blwi->blwi_lock &&
-	    blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+	if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) {
 		/* add LDLM_FL_DISCARD_DATA requests to the priority list */
 		list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
 	} else {
@@ -504,7 +499,7 @@ static int ldlm_handle_setinfo(struct ptlrpc_request *req)
 
 static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
 					const char *msg, int rc,
-					struct lustre_handle *handle)
+					const struct lustre_handle *handle)
 {
 	DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
 		  "%s: [nid %s] [rc %d] [lock %#llx]",
@@ -626,24 +621,24 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 		return 0;
 	}
 
-	if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+	if (ldlm_is_fail_loc(lock) &&
 	    lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
 		OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
 	/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
 	lock_res_and_lock(lock);
 	lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
-					      LDLM_AST_FLAGS);
+					      LDLM_FL_AST_MASK);
 	if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
 		/* If somebody cancels lock and cache is already dropped,
 		 * or lock is failed before cp_ast received on client,
 		 * we can tell the server we have no lock. Otherwise, we
 		 * should send cancel after dropping the cache.
 		 */
-		if (((lock->l_flags & LDLM_FL_CANCELING) &&
-		    (lock->l_flags & LDLM_FL_BL_DONE)) ||
-		    (lock->l_flags & LDLM_FL_FAILED)) {
-			LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n",
+		if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
+		    ldlm_is_failed(lock)) {
+			LDLM_DEBUG(lock,
+				   "callback on lock %#llx - lock disappeared",
 				   dlm_req->lock_handle[0].cookie);
 			unlock_res_and_lock(lock);
 			LDLM_LOCK_RELEASE(lock);
@@ -656,7 +651,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 		 * Let ldlm_cancel_lru() be fast.
 		 */
 		ldlm_lock_remove_from_lru(lock);
-		lock->l_flags |= LDLM_FL_BL_AST;
+		ldlm_set_bl_ast(lock);
 	}
 	unlock_res_and_lock(lock);
 
@@ -674,7 +669,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 	case LDLM_BL_CALLBACK:
 		CDEBUG(D_INODE, "blocking ast\n");
 		req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
-		if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
+		if (!ldlm_is_cancel_on_block(lock)) {
 			rc = ldlm_callback_reply(req, 0);
 			if (req->rq_no_reply || rc)
 				ldlm_callback_errmsg(req, "Normal process", rc,
@@ -1013,9 +1008,11 @@ static int ldlm_setup(void)
 		blp->blp_min_threads = LDLM_NTHRS_INIT;
 		blp->blp_max_threads = LDLM_NTHRS_MAX;
 	} else {
-		blp->blp_min_threads = blp->blp_max_threads =
-			min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT,
-							 ldlm_num_threads));
+		blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX,
+					     max_t(int, LDLM_NTHRS_INIT,
+						   ldlm_num_threads));
+
+		blp->blp_max_threads = blp->blp_min_threads;
 	}
 
 	for (i = 0; i < blp->blp_min_threads; i++) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
index 0c1965ddabb9..0aed39c46154 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index b913ba9cf97c..657ed4012776 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index 74e193e52cd6..af487f9937f4 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -153,7 +149,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
 	long delay;
 	int  result;
 
-	if (lock->l_flags & (LDLM_FL_DESTROYED | LDLM_FL_FAILED)) {
+	if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue: destroyed");
 		result = -EIO;
 	} else {
@@ -252,7 +248,7 @@ noreproc:
 
 	lwd.lwd_lock = lock;
 
-	if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+	if (ldlm_is_no_timeout(lock)) {
 		LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
 		lwi = LWI_INTR(interrupted_completion_wait, &lwd);
 	} else {
@@ -269,7 +265,7 @@ noreproc:
 
 	if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
 				 OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
-		lock->l_flags |= LDLM_FL_FAIL_LOC;
+		ldlm_set_fail_loc(lock);
 		rc = -EINTR;
 	} else {
 		/* Go to sleep until the lock is granted or cancelled. */
@@ -296,7 +292,7 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
 	lock_res_and_lock(lock);
 	/* Check that lock is not granted or failed, we might race. */
 	if ((lock->l_req_mode != lock->l_granted_mode) &&
-	    !(lock->l_flags & LDLM_FL_FAILED)) {
+	    !ldlm_is_failed(lock)) {
 		/* Make sure that this lock will not be found by raced
 		 * bl_ast and -EINVAL reply is sent to server anyways.
 		 * bug 17645
@@ -340,14 +336,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			  enum ldlm_type type, __u8 with_policy,
 			  enum ldlm_mode mode,
 			  __u64 *flags, void *lvb, __u32 lvb_len,
-			  struct lustre_handle *lockh, int rc)
+			  const struct lustre_handle *lockh, int rc)
 {
 	struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
 	int is_replay = *flags & LDLM_FL_REPLAY;
 	struct ldlm_lock *lock;
 	struct ldlm_reply *reply;
 	int cleanup_phase = 1;
-	int size = 0;
 
 	lock = ldlm_handle2lock(lockh);
 	/* ldlm_cli_enqueue is holding a reference on this lock. */
@@ -375,8 +370,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		goto cleanup;
 	}
 
-	if (lvb_len != 0) {
-		LASSERT(lvb);
+	if (lvb_len > 0) {
+		int size = 0;
 
 		size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
 					    RCL_SERVER);
@@ -390,12 +385,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			rc = -EINVAL;
 			goto cleanup;
 		}
+		lvb_len = size;
 	}
 
 	if (rc == ELDLM_LOCK_ABORTED) {
-		if (lvb_len != 0)
+		if (lvb_len > 0 && lvb)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lvb, size);
+					   lvb, lvb_len);
 		if (rc == 0)
 			rc = ELDLM_LOCK_ABORTED;
 		goto cleanup;
@@ -421,7 +417,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
 	*flags = ldlm_flags_from_wire(reply->lock_flags);
 	lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
-					      LDLM_INHERIT_FLAGS);
+					      LDLM_FL_INHERIT_MASK);
 	/* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
 	 * to wait with no timeout as well
 	 */
@@ -489,7 +485,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	/* If the lock has already been granted by a completion AST, don't
 	 * clobber the LVB with an older one.
 	 */
-	if (lvb_len != 0) {
+	if (lvb_len > 0) {
 		/* We must lock or a racing completion might update lvb without
 		 * letting us know and we'll clobber the correct value.
 		 * Cannot unlock after the check either, as that still leaves
@@ -498,7 +494,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		lock_res_and_lock(lock);
 		if (lock->l_req_mode != lock->l_granted_mode)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lock->l_lvb_data, size);
+					   lock->l_lvb_data, lvb_len);
 		unlock_res_and_lock(lock);
 		if (rc < 0) {
 			cleanup_phase = 1;
@@ -518,7 +514,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		}
 	}
 
-	if (lvb_len && lvb) {
+	if (lvb_len > 0 && lvb) {
 		/* Copy the LVB here, and not earlier, because the completion
 		 * AST (if any) can override what we got in the reply
 		 */
@@ -601,7 +597,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
 		avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
 
 		flags = ns_connect_lru_resize(ns) ?
-			LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+			LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED;
 		to_free = !ns_connect_lru_resize(ns) &&
 			  opc == LDLM_ENQUEUE ? 1 : 0;
 
@@ -715,7 +711,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 
 			lock->l_req_extent = policy->l_extent;
 		}
-		LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
+		LDLM_DEBUG(lock, "client-side enqueue START, flags %llx",
 			   *flags);
 	}
 
@@ -821,12 +817,11 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
 		LDLM_DEBUG(lock, "client-side cancel");
 		/* Set this flag to prevent others from getting new references*/
 		lock_res_and_lock(lock);
-		lock->l_flags |= LDLM_FL_CBPENDING;
+		ldlm_set_cbpending(lock);
 		local_only = !!(lock->l_flags &
 				(LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
 		ldlm_cancel_callback(lock);
-		rc = (lock->l_flags & LDLM_FL_BL_AST) ?
-			LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+		rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
 		unlock_res_and_lock(lock);
 
 		if (local_only) {
@@ -1028,7 +1023,7 @@ EXPORT_SYMBOL(ldlm_cli_update_pool);
  *
  * Lock must not have any readers or writers by this time.
  */
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
 		    enum ldlm_cancel_flags cancel_flags)
 {
 	struct obd_export *exp;
@@ -1131,31 +1126,30 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
  * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
  * readahead requests, ...)
  */
-static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
-						    struct ldlm_lock *lock,
-						    int unused, int added,
-						    int count)
+static enum ldlm_policy_res
+ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+			   int unused, int added, int count)
 {
-	ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
-	ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
-
-	lock_res_and_lock(lock);
+	enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK;
 
 	/* don't check added & count since we want to process all locks
-	 * from unused list
+	 * from unused list.
+	 * It's fine to not take lock to access lock->l_resource since
+	 * the lock has already been granted so it won't change.
 	 */
 	switch (lock->l_resource->lr_type) {
 	case LDLM_EXTENT:
 	case LDLM_IBITS:
-		if (cb && cb(lock))
+		if (ns->ns_cancel && ns->ns_cancel(lock) != 0)
 			break;
 	default:
 		result = LDLM_POLICY_SKIP_LOCK;
-		lock->l_flags |= LDLM_FL_SKIPPED;
+		lock_res_and_lock(lock);
+		ldlm_set_skipped(lock);
+		unlock_res_and_lock(lock);
 		break;
 	}
 
-	unlock_res_and_lock(lock);
 	return result;
 }
 
@@ -1168,10 +1162,10 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
-						 struct ldlm_lock *lock,
-						 int unused, int added,
-						 int count)
+static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+						    struct ldlm_lock *lock,
+						    int unused, int added,
+						    int count)
 {
 	unsigned long cur = cfs_time_current();
 	struct ldlm_pool *pl = &ns->ns_pool;
@@ -1196,8 +1190,13 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
 	/* Stop when SLV is not yet come from server or lv is smaller than
 	 * it is.
 	 */
-	return (slv == 0 || lv < slv) ?
-		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+	if (slv == 0 || lv < slv)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	return LDLM_POLICY_CANCEL_LOCK;
 }
 
 /**
@@ -1209,10 +1208,10 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
-						   struct ldlm_lock *lock,
-						   int unused, int added,
-						   int count)
+static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+						      struct ldlm_lock *lock,
+						      int unused, int added,
+						      int count)
 {
 	/* Stop LRU processing when we reach past @count or have checked all
 	 * locks in LRU.
@@ -1230,16 +1229,35 @@ static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
-						 struct ldlm_lock *lock,
-						 int unused, int added,
-						 int count)
+static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+						    struct ldlm_lock *lock,
+						    int unused, int added,
+						    int count)
 {
-	/* Stop LRU processing if young lock is found and we reach past count */
-	return ((added >= count) &&
-		time_before(cfs_time_current(),
-			    cfs_time_add(lock->l_last_used, ns->ns_max_age))) ?
-		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+	if ((added >= count) &&
+	    time_before(cfs_time_current(),
+			cfs_time_add(lock->l_last_used, ns->ns_max_age)))
+		return LDLM_POLICY_KEEP_LOCK;
+
+	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	return LDLM_POLICY_CANCEL_LOCK;
+}
+
+static enum ldlm_policy_res
+ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns,
+				struct ldlm_lock *lock,
+				int unused, int added,
+				int count)
+{
+	enum ldlm_policy_res result;
+
+	result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count);
+	if (result == LDLM_POLICY_KEEP_LOCK)
+		return result;
+
+	return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count);
 }
 
 /**
@@ -1251,10 +1269,9 @@ static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
-						    struct ldlm_lock *lock,
-						    int unused, int added,
-						    int count)
+static enum ldlm_policy_res
+ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+			   int unused, int added, int count)
 {
 	/* Stop LRU processing when we reach past count or have checked all
 	 * locks in LRU.
@@ -1263,7 +1280,8 @@ static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
 		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
 }
 
-typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *,
+typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)(
+						      struct ldlm_namespace *,
 						      struct ldlm_lock *, int,
 						      int, int);
 
@@ -1281,6 +1299,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
 			return ldlm_cancel_lrur_policy;
 		else if (flags & LDLM_CANCEL_PASSED)
 			return ldlm_cancel_passed_policy;
+		else if (flags & LDLM_CANCEL_LRUR_NO_WAIT)
+			return ldlm_cancel_lrur_no_wait_policy;
 	} else {
 		if (flags & LDLM_CANCEL_AGED)
 			return ldlm_cancel_aged_policy;
@@ -1329,6 +1349,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 	ldlm_cancel_lru_policy_t pf;
 	struct ldlm_lock *lock, *next;
 	int added = 0, unused, remained;
+	int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT);
 
 	spin_lock(&ns->ns_lock);
 	unused = ns->ns_nr_unused;
@@ -1341,7 +1362,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 	LASSERT(pf);
 
 	while (!list_empty(&ns->ns_unused_list)) {
-		ldlm_policy_res_t result;
+		enum ldlm_policy_res result;
+		time_t last_use = 0;
 
 		/* all unused locks */
 		if (remained-- <= 0)
@@ -1354,17 +1376,20 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 		list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
 					     l_lru) {
 			/* No locks which got blocking requests. */
-			LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+			LASSERT(!ldlm_is_bl_ast(lock));
 
-			if (flags & LDLM_CANCEL_NO_WAIT &&
-			    lock->l_flags & LDLM_FL_SKIPPED)
+			if (no_wait && ldlm_is_skipped(lock))
 				/* already processed */
 				continue;
 
+			last_use = lock->l_last_used;
+			if (last_use == cfs_time_current())
+				continue;
+
 			/* Somebody is already doing CANCEL. No need for this
 			 * lock in LRU, do not traverse it again.
 			 */
-			if (!(lock->l_flags & LDLM_FL_CANCELING))
+			if (!ldlm_is_canceling(lock))
 				break;
 
 			ldlm_lock_remove_from_lru_nolock(lock);
@@ -1407,12 +1432,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 
 		lock_res_and_lock(lock);
 		/* Check flags again under the lock. */
-		if ((lock->l_flags & LDLM_FL_CANCELING) ||
-		    (ldlm_lock_remove_from_lru(lock) == 0)) {
+		if (ldlm_is_canceling(lock) ||
+		    (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) {
 			/* Another thread is removing lock from LRU, or
 			 * somebody is already doing CANCEL, or there
 			 * is a blocking request which will send cancel
-			 * by itself, or the lock is no longer unused.
+			 * by itself, or the lock is no longer unused or
+			 * the lock has been used since the pf() call and
+			 * pages could be put under it.
 			 */
 			unlock_res_and_lock(lock);
 			lu_ref_del(&lock->l_reference,
@@ -1429,7 +1456,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 		 * where while we are doing cancel here, server is also
 		 * silently cancelling this lock.
 		 */
-		lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+		ldlm_clear_cancel_on_block(lock);
 
 		/* Setting the CBPENDING flag is a little misleading,
 		 * but prevents an important race; namely, once
@@ -1526,8 +1553,7 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
 		/* If somebody is already doing CANCEL, or blocking AST came,
 		 * skip this lock.
 		 */
-		if (lock->l_flags & LDLM_FL_BL_AST ||
-		    lock->l_flags & LDLM_FL_CANCELING)
+		if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
 			continue;
 
 		if (lockmode_compat(lock->l_granted_mode, mode))
@@ -1771,7 +1797,6 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns,
 
 	cfs_hash_for_each_nolock(ns->ns_rs_hash,
 				 ldlm_res_iter_helper, &helper);
-
 }
 
 /* non-blocking function to manipulate a lock whose cb_data is being put away.
@@ -1887,7 +1912,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 	int flags;
 
 	/* Bug 11974: Do not replay a lock which is actively being canceled */
-	if (lock->l_flags & LDLM_FL_CANCELING) {
+	if (ldlm_is_canceling(lock)) {
 		LDLM_DEBUG(lock, "Not replaying canceled lock:");
 		return 0;
 	}
@@ -1896,7 +1921,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 	 * server might have long dropped it, but notification of that event was
 	 * lost by network. (and server granted conflicting lock already)
 	 */
-	if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+	if (ldlm_is_cancel_on_block(lock)) {
 		LDLM_DEBUG(lock, "Not replaying reply-less lock:");
 		ldlm_lock_cancel(lock);
 		return 0;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 9dede87ad0a3..51a28d96af39 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -124,9 +120,15 @@ int ldlm_debugfs_setup(void)
 	}
 
 	rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL);
+	if (rc) {
+		CERROR("LProcFS failed in ldlm-init\n");
+		goto err_svc;
+	}
 
 	return 0;
 
+err_svc:
+	ldebugfs_remove(&ldlm_svc_debugfs_dir);
 err_ns:
 	ldebugfs_remove(&ldlm_ns_debugfs_dir);
 err_type:
@@ -758,12 +760,12 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 		list_for_each(tmp, q) {
 			lock = list_entry(tmp, struct ldlm_lock,
 					      l_res_link);
-			if (lock->l_flags & LDLM_FL_CLEANED) {
+			if (ldlm_is_cleaned(lock)) {
 				lock = NULL;
 				continue;
 			}
 			LDLM_LOCK_GET(lock);
-			lock->l_flags |= LDLM_FL_CLEANED;
+			ldlm_set_cleaned(lock);
 			break;
 		}
 
@@ -775,13 +777,13 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 		/* Set CBPENDING so nothing in the cancellation path
 		 * can match this lock.
 		 */
-		lock->l_flags |= LDLM_FL_CBPENDING;
-		lock->l_flags |= LDLM_FL_FAILED;
+		ldlm_set_cbpending(lock);
+		ldlm_set_failed(lock);
 		lock->l_flags |= flags;
 
 		/* ... without sending a CANCEL message for local_only. */
 		if (local_only)
-			lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+			ldlm_set_local_only(lock);
 
 		if (local_only && (lock->l_readers || lock->l_writers)) {
 			/* This is a little bit gross, but much better than the
@@ -1273,9 +1275,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
 {
 	check_res_locked(res);
 
-	LDLM_DEBUG(lock, "About to add this lock:\n");
+	LDLM_DEBUG(lock, "About to add this lock:");
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
 		return;
 	}
@@ -1400,3 +1402,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
 			LDLM_DEBUG_LIMIT(level, lock, "###");
 	}
 }
+EXPORT_SYMBOL(ldlm_resource_dump);
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 9ac29e718da3..2cbb1b80bd41 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,10 +1,7 @@
 obj-$(CONFIG_LUSTRE_FS) += lustre.o
-obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o
 lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
 	    rw.o namei.o symlink.o llite_mmap.o \
-	    xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \
-	    rw26.o super25.o statahead.o \
-	    ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
-	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o
-
-llite_lloop-y := lloop.o
+	    xattr.o xattr_cache.o rw26.o super25.o statahead.o \
+	    glimpse.o lcommon_cl.o lcommon_misc.o \
+	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
+	    lproc_llite.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index dd1c827013b9..463b1a360733 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -82,7 +78,7 @@ static void ll_release(struct dentry *de)
  * INVALID) so d_lookup() matches it, but we have no lock on it (so
  * lock_match() fails) and we spin around real_lookup().
  */
-static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
+static int ll_dcompare(const struct dentry *dentry,
 		       unsigned int len, const char *str,
 		       const struct qstr *name)
 {
@@ -108,11 +104,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
 
 static inline int return_if_equal(struct ldlm_lock *lock, void *data)
 {
-	if ((lock->l_flags &
-	     (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) ==
-	    (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA))
-		return LDLM_ITER_CONTINUE;
-	return LDLM_ITER_STOP;
+	return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
+		LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
 }
 
 /* find any ldlm lock of the inode in mdc and lov
@@ -209,27 +202,27 @@ int ll_d_init(struct dentry *de)
 
 void ll_intent_drop_lock(struct lookup_intent *it)
 {
-	if (it->it_op && it->d.lustre.it_lock_mode) {
+	if (it->it_op && it->it_lock_mode) {
 		struct lustre_handle handle;
 
-		handle.cookie = it->d.lustre.it_lock_handle;
+		handle.cookie = it->it_lock_handle;
 
 		CDEBUG(D_DLMTRACE, "releasing lock with cookie %#llx from it %p\n",
 		       handle.cookie, it);
-		ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode);
+		ldlm_lock_decref(&handle, it->it_lock_mode);
 
 		/* bug 494: intent_release may be called multiple times, from
 		 * this thread and we don't want to double-decref this lock
 		 */
-		it->d.lustre.it_lock_mode = 0;
-		if (it->d.lustre.it_remote_lock_mode != 0) {
-			handle.cookie = it->d.lustre.it_remote_lock_handle;
+		it->it_lock_mode = 0;
+		if (it->it_remote_lock_mode != 0) {
+			handle.cookie = it->it_remote_lock_handle;
 
 			CDEBUG(D_DLMTRACE, "releasing remote lock with cookie%#llx from it %p\n",
 			       handle.cookie, it);
 			ldlm_lock_decref(&handle,
-					 it->d.lustre.it_remote_lock_mode);
-			it->d.lustre.it_remote_lock_mode = 0;
+					 it->it_remote_lock_mode);
+			it->it_remote_lock_mode = 0;
 		}
 	}
 }
@@ -240,23 +233,23 @@ void ll_intent_release(struct lookup_intent *it)
 	ll_intent_drop_lock(it);
 	/* We are still holding extra reference on a request, need to free it */
 	if (it_disposition(it, DISP_ENQ_OPEN_REF))
-		ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */
+		ptlrpc_req_finished(it->it_request); /* ll_file_open */
 
 	if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 
-	it->d.lustre.it_disposition = 0;
-	it->d.lustre.it_data = NULL;
+	it->it_disposition = 0;
+	it->it_request = NULL;
 }
 
 void ll_invalidate_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
-	ll_lock_dcache(inode);
+	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
 		       dentry, dentry, dentry->d_parent,
@@ -264,7 +257,7 @@ void ll_invalidate_aliases(struct inode *inode)
 
 		d_lustre_invalidate(dentry, 0);
 	}
-	ll_unlock_dcache(inode);
+	spin_unlock(&inode->i_lock);
 }
 
 int ll_revalidate_it_finish(struct ptlrpc_request *request,
@@ -286,11 +279,11 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
 
 void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
 {
-	if (it->d.lustre.it_lock_mode && inode) {
+	if (it->it_lock_mode && inode) {
 		struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-		CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
-		       inode, inode->i_ino, inode->i_generation);
+		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n",
+		       PFID(ll_inode2fid(inode)), inode);
 		ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
 	}
 
@@ -309,6 +302,17 @@ static int ll_revalidate_dentry(struct dentry *dentry,
 {
 	struct inode *dir = d_inode(dentry->d_parent);
 
+	/* If this is intermediate component path lookup and we were able to get
+	 * to this dentry, then its lock has not been revoked and the
+	 * path component is valid.
+	 */
+	if (lookup_flags & LOOKUP_PARENT)
+		return 1;
+
+	/* Symlink - always valid as long as the dentry was found */
+	if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
+		return 1;
+
 	/*
 	 * if open&create is set, talk to MDS to make sure file is created if
 	 * necessary, because we can't do this in ->open() later since that's
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index e4c82883e580..5b381779c827 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -158,11 +154,16 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 	int i;
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n",
-	       inode->i_ino, inode->i_generation, inode, hash);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
+	       PFID(ll_inode2fid(inode)), inode, hash);
 
 	LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
 
+	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, NULL);
+	if (IS_ERR(op_data))
+		return PTR_ERR(op_data);
+
 	page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
 	if (page_pool) {
 		page_pool[0] = page0;
@@ -177,8 +178,6 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		page_pool[npages] = page;
 	}
 
-	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
-				     LUSTRE_OPC_ANY, NULL);
 	op_data->op_npages = npages;
 	op_data->op_offset = hash;
 	rc = md_readpage(exp, op_data, page_pool, &request);
@@ -190,7 +189,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
 		/* Checked by mdc_readpage() */
 		if (body->valid & OBD_MD_FLSIZE)
-			cl_isize_write(inode, body->size);
+			i_size_write(inode, body->size);
 
 		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
 			 >> PAGE_SHIFT;
@@ -363,7 +362,7 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 
 		ll_finish_md_op_data(op_data);
 
-		request = (struct ptlrpc_request *)it.d.lustre.it_data;
+		request = (struct ptlrpc_request *)it.it_request;
 		if (request)
 			ptlrpc_req_finished(request);
 		if (rc < 0) {
@@ -372,10 +371,10 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 			return ERR_PTR(rc);
 		}
 
-		CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
-		       dir, dir->i_ino, dir->i_generation);
+		CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
+		       PFID(ll_inode2fid(dir)), dir);
 		md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
-				 &it.d.lustre.it_lock_handle, dir, NULL);
+				 &it.it_lock_handle, dir, NULL);
 	} else {
 		/* for cross-ref object, l_ast_data of the lock may not be set,
 		 * we reset it here
@@ -468,6 +467,28 @@ fail:
 	goto out_unlock;
 }
 
+/**
+ * return IF_* type for given lu_dirent entry.
+ * IF_* flag shld be converted to particular OS file type in
+ * platform llite module.
+ */
+static __u16 ll_dirent_type_get(struct lu_dirent *ent)
+{
+	__u16 type = 0;
+	struct luda_type *lt;
+	int len = 0;
+
+	if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
+		const unsigned int align = sizeof(struct luda_type) - 1;
+
+		len = le16_to_cpu(ent->lde_namelen);
+		len = (len + align) & ~align;
+		lt = (void *)ent->lde_name + len;
+		type = IFTODT(le16_to_cpu(lt->lt_type));
+	}
+	return type;
+}
+
 int ll_dir_read(struct inode *inode, struct dir_context *ctx)
 {
 	struct ll_inode_info *info       = ll_i2info(inode);
@@ -589,15 +610,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 	struct inode		*inode	= file_inode(filp);
 	struct ll_file_data	*lfd	= LUSTRE_FPRIVATE(filp);
 	struct ll_sb_info	*sbi	= ll_i2sbi(inode);
+	__u64 pos = lfd ? lfd->lfd_pos : 0;
 	int			hash64	= sbi->ll_flags & LL_SBI_64BIT_HASH;
 	int			api32	= ll_need_32bit_api(sbi);
 	int			rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
-	       inode->i_ino, inode->i_generation,
-	       inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+	       PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
+	       i_size_read(inode), api32);
 
-	if (lfd->lfd_pos == MDS_DIR_END_OFF) {
+	if (pos == MDS_DIR_END_OFF) {
 		/*
 		 * end-of-file.
 		 */
@@ -605,9 +627,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 		goto out;
 	}
 
-	ctx->pos = lfd->lfd_pos;
+	ctx->pos = pos;
 	rc = ll_dir_read(inode, ctx);
-	lfd->lfd_pos = ctx->pos;
+	if (lfd)
+		lfd->lfd_pos = ctx->pos;
 	if (ctx->pos == MDS_DIR_END_OFF) {
 		if (api32)
 			ctx->pos = LL_DIR_END_OFF_32BIT;
@@ -804,9 +827,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 	rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 	ll_finish_md_op_data(op_data);
 	if (rc < 0) {
-		CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n",
-		       inode->i_ino,
-		       inode->i_generation, rc);
+		CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n",
+		       PFID(ll_inode2fid(inode)), rc);
 		goto out;
 	}
 
@@ -916,7 +938,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
 		}
 
 		/* Read current file data version */
-		rc = ll_data_version(inode, &data_version, 1);
+		rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
 		iput(inode);
 		if (rc != 0) {
 			CDEBUG(D_HSM, "Could not read file data version of "
@@ -936,6 +958,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
 	}
 
 progress:
+	/* On error, the request should be considered as completed */
+	if (hpk.hpk_errval > 0)
+		hpk.hpk_flags |= HP_FLAG_COMPLETED;
 	rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
 			   &hpk, NULL);
 
@@ -997,8 +1022,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
 			goto progress;
 		}
 
-		rc = ll_data_version(inode, &data_version,
-				     copy->hc_hai.hai_action == HSMA_ARCHIVE);
+		rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
 		iput(inode);
 		if (rc) {
 			CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n");
@@ -1033,7 +1057,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
 			/* hpk_errval must be >= 0 */
 			hpk.hpk_errval = EBUSY;
 		}
-
 	}
 
 progress:
@@ -1049,17 +1072,11 @@ static int copy_and_ioctl(int cmd, struct obd_export *exp,
 	void *copy;
 	int rc;
 
-	copy = kzalloc(size, GFP_NOFS);
-	if (!copy)
-		return -ENOMEM;
-
-	if (copy_from_user(copy, data, size)) {
-		rc = -EFAULT;
-		goto out;
-	}
+	copy = memdup_user(data, size);
+	if (IS_ERR(copy))
+		return PTR_ERR(copy);
 
 	rc = obd_iocontrol(cmd, exp, size, copy, NULL);
-out:
 	kfree(copy);
 
 	return rc;
@@ -1080,8 +1097,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 	case Q_QUOTAOFF:
 	case Q_SETQUOTA:
 	case Q_SETINFO:
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 		break;
 	case Q_GETQUOTA:
@@ -1089,8 +1105,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 		      !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
 		     (type == GRPQUOTA &&
 		      !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
-		    (!capable(CFS_CAP_SYS_ADMIN) ||
-		     sbi->ll_flags & LL_SBI_RMT_CLIENT))
+		      !capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 		break;
 	case Q_GETINFO:
@@ -1101,9 +1116,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
 	}
 
 	if (valid != QC_GENERAL) {
-		if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-			return -EOPNOTSUPP;
-
 		if (cmd == Q_GETINFO)
 			qctl->qc_cmd = Q_GETOINFO;
 		else if (cmd == Q_GETQUOTA)
@@ -1242,8 +1254,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct obd_ioctl_data *data;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
-	       inode->i_ino, inode->i_generation, inode, cmd);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n",
+	       PFID(ll_inode2fid(inode)), inode, cmd);
 
 	/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
 	if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
@@ -1362,7 +1374,6 @@ out_free:
 lmv_out_free:
 		obd_ioctl_freedata(buf, len);
 		return rc;
-
 	}
 	case LL_IOC_LOV_SETSTRIPE: {
 		struct lov_user_md_v3 lumv3;
@@ -1474,8 +1485,9 @@ free_lmv:
 					       cmd == LL_IOC_MDC_GETINFO)) {
 				rc = 0;
 				goto skip_lmm;
-			} else
+			} else {
 				goto out_req;
+			}
 		}
 
 		if (cmd == IOC_MDC_GETFILESTRIPE ||
@@ -1511,7 +1523,9 @@ skip_lmm:
 			st.st_atime   = body->atime;
 			st.st_mtime   = body->mtime;
 			st.st_ctime   = body->ctime;
-			st.st_ino     = inode->i_ino;
+			st.st_ino     = cl_fid_build_ino(&body->fid1,
+							 sbi->ll_flags &
+							 LL_SBI_32BIT_API);
 
 			lmdp = (struct lov_user_mds_data __user *)arg;
 			if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
@@ -1604,8 +1618,7 @@ free_lmm:
 		struct obd_quotactl *oqctl;
 		int error = 0;
 
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 
 		oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
@@ -1628,8 +1641,7 @@ free_lmm:
 	case OBD_IOC_POLL_QUOTACHECK: {
 		struct if_quotacheck *check;
 
-		if (!capable(CFS_CAP_SYS_ADMIN) ||
-		    sbi->ll_flags & LL_SBI_RMT_CLIENT)
+		if (!capable(CFS_CAP_SYS_ADMIN))
 			return -EPERM;
 
 		check = kzalloc(sizeof(*check), GFP_NOFS);
@@ -1686,19 +1698,6 @@ out_quotactl:
 		return ll_get_obd_name(inode, cmd, arg);
 	case LL_IOC_FLUSHCTX:
 		return ll_flush_ctx(inode);
-#ifdef CONFIG_FS_POSIX_ACL
-	case LL_IOC_RMTACL: {
-	    if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
-		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
-		rc = rct_add(&sbi->ll_rct, current_pid(), arg);
-		if (!rc)
-			fd->fd_flags |= LL_FILE_RMTACL;
-		return rc;
-	    } else
-		return 0;
-	}
-#endif
 	case LL_IOC_GETOBDCOUNT: {
 		int count, vallen;
 		struct obd_export *exp;
@@ -1817,6 +1816,9 @@ out_quotactl:
 		return rc;
 	}
 	case LL_IOC_HSM_CT_START:
+		if (!capable(CFS_CAP_SYS_ADMIN))
+			return -EPERM;
+
 		rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
 				    sizeof(struct lustre_kernelcomm));
 		return rc;
@@ -1865,7 +1867,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 	int api32 = ll_need_32bit_api(sbi);
 	loff_t ret = -EINVAL;
 
-	inode_lock(inode);
 	switch (origin) {
 	case SEEK_SET:
 		break;
@@ -1903,7 +1904,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 	goto out;
 
 out:
-	inode_unlock(inode);
 	return ret;
 }
 
@@ -1922,7 +1922,7 @@ const struct file_operations ll_dir_operations = {
 	.open     = ll_dir_open,
 	.release  = ll_dir_release,
 	.read     = generic_read_dir,
-	.iterate  = ll_readdir,
+	.iterate_shared  = ll_readdir,
 	.unlocked_ioctl   = ll_dir_ioctl,
 	.fsync    = ll_fsync,
 };
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index cf619af3caf5..57281b9e31ff 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -45,6 +41,7 @@
 #include "../include/lustre_lite.h"
 #include <linux/pagemap.h>
 #include <linux/file.h>
+#include <linux/mount.h>
 #include "llite_internal.h"
 #include "../include/lustre/ll_fiemap.h"
 
@@ -87,8 +84,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
 	op_data->op_attr.ia_ctime = inode->i_ctime;
 	op_data->op_attr.ia_size = i_size_read(inode);
 	op_data->op_attr_blocks = inode->i_blocks;
-	((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
-					ll_inode_to_ext_flags(inode->i_flags);
+	op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
 	op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
 	if (fh)
 		op_data->op_handle = *fh;
@@ -170,13 +166,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
 		 */
 		rc = ll_som_update(inode, op_data);
 		if (rc) {
-			CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n",
-			       inode->i_ino, rc);
+			CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
+			       ll_i2mdexp(inode)->exp_obd->obd_name,
+			       PFID(ll_inode2fid(inode)), rc);
 			rc = 0;
 		}
 	} else if (rc) {
-		CERROR("inode %lu mdc close failed: rc = %d\n",
-		       inode->i_ino, rc);
+		CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
+		       ll_i2mdexp(inode)->exp_obd->obd_name,
+		       PFID(ll_inode2fid(inode)), rc);
 	}
 
 	/* DATA_MODIFIED flag was successfully sent on close, cancel data
@@ -278,7 +276,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
 
 	/* clear group lock, if present */
 	if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
-		ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+		ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
 
 	if (fd->fd_lease_och) {
 		bool lease_broken;
@@ -343,20 +341,8 @@ int ll_file_release(struct inode *inode, struct file *file)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
-
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
-		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
-		if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
-			fd->fd_flags &= ~LL_FILE_RMTACL;
-			rct_del(&sbi->ll_rct, current_pid());
-			et_search_free(&sbi->ll_et, current_pid());
-		}
-	}
-#endif
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	if (!is_root_inode(inode))
 		ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
@@ -413,7 +399,19 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
 	 * parameters. No need for the open lock
 	 */
 	if (!lmm && lmmsize == 0) {
-		itp->it_flags |= MDS_OPEN_LOCK;
+		struct ll_dentry_data *ldd = ll_d2d(dentry);
+		/*
+		 * If we came via ll_iget_for_nfs, then we need to request
+		 * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
+		 *
+		 * NB: when ldd is NULL, it must have come via normal
+		 * lookup path only, since ll_iget_for_nfs always calls
+		 * ll_d_init().
+		 */
+		if (ldd && ldd->lld_nfs_dentry) {
+			ldd->lld_nfs_dentry = 0;
+			itp->it_flags |= MDS_OPEN_LOCK;
+		}
 		if (itp->it_flags & FMODE_WRITE)
 			opc = LUSTRE_OPC_CREATE;
 	}
@@ -451,7 +449,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
 	}
 
 	rc = ll_prep_inode(&inode, req, NULL, itp);
-	if (!rc && itp->d.lustre.it_lock_mode)
+	if (!rc && itp->it_lock_mode)
 		ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
 
 out:
@@ -478,13 +476,12 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
 		       struct obd_client_handle *och)
 {
-	struct ptlrpc_request *req = it->d.lustre.it_data;
 	struct mdt_body *body;
 
-	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+	body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
 	och->och_fh = body->handle;
 	och->och_fid = body->fid1;
-	och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
+	och->och_lease_handle.cookie = it->it_lock_handle;
 	och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
 	och->och_flags = it->it_flags;
 
@@ -502,7 +499,6 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
 	LASSERT(fd);
 
 	if (och) {
-		struct ptlrpc_request *req = it->d.lustre.it_data;
 		struct mdt_body *body;
 		int rc;
 
@@ -510,13 +506,19 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
 		if (rc != 0)
 			return rc;
 
-		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+		body = req_capsule_server_get(&it->it_request->rq_pill,
+					      &RMF_MDT_BODY);
 		ll_ioepoch_open(lli, body->ioepoch);
 	}
 
 	LUSTRE_FPRIVATE(file) = fd;
 	ll_readahead_init(inode, &fd->fd_ras);
 	fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+	/* ll_cl_context initialize */
+	rwlock_init(&fd->fd_lock);
+	INIT_LIST_HEAD(&fd->fd_lccs);
+
 	return 0;
 }
 
@@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file)
 	struct ll_file_data *fd;
 	int rc = 0, opendir_set = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-	       inode->i_generation, inode, file->f_flags);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
+	       PFID(ll_inode2fid(inode)), inode, file->f_flags);
 
 	it = file->private_data; /* XXX: compat macro */
 	file->private_data = NULL; /* prevent ll_local_open assertion */
@@ -572,7 +574,7 @@ int ll_file_open(struct inode *inode, struct file *file)
 		return 0;
 	}
 
-	if (!it || !it->d.lustre.it_disposition) {
+	if (!it || !it->it_disposition) {
 		/* Convert f_flags into access mode. We cannot use file->f_mode,
 		 * because everything but O_ACCMODE mask was stripped from
 		 * there
@@ -642,7 +644,7 @@ restart:
 		}
 	} else {
 		LASSERT(*och_usecount == 0);
-		if (!it->d.lustre.it_disposition) {
+		if (!it->it_disposition) {
 			/* We cannot just request lock handle now, new ELC code
 			 * means that one of other OPEN locks for this file
 			 * could be cancelled, and since blocking ast handler
@@ -677,7 +679,9 @@ restart:
 		if (rc)
 			goto out_och_free;
 
-		LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
+		LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
+			 "inode %p: disposition %x, status %d\n", inode,
+			 it_disposition(it, ~0), it->it_status);
 
 		rc = ll_local_open(file, it, fd, *och_p);
 		if (rc)
@@ -720,7 +724,7 @@ out_openerr:
 	}
 
 	if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 		it_clear_disposition(it, DISP_ENQ_OPEN_REF);
 	}
 
@@ -861,12 +865,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 
 	/* already get lease, handle lease lock */
 	ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
-	if (it.d.lustre.it_lock_mode == 0 ||
-	    it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+	if (it.it_lock_mode == 0 ||
+	    it.it_lock_bits != MDS_INODELOCK_OPEN) {
 		/* open lock must return for lease */
 		CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
-		       PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
-		       it.d.lustre.it_lock_bits);
+		       PFID(ll_inode2fid(inode)), it.it_lock_mode,
+		       it.it_lock_bits);
 		rc = -EPROTO;
 		goto out_close;
 	}
@@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 	return och;
 
 out_close:
-	rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
-	if (rc2)
-		CERROR("Close openhandle returned %d\n", rc2);
-
-	/* cancel open lock */
-	if (it.d.lustre.it_lock_mode != 0) {
+	/* Cancel open lock */
+	if (it.it_lock_mode != 0) {
 		ldlm_lock_decref_and_cancel(&och->och_lease_handle,
-					    it.d.lustre.it_lock_mode);
-		it.d.lustre.it_lock_mode = 0;
+					    it.it_lock_mode);
+		it.it_lock_mode = 0;
+		och->och_lease_handle.cookie = 0ULL;
 	}
+	rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+	if (rc2 < 0)
+		CERROR("%s: error closing file "DFID": %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(&ll_i2info(inode)->lli_fid), rc2);
+	och = NULL; /* och has been freed in ll_close_inode_openhandle() */
 out_release_it:
 	ll_intent_release(&it);
 out:
@@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 		lock_res_and_lock(lock);
 		cancelled = ldlm_is_cancel(lock);
 		unlock_res_and_lock(lock);
-		ldlm_lock_put(lock);
+		LDLM_LOCK_PUT(lock);
 	}
 
 	CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
@@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 
 /* Fills the obdo with the attributes for the lsm */
 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
-			  struct obdo *obdo, __u64 ioepoch, int sync)
+			  struct obdo *obdo, __u64 ioepoch, int dv_flags)
 {
 	struct ptlrpc_request_set *set;
 	struct obd_info	    oinfo = { };
@@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 			       OBD_MD_FLMTIME | OBD_MD_FLCTIME |
 			       OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
 			       OBD_MD_FLDATAVERSION;
-	if (sync) {
+	if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
 		oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
 		oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
+		if (dv_flags & LL_DV_WR_FLUSH)
+			oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
 	}
 
 	set = ptlrpc_prep_set();
@@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 			rc = ptlrpc_set_wait(set);
 		ptlrpc_set_destroy(set);
 	}
-	if (rc == 0)
+	if (rc == 0) {
 		oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
 					 OBD_MD_FLATIME | OBD_MD_FLMTIME |
 					 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
-					 OBD_MD_FLDATAVERSION);
+					 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
+		if (dv_flags & LL_DV_WR_FLUSH &&
+		    !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
+		      oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
+			return -ENOTSUPP;
+	}
 	return rc;
 }
 
@@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
 
 	lsm = ccc_inode_lsm_get(inode);
 	rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
-			    obdo, ioepoch, sync);
+			    obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
 	if (rc == 0) {
 		struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
 
@@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
 	return rc;
 }
 
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
+int ll_merge_attr(const struct lu_env *env, struct inode *inode)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct cl_object *obj = lli->lli_clob;
-	struct cl_attr *attr = ccc_env_thread_attr(env);
-	struct ost_lvb lvb;
+	struct cl_attr *attr = vvp_env_thread_attr(env);
+	s64 atime;
+	s64 mtime;
+	s64 ctime;
 	int rc = 0;
 
 	ll_inode_size_lock(inode);
+
 	/* merge timestamps the most recently obtained from mds with
 	 * timestamps obtained from osts
 	 */
-	LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
-	LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
-	LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
+	LTIME_S(inode->i_atime) = lli->lli_atime;
+	LTIME_S(inode->i_mtime) = lli->lli_mtime;
+	LTIME_S(inode->i_ctime) = lli->lli_ctime;
 
-	lvb.lvb_size = i_size_read(inode);
-	lvb.lvb_blocks = inode->i_blocks;
-	lvb.lvb_mtime = LTIME_S(inode->i_mtime);
-	lvb.lvb_atime = LTIME_S(inode->i_atime);
-	lvb.lvb_ctime = LTIME_S(inode->i_ctime);
+	mtime = LTIME_S(inode->i_mtime);
+	atime = LTIME_S(inode->i_atime);
+	ctime = LTIME_S(inode->i_ctime);
 
 	cl_object_attr_lock(obj);
 	rc = cl_object_attr_get(env, obj, attr);
 	cl_object_attr_unlock(obj);
 
-	if (rc == 0) {
-		if (lvb.lvb_atime < attr->cat_atime)
-			lvb.lvb_atime = attr->cat_atime;
-		if (lvb.lvb_ctime < attr->cat_ctime)
-			lvb.lvb_ctime = attr->cat_ctime;
-		if (lvb.lvb_mtime < attr->cat_mtime)
-			lvb.lvb_mtime = attr->cat_mtime;
+	if (rc != 0)
+		goto out_size_unlock;
 
-		CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
-		       PFID(&lli->lli_fid), attr->cat_size);
-		cl_isize_write_nolock(inode, attr->cat_size);
+	if (atime < attr->cat_atime)
+		atime = attr->cat_atime;
 
-		inode->i_blocks = attr->cat_blocks;
+	if (ctime < attr->cat_ctime)
+		ctime = attr->cat_ctime;
 
-		LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-		LTIME_S(inode->i_atime) = lvb.lvb_atime;
-		LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-	}
+	if (mtime < attr->cat_mtime)
+		mtime = attr->cat_mtime;
+
+	CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+	       PFID(&lli->lli_fid), attr->cat_size);
+
+	i_size_write(inode, attr->cat_size);
+
+	inode->i_blocks = attr->cat_blocks;
+
+	LTIME_S(inode->i_mtime) = mtime;
+	LTIME_S(inode->i_atime) = atime;
+	LTIME_S(inode->i_ctime) = ctime;
+
+out_size_unlock:
 	ll_inode_size_unlock(inode);
 
 	return rc;
@@ -1120,47 +1141,50 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
 	struct cl_io	 *io;
 	ssize_t	       result;
 
+	CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+	       file->f_path.dentry->d_name.name, iot, *ppos, count);
+
 restart:
-	io = ccc_env_thread_io(env);
+	io = vvp_env_thread_io(env);
 	ll_io_init(io, file, iot == CIT_WRITE);
 
 	if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
 		struct vvp_io *vio = vvp_env_io(env);
-		struct ccc_io *cio = ccc_env_io(env);
 		int write_mutex_locked = 0;
 
-		cio->cui_fd  = LUSTRE_FPRIVATE(file);
-		vio->cui_io_subtype = args->via_io_subtype;
+		vio->vui_fd  = LUSTRE_FPRIVATE(file);
+		vio->vui_io_subtype = args->via_io_subtype;
 
-		switch (vio->cui_io_subtype) {
+		switch (vio->vui_io_subtype) {
 		case IO_NORMAL:
-			cio->cui_iter = args->u.normal.via_iter;
-			cio->cui_iocb = args->u.normal.via_iocb;
+			vio->vui_iter = args->u.normal.via_iter;
+			vio->vui_iocb = args->u.normal.via_iocb;
 			if ((iot == CIT_WRITE) &&
-			    !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+			    !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
 				if (mutex_lock_interruptible(&lli->
 							       lli_write_mutex)) {
 					result = -ERESTARTSYS;
 					goto out;
 				}
 				write_mutex_locked = 1;
-			} else if (iot == CIT_READ) {
-				down_read(&lli->lli_trunc_sem);
 			}
+			down_read(&lli->lli_trunc_sem);
 			break;
 		case IO_SPLICE:
-			vio->u.splice.cui_pipe = args->u.splice.via_pipe;
-			vio->u.splice.cui_flags = args->u.splice.via_flags;
+			vio->u.splice.vui_pipe = args->u.splice.via_pipe;
+			vio->u.splice.vui_flags = args->u.splice.via_flags;
 			break;
 		default:
-			CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
+			CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
 			LBUG();
 		}
+		ll_cl_add(file, env, io);
 		result = cl_io_loop(env, io);
+		ll_cl_remove(file, env);
+		if (args->via_io_subtype == IO_NORMAL)
+			up_read(&lli->lli_trunc_sem);
 		if (write_mutex_locked)
 			mutex_unlock(&lli->lli_write_mutex);
-		else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
-			up_read(&lli->lli_trunc_sem);
 	} else {
 		/* cl_io_rw_init() handled IO */
 		result = io->ci_result;
@@ -1197,6 +1221,7 @@ out:
 			fd->fd_write_failed = true;
 		}
 	}
+	CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
 
 	return result;
 }
@@ -1212,7 +1237,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_NORMAL);
+	args = ll_env_args(env, IO_NORMAL);
 	args->u.normal.via_iter = to;
 	args->u.normal.via_iocb = iocb;
 
@@ -1236,7 +1261,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_NORMAL);
+	args = ll_env_args(env, IO_NORMAL);
 	args->u.normal.via_iter = from;
 	args->u.normal.via_iocb = iocb;
 
@@ -1262,7 +1287,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_SPLICE);
+	args = ll_env_args(env, IO_SPLICE);
 	args->u.splice.via_pipe = pipe;
 	args->u.splice.via_flags = flags;
 
@@ -1354,7 +1379,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
 }
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
-			     int flags, struct lov_user_md *lum, int lum_size)
+			     __u64 flags, struct lov_user_md *lum,
+			     int lum_size)
 {
 	struct lov_stripe_md *lsm = NULL;
 	struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
@@ -1363,8 +1389,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 	lsm = ccc_inode_lsm_get(inode);
 	if (lsm) {
 		ccc_inode_lsm_put(inode, lsm);
-		CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
-		       inode->i_ino);
+		CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
+		       PFID(ll_inode2fid(inode)));
 		rc = -EEXIST;
 		goto out;
 	}
@@ -1373,7 +1399,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 	rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
 	if (rc)
 		goto out_unlock;
-	rc = oit.d.lustre.it_status;
+	rc = oit.it_status;
 	if (rc < 0)
 		goto out_req_free;
 
@@ -1386,7 +1412,7 @@ out_unlock:
 out:
 	return rc;
 out_req_free:
-	ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+	ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
 	goto out;
 }
 
@@ -1478,7 +1504,7 @@ out:
 static int ll_lov_setea(struct inode *inode, struct file *file,
 			unsigned long arg)
 {
-	int			 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+	__u64			 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
 	struct lov_user_md	*lump;
 	int			 lum_size = sizeof(struct lov_user_md) +
 					    sizeof(struct lov_user_ost_data);
@@ -1512,7 +1538,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
 	struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
 	struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
 	int lum_size, rc;
-	int flags = FMODE_WRITE;
+	__u64 flags = FMODE_WRITE;
 
 	/* first try with v1 which is smaller than v3 */
 	lum_size = sizeof(struct lov_user_md_v1);
@@ -1561,7 +1587,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
 {
 	struct ll_inode_info   *lli = ll_i2info(inode);
 	struct ll_file_data    *fd = LUSTRE_FPRIVATE(file);
-	struct ccc_grouplock    grouplock;
+	struct ll_grouplock    grouplock;
 	int		     rc;
 
 	if (arg == 0) {
@@ -1575,14 +1601,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
 	spin_lock(&lli->lli_lock);
 	if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
 		CWARN("group lock already existed with gid %lu\n",
-		      fd->fd_grouplock.cg_gid);
+		      fd->fd_grouplock.lg_gid);
 		spin_unlock(&lli->lli_lock);
 		return -EINVAL;
 	}
-	LASSERT(!fd->fd_grouplock.cg_lock);
+	LASSERT(!fd->fd_grouplock.lg_lock);
 	spin_unlock(&lli->lli_lock);
 
-	rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
+	rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
 			      arg, (file->f_flags & O_NONBLOCK), &grouplock);
 	if (rc)
 		return rc;
@@ -1608,7 +1634,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
 {
 	struct ll_inode_info   *lli = ll_i2info(inode);
 	struct ll_file_data    *fd = LUSTRE_FPRIVATE(file);
-	struct ccc_grouplock    grouplock;
+	struct ll_grouplock    grouplock;
 
 	spin_lock(&lli->lli_lock);
 	if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1616,11 +1642,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
 		CWARN("no group lock held\n");
 		return -EINVAL;
 	}
-	LASSERT(fd->fd_grouplock.cg_lock);
+	LASSERT(fd->fd_grouplock.lg_lock);
 
-	if (fd->fd_grouplock.cg_gid != arg) {
+	if (fd->fd_grouplock.lg_gid != arg) {
 		CWARN("group lock %lu doesn't match current id %lu\n",
-		      arg, fd->fd_grouplock.cg_gid);
+		      arg, fd->fd_grouplock.lg_gid);
 		spin_unlock(&lli->lli_lock);
 		return -EINVAL;
 	}
@@ -1674,7 +1700,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
 out:
 	/* this one is in place of ll_file_open */
 	if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
-		ptlrpc_req_finished(it->d.lustre.it_data);
+		ptlrpc_req_finished(it->it_request);
 		it_clear_disposition(it, DISP_ENQ_OPEN_REF);
 	}
 	return rc;
@@ -1861,11 +1887,12 @@ error:
  * This value is computed using stripe object version on OST.
  * Version is computed using server side locking.
  *
- * @param extent_lock  Take extent lock. Not needed if a process is already
- *		       holding the OST object group locks.
+ * @param sync  if do sync on the OST side;
+ *		0: no sync
+ *		LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+ *		LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
  */
-int ll_data_version(struct inode *inode, __u64 *data_version,
-		    int extent_lock)
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
 {
 	struct lov_stripe_md	*lsm = NULL;
 	struct ll_sb_info	*sbi = ll_i2sbi(inode);
@@ -1887,7 +1914,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
 		goto out;
 	}
 
-	rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock);
+	rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
 	if (rc == 0) {
 		if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
 			rc = -EOPNOTSUPP;
@@ -1923,7 +1950,7 @@ int ll_hsm_release(struct inode *inode)
 	}
 
 	/* Grab latest data_version and [am]time values */
-	rc = ll_data_version(inode, &data_version, 1);
+	rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
 	if (rc != 0)
 		goto out;
 
@@ -1933,7 +1960,7 @@ int ll_hsm_release(struct inode *inode)
 		goto out;
 	}
 
-	ll_merge_lvb(env, inode);
+	ll_merge_attr(env, inode);
 	cl_env_nested_put(&nest, env);
 
 	/* Release the file.
@@ -2227,8 +2254,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct ll_file_data	*fd = LUSTRE_FPRIVATE(file);
 	int			 flags, rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
-	       inode->i_generation, inode, cmd);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
+	       PFID(ll_inode2fid(inode)), inode, cmd);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
 
 	/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
@@ -2331,9 +2358,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
 			return -EFAULT;
 
-		rc = ll_data_version(inode, &idv.idv_version,
-				     !(idv.idv_flags & LL_DV_NOFLUSH));
-
+		idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
+		rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
 		if (rc == 0 && copy_to_user((char __user *)arg, &idv,
 					    sizeof(idv)))
 			return -EFAULT;
@@ -2499,7 +2525,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 					rc = och->och_flags &
 						(FMODE_READ | FMODE_WRITE);
 				unlock_res_and_lock(lock);
-				ldlm_lock_put(lock);
+				LDLM_LOCK_PUT(lock);
 			}
 		}
 		mutex_unlock(&lli->lli_och_mutex);
@@ -2537,9 +2563,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 
 	retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
 			   (origin == SEEK_CUR) ? file->f_pos : 0);
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
-	       inode->i_ino, inode->i_generation, inode, retval, retval,
-	       origin);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
+	       PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
 
 	if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
@@ -2603,8 +2628,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	io = ccc_env_thread_io(env);
-	io->ci_obj = cl_i2info(inode)->lli_clob;
+	io = vvp_env_thread_io(env);
+	io->ci_obj = ll_i2info(inode)->lli_clob;
 	io->ci_ignore_layout = ignore_layout;
 
 	/* initialize parameters for sync */
@@ -2634,8 +2659,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	struct ptlrpc_request *req;
 	int rc, err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
 	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2693,8 +2718,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	int rc;
 	int rc2 = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
-	       inode->i_ino, file_lock);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
+	       PFID(ll_inode2fid(inode)), file_lock);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
 
@@ -2777,9 +2802,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
-	       inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode,
-	       flock.l_flock.start, flock.l_flock.end);
+	CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
+	       PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
+	       einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
 
 	rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
 			op_data, &lockh, &flock, 0, NULL /* req */, flags);
@@ -2901,8 +2926,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 	struct obd_export *exp;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n",
-	       inode->i_ino, inode->i_generation, inode, dentry);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
+	       PFID(ll_inode2fid(inode)), inode, dentry);
 
 	exp = ll_i2mdexp(inode);
 
@@ -2949,8 +2974,11 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 		 * here to preserve get_cwd functionality on 2.6.
 		 * Bug 10503
 		 */
-		if (!d_inode(dentry)->i_nlink)
+		if (!d_inode(dentry)->i_nlink) {
+			spin_lock(&inode->i_lock);
 			d_lustre_invalidate(dentry, 0);
+			spin_unlock(&inode->i_lock);
+		}
 
 		ll_lookup_finish_locks(&oit, inode);
 	} else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
@@ -2998,9 +3026,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 
 	/* if object isn't regular file, don't validate size */
 	if (!S_ISREG(inode->i_mode)) {
-		LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
-		LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
-		LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
+		LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
+		LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
+		LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
 	} else {
 		/* In case of restore, the MDT has the right size and has
 		 * already send it back without granting the layout lock,
@@ -3101,6 +3129,9 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
 	spin_lock(&lli->lli_lock);
 	/* VFS' acl_permission_check->check_acl will release the refcount */
 	acl = posix_acl_dup(lli->lli_posix_acl);
+#ifdef CONFIG_FS_POSIX_ACL
+	forget_cached_acl(inode, type);
+#endif
 	spin_unlock(&lli->lli_lock);
 
 	return acl;
@@ -3124,11 +3155,8 @@ int ll_inode_permission(struct inode *inode, int mask)
 			return rc;
 	}
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
-	       inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
-
-	if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
-		return lustre_check_remote_perm(inode, mask);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
+	       PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
 	rc = generic_permission(inode, mask);
@@ -3335,10 +3363,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
 	int rc;
 
 	CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
-	       PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
+	       PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
 	       lock->l_lvb_data, lock->l_lvb_len);
 
-	if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY))
+	if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
 		return 0;
 
 	/* if layout lock was granted right away, the layout is returned
@@ -3415,14 +3443,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
 	LASSERT(lock);
 	LASSERT(ldlm_has_layout(lock));
 
-	LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d",
-		   inode, PFID(&lli->lli_fid), reconf);
+	LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
+		   PFID(&lli->lli_fid), inode, reconf);
 
 	/* in case this is a caching lock and reinstate with new inode */
 	md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
 
 	lock_res_and_lock(lock);
-	lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
+	lvb_ready = ldlm_is_lvb_ready(lock);
 	unlock_res_and_lock(lock);
 	/* checking lvb_ready is racy but this is okay. The worst case is
 	 * that multi processes may configure the file on the same time.
@@ -3487,9 +3515,9 @@ out:
 
 	/* wait for IO to complete if it's still being used. */
 	if (wait_layout) {
-		CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n",
+		CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
 		       ll_get_fsname(inode->i_sb, NULL, 0),
-		       inode, PFID(&lli->lli_fid));
+		       PFID(&lli->lli_fid), inode);
 
 		memset(&conf, 0, sizeof(conf));
 		conf.coc_opc = OBJECT_CONF_WAIT;
@@ -3498,7 +3526,8 @@ out:
 		if (rc == 0)
 			rc = -EAGAIN;
 
-		CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n",
+		CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
 		       PFID(&lli->lli_fid), rc);
 	}
 	return rc;
@@ -3571,19 +3600,19 @@ again:
 	it.it_op = IT_LAYOUT;
 	lockh.cookie = 0ULL;
 
-	LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "",
-			  ll_get_fsname(inode->i_sb, NULL, 0), inode,
-			PFID(&lli->lli_fid));
+	LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
+			  ll_get_fsname(inode->i_sb, NULL, 0),
+			  PFID(&lli->lli_fid), inode);
 
 	rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
 			NULL, 0, NULL, 0);
-	ptlrpc_req_finished(it.d.lustre.it_data);
-	it.d.lustre.it_data = NULL;
+	ptlrpc_req_finished(it.it_request);
+	it.it_request = NULL;
 
 	ll_finish_md_op_data(op_data);
 
-	mode = it.d.lustre.it_lock_mode;
-	it.d.lustre.it_lock_mode = 0;
+	mode = it.it_lock_mode;
+	it.it_lock_mode = 0;
 	ll_intent_drop_lock(&it);
 
 	if (rc == 0) {
@@ -3601,7 +3630,7 @@ again:
 /**
  *  This function send a restore request to the MDT
  */
-int ll_layout_restore(struct inode *inode)
+int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
 {
 	struct hsm_user_request	*hur;
 	int			 len, rc;
@@ -3617,9 +3646,10 @@ int ll_layout_restore(struct inode *inode)
 	hur->hur_request.hr_flags = 0;
 	memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
 	       sizeof(hur->hur_user_item[0].hui_fid));
-	hur->hur_user_item[0].hui_extent.length = -1;
+	hur->hur_user_item[0].hui_extent.offset = offset;
+	hur->hur_user_item[0].hui_extent.length = length;
 	hur->hur_request.hr_itemcount = 1;
-	rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+	rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
 			   len, hur, NULL);
 	kfree(hur);
 	return rc;
diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index c4e8a0878ac8..92004a05f9ee 100644
--- a/drivers/staging/lustre/lustre/lclient/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -52,7 +48,6 @@
 #include <linux/file.h>
 
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 #include "../llite/llite_internal.h"
 
 static const struct cl_lock_descr whole_file = {
@@ -70,14 +65,14 @@ static const struct cl_lock_descr whole_file = {
 blkcnt_t dirty_cnt(struct inode *inode)
 {
 	blkcnt_t cnt = 0;
-	struct ccc_object *vob = cl_inode2ccc(inode);
+	struct vvp_object *vob = cl_inode2vvp(inode);
 	void	      *results[1];
 
 	if (inode->i_mapping)
 		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
 						  results, 0, 1,
 						  PAGECACHE_TAG_DIRTY);
-	if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0)
+	if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
 		cnt = 1;
 
 	return (cnt > 0) ? 1 : 0;
@@ -86,17 +81,17 @@ blkcnt_t dirty_cnt(struct inode *inode)
 int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 		    struct inode *inode, struct cl_object *clob, int agl)
 {
-	struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
-	struct cl_inode_info *lli   = cl_i2info(inode);
+	struct ll_inode_info *lli   = ll_i2info(inode);
 	const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
-	struct ccc_io	*cio   = ccc_env_io(env);
-	struct cl_lock       *lock;
 	int result;
 
 	result = 0;
 	if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
-		CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
+		CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
 		if (lli->lli_has_smd) {
+			struct cl_lock *lock = vvp_env_lock(env);
+			struct cl_lock_descr *descr = &lock->cll_descr;
+
 			/* NOTE: this looks like DLM lock request, but it may
 			 *       not be one. Due to CEF_ASYNC flag (translated
 			 *       to LDLM_FL_HAS_INTENT by osc), this is
@@ -113,11 +108,10 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 			 */
 			*descr = whole_file;
 			descr->cld_obj   = clob;
-			descr->cld_mode  = CLM_PHANTOM;
+			descr->cld_mode  = CLM_READ;
 			descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
 			if (agl)
 				descr->cld_enq_flags |= CEF_AGL;
-			cio->cui_glimpse = 1;
 			/*
 			 * CEF_ASYNC is used because glimpse sub-locks cannot
 			 * deadlock (because they never conflict with other
@@ -126,21 +120,13 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 			 * CEF_MUST protects glimpse lock from conversion into
 			 * a lockless mode.
 			 */
-			lock = cl_lock_request(env, io, descr, "glimpse",
-					       current);
-			cio->cui_glimpse = 0;
-
-			if (!lock)
-				return 0;
+			result = cl_lock_request(env, io, lock);
+			if (result < 0)
+				return result;
 
-			if (IS_ERR(lock))
-				return PTR_ERR(lock);
-
-			LASSERT(agl == 0);
-			result = cl_wait(env, lock);
-			if (result == 0) {
-				cl_merge_lvb(env, inode);
-				if (cl_isize_read(inode) > 0 &&
+			if (!agl) {
+				ll_merge_attr(env, inode);
+				if (i_size_read(inode) > 0 &&
 				    inode->i_blocks == 0) {
 					/*
 					 * LU-417: Add dirty pages block count
@@ -150,12 +136,11 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
 					 */
 					inode->i_blocks = dirty_cnt(inode);
 				}
-				cl_unuse(env, lock);
 			}
-			cl_lock_release(env, lock, "glimpse", current);
+			cl_lock_release(env, lock);
 		} else {
 			CDEBUG(D_DLMTRACE, "No objects for inode\n");
-			cl_merge_lvb(env, inode);
+			ll_merge_attr(env, inode);
 		}
 	}
 
@@ -167,22 +152,24 @@ static int cl_io_get(struct inode *inode, struct lu_env **envout,
 {
 	struct lu_env	  *env;
 	struct cl_io	   *io;
-	struct cl_inode_info   *lli = cl_i2info(inode);
+	struct ll_inode_info	*lli = ll_i2info(inode);
 	struct cl_object       *clob = lli->lli_clob;
 	int result;
 
-	if (S_ISREG(cl_inode_mode(inode))) {
+	if (S_ISREG(inode->i_mode)) {
 		env = cl_env_get(refcheck);
 		if (!IS_ERR(env)) {
-			io = ccc_env_thread_io(env);
+			io = vvp_env_thread_io(env);
 			io->ci_obj = clob;
 			*envout = env;
 			*ioout  = io;
 			result = 1;
-		} else
+		} else {
 			result = PTR_ERR(env);
-	} else
+		}
+	} else {
 		result = 0;
+	}
 	return result;
 }
 
@@ -231,14 +218,11 @@ int cl_local_size(struct inode *inode)
 {
 	struct lu_env	   *env = NULL;
 	struct cl_io	    *io  = NULL;
-	struct ccc_thread_info  *cti;
 	struct cl_object	*clob;
-	struct cl_lock_descr    *descr;
-	struct cl_lock	  *lock;
 	int		      result;
 	int		      refcheck;
 
-	if (!cl_i2info(inode)->lli_has_smd)
+	if (!ll_i2info(inode)->lli_has_smd)
 		return 0;
 
 	result = cl_io_get(inode, &env, &io, &refcheck);
@@ -247,22 +231,19 @@ int cl_local_size(struct inode *inode)
 
 	clob = io->ci_obj;
 	result = cl_io_init(env, io, CIT_MISC, clob);
-	if (result > 0)
+	if (result > 0) {
 		result = io->ci_result;
-	else if (result == 0) {
-		cti = ccc_env_info(env);
-		descr = &cti->cti_descr;
-
-		*descr = whole_file;
-		descr->cld_obj = clob;
-		lock = cl_lock_peek(env, io, descr, "localsize", current);
-		if (lock) {
-			cl_merge_lvb(env, inode);
-			cl_unuse(env, lock);
-			cl_lock_release(env, lock, "localsize", current);
-			result = 0;
-		} else
-			result = -ENODATA;
+	} else if (result == 0) {
+		struct cl_lock *lock = vvp_env_lock(env);
+
+		lock->cll_descr = whole_file;
+		lock->cll_descr.cld_enq_flags = CEF_PEEK;
+		lock->cll_descr.cld_obj = clob;
+		result = cl_lock_request(env, io, lock);
+		if (result == 0) {
+			ll_merge_attr(env, inode);
+			cl_lock_release(env, lock);
+		}
 	}
 	cl_io_fini(env, io);
 	cl_env_put(env, &refcheck);
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
new file mode 100644
index 000000000000..396e4e4f0715
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -0,0 +1,323 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../../include/linux/libcfs/libcfs.h"
+# include <linux/fs.h>
+# include <linux/sched.h>
+# include <linux/mm.h>
+# include <linux/quotaops.h>
+# include <linux/highmem.h>
+# include <linux/pagemap.h>
+# include <linux/rbtree.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_ver.h"
+#include "../include/lustre_mdc.h"
+#include "../include/cl_object.h"
+
+#include "../llite/llite_internal.h"
+
+/*
+ * ccc_ prefix stands for "Common Client Code".
+ */
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+/**
+ * An `emergency' environment used by cl_inode_fini() when cl_env_get()
+ * fails. Access to this environment is serialized by cl_inode_fini_guard
+ * mutex.
+ */
+struct lu_env *cl_inode_fini_env;
+int cl_inode_fini_refcheck;
+
+/**
+ * A mutex serializing calls to slp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+static DEFINE_MUTEX(cl_inode_fini_guard);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
+{
+	struct lu_env *env;
+	struct cl_io  *io;
+	int	    result;
+	int	    refcheck;
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	io = vvp_env_thread_io(env);
+	io->ci_obj = ll_i2info(inode)->lli_clob;
+
+	io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
+	io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
+	io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
+	io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+	io->u.ci_setattr.sa_valid = attr->ia_valid;
+
+again:
+	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
+		struct vvp_io *vio = vvp_env_io(env);
+
+		if (attr->ia_valid & ATTR_FILE)
+			/* populate the file descriptor for ftruncate to honor
+			 * group lock - see LU-787
+			 */
+			vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
+
+		result = cl_io_loop(env, io);
+	} else {
+		result = io->ci_result;
+	}
+	cl_io_fini(env, io);
+	if (unlikely(io->ci_need_restart))
+		goto again;
+	/* HSM import case: file is released, cannot be restored
+	 * no need to fail except if restore registration failed
+	 * with -ENODATA
+	 */
+	if (result == -ENODATA && io->ci_restore_needed &&
+	    io->ci_result != -ENODATA)
+		result = 0;
+	cl_env_put(env, &refcheck);
+	return result;
+}
+
+/**
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
+ *
+ * \param inode regular file inode
+ * \param md    new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
+ */
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
+{
+	struct lu_env	*env;
+	struct ll_inode_info *lli;
+	struct cl_object     *clob;
+	struct lu_site       *site;
+	struct lu_fid	*fid;
+	struct cl_object_conf conf = {
+		.coc_inode = inode,
+		.u = {
+			.coc_md    = md
+		}
+	};
+	int result = 0;
+	int refcheck;
+
+	LASSERT(md->body->valid & OBD_MD_FLID);
+	LASSERT(S_ISREG(inode->i_mode));
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	site = ll_i2sbi(inode)->ll_site;
+	lli  = ll_i2info(inode);
+	fid  = &lli->lli_fid;
+	LASSERT(fid_is_sane(fid));
+
+	if (!lli->lli_clob) {
+		/* clob is slave of inode, empty lli_clob means for new inode,
+		 * there is no clob in cache with the given fid, so it is
+		 * unnecessary to perform lookup-alloc-lookup-insert, just
+		 * alloc and insert directly.
+		 */
+		LASSERT(inode->i_state & I_NEW);
+		conf.coc_lu.loc_flags = LOC_F_NEW;
+		clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
+				      fid, &conf);
+		if (!IS_ERR(clob)) {
+			/*
+			 * No locking is necessary, as new inode is
+			 * locked by I_NEW bit.
+			 */
+			lli->lli_clob = clob;
+			lli->lli_has_smd = lsm_has_objects(md->lsm);
+			lu_object_ref_add(&clob->co_lu, "inode", inode);
+		} else {
+			result = PTR_ERR(clob);
+		}
+	} else {
+		result = cl_conf_set(env, lli->lli_clob, &conf);
+	}
+
+	cl_env_put(env, &refcheck);
+
+	if (result != 0)
+		CERROR("Failure to initialize cl object " DFID ": %d\n",
+		       PFID(fid), result);
+	return result;
+}
+
+/**
+ * Wait for others drop their references of the object at first, then we drop
+ * the last one, which will lead to the object be destroyed immediately.
+ * Must be called after cl_object_kill() against this object.
+ *
+ * The reason we want to do this is: destroying top object will wait for sub
+ * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
+ * to initiate top object destroying which may deadlock. See bz22520.
+ */
+static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
+{
+	struct lu_object_header *header = obj->co_lu.lo_header;
+	wait_queue_t	   waiter;
+
+	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
+		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
+		struct lu_site_bkt_data *bkt;
+
+		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+
+		init_waitqueue_entry(&waiter, current);
+		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+
+		while (1) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&header->loh_ref) == 1)
+				break;
+			schedule();
+		}
+
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	}
+
+	cl_object_put(env, obj);
+}
+
+void cl_inode_fini(struct inode *inode)
+{
+	struct lu_env	   *env;
+	struct ll_inode_info    *lli  = ll_i2info(inode);
+	struct cl_object	*clob = lli->lli_clob;
+	int refcheck;
+	int emergency;
+
+	if (clob) {
+		void		    *cookie;
+
+		cookie = cl_env_reenter();
+		env = cl_env_get(&refcheck);
+		emergency = IS_ERR(env);
+		if (emergency) {
+			mutex_lock(&cl_inode_fini_guard);
+			LASSERT(cl_inode_fini_env);
+			cl_env_implant(cl_inode_fini_env, &refcheck);
+			env = cl_inode_fini_env;
+		}
+		/*
+		 * cl_object cache is a slave to inode cache (which, in turn
+		 * is a slave to dentry cache), don't keep cl_object in memory
+		 * when its master is evicted.
+		 */
+		cl_object_kill(env, clob);
+		lu_object_ref_del(&clob->co_lu, "inode", inode);
+		cl_object_put_last(env, clob);
+		lli->lli_clob = NULL;
+		if (emergency) {
+			cl_env_unplant(cl_inode_fini_env, &refcheck);
+			mutex_unlock(&cl_inode_fini_guard);
+		} else {
+			cl_env_put(env, &refcheck);
+		}
+		cl_env_reexit(cookie);
+	}
+}
+
+/**
+ * build inode number from passed @fid
+ */
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
+{
+	if (BITS_PER_LONG == 32 || api32)
+		return fid_flatten32(fid);
+	else
+		return fid_flatten(fid);
+}
+
+/**
+ * build inode generation from passed @fid.  If our FID overflows the 32-bit
+ * inode number then return a non-zero generation to distinguish them.
+ */
+__u32 cl_fid_build_gen(const struct lu_fid *fid)
+{
+	__u32 gen;
+
+	if (fid_is_igif(fid)) {
+		gen = lu_igif_gen(fid);
+		return gen;
+	}
+
+	gen = fid_flatten(fid) >> 32;
+	return gen;
+}
+
+/* lsm is unreliable after hsm implementation as layout can be changed at
+ * any time. This is only to support old, non-clio-ized interfaces. It will
+ * cause deadlock if clio operations are called with this extra layout refcount
+ * because in case the layout changed during the IO, ll_layout_refresh() will
+ * have to wait for the refcount to become zero to destroy the older layout.
+ *
+ * Notice that the lsm returned by this function may not be valid unless called
+ * inside layout lock - MDS_INODELOCK_LAYOUT.
+ */
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
+{
+	return lov_lsm_get(ll_i2info(inode)->lli_clob);
+}
+
+inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
+{
+	lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
+}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index d80bcedd78d1..f6be105eeef7 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -41,9 +37,9 @@
 #include "../include/obd_support.h"
 #include "../include/obd.h"
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 
 #include "../include/lustre_lite.h"
+#include "llite_internal.h"
 
 /* Initialize the default and maximum LOV EA and cookie sizes.  This allows
  * us to make MDS RPCs with large enough reply buffers to hold the
@@ -100,7 +96,8 @@ int cl_ocd_update(struct obd_device *host,
 	__u64 flags;
 	int   result;
 
-	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+	    watched->obd_set_up && !watched->obd_stopping) {
 		cli = &watched->u.cli;
 		lco = owner;
 		flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
@@ -115,9 +112,10 @@ int cl_ocd_update(struct obd_device *host,
 		mutex_unlock(&lco->lco_lock);
 		result = 0;
 	} else {
-		CERROR("unexpected notification from %s %s!\n",
+		CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
 		       watched->obd_type->typ_name,
-		       watched->obd_name);
+		       watched->obd_name, watched->obd_set_up,
+		       watched->obd_stopping);
 		result = -EINVAL;
 	}
 	return result;
@@ -126,7 +124,7 @@ int cl_ocd_update(struct obd_device *host,
 #define GROUPLOCK_SCOPE "grouplock"
 
 int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
-		     struct ccc_grouplock *cg)
+		     struct ll_grouplock *cg)
 {
 	struct lu_env	  *env;
 	struct cl_io	   *io;
@@ -140,20 +138,22 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	io = ccc_env_thread_io(env);
+	io = vvp_env_thread_io(env);
 	io->ci_obj = obj;
 	io->ci_ignore_layout = 1;
 
 	rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-	if (rc) {
+	if (rc != 0) {
+		cl_io_fini(env, io);
+		cl_env_put(env, &refcheck);
 		/* Does not make sense to take GL for released layout */
 		if (rc > 0)
 			rc = -ENOTSUPP;
-		cl_env_put(env, &refcheck);
 		return rc;
 	}
 
-	descr = &ccc_env_info(env)->cti_descr;
+	lock = vvp_env_lock(env);
+	descr = &lock->cll_descr;
 	descr->cld_obj = obj;
 	descr->cld_start = 0;
 	descr->cld_end = CL_PAGE_EOF;
@@ -163,38 +163,37 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
 	enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
 	descr->cld_enq_flags = enqflags;
 
-	lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
-	if (IS_ERR(lock)) {
+	rc = cl_lock_request(env, io, lock);
+	if (rc < 0) {
 		cl_io_fini(env, io);
 		cl_env_put(env, &refcheck);
-		return PTR_ERR(lock);
+		return rc;
 	}
 
-	cg->cg_env  = cl_env_get(&refcheck);
-	cg->cg_io   = io;
-	cg->cg_lock = lock;
-	cg->cg_gid  = gid;
-	LASSERT(cg->cg_env == env);
+	cg->lg_env  = cl_env_get(&refcheck);
+	cg->lg_io   = io;
+	cg->lg_lock = lock;
+	cg->lg_gid  = gid;
+	LASSERT(cg->lg_env == env);
 
 	cl_env_unplant(env, &refcheck);
 	return 0;
 }
 
-void cl_put_grouplock(struct ccc_grouplock *cg)
+void cl_put_grouplock(struct ll_grouplock *cg)
 {
-	struct lu_env  *env  = cg->cg_env;
-	struct cl_io   *io   = cg->cg_io;
-	struct cl_lock *lock = cg->cg_lock;
+	struct lu_env  *env  = cg->lg_env;
+	struct cl_io   *io   = cg->lg_io;
+	struct cl_lock *lock = cg->lg_lock;
 	int	     refcheck;
 
-	LASSERT(cg->cg_env);
-	LASSERT(cg->cg_gid);
+	LASSERT(cg->lg_env);
+	LASSERT(cg->lg_gid);
 
 	cl_env_implant(env, &refcheck);
 	cl_env_put(env, &refcheck);
 
-	cl_unuse(env, lock);
-	cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
+	cl_lock_release(env, lock);
 	cl_io_fini(env, io);
 	cl_env_put(env, NULL);
 }
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index a55ac4dccd90..2326b40a0870 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -46,31 +42,31 @@
 #include "llite_internal.h"
 
 /** records that a write is in flight */
-void vvp_write_pending(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page)
 {
-	struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+	struct ll_inode_info *lli = ll_i2info(club->vob_inode);
 
 	spin_lock(&lli->lli_lock);
 	lli->lli_flags |= LLIF_SOM_DIRTY;
-	if (page && list_empty(&page->cpg_pending_linkage))
-		list_add(&page->cpg_pending_linkage, &club->cob_pending_list);
+	if (page && list_empty(&page->vpg_pending_linkage))
+		list_add(&page->vpg_pending_linkage, &club->vob_pending_list);
 	spin_unlock(&lli->lli_lock);
 }
 
 /** records that a write has completed */
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page)
 {
-	struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+	struct ll_inode_info *lli = ll_i2info(club->vob_inode);
 	int rc = 0;
 
 	spin_lock(&lli->lli_lock);
-	if (page && !list_empty(&page->cpg_pending_linkage)) {
-		list_del_init(&page->cpg_pending_linkage);
+	if (page && !list_empty(&page->vpg_pending_linkage)) {
+		list_del_init(&page->vpg_pending_linkage);
 		rc = 1;
 	}
 	spin_unlock(&lli->lli_lock);
 	if (rc)
-		ll_queue_done_writing(club->cob_inode, 0);
+		ll_queue_done_writing(club->vob_inode, 0);
 }
 
 /** Queues DONE_WRITING if
@@ -80,25 +76,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
 void ll_queue_done_writing(struct inode *inode, unsigned long flags)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+	struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
 
 	spin_lock(&lli->lli_lock);
 	lli->lli_flags |= flags;
 
 	if ((lli->lli_flags & LLIF_DONE_WRITING) &&
-	    list_empty(&club->cob_pending_list)) {
+	    list_empty(&club->vob_pending_list)) {
 		struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
 
 		if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-			CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n",
-			      inode->i_ino, inode->i_generation,
-			      lli->lli_flags);
+			CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n",
+			      ll_get_fsname(inode->i_sb, NULL, 0),
+			      PFID(ll_inode2fid(inode)), lli->lli_flags);
 		/* DONE_WRITING is allowed and inode has no dirty page. */
 		spin_lock(&lcq->lcq_lock);
 
 		LASSERT(list_empty(&lli->lli_close_list));
-		CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
-		       inode->i_ino, inode->i_generation);
+		CDEBUG(D_INODE, "adding inode "DFID" to close list\n",
+		       PFID(ll_inode2fid(inode)));
 		list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
 
 		/* Avoid a concurrent insertion into the close thread queue:
@@ -124,9 +120,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
 	op_data->op_flags |= MF_SOM_CHANGE;
 	/* Check if Size-on-MDS attributes are valid. */
 	if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-		CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
-		       inode->i_ino, inode->i_generation,
-		       lli->lli_flags);
+		CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), lli->lli_flags);
 
 	if (!cl_local_size(inode)) {
 		/* Send Size-on-MDS Attributes if valid. */
@@ -140,10 +136,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
 		      struct obd_client_handle **och, unsigned long flags)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+	struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
 
 	spin_lock(&lli->lli_lock);
-	if (!(list_empty(&club->cob_pending_list))) {
+	if (!(list_empty(&club->vob_pending_list))) {
 		if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
 			LASSERT(*och);
 			LASSERT(!lli->lli_pending_och);
@@ -198,7 +194,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
 		}
 	}
 
-	LASSERT(list_empty(&club->cob_pending_list));
+	LASSERT(list_empty(&club->vob_pending_list));
 	lli->lli_flags &= ~LLIF_SOM_DIRTY;
 	spin_unlock(&lli->lli_lock);
 	ll_done_writing_attr(inode, op_data);
@@ -221,9 +217,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
 
 	LASSERT(op_data);
 	if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-		CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
-		       inode->i_ino, inode->i_generation,
-		       lli->lli_flags);
+		CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), lli->lli_flags);
 
 	oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
 	if (!oa) {
@@ -241,9 +237,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
 		if (rc) {
 			oa->o_valid = 0;
 			if (rc != -ENOENT)
-				CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n",
-				       rc, inode->i_ino,
-				       inode->i_generation);
+				CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n",
+				       ll_get_fsname(inode->i_sb, NULL, 0),
+				       PFID(ll_inode2fid(inode)), rc);
 		} else {
 			CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
 			       PFID(&lli->lli_fid));
@@ -302,9 +298,11 @@ static void ll_done_writing(struct inode *inode)
 		 * OSTs and send setattr to back to MDS.
 		 */
 		rc = ll_som_update(inode, op_data);
-	else if (rc)
-		CERROR("inode %lu mdc done_writing failed: rc = %d\n",
-		       inode->i_ino, rc);
+	else if (rc) {
+		CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), rc);
+	}
 out:
 	ll_finish_md_op_data(op_data);
 	if (och) {
@@ -323,8 +321,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
 		lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
 				 lli_close_list);
 		list_del_init(&lli->lli_close_list);
-	} else if (atomic_read(&lcq->lcq_stop))
+	} else if (atomic_read(&lcq->lcq_stop)) {
 		lli = ERR_PTR(-EALREADY);
+	}
 
 	spin_unlock(&lcq->lcq_lock);
 	return lli;
@@ -348,8 +347,8 @@ static int ll_close_thread(void *arg)
 			break;
 
 		inode = ll_info2i(lli);
-		CDEBUG(D_INFO, "done_writing for inode %lu/%u\n",
-		       inode->i_ino, inode->i_generation);
+		CDEBUG(D_INFO, "done_writing for inode "DFID"\n",
+		       PFID(ll_inode2fid(inode)));
 		ll_done_writing(inode);
 		iput(inode);
 	}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index e3c0f1dd4d31..4d6d589a1677 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -43,11 +39,11 @@
 
 /* for struct cl_lock_descr and struct cl_io */
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 #include "../include/lustre_mdc.h"
 #include "../include/lustre_intent.h"
 #include <linux/compat.h>
 #include <linux/posix_acl_xattr.h>
+#include "vvp_internal.h"
 
 #ifndef FMODE_EXEC
 #define FMODE_EXEC 0
@@ -68,6 +64,7 @@ struct ll_dentry_data {
 	struct lookup_intent		*lld_it;
 	unsigned int			lld_sa_generation;
 	unsigned int			lld_invalid:1;
+	unsigned int			lld_nfs_dentry:1;
 	struct rcu_head			lld_rcu_head;
 };
 
@@ -76,9 +73,6 @@ struct ll_dentry_data {
 #define LLI_INODE_MAGIC		 0x111d0de5
 #define LLI_INODE_DEAD		  0xdeadd00d
 
-/* remote client permission cache */
-#define REMOTE_PERM_HASHSIZE 16
-
 struct ll_getname_data {
 	struct dir_context ctx;
 	char	    *lgd_name;      /* points to a buffer with NAME_MAX+1 size */
@@ -86,17 +80,11 @@ struct ll_getname_data {
 	int	      lgd_found;     /* inode matched? */
 };
 
-/* llite setxid/access permission for user on remote client */
-struct ll_remote_perm {
-	struct hlist_node	lrp_list;
-	uid_t		   lrp_uid;
-	gid_t		   lrp_gid;
-	uid_t		   lrp_fsuid;
-	gid_t		   lrp_fsgid;
-	int		   lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
-					     * is access permission with
-					     * lrp_fsuid/lrp_fsgid.
-					     */
+struct ll_grouplock {
+	struct lu_env	*lg_env;
+	struct cl_io	*lg_io;
+	struct cl_lock	*lg_lock;
+	unsigned long	 lg_gid;
 };
 
 enum lli_flags {
@@ -126,9 +114,6 @@ struct ll_inode_info {
 	spinlock_t			lli_lock;
 	struct posix_acl		*lli_posix_acl;
 
-	struct hlist_head		*lli_remote_perms;
-	struct mutex				lli_rmtperm_mutex;
-
 	/* identifying fields for both metadata and data stacks. */
 	struct lu_fid		   lli_fid;
 	/* Parent fid for accessing default stripe data on parent directory
@@ -138,8 +123,6 @@ struct ll_inode_info {
 
 	struct list_head	      lli_close_list;
 
-	unsigned long		      lli_rmtperm_time;
-
 	/* handle is to be sent to MDS later on done_writing and setattr.
 	 * Open handle data are needed for the recovery to reconstruct
 	 * the inode state on the MDS. XXX: recovery is not ready yet.
@@ -161,7 +144,9 @@ struct ll_inode_info {
 	struct inode			lli_vfs_inode;
 
 	/* the most recent timestamps obtained from mds */
-	struct ost_lvb			lli_lvb;
+	s64				lli_atime;
+	s64				lli_mtime;
+	s64				lli_ctime;
 	spinlock_t			lli_agl_lock;
 
 	/* Try to make the d::member and f::member are aligned. Before using
@@ -328,6 +313,7 @@ enum ra_stat {
 	RA_STAT_EOF,
 	RA_STAT_MAX_IN_FLIGHT,
 	RA_STAT_WRONG_GRAB_PAGE,
+	RA_STAT_FAILED_REACH_END,
 	_NR_RA_STAT,
 };
 
@@ -401,7 +387,7 @@ enum stats_track_type {
 #define LL_SBI_FLOCK	     0x04
 #define LL_SBI_USER_XATTR	0x08 /* support user xattr */
 #define LL_SBI_ACL	       0x10 /* support ACL */
-#define LL_SBI_RMT_CLIENT	0x40 /* remote client */
+/* LL_SBI_RMT_CLIENT		 0x40	 remote client */
 #define LL_SBI_MDS_CAPA		 0x80 /* support mds capa, obsolete */
 #define LL_SBI_OSS_CAPA		0x100 /* support oss capa, obsolete */
 #define LL_SBI_LOCALFLOCK       0x200 /* Local flocks support by kernel */
@@ -423,7 +409,7 @@ enum stats_track_type {
 	"xattr",	\
 	"acl",		\
 	"???",		\
-	"rmt_client",	\
+	"???",		\
 	"mds_capa",	\
 	"oss_capa",	\
 	"flock",	\
@@ -439,26 +425,6 @@ enum stats_track_type {
 	"xattr",	\
 }
 
-#define RCE_HASHES      32
-
-struct rmtacl_ctl_entry {
-	struct list_head       rce_list;
-	pid_t	    rce_key; /* hash key */
-	int	      rce_ops; /* acl operation type */
-};
-
-struct rmtacl_ctl_table {
-	spinlock_t	rct_lock;
-	struct list_head	rct_entries[RCE_HASHES];
-};
-
-#define EE_HASHES       32
-
-struct eacl_table {
-	spinlock_t	et_lock;
-	struct list_head	et_entries[EE_HASHES];
-};
-
 struct ll_sb_info {
 	/* this protects pglist and ra_info.  It isn't safe to
 	 * grab from interrupt contexts
@@ -481,7 +447,13 @@ struct ll_sb_info {
 
 	struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 
-	struct cl_client_cache    ll_cache;
+	/*
+	 * Used to track "unstable" pages on a client, and maintain a
+	 * LRU list of clean pages. An "unstable" page is defined as
+	 * any page which is sent to a server as part of a bulk request,
+	 * but is uncommitted to stable storage.
+	 */
+	struct cl_client_cache    *ll_cache;
 
 	struct lprocfs_stats     *ll_ra_stats;
 
@@ -517,21 +489,12 @@ struct ll_sb_info {
 	dev_t			  ll_sdev_orig; /* save s_dev before assign for
 						 * clustered nfs
 						 */
-	struct rmtacl_ctl_table   ll_rct;
-	struct eacl_table	 ll_et;
 	__kernel_fsid_t		  ll_fsid;
 	struct kobject		 ll_kobj; /* sysfs object */
 	struct super_block	*ll_sb; /* struct super_block (for sysfs code)*/
 	struct completion	 ll_kobj_unregister;
 };
 
-struct ll_ra_read {
-	pgoff_t	     lrr_start;
-	pgoff_t	     lrr_count;
-	struct task_struct *lrr_reader;
-	struct list_head	  lrr_linkage;
-};
-
 /*
  * per file-descriptor read-ahead data.
  */
@@ -590,12 +553,6 @@ struct ll_readahead_state {
 	 */
 	unsigned long   ras_request_index;
 	/*
-	 * list of struct ll_ra_read's one per read(2) call current in
-	 * progress against this file descriptor. Used by read-ahead code,
-	 * protected by ->ras_lock.
-	 */
-	struct list_head      ras_read_beads;
-	/*
 	 * The following 3 items are used for detecting the stride I/O
 	 * mode.
 	 * In stride I/O mode,
@@ -622,7 +579,7 @@ extern struct kmem_cache *ll_file_data_slab;
 struct lustre_handle;
 struct ll_file_data {
 	struct ll_readahead_state fd_ras;
-	struct ccc_grouplock fd_grouplock;
+	struct ll_grouplock fd_grouplock;
 	__u64 lfd_pos;
 	__u32 fd_flags;
 	fmode_t fd_omode;
@@ -637,6 +594,8 @@ struct ll_file_data {
 	 * false: unknown failure, should report.
 	 */
 	bool fd_write_failed;
+	rwlock_t fd_lock; /* protect lcc list */
+	struct list_head fd_lccs; /* list of ll_cl_context */
 };
 
 struct lov_stripe_md;
@@ -663,8 +622,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
 #endif
 }
 
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
+void ll_ras_enter(struct file *f);
+
+/* llite/lcommon_misc.c */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+		  struct obd_device *watched,
+		  enum obd_notify_event ev, void *owner, void *data);
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+		     struct ll_grouplock *cg);
+void cl_put_grouplock(struct ll_grouplock *cg);
 
 /* llite/lproc_llite.c */
 int ldebugfs_register_mountpoint(struct dentry *parent,
@@ -697,15 +664,16 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 
 /* llite/rw.c */
-int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
 int ll_writepage(struct page *page, struct writeback_control *wbc);
 int ll_writepages(struct address_space *, struct writeback_control *wbc);
 int ll_readpage(struct file *file, struct page *page);
 void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
-		 struct ll_readahead_state *ras, struct address_space *mapping,
-		 struct cl_page_list *queue, int flags);
+		 struct cl_page_list *queue, struct ll_readahead_state *ras,
+		 bool hit);
+struct ll_cl_context *ll_cl_find(struct file *file);
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
+void ll_cl_remove(struct file *file, const struct lu_env *env);
 
 extern const struct address_space_operations ll_aops;
 
@@ -740,7 +708,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type);
 int ll_inode_permission(struct inode *inode, int mask);
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
-			     int flags, struct lov_user_md *lum,
+			     __u64 flags, struct lov_user_md *lum,
 			     int lum_size);
 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 			     struct lov_mds_md **lmm, int *lmm_size,
@@ -750,9 +718,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 		     int *lmm_size, struct ptlrpc_request **request);
 int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
+int ll_merge_attr(const struct lu_env *env, struct inode *inode);
 int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
 int ll_hsm_release(struct inode *inode);
 
 /* llite/dcache.c */
@@ -824,65 +792,8 @@ struct ll_close_queue {
 	atomic_t		lcq_stop;
 };
 
-struct ccc_object *cl_inode2ccc(struct inode *inode);
-
-void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
-
-/* specific architecture can implement only part of this list */
-enum vvp_io_subtype {
-	/** normal IO */
-	IO_NORMAL,
-	/** io started from splice_{read|write} */
-	IO_SPLICE
-};
-
-/* IO subtypes */
-struct vvp_io {
-	/** io subtype */
-	enum vvp_io_subtype    cui_io_subtype;
-
-	union {
-		struct {
-			struct pipe_inode_info *cui_pipe;
-			unsigned int	    cui_flags;
-		} splice;
-		struct vvp_fault_io {
-			/**
-			 * Inode modification time that is checked across DLM
-			 * lock request.
-			 */
-			time64_t	    ft_mtime;
-			struct vm_area_struct *ft_vma;
-			/**
-			 *  locked page returned from vvp_io
-			 */
-			struct page	    *ft_vmpage;
-			struct vm_fault_api {
-				/**
-				 * kernel fault info
-				 */
-				struct vm_fault *ft_vmf;
-				/**
-				 * fault API used bitflags for return code.
-				 */
-				unsigned int    ft_flags;
-				/**
-				 * check that flags are from filemap_fault
-				 */
-				bool		ft_flags_valid;
-			} fault;
-		} fault;
-	} u;
-	/**
-	 * Read-ahead state used by read and page-fault IO contexts.
-	 */
-	struct ll_ra_read    cui_bead;
-	/**
-	 * Set when cui_bead has been initialized.
-	 */
-	int		  cui_ra_window_set;
-};
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page);
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
 
 /**
  * IO arguments for various VFS I/O interfaces.
@@ -904,61 +815,39 @@ struct vvp_io_args {
 };
 
 struct ll_cl_context {
+	struct list_head	 lcc_list;
 	void	   *lcc_cookie;
+	const struct lu_env	*lcc_env;
 	struct cl_io   *lcc_io;
 	struct cl_page *lcc_page;
-	struct lu_env  *lcc_env;
-	int	     lcc_refcheck;
 };
 
-struct vvp_thread_info {
-	struct vvp_io_args   vti_args;
-	struct ra_io_arg     vti_ria;
-	struct ll_cl_context vti_io_ctx;
+struct ll_thread_info {
+	struct vvp_io_args   lti_args;
+	struct ra_io_arg     lti_ria;
+	struct ll_cl_context lti_io_ctx;
 };
 
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+extern struct lu_context_key ll_thread_key;
+static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
 {
-	extern struct lu_context_key vvp_key;
-	struct vvp_thread_info      *info;
+	struct ll_thread_info *lti;
 
-	info = lu_context_key_get(&env->le_ctx, &vvp_key);
-	LASSERT(info);
-	return info;
+	lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
+	LASSERT(lti);
+	return lti;
 }
 
-static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
-					       enum vvp_io_subtype type)
+static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
+					      enum vvp_io_subtype type)
 {
-	struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
+	struct vvp_io_args *via = &ll_env_info(env)->lti_args;
 
-	ret->via_io_subtype = type;
+	via->via_io_subtype = type;
 
-	return ret;
+	return via;
 }
 
-struct vvp_session {
-	struct vvp_io	 vs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
-{
-	extern struct lu_context_key vvp_session_key;
-	struct vvp_session *ses;
-
-	ses = lu_context_key_get(env->le_ses, &vvp_session_key);
-	LASSERT(ses);
-	return ses;
-}
-
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
-	return &vvp_env_session(env)->vs_ios;
-}
-
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
 void ll_queue_done_writing(struct inode *inode, unsigned long flags);
 void ll_close_thread_shutdown(struct ll_close_queue *lcq);
 int ll_close_thread_start(struct ll_close_queue **lcq_ret);
@@ -981,6 +870,10 @@ static inline void ll_invalidate_page(struct page *vmpage)
 	if (!mapping)
 		return;
 
+	/*
+	 * truncate_complete_page() calls
+	 * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+	 */
 	ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
 	truncate_complete_page(mapping, vmpage);
 }
@@ -1040,24 +933,13 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
 }
 
 /* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-		    void *buffer, size_t size);
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+		const char *name, const void *value, size_t size, int flags);
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer, size_t size);
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ll_removexattr(struct dentry *dentry, const char *name);
 
-/* llite/remote_perm.c */
-extern struct kmem_cache *ll_remote_perm_cachep;
-extern struct kmem_cache *ll_rmtperm_hash_cachep;
-
-void free_rmtperm_hash(struct hlist_head *hash);
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
-int lustre_check_remote_perm(struct inode *inode, int mask);
-
-/* llite/llite_cl.c */
-extern struct lu_device_type vvp_device_type;
-
 /**
  * Common IO arguments for various VFS I/O interfaces.
  */
@@ -1069,42 +951,9 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 		struct ll_readahead_state *ras, unsigned long index,
 		unsigned hit);
 void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
-
-/* llite/llite_rmtacl.c */
-#ifdef CONFIG_FS_POSIX_ACL
-struct eacl_entry {
-	struct list_head	    ee_list;
-	pid_t		 ee_key; /* hash key */
-	struct lu_fid	 ee_fid;
-	int		   ee_type; /* ACL type for ACCESS or DEFAULT */
-	ext_acl_xattr_header *ee_acl;
-};
-
-u64 rce_ops2valid(int ops);
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key);
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops);
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key);
-void rct_init(struct rmtacl_ctl_table *rct);
-void rct_fini(struct rmtacl_ctl_table *rct);
-
-void ee_free(struct eacl_entry *ee);
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
-	   ext_acl_xattr_header *header);
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
-				 struct lu_fid *fid, int type);
-void et_search_free(struct eacl_table *et, pid_t key);
-void et_init(struct eacl_table *et);
-void et_fini(struct eacl_table *et);
-#else
-static inline u64 rce_ops2valid(int ops)
-{
-	return 0;
-}
-#endif
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 
 /* statahead.c */
-
 #define LL_SA_RPC_MIN	   2
 #define LL_SA_RPC_DEF	   32
 #define LL_SA_RPC_MAX	   8192
@@ -1163,6 +1012,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry,
 		       int only_unplug);
 void ll_stop_statahead(struct inode *dir, void *key);
 
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+		    struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 1);
+}
+
 static inline int ll_glimpse_size(struct inode *inode)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
@@ -1285,43 +1150,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
 void ll_iocontrol_unregister(void *magic);
 
-/* lclient compat stuff */
-#define cl_inode_info ll_inode_info
-#define cl_i2info(info) ll_i2info(info)
-#define cl_inode_mode(inode) ((inode)->i_mode)
-#define cl_i2sbi ll_i2sbi
-
-static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
-					       const struct iattr *attr)
-{
-	LASSERT(attr->ia_valid & ATTR_FILE);
-	return LUSTRE_FPRIVATE(attr->ia_file);
-}
-
-static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
-{
-	LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex));
-	i_size_write(inode, kms);
-}
-
-static inline void cl_isize_write(struct inode *inode, loff_t kms)
-{
-	ll_inode_size_lock(inode);
-	i_size_write(inode, kms);
-	ll_inode_size_unlock(inode);
-}
-
-#define cl_isize_read(inode)	     i_size_read(inode)
-
-static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
-{
-	return ll_merge_lvb(env, inode);
-}
-
-#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
-#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
-#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
-
 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
 		       enum cl_fsync_mode mode, int ignore_layout);
 
@@ -1350,7 +1178,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
 	int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
 				      LPROC_LL_OSC_WRITE;
 
-	ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+	ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -1369,7 +1197,7 @@ static inline int ll_file_nolock(const struct file *file)
 static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 				    struct lookup_intent *it, __u64 *bits)
 {
-	if (!it->d.lustre.it_lock_set) {
+	if (!it->it_lock_set) {
 		struct lustre_handle handle;
 
 		/* If this inode is a remote object, it will get two
@@ -1380,38 +1208,26 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 		 * LOOKUP and PERM locks, so revoking either locks will
 		 * case the dcache being cleared
 		 */
-		if (it->d.lustre.it_remote_lock_mode) {
-			handle.cookie = it->d.lustre.it_remote_lock_handle;
-			CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
-			       inode,
-			       inode->i_ino, inode->i_generation,
+		if (it->it_remote_lock_mode) {
+			handle.cookie = it->it_remote_lock_handle;
+			CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
+			       PFID(ll_inode2fid(inode)), inode,
 			       handle.cookie);
 			md_set_lock_data(exp, &handle.cookie, inode, NULL);
 		}
 
-		handle.cookie = it->d.lustre.it_lock_handle;
+		handle.cookie = it->it_lock_handle;
 
-		CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n",
-		       inode, inode->i_ino,
-		       inode->i_generation, handle.cookie);
+		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
+		       PFID(ll_inode2fid(inode)), inode, handle.cookie);
 
 		md_set_lock_data(exp, &handle.cookie, inode,
-				 &it->d.lustre.it_lock_bits);
-		it->d.lustre.it_lock_set = 1;
+				 &it->it_lock_bits);
+		it->it_lock_set = 1;
 	}
 
 	if (bits)
-		*bits = it->d.lustre.it_lock_bits;
-}
-
-static inline void ll_lock_dcache(struct inode *inode)
-{
-	spin_lock(&inode->i_lock);
-}
-
-static inline void ll_unlock_dcache(struct inode *inode)
-{
-	spin_unlock(&inode->i_lock);
+		*bits = it->it_lock_bits;
 }
 
 static inline int d_lustre_invalid(const struct dentry *dentry)
@@ -1471,9 +1287,25 @@ enum {
 
 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
 int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode);
+int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
 
 int ll_xattr_init(void);
 void ll_xattr_fini(void);
 
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+		    struct cl_page *page, enum cl_req_type crt);
+
+/* lcommon_cl.c */
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
+
+extern struct lu_env *cl_inode_fini_env;
+extern int cl_inode_fini_refcheck;
+
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
 #endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b57a992688a8..546063e728db 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -85,17 +81,13 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
 
 	si_meminfo(&si);
 	pages = si.totalram - si.totalhigh;
-	if (pages >> (20 - PAGE_SHIFT) < 512)
-		lru_page_max = pages / 2;
-	else
-		lru_page_max = (pages / 4) * 3;
+	lru_page_max = pages / 2;
 
-	/* initialize lru data */
-	atomic_set(&sbi->ll_cache.ccc_users, 0);
-	sbi->ll_cache.ccc_lru_max = lru_page_max;
-	atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
-	spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
-	INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
+	sbi->ll_cache = cl_cache_init(lru_page_max);
+	if (!sbi->ll_cache) {
+		kfree(sbi);
+		return NULL;
+	}
 
 	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
 					   SBI_DEFAULT_READAHEAD_MAX);
@@ -135,6 +127,11 @@ static void ll_free_sbi(struct super_block *sb)
 {
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 
+	if (sbi->ll_cache) {
+		cl_cache_decref(sbi->ll_cache);
+		sbi->ll_cache = NULL;
+	}
+
 	kfree(sbi);
 }
 
@@ -169,20 +166,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		return -ENOMEM;
 	}
 
-	if (llite_root) {
-		err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
-		if (err < 0)
-			CERROR("could not register mount in <debugfs>/lustre/llite\n");
-	}
-
 	/* indicate the features supported by this client */
 	data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
 				  OBD_CONNECT_ATTRFID  |
 				  OBD_CONNECT_VERSION  | OBD_CONNECT_BRW_SIZE |
 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID     |
 				  OBD_CONNECT_AT       | OBD_CONNECT_LOV_V3   |
-				  OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR    |
-				  OBD_CONNECT_FULL20   | OBD_CONNECT_64BITHASH|
+				  OBD_CONNECT_VBR	| OBD_CONNECT_FULL20  |
+				  OBD_CONNECT_64BITHASH |
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 				  OBD_CONNECT_LAYOUTLOCK |
@@ -223,8 +214,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
 	/* real client */
 	data->ocd_connect_flags |= OBD_CONNECT_REAL;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
 	data->ocd_brw_size = MD_MAX_BRW_SIZE;
 
@@ -317,18 +306,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		sbi->ll_flags &= ~LL_SBI_ACL;
 	}
 
-	if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
-		if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-			sbi->ll_flags |= LL_SBI_RMT_CLIENT;
-			LCONSOLE_INFO("client is set as remote by default.\n");
-		}
-	} else {
-		if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-			sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
-			LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n");
-		}
-	}
-
 	if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
 		sbi->ll_flags |= LL_SBI_64BIT_HASH;
 
@@ -337,10 +314,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	else
 		sbi->ll_md_brw_size = PAGE_SIZE;
 
-	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
-		LCONSOLE_INFO("Layout lock feature supported.\n");
+	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
 		sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
-	}
 
 	if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
 		if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
@@ -364,10 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 				  OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID      |
 				  OBD_CONNECT_SRVLOCK   | OBD_CONNECT_TRUNCLOCK|
-				  OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
-				  OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
-				  OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
-				  OBD_CONNECT_MAXBYTES |
+				  OBD_CONNECT_AT	| OBD_CONNECT_OSS_CAPA |
+				  OBD_CONNECT_VBR	| OBD_CONNECT_FULL20   |
+				  OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
 				  OBD_CONNECT_EINPROGRESS |
 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 				  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
@@ -390,8 +364,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	}
 
 	data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
 	CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
 	       data->ocd_connect_flags,
@@ -453,10 +425,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	/* make root inode
 	 * XXX: move this to after cbd setup?
 	 */
-	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
-		valid |= OBD_MD_FLRMTPERM;
-	else if (sbi->ll_flags & LL_SBI_ACL)
+	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
+	if (sbi->ll_flags & LL_SBI_ACL)
 		valid |= OBD_MD_FLACL;
 
 	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -512,13 +482,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		goto out_root;
 	}
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		rct_init(&sbi->ll_rct);
-		et_init(&sbi->ll_et);
-	}
-#endif
-
 	checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
 				 KEY_CHECKSUM, sizeof(checksum), &checksum,
@@ -526,8 +489,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	cl_sb_init(sb);
 
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
-				 KEY_CACHE_SET, sizeof(sbi->ll_cache),
-				 &sbi->ll_cache, NULL);
+				 KEY_CACHE_SET, sizeof(*sbi->ll_cache),
+				 sbi->ll_cache, NULL);
 
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
@@ -555,6 +518,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	kfree(data);
 	kfree(osfs);
 
+	if (llite_root) {
+		err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
+		if (err < 0) {
+			CERROR("%s: could not register mount in debugfs: "
+			       "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
+			err = 0;
+		}
+	}
+
 	return err;
 out_root:
 	iput(root);
@@ -563,8 +535,6 @@ out_lock_cn_cb:
 out_dt:
 	obd_disconnect(sbi->ll_dt_exp);
 	sbi->ll_dt_exp = NULL;
-	/* Make sure all OScs are gone, since cl_cache is accessing sbi. */
-	obd_zombie_barrier();
 out_md_fid:
 	obd_fid_fini(sbi->ll_md_exp->exp_obd);
 out_md:
@@ -573,7 +543,6 @@ out_md:
 out:
 	kfree(data);
 	kfree(osfs);
-	ldebugfs_unregister_mountpoint(sbi);
 	return err;
 }
 
@@ -608,13 +577,6 @@ static void client_common_put_super(struct super_block *sb)
 {
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		et_fini(&sbi->ll_et);
-		rct_fini(&sbi->ll_rct);
-	}
-#endif
-
 	ll_close_thread_shutdown(sbi->ll_lcq);
 
 	cl_sb_fini(sb);
@@ -622,10 +584,6 @@ static void client_common_put_super(struct super_block *sb)
 	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
 	obd_disconnect(sbi->ll_dt_exp);
 	sbi->ll_dt_exp = NULL;
-	/* wait till all OSCs are gone, since cl_cache is accessing sbi.
-	 * see LU-2543.
-	 */
-	obd_zombie_barrier();
 
 	ldebugfs_unregister_mountpoint(sbi);
 
@@ -704,11 +662,6 @@ static int ll_options(char *options, int *flags)
 			*flags &= ~tmp;
 			goto next;
 		}
-		tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
-		if (tmp) {
-			*flags |= tmp;
-			goto next;
-		}
 		tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
 		if (tmp) {
 			*flags |= tmp;
@@ -792,12 +745,9 @@ void ll_lli_init(struct ll_inode_info *lli)
 	lli->lli_maxbytes = MAX_LFS_FILESIZE;
 	spin_lock_init(&lli->lli_lock);
 	lli->lli_posix_acl = NULL;
-	lli->lli_remote_perms = NULL;
-	mutex_init(&lli->lli_rmtperm_mutex);
 	/* Do not set lli_fid, it has been initialized already. */
 	fid_zero(&lli->lli_pfid);
 	INIT_LIST_HEAD(&lli->lli_close_list);
-	lli->lli_rmtperm_time = 0;
 	lli->lli_pending_och = NULL;
 	lli->lli_mds_read_och = NULL;
 	lli->lli_mds_write_och = NULL;
@@ -864,7 +814,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	try_module_get(THIS_MODULE);
 
 	/* client additional sb info */
-	lsi->lsi_llsbi = sbi = ll_init_sbi(sb);
+	sbi = ll_init_sbi(sb);
+	lsi->lsi_llsbi = sbi;
 	if (!sbi) {
 		module_put(THIS_MODULE);
 		kfree(cfg);
@@ -897,10 +848,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	cfg->cfg_callback = class_config_llog_handler;
 	/* set up client obds */
 	err = lustre_process_log(sb, profilenm, cfg);
-	if (err < 0) {
-		CERROR("Unable to process log: %d\n", err);
+	if (err < 0)
 		goto out_free;
-	}
 
 	/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
 	lprof = class_get_profile(profilenm);
@@ -947,7 +896,7 @@ void ll_put_super(struct super_block *sb)
 	struct lustre_sb_info *lsi = s2lsi(sb);
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	char *profilenm = get_profile_name(sb);
-	int next, force = 1;
+	int ccc_count, next, force = 1, rc = 0;
 
 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
 
@@ -963,6 +912,19 @@ void ll_put_super(struct super_block *sb)
 			force = obd->obd_force;
 	}
 
+	/* Wait for unstable pages to be committed to stable storage */
+	if (!force) {
+		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+		rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
+				  !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
+				  &lwi);
+	}
+
+	ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
+	if (!force && rc != -EINTR)
+		LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+
 	/* We need to set force before the lov_disconnect in
 	 * lustre_common_put_super, since l_d cleans up osc's as well.
 	 */
@@ -999,6 +961,8 @@ void ll_put_super(struct super_block *sb)
 
 	lustre_common_put_super(sb);
 
+	cl_env_cache_purge(~0);
+
 	module_put(THIS_MODULE);
 } /* client_put_super */
 
@@ -1032,8 +996,8 @@ void ll_clear_inode(struct inode *inode)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	if (S_ISDIR(inode->i_mode)) {
 		/* these should have been cleared in ll_file_release */
@@ -1065,17 +1029,9 @@ void ll_clear_inode(struct inode *inode)
 
 	ll_xattr_cache_destroy(inode);
 
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		LASSERT(!lli->lli_posix_acl);
-		if (lli->lli_remote_perms) {
-			free_rmtperm_hash(lli->lli_remote_perms);
-			lli->lli_remote_perms = NULL;
-		}
-	}
 #ifdef CONFIG_FS_POSIX_ACL
-	else if (lli->lli_posix_acl) {
+	if (lli->lli_posix_acl) {
 		LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
-		LASSERT(!lli->lli_remote_perms);
 		posix_acl_release(lli->lli_posix_acl);
 		lli->lli_posix_acl = NULL;
 	}
@@ -1180,9 +1136,11 @@ static int ll_setattr_done_writing(struct inode *inode,
 		 * from OSTs and send setattr to back to MDS.
 		 */
 		rc = ll_som_update(inode, op_data);
-	else if (rc)
-		CERROR("inode %lu mdc truncate failed: rc = %d\n",
-		       inode->i_ino, rc);
+	else if (rc) {
+		CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
+		      ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+		      PFID(ll_inode2fid(inode)), rc);
+	}
 	return rc;
 }
 
@@ -1210,12 +1168,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	bool file_is_released = false;
 	int rc = 0, rc1 = 0;
 
-	CDEBUG(D_VFSTRACE,
-	       "%s: setattr inode %p/fid:" DFID
-	       " from %llu to %llu, valid %x, hsm_import %d\n",
-	       ll_get_fsname(inode->i_sb, NULL, 0), inode,
-	       PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
-	       attr->ia_valid, hsm_import);
+	CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
+	       ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
+	       i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
 
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Check new size against VFS/VM file size limit and rlimit */
@@ -1265,14 +1220,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 		       LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
 		       (s64)ktime_get_real_seconds());
 
-	/* If we are changing file size, file content is modified, flag it. */
-	if (attr->ia_valid & ATTR_SIZE) {
-		attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-		spin_lock(&lli->lli_lock);
-		lli->lli_flags |= LLIF_DATA_MODIFIED;
-		spin_unlock(&lli->lli_lock);
-	}
-
 	/* We always do an MDS RPC, even if we're only changing the size;
 	 * only the MDS knows whether truncate() should fail with -ETXTBUSY
 	 */
@@ -1284,13 +1231,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	if (!S_ISDIR(inode->i_mode))
 		inode_unlock(inode);
 
-	memcpy(&op_data->op_attr, attr, sizeof(*attr));
-
-	/* Open epoch for truncate. */
-	if (exp_connect_som(ll_i2mdexp(inode)) &&
-	    (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
-		op_data->op_flags = MF_EPOCH_OPEN;
-
 	/* truncate on a released file must failed with -ENODATA,
 	 * so size must not be set on MDS for released file
 	 * but other attributes must be set
@@ -1304,29 +1244,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 		if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
 			file_is_released = true;
 		ccc_inode_lsm_put(inode, lsm);
+
+		if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
+			if (file_is_released) {
+				rc = ll_layout_restore(inode, 0, attr->ia_size);
+				if (rc < 0)
+					goto out;
+
+				file_is_released = false;
+				ll_layout_refresh(inode, &gen);
+			}
+
+			/*
+			 * If we are changing file size, file content is
+			 * modified, flag it.
+			 */
+			attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+			spin_lock(&lli->lli_lock);
+			lli->lli_flags |= LLIF_DATA_MODIFIED;
+			spin_unlock(&lli->lli_lock);
+			op_data->op_bias |= MDS_DATA_MODIFIED;
+		}
 	}
 
-	/* if not in HSM import mode, clear size attr for released file
-	 * we clear the attribute send to MDT in op_data, not the original
-	 * received from caller in attr which is used later to
-	 * decide return code
-	 */
-	if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
-		op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+	memcpy(&op_data->op_attr, attr, sizeof(*attr));
+
+	/* Open epoch for truncate. */
+	if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
+	    (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
+		op_data->op_flags = MF_EPOCH_OPEN;
 
 	rc = ll_md_setattr(dentry, op_data, &mod);
 	if (rc)
 		goto out;
 
-	/* truncate failed (only when non HSM import), others succeed */
-	if (file_is_released) {
-		if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
-			rc = -ENODATA;
-		else
-			rc = 0;
-		goto out;
-	}
-
 	/* RPC to MDT is sent, cancel data modification flag */
 	if (op_data->op_bias & MDS_DATA_MODIFIED) {
 		spin_lock(&lli->lli_lock);
@@ -1335,7 +1286,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	}
 
 	ll_ioepoch_open(lli, op_data->op_ioepoch);
-	if (!S_ISREG(inode->i_mode)) {
+	if (!S_ISREG(inode->i_mode) || file_is_released) {
 		rc = 0;
 		goto out;
 	}
@@ -1532,12 +1483,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			lli->lli_maxbytes = MAX_LFS_FILESIZE;
 	}
 
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-		if (body->valid & OBD_MD_FLRMTPERM)
-			ll_update_remote_perm(inode, md->remote_perm);
-	}
 #ifdef CONFIG_FS_POSIX_ACL
-	else if (body->valid & OBD_MD_FLACL) {
+	if (body->valid & OBD_MD_FLACL) {
 		spin_lock(&lli->lli_lock);
 		if (lli->lli_posix_acl)
 			posix_acl_release(lli->lli_posix_acl);
@@ -1552,7 +1499,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 	if (body->valid & OBD_MD_FLATIME) {
 		if (body->atime > LTIME_S(inode->i_atime))
 			LTIME_S(inode->i_atime) = body->atime;
-		lli->lli_lvb.lvb_atime = body->atime;
+		lli->lli_atime = body->atime;
 	}
 	if (body->valid & OBD_MD_FLMTIME) {
 		if (body->mtime > LTIME_S(inode->i_mtime)) {
@@ -1561,12 +1508,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			       body->mtime);
 			LTIME_S(inode->i_mtime) = body->mtime;
 		}
-		lli->lli_lvb.lvb_mtime = body->mtime;
+		lli->lli_mtime = body->mtime;
 	}
 	if (body->valid & OBD_MD_FLCTIME) {
 		if (body->ctime > LTIME_S(inode->i_ctime))
 			LTIME_S(inode->i_ctime) = body->ctime;
-		lli->lli_lvb.lvb_ctime = body->ctime;
+		lli->lli_ctime = body->ctime;
 	}
 	if (body->valid & OBD_MD_FLMODE)
 		inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
@@ -1593,12 +1540,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 		/* FID shouldn't be changed! */
 		if (fid_is_sane(&lli->lli_fid)) {
 			LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
-				 "Trying to change FID "DFID
-				 " to the "DFID", inode %lu/%u(%p)\n",
+				 "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
 				 PFID(&lli->lli_fid), PFID(&body->fid1),
-				 inode->i_ino, inode->i_generation, inode);
-		} else
+				 PFID(ll_inode2fid(inode)), inode);
+		} else {
 			lli->lli_fid = body->fid1;
+		}
 	}
 
 	LASSERT(fid_seq(&lli->lli_fid) != 0);
@@ -1622,8 +1569,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 				if (lli->lli_flags & (LLIF_DONE_WRITING |
 						      LLIF_EPOCH_PENDING |
 						      LLIF_SOM_DIRTY)) {
-					CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
-					       inode->i_ino, lli->lli_flags);
+					CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n",
+					       sbi->ll_md_exp->exp_obd->obd_name,
+					       PFID(ll_inode2fid(inode)),
+					       lli->lli_flags);
 				} else {
 					/* Use old size assignment to avoid
 					 * deadlock bz14138 & bz14326
@@ -1699,7 +1648,7 @@ void ll_read_inode2(struct inode *inode, void *opaque)
 
 void ll_delete_inode(struct inode *inode)
 {
-	struct cl_inode_info *lli = cl_i2info(inode);
+	struct ll_inode_info *lli = ll_i2info(inode);
 
 	if (S_ISREG(inode->i_mode) && lli->lli_clob)
 		/* discard all dirty pages before truncating them, required by
@@ -1715,8 +1664,8 @@ void ll_delete_inode(struct inode *inode)
 		spin_lock_irq(&inode->i_data.tree_lock);
 		spin_unlock_irq(&inode->i_data.tree_lock);
 		LASSERTF(inode->i_data.nrpages == 0,
-			 "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
-			 inode->i_ino, inode->i_generation, inode,
+			 "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+			 PFID(ll_inode2fid(inode)), inode,
 			 inode->i_data.nrpages);
 	}
 	/* Workaround end */
@@ -1747,7 +1696,9 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 		rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 		ll_finish_md_op_data(op_data);
 		if (rc) {
-			CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+			CERROR("%s: failure inode "DFID": rc = %d\n",
+			       sbi->ll_md_exp->exp_obd->obd_name,
+			       PFID(ll_inode2fid(inode)), rc);
 			return -abs(rc);
 		}
 
@@ -1772,7 +1723,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 		if (IS_ERR(op_data))
 			return PTR_ERR(op_data);
 
-		((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+		op_data->op_attr_flags = flags;
 		op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
 		rc = md_setattr(sbi->ll_md_exp, op_data,
 				NULL, 0, NULL, 0, &req, NULL);
@@ -1967,7 +1918,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 		 * At this point server returns to client's same fid as client
 		 * generated for creating. So using ->fid1 is okay here.
 		 */
-		LASSERT(fid_is_sane(&md.body->fid1));
+		if (!fid_is_sane(&md.body->fid1)) {
+			CERROR("%s: Fid is insane " DFID "\n",
+			       ll_get_fsname(sb, NULL, 0),
+			       PFID(&md.body->fid1));
+			rc = -EINVAL;
+			goto out;
+		}
 
 		*inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
 					     sbi->ll_flags & LL_SBI_32BIT_API),
@@ -1994,11 +1951,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 	 * 3. proc2: refresh layout and layout lock granted
 	 * 4. proc1: to apply a stale layout
 	 */
-	if (it && it->d.lustre.it_lock_mode != 0) {
+	if (it && it->it_lock_mode != 0) {
 		struct lustre_handle lockh;
 		struct ldlm_lock *lock;
 
-		lockh.cookie = it->d.lustre.it_lock_handle;
+		lockh.cookie = it->it_lock_handle;
 		lock = ldlm_handle2lock(&lockh);
 		LASSERT(lock);
 		if (ldlm_has_layout(lock)) {
@@ -2066,11 +2023,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg)
 	}
 
 	memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-	if (type & LL_STATFS_LMV)
+	if (type & LL_STATFS_LMV) {
 		exp = sbi->ll_md_exp;
-	else if (type & LL_STATFS_LOV)
+	} else if (type & LL_STATFS_LOV) {
 		exp = sbi->ll_dt_exp;
-	else {
+	} else {
 		rc = -ENODEV;
 		goto out_statfs;
 	}
@@ -2271,7 +2228,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
 	char *buf, *path = NULL;
 	struct dentry *dentry = NULL;
-	struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+	struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
 
 	/* this can be called inside spin lock so use GFP_ATOMIC. */
 	buf = (char *)__get_free_page(GFP_ATOMIC);
@@ -2285,7 +2242,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 	       "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
 	       ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
 	       s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
-	       PFID(&obj->cob_header.coh_lu.loh_fid),
+	       PFID(&obj->vob_header.coh_lu.loh_fid),
 	       (path && !IS_ERR(path)) ? path : "", ioret);
 
 	if (dentry)
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 5b484e62ffd0..66ee5db5fce8 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -57,10 +53,10 @@ void policy_from_vma(ldlm_policy_data_t *policy,
 		     struct vm_area_struct *vma, unsigned long addr,
 		     size_t count)
 {
-	policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+	policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
 				 (vma->vm_pgoff << PAGE_SHIFT);
 	policy->l_extent.end = (policy->l_extent.start + count - 1) |
-			       ~CFS_PAGE_MASK;
+			       ~PAGE_MASK;
 }
 
 struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
@@ -123,7 +119,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
 
 	*env_ret = env;
 
-	io = ccc_env_thread_io(env);
+restart:
+	io = vvp_env_thread_io(env);
 	io->ci_obj = ll_i2info(inode)->lli_clob;
 	LASSERT(io->ci_obj);
 
@@ -146,17 +143,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
 
 	rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
 	if (rc == 0) {
-		struct ccc_io *cio = ccc_env_io(env);
+		struct vvp_io *vio = vvp_env_io(env);
 		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
 
-		LASSERT(cio->cui_cl.cis_io == io);
+		LASSERT(vio->vui_cl.cis_io == io);
 
 		/* mmap lock must be MANDATORY it has to cache pages. */
 		io->ci_lockreq = CILR_MANDATORY;
-		cio->cui_fd = fd;
+		vio->vui_fd = fd;
 	} else {
 		LASSERT(rc < 0);
 		cl_io_fini(env, io);
+		if (io->ci_need_restart)
+			goto restart;
+
 		cl_env_nested_put(nest, env);
 		io = ERR_PTR(rc);
 	}
@@ -196,18 +196,11 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
 
 	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
 
-	/* we grab lli_trunc_sem to exclude truncate case.
-	 * Otherwise, we could add dirty pages into osc cache
-	 * while truncate is on-going.
-	 */
-	inode = ccc_object_inode(io->ci_obj);
+	inode = vvp_object_inode(io->ci_obj);
 	lli = ll_i2info(inode);
-	down_read(&lli->lli_trunc_sem);
 
 	result = cl_io_loop(env, io);
 
-	up_read(&lli->lli_trunc_sem);
-
 	cfs_restore_sigs(set);
 
 	if (result == 0) {
@@ -307,17 +300,22 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
 		vio = vvp_env_io(env);
 		vio->u.fault.ft_vma       = vma;
 		vio->u.fault.ft_vmpage    = NULL;
-		vio->u.fault.fault.ft_vmf = vmf;
-		vio->u.fault.fault.ft_flags = 0;
-		vio->u.fault.fault.ft_flags_valid = false;
+		vio->u.fault.ft_vmf = vmf;
+		vio->u.fault.ft_flags = 0;
+		vio->u.fault.ft_flags_valid = false;
+
+		/* May call ll_readpage() */
+		ll_cl_add(vma->vm_file, env, io);
 
 		result = cl_io_loop(env, io);
 
+		ll_cl_remove(vma->vm_file, env);
+
 		/* ft_flags are only valid if we reached
 		 * the call to filemap_fault
 		 */
-		if (vio->u.fault.fault.ft_flags_valid)
-			fault_ret = vio->u.fault.fault.ft_flags;
+		if (vio->u.fault.ft_flags_valid)
+			fault_ret = vio->u.fault.ft_flags;
 
 		vmpage = vio->u.fault.ft_vmpage;
 		if (result != 0 && vmpage) {
@@ -390,9 +388,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		result = ll_page_mkwrite0(vma, vmf->page, &retry);
 
 		if (!printed && ++count > 16) {
-			CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n",
+			const struct dentry *de = vma->vm_file->f_path.dentry;
+
+			CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n",
 			      current->comm, vmf->pgoff,
-			      file_inode(vma->vm_file)->i_ino);
+			      PFID(ll_inode2fid(de->d_inode)));
 			printed = true;
 		}
 	} while (retry);
@@ -422,16 +422,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 /**
  *  To avoid cancel the locks covering mmapped region for lock cache pressure,
- *  we track the mapped vma count in ccc_object::cob_mmap_cnt.
+ *  we track the mapped vma count in vvp_object::vob_mmap_cnt.
  */
 static void ll_vm_open(struct vm_area_struct *vma)
 {
 	struct inode *inode    = file_inode(vma->vm_file);
-	struct ccc_object *vob = cl_inode2ccc(inode);
+	struct vvp_object *vob = cl_inode2vvp(inode);
 
 	LASSERT(vma->vm_file);
-	LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
-	atomic_inc(&vob->cob_mmap_cnt);
+	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
+	atomic_inc(&vob->vob_mmap_cnt);
 }
 
 /**
@@ -440,11 +440,11 @@ static void ll_vm_open(struct vm_area_struct *vma)
 static void ll_vm_close(struct vm_area_struct *vma)
 {
 	struct inode      *inode = file_inode(vma->vm_file);
-	struct ccc_object *vob   = cl_inode2ccc(inode);
+	struct vvp_object *vob   = cl_inode2vvp(inode);
 
 	LASSERT(vma->vm_file);
-	atomic_dec(&vob->cob_mmap_cnt);
-	LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+	atomic_dec(&vob->vob_mmap_cnt);
+	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
 }
 
 /* XXX put nice comment here.  talk about __free_pte -> dirty pages and
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 193aab879709..65972c892731 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -119,7 +115,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
 	rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 	kfree(op_data);
 	if (rc) {
-		CERROR("can't get object attrs, fid "DFID", rc %d\n",
+		CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n",
 		       PFID(fid), rc);
 		return ERR_PTR(rc);
 	}
@@ -172,6 +168,24 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 
 	/* N.B. d_obtain_alias() drops inode ref on error */
 	result = d_obtain_alias(inode);
+	if (!IS_ERR(result)) {
+		int rc;
+
+		rc = ll_d_init(result);
+		if (rc < 0) {
+			dput(result);
+			result = ERR_PTR(rc);
+		} else {
+			struct ll_dentry_data *ldd = ll_d2d(result);
+
+			/*
+			 * Need to signal to the ll_intent_file_open that
+			 * we came from NFS and so opencache needs to be
+			 * enabled for this one
+			 */
+			ldd->lld_nfs_dentry = 1;
+		}
+	}
 
 	return result;
 }
@@ -191,8 +205,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
 	int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
 	struct lustre_nfs_fid *nfs_fid = (void *)fh;
 
-	CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n",
-	       inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len);
+	CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n",
+	       ll_get_fsname(inode->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(inode)), *plen, fileid_len);
 
 	if (*plen < fileid_len) {
 		*plen = fileid_len;
@@ -298,8 +313,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 
 	sbi = ll_s2sbi(dir->i_sb);
 
-	CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n",
-	       dir->i_ino, PFID(ll_inode2fid(dir)));
+	CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n",
+	       ll_get_fsname(dir->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(dir)));
 
 	rc = ll_get_default_mdsize(sbi, &lmmsize);
 	if (rc != 0)
@@ -314,15 +330,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 	rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
 	ll_finish_md_op_data(op_data);
 	if (rc) {
-		CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+		CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
+		       ll_get_fsname(dir->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(dir)), rc);
 		return ERR_PTR(rc);
 	}
 	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-	LASSERT(body->valid & OBD_MD_FLID);
-
-	CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
-	       PFID(ll_inode2fid(dir)), PFID(&body->fid1));
-
+	/*
+	 * LU-3952: MDT may lost the FID of its parent, we should not crash
+	 * the NFS server, ll_iget_for_nfs() will handle the error.
+	 */
+	if (body->valid & OBD_MD_FLID) {
+		CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
+		       PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+	}
 	result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
 
 	ptlrpc_req_finished(req);
diff --git a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
deleted file mode 100644
index 8509b07cb5c7..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_rmtacl.c
- *
- * Lustre Remote User Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_eacl.h"
-#include "llite_internal.h"
-
-static inline __u32 rce_hashfunc(uid_t id)
-{
-	return id & (RCE_HASHES - 1);
-}
-
-static inline __u32 ee_hashfunc(uid_t id)
-{
-	return id & (EE_HASHES - 1);
-}
-
-u64 rce_ops2valid(int ops)
-{
-	switch (ops) {
-	case RMT_LSETFACL:
-		return OBD_MD_FLRMTLSETFACL;
-	case RMT_LGETFACL:
-		return OBD_MD_FLRMTLGETFACL;
-	case RMT_RSETFACL:
-		return OBD_MD_FLRMTRSETFACL;
-	case RMT_RGETFACL:
-		return OBD_MD_FLRMTRGETFACL;
-	default:
-		return 0;
-	}
-}
-
-static struct rmtacl_ctl_entry *rce_alloc(pid_t key, int ops)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	rce = kzalloc(sizeof(*rce), GFP_NOFS);
-	if (!rce)
-		return NULL;
-
-	INIT_LIST_HEAD(&rce->rce_list);
-	rce->rce_key = key;
-	rce->rce_ops = ops;
-
-	return rce;
-}
-
-static void rce_free(struct rmtacl_ctl_entry *rce)
-{
-	if (!list_empty(&rce->rce_list))
-		list_del(&rce->rce_list);
-
-	kfree(rce);
-}
-
-static struct rmtacl_ctl_entry *__rct_search(struct rmtacl_ctl_table *rct,
-					     pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-	struct list_head *head = &rct->rct_entries[rce_hashfunc(key)];
-
-	list_for_each_entry(rce, head, rce_list)
-		if (rce->rce_key == key)
-			return rce;
-
-	return NULL;
-}
-
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	spin_lock(&rct->rct_lock);
-	rce = __rct_search(rct, key);
-	spin_unlock(&rct->rct_lock);
-	return rce;
-}
-
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops)
-{
-	struct rmtacl_ctl_entry *rce, *e;
-
-	rce = rce_alloc(key, ops);
-	if (!rce)
-		return -ENOMEM;
-
-	spin_lock(&rct->rct_lock);
-	e = __rct_search(rct, key);
-	if (unlikely(e)) {
-		CWARN("Unexpected stale rmtacl_entry found: [key: %d] [ops: %d]\n",
-		      (int)key, ops);
-		rce_free(e);
-	}
-	list_add_tail(&rce->rce_list, &rct->rct_entries[rce_hashfunc(key)]);
-	spin_unlock(&rct->rct_lock);
-
-	return 0;
-}
-
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key)
-{
-	struct rmtacl_ctl_entry *rce;
-
-	spin_lock(&rct->rct_lock);
-	rce = __rct_search(rct, key);
-	if (rce)
-		rce_free(rce);
-	spin_unlock(&rct->rct_lock);
-
-	return rce ? 0 : -ENOENT;
-}
-
-void rct_init(struct rmtacl_ctl_table *rct)
-{
-	int i;
-
-	spin_lock_init(&rct->rct_lock);
-	for (i = 0; i < RCE_HASHES; i++)
-		INIT_LIST_HEAD(&rct->rct_entries[i]);
-}
-
-void rct_fini(struct rmtacl_ctl_table *rct)
-{
-	struct rmtacl_ctl_entry *rce;
-	int i;
-
-	spin_lock(&rct->rct_lock);
-	for (i = 0; i < RCE_HASHES; i++)
-		while (!list_empty(&rct->rct_entries[i])) {
-			rce = list_entry(rct->rct_entries[i].next,
-					 struct rmtacl_ctl_entry, rce_list);
-			rce_free(rce);
-		}
-	spin_unlock(&rct->rct_lock);
-}
-
-static struct eacl_entry *ee_alloc(pid_t key, struct lu_fid *fid, int type,
-				   ext_acl_xattr_header *header)
-{
-	struct eacl_entry *ee;
-
-	ee = kzalloc(sizeof(*ee), GFP_NOFS);
-	if (!ee)
-		return NULL;
-
-	INIT_LIST_HEAD(&ee->ee_list);
-	ee->ee_key = key;
-	ee->ee_fid = *fid;
-	ee->ee_type = type;
-	ee->ee_acl = header;
-
-	return ee;
-}
-
-void ee_free(struct eacl_entry *ee)
-{
-	if (!list_empty(&ee->ee_list))
-		list_del(&ee->ee_list);
-
-	if (ee->ee_acl)
-		lustre_ext_acl_xattr_free(ee->ee_acl);
-
-	kfree(ee);
-}
-
-static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key,
-					  struct lu_fid *fid, int type)
-{
-	struct eacl_entry *ee;
-	struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
-	LASSERT(fid);
-	list_for_each_entry(ee, head, ee_list)
-		if (ee->ee_key == key) {
-			if (lu_fid_eq(&ee->ee_fid, fid) &&
-			    ee->ee_type == type) {
-				list_del_init(&ee->ee_list);
-				return ee;
-			}
-		}
-
-	return NULL;
-}
-
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
-				 struct lu_fid *fid, int type)
-{
-	struct eacl_entry *ee;
-
-	spin_lock(&et->et_lock);
-	ee = __et_search_del(et, key, fid, type);
-	spin_unlock(&et->et_lock);
-	return ee;
-}
-
-void et_search_free(struct eacl_table *et, pid_t key)
-{
-	struct eacl_entry *ee, *next;
-	struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
-	spin_lock(&et->et_lock);
-	list_for_each_entry_safe(ee, next, head, ee_list)
-		if (ee->ee_key == key)
-			ee_free(ee);
-
-	spin_unlock(&et->et_lock);
-}
-
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
-	   ext_acl_xattr_header *header)
-{
-	struct eacl_entry *ee, *e;
-
-	ee = ee_alloc(key, fid, type, header);
-	if (!ee)
-		return -ENOMEM;
-
-	spin_lock(&et->et_lock);
-	e = __et_search_del(et, key, fid, type);
-	if (unlikely(e)) {
-		CWARN("Unexpected stale eacl_entry found: [key: %d] [fid: " DFID "] [type: %d]\n",
-		      (int)key, PFID(fid), type);
-		ee_free(e);
-	}
-	list_add_tail(&ee->ee_list, &et->et_entries[ee_hashfunc(key)]);
-	spin_unlock(&et->et_lock);
-
-	return 0;
-}
-
-void et_init(struct eacl_table *et)
-{
-	int i;
-
-	spin_lock_init(&et->et_lock);
-	for (i = 0; i < EE_HASHES; i++)
-		INIT_LIST_HEAD(&et->et_entries[i]);
-}
-
-void et_fini(struct eacl_table *et)
-{
-	struct eacl_entry *ee;
-	int i;
-
-	spin_lock(&et->et_lock);
-	for (i = 0; i < EE_HASHES; i++)
-		while (!list_empty(&et->et_entries[i])) {
-			ee = list_entry(et->et_entries[i].next,
-					struct eacl_entry, ee_list);
-			ee_free(ee);
-		}
-	spin_unlock(&et->et_lock);
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
deleted file mode 100644
index f169c0db63b4..000000000000
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ /dev/null
@@ -1,882 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/*
- *  linux/drivers/block/loop.c
- *
- *  Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bols?, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/init.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/writeback.h>
-#include <linux/buffer_head.h>		/* for invalidate_bdev() */
-#include <linux/completion.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/pagevec.h>
-#include <linux/uaccess.h>
-
-#include "../include/lustre_lib.h"
-#include "../include/lustre_lite.h"
-#include "llite_internal.h"
-
-#define LLOOP_MAX_SEGMENTS	LNET_MAX_IOV
-
-/* Possible states of device */
-enum {
-	LLOOP_UNBOUND,
-	LLOOP_BOUND,
-	LLOOP_RUNDOWN,
-};
-
-struct lloop_device {
-	int		  lo_number;
-	int		  lo_refcnt;
-	loff_t	       lo_offset;
-	loff_t	       lo_sizelimit;
-	int		  lo_flags;
-	struct file	 *lo_backing_file;
-	struct block_device *lo_device;
-	unsigned	     lo_blocksize;
-
-	gfp_t		  old_gfp_mask;
-
-	spinlock_t		lo_lock;
-	struct bio		*lo_bio;
-	struct bio		*lo_biotail;
-	int			lo_state;
-	struct semaphore	lo_sem;
-	struct mutex		lo_ctl_mutex;
-	atomic_t	 lo_pending;
-	wait_queue_head_t	  lo_bh_wait;
-
-	struct request_queue *lo_queue;
-
-	const struct lu_env *lo_env;
-	struct cl_io	 lo_io;
-	struct ll_dio_pages  lo_pvec;
-
-	/* data to handle bio for lustre. */
-	struct lo_request_data {
-		struct page *lrd_pages[LLOOP_MAX_SEGMENTS];
-		loff_t       lrd_offsets[LLOOP_MAX_SEGMENTS];
-	} lo_requests[1];
-};
-
-/*
- * Loop flags
- */
-enum {
-	LO_FLAGS_READ_ONLY       = 1,
-};
-
-static int lloop_major;
-#define MAX_LOOP_DEFAULT  16
-static int max_loop = MAX_LOOP_DEFAULT;
-static struct lloop_device *loop_dev;
-static struct gendisk **disks;
-static struct mutex lloop_mutex;
-static void *ll_iocontrol_magic;
-
-static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-{
-	loff_t size, offset, loopsize;
-
-	/* Compute loopsize in bytes */
-	size = i_size_read(file->f_mapping->host);
-	offset = lo->lo_offset;
-	loopsize = size - offset;
-	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
-		loopsize = lo->lo_sizelimit;
-
-	/*
-	 * Unfortunately, if we want to do I/O on the device,
-	 * the number of 512-byte sectors has to fit into a sector_t.
-	 */
-	return loopsize >> 9;
-}
-
-static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
-{
-	const struct lu_env  *env   = lo->lo_env;
-	struct cl_io	 *io    = &lo->lo_io;
-	struct inode	 *inode = file_inode(lo->lo_backing_file);
-	struct cl_object     *obj = ll_i2info(inode)->lli_clob;
-	pgoff_t	       offset;
-	int		   ret;
-	int		   rw;
-	u32		   page_count = 0;
-	struct bio_vec       bvec;
-	struct bvec_iter   iter;
-	struct bio	   *bio;
-	ssize_t	       bytes;
-
-	struct ll_dio_pages  *pvec = &lo->lo_pvec;
-	struct page	 **pages = pvec->ldp_pages;
-	loff_t	       *offsets = pvec->ldp_offsets;
-
-	truncate_inode_pages(inode->i_mapping, 0);
-
-	/* initialize the IO */
-	memset(io, 0, sizeof(*io));
-	io->ci_obj = obj;
-	ret = cl_io_init(env, io, CIT_MISC, obj);
-	if (ret)
-		return io->ci_result;
-	io->ci_lockreq = CILR_NEVER;
-
-	rw = head->bi_rw;
-	for (bio = head; bio ; bio = bio->bi_next) {
-		LASSERT(rw == bio->bi_rw);
-
-		offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
-		bio_for_each_segment(bvec, bio, iter) {
-			BUG_ON(bvec.bv_offset != 0);
-			BUG_ON(bvec.bv_len != PAGE_SIZE);
-
-			pages[page_count] = bvec.bv_page;
-			offsets[page_count] = offset;
-			page_count++;
-			offset += bvec.bv_len;
-		}
-		LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
-	}
-
-	ll_stats_ops_tally(ll_i2sbi(inode),
-			(rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
-			page_count);
-
-	pvec->ldp_size = page_count << PAGE_SHIFT;
-	pvec->ldp_nr = page_count;
-
-	/* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
-	 * write those pages into OST. Even worse case is that more pages
-	 * would be asked to write out to swap space, and then finally get here
-	 * again.
-	 * Unfortunately this is NOT easy to fix.
-	 * Thoughts on solution:
-	 * 0. Define a reserved pool for cl_pages, which could be a list of
-	 *    pre-allocated cl_pages;
-	 * 1. Define a new operation in cl_object_operations{}, says clo_depth,
-	 *    which measures how many layers for this lustre object. Generally
-	 *    speaking, the depth would be 2, one for llite, and one for lovsub.
-	 *    However, for SNS, there will be more since we need additional page
-	 *    to store parity;
-	 * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
-	 *    pool. Afterwards, the clio would allocate the pages from reserved
-	 *    pool, this guarantees we needn't allocate the cl_pages from
-	 *    generic cl_page slab cache.
-	 *    Of course, if there is NOT enough pages in the pool, we might
-	 *    be asked to write less pages once, this purely depends on
-	 *    implementation. Anyway, we should be careful to avoid deadlocking.
-	 */
-	inode_lock(inode);
-	bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-	inode_unlock(inode);
-	cl_io_fini(env, io);
-	return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
-}
-
-/*
- * Add bio to back of pending list
- */
-static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_biotail) {
-		lo->lo_biotail->bi_next = bio;
-		lo->lo_biotail = bio;
-	} else
-		lo->lo_bio = lo->lo_biotail = bio;
-	spin_unlock_irqrestore(&lo->lo_lock, flags);
-
-	atomic_inc(&lo->lo_pending);
-	if (waitqueue_active(&lo->lo_bh_wait))
-		wake_up(&lo->lo_bh_wait);
-}
-
-/*
- * Grab first pending buffer
- */
-static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
-{
-	struct bio *first;
-	struct bio **bio;
-	unsigned int count = 0;
-	unsigned int page_count = 0;
-	int rw;
-
-	spin_lock_irq(&lo->lo_lock);
-	first = lo->lo_bio;
-	if (unlikely(!first)) {
-		spin_unlock_irq(&lo->lo_lock);
-		return 0;
-	}
-
-	/* TODO: need to split the bio, too bad. */
-	LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
-
-	rw = first->bi_rw;
-	bio = &lo->lo_bio;
-	while (*bio && (*bio)->bi_rw == rw) {
-		CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u\n",
-		       (unsigned long long)(*bio)->bi_iter.bi_sector,
-		       (*bio)->bi_iter.bi_size,
-		       page_count, (*bio)->bi_vcnt);
-		if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
-			break;
-
-		page_count += (*bio)->bi_vcnt;
-		count++;
-		bio = &(*bio)->bi_next;
-	}
-	if (*bio) {
-		/* Some of bios can't be mergeable. */
-		lo->lo_bio = *bio;
-		*bio = NULL;
-	} else {
-		/* Hit the end of queue */
-		lo->lo_biotail = NULL;
-		lo->lo_bio = NULL;
-	}
-	*req = first;
-	spin_unlock_irq(&lo->lo_lock);
-	return count;
-}
-
-static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio)
-{
-	struct lloop_device *lo = q->queuedata;
-	int rw = bio_rw(old_bio);
-	int inactive;
-
-	blk_queue_split(q, &old_bio, q->bio_split);
-
-	if (!lo)
-		goto err;
-
-	CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
-	       (unsigned long long)old_bio->bi_iter.bi_sector,
-	       old_bio->bi_iter.bi_size);
-
-	spin_lock_irq(&lo->lo_lock);
-	inactive = lo->lo_state != LLOOP_BOUND;
-	spin_unlock_irq(&lo->lo_lock);
-	if (inactive)
-		goto err;
-
-	if (rw == WRITE) {
-		if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-			goto err;
-	} else if (rw == READA) {
-		rw = READ;
-	} else if (rw != READ) {
-		CERROR("lloop: unknown command (%x)\n", rw);
-		goto err;
-	}
-	loop_add_bio(lo, old_bio);
-	return BLK_QC_T_NONE;
-err:
-	bio_io_error(old_bio);
-	return BLK_QC_T_NONE;
-}
-
-static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-{
-	int ret;
-
-	ret = do_bio_lustrebacked(lo, bio);
-	while (bio) {
-		struct bio *tmp = bio->bi_next;
-
-		bio->bi_next = NULL;
-		bio->bi_error = ret;
-		bio_endio(bio);
-		bio = tmp;
-	}
-}
-
-static inline int loop_active(struct lloop_device *lo)
-{
-	return atomic_read(&lo->lo_pending) ||
-		(lo->lo_state == LLOOP_RUNDOWN);
-}
-
-/*
- * worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn.
- */
-static int loop_thread(void *data)
-{
-	struct lloop_device *lo = data;
-	struct bio *bio;
-	unsigned int count;
-	unsigned long times = 0;
-	unsigned long total_count = 0;
-
-	struct lu_env *env;
-	int refcheck;
-	int ret = 0;
-
-	set_user_nice(current, MIN_NICE);
-
-	lo->lo_state = LLOOP_BOUND;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env)) {
-		ret = PTR_ERR(env);
-		goto out;
-	}
-
-	lo->lo_env = env;
-	memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec));
-	lo->lo_pvec.ldp_pages   = lo->lo_requests[0].lrd_pages;
-	lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets;
-
-	/*
-	 * up sem, we are running
-	 */
-	up(&lo->lo_sem);
-
-	for (;;) {
-		wait_event(lo->lo_bh_wait, loop_active(lo));
-		if (!atomic_read(&lo->lo_pending)) {
-			int exiting = 0;
-
-			spin_lock_irq(&lo->lo_lock);
-			exiting = (lo->lo_state == LLOOP_RUNDOWN);
-			spin_unlock_irq(&lo->lo_lock);
-			if (exiting)
-				break;
-		}
-
-		bio = NULL;
-		count = loop_get_bio(lo, &bio);
-		if (!count) {
-			CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
-			continue;
-		}
-
-		total_count += count;
-		if (total_count < count) {     /* overflow */
-			total_count = count;
-			times = 1;
-		} else {
-			times++;
-		}
-		if ((times & 127) == 0) {
-			CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n",
-			       total_count, times, total_count / times);
-		}
-
-		LASSERT(bio);
-		LASSERT(count <= atomic_read(&lo->lo_pending));
-		loop_handle_bio(lo, bio);
-		atomic_sub(count, &lo->lo_pending);
-	}
-	cl_env_put(env, &refcheck);
-
-out:
-	up(&lo->lo_sem);
-	return ret;
-}
-
-static int loop_set_fd(struct lloop_device *lo, struct file *unused,
-		       struct block_device *bdev, struct file *file)
-{
-	struct inode	 *inode;
-	struct address_space *mapping;
-	int		   lo_flags = 0;
-	int		   error;
-	loff_t		size;
-
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
-	error = -EBUSY;
-	if (lo->lo_state != LLOOP_UNBOUND)
-		goto out;
-
-	mapping = file->f_mapping;
-	inode = mapping->host;
-
-	error = -EINVAL;
-	if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
-		goto out;
-
-	if (!(file->f_mode & FMODE_WRITE))
-		lo_flags |= LO_FLAGS_READ_ONLY;
-
-	size = get_loop_size(lo, file);
-
-	if ((loff_t)(sector_t)size != size) {
-		error = -EFBIG;
-		goto out;
-	}
-
-	/* remove all pages in cache so as dirty pages not to be existent. */
-	truncate_inode_pages(mapping, 0);
-
-	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-
-	lo->lo_blocksize = PAGE_SIZE;
-	lo->lo_device = bdev;
-	lo->lo_flags = lo_flags;
-	lo->lo_backing_file = file;
-	lo->lo_sizelimit = 0;
-	lo->old_gfp_mask = mapping_gfp_mask(mapping);
-	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-
-	lo->lo_bio = lo->lo_biotail = NULL;
-
-	/*
-	 * set queue make_request_fn, and add limits based on lower level
-	 * device
-	 */
-	blk_queue_make_request(lo->lo_queue, loop_make_request);
-	lo->lo_queue->queuedata = lo;
-
-	/* queue parameters */
-	CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8)));
-	blk_queue_logical_block_size(lo->lo_queue,
-				     (unsigned short)PAGE_SIZE);
-	blk_queue_max_hw_sectors(lo->lo_queue,
-				 LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9));
-	blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-
-	set_capacity(disks[lo->lo_number], size);
-	bd_set_size(bdev, size << 9);
-
-	set_blocksize(bdev, lo->lo_blocksize);
-
-	kthread_run(loop_thread, lo, "lloop%d", lo->lo_number);
-	down(&lo->lo_sem);
-	return 0;
-
-out:
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
-	return error;
-}
-
-static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
-		       int count)
-{
-	struct file *filp = lo->lo_backing_file;
-	gfp_t gfp = lo->old_gfp_mask;
-
-	if (lo->lo_state != LLOOP_BOUND)
-		return -ENXIO;
-
-	if (lo->lo_refcnt > count)	/* we needed one fd for the ioctl */
-		return -EBUSY;
-
-	if (!filp)
-		return -EINVAL;
-
-	spin_lock_irq(&lo->lo_lock);
-	lo->lo_state = LLOOP_RUNDOWN;
-	spin_unlock_irq(&lo->lo_lock);
-	wake_up(&lo->lo_bh_wait);
-
-	down(&lo->lo_sem);
-	lo->lo_backing_file = NULL;
-	lo->lo_device = NULL;
-	lo->lo_offset = 0;
-	lo->lo_sizelimit = 0;
-	lo->lo_flags = 0;
-	invalidate_bdev(bdev);
-	set_capacity(disks[lo->lo_number], 0);
-	bd_set_size(bdev, 0);
-	mapping_set_gfp_mask(filp->f_mapping, gfp);
-	lo->lo_state = LLOOP_UNBOUND;
-	fput(filp);
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
-	return 0;
-}
-
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
-	struct lloop_device *lo = bdev->bd_disk->private_data;
-
-	mutex_lock(&lo->lo_ctl_mutex);
-	lo->lo_refcnt++;
-	mutex_unlock(&lo->lo_ctl_mutex);
-
-	return 0;
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
-	struct lloop_device *lo = disk->private_data;
-
-	mutex_lock(&lo->lo_ctl_mutex);
-	--lo->lo_refcnt;
-	mutex_unlock(&lo->lo_ctl_mutex);
-}
-
-/* lloop device node's ioctl function. */
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
-		    unsigned int cmd, unsigned long arg)
-{
-	struct lloop_device *lo = bdev->bd_disk->private_data;
-	struct inode *inode = NULL;
-	int err = 0;
-
-	mutex_lock(&lloop_mutex);
-	switch (cmd) {
-	case LL_IOC_LLOOP_DETACH: {
-		err = loop_clr_fd(lo, bdev, 2);
-		if (err == 0)
-			blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
-		break;
-	}
-
-	case LL_IOC_LLOOP_INFO: {
-		struct lu_fid fid;
-
-		if (!lo->lo_backing_file) {
-			err = -ENOENT;
-			break;
-		}
-		if (!inode)
-			inode = file_inode(lo->lo_backing_file);
-		if (lo->lo_state == LLOOP_BOUND)
-			fid = ll_i2info(inode)->lli_fid;
-		else
-			fid_zero(&fid);
-
-		if (copy_to_user((void __user *)arg, &fid, sizeof(fid)))
-			err = -EFAULT;
-		break;
-	}
-
-	default:
-		err = -EINVAL;
-		break;
-	}
-	mutex_unlock(&lloop_mutex);
-
-	return err;
-}
-
-static struct block_device_operations lo_fops = {
-	.owner =	THIS_MODULE,
-	.open =	 lo_open,
-	.release =      lo_release,
-	.ioctl =	lo_ioctl,
-};
-
-/* dynamic iocontrol callback.
- * This callback is registered in lloop_init and will be called by
- * ll_iocontrol_call.
- *
- * This is a llite regular file ioctl function. It takes the responsibility
- * of attaching or detaching a file by a lloop's device number.
- */
-static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
-				   unsigned int cmd, unsigned long arg,
-				   void *magic, int *rcp)
-{
-	struct lloop_device *lo = NULL;
-	struct block_device *bdev = NULL;
-	int err = 0;
-	dev_t dev;
-
-	if (magic != ll_iocontrol_magic)
-		return LLIOC_CONT;
-
-	if (!disks) {
-		err = -ENODEV;
-		goto out1;
-	}
-
-	CWARN("Enter llop_ioctl\n");
-
-	mutex_lock(&lloop_mutex);
-	switch (cmd) {
-	case LL_IOC_LLOOP_ATTACH: {
-		struct lloop_device *lo_free = NULL;
-		int i;
-
-		for (i = 0; i < max_loop; i++, lo = NULL) {
-			lo = &loop_dev[i];
-			if (lo->lo_state == LLOOP_UNBOUND) {
-				if (!lo_free)
-					lo_free = lo;
-				continue;
-			}
-			if (file_inode(lo->lo_backing_file) == file_inode(file))
-				break;
-		}
-		if (lo || !lo_free) {
-			err = -EBUSY;
-			goto out;
-		}
-
-		lo = lo_free;
-		dev = MKDEV(lloop_major, lo->lo_number);
-
-		/* quit if the used pointer is writable */
-		if (put_user((long)old_encode_dev(dev), (long __user *)arg)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		bdev = blkdev_get_by_dev(dev, file->f_mode, NULL);
-		if (IS_ERR(bdev)) {
-			err = PTR_ERR(bdev);
-			goto out;
-		}
-
-		get_file(file);
-		err = loop_set_fd(lo, NULL, bdev, file);
-		if (err) {
-			fput(file);
-			blkdev_put(bdev, 0);
-		}
-
-		break;
-	}
-
-	case LL_IOC_LLOOP_DETACH_BYDEV: {
-		int minor;
-
-		dev = old_decode_dev(arg);
-		if (MAJOR(dev) != lloop_major) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		minor = MINOR(dev);
-		if (minor > max_loop - 1) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		lo = &loop_dev[minor];
-		if (lo->lo_state != LLOOP_BOUND) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		bdev = lo->lo_device;
-		err = loop_clr_fd(lo, bdev, 1);
-		if (err == 0)
-			blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
-
-		break;
-	}
-
-	default:
-		err = -EINVAL;
-		break;
-	}
-
-out:
-	mutex_unlock(&lloop_mutex);
-out1:
-	if (rcp)
-		*rcp = err;
-	return LLIOC_STOP;
-}
-
-static int __init lloop_init(void)
-{
-	int	i;
-	unsigned int cmdlist[] = {
-		LL_IOC_LLOOP_ATTACH,
-		LL_IOC_LLOOP_DETACH_BYDEV,
-	};
-
-	if (max_loop < 1 || max_loop > 256) {
-		max_loop = MAX_LOOP_DEFAULT;
-		CWARN("lloop: invalid max_loop (must be between 1 and 256), using default (%u)\n",
-		      max_loop);
-	}
-
-	lloop_major = register_blkdev(0, "lloop");
-	if (lloop_major < 0)
-		return -EIO;
-
-	CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n",
-	       lloop_major, max_loop);
-
-	ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
-	if (!ll_iocontrol_magic)
-		goto out_mem1;
-
-	loop_dev = kcalloc(max_loop, sizeof(*loop_dev), GFP_KERNEL);
-	if (!loop_dev)
-		goto out_mem1;
-
-	disks = kcalloc(max_loop, sizeof(*disks), GFP_KERNEL);
-	if (!disks)
-		goto out_mem2;
-
-	for (i = 0; i < max_loop; i++) {
-		disks[i] = alloc_disk(1);
-		if (!disks[i])
-			goto out_mem3;
-	}
-
-	mutex_init(&lloop_mutex);
-
-	for (i = 0; i < max_loop; i++) {
-		struct lloop_device *lo = &loop_dev[i];
-		struct gendisk *disk = disks[i];
-
-		lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-		if (!lo->lo_queue)
-			goto out_mem4;
-
-		mutex_init(&lo->lo_ctl_mutex);
-		sema_init(&lo->lo_sem, 0);
-		init_waitqueue_head(&lo->lo_bh_wait);
-		lo->lo_number = i;
-		spin_lock_init(&lo->lo_lock);
-		disk->major = lloop_major;
-		disk->first_minor = i;
-		disk->fops = &lo_fops;
-		sprintf(disk->disk_name, "lloop%d", i);
-		disk->private_data = lo;
-		disk->queue = lo->lo_queue;
-	}
-
-	/* We cannot fail after we call this, so another loop!*/
-	for (i = 0; i < max_loop; i++)
-		add_disk(disks[i]);
-	return 0;
-
-out_mem4:
-	while (i--)
-		blk_cleanup_queue(loop_dev[i].lo_queue);
-	i = max_loop;
-out_mem3:
-	while (i--)
-		put_disk(disks[i]);
-	kfree(disks);
-out_mem2:
-	kfree(loop_dev);
-out_mem1:
-	unregister_blkdev(lloop_major, "lloop");
-	ll_iocontrol_unregister(ll_iocontrol_magic);
-	CERROR("lloop: ran out of memory\n");
-	return -ENOMEM;
-}
-
-static void lloop_exit(void)
-{
-	int i;
-
-	ll_iocontrol_unregister(ll_iocontrol_magic);
-	for (i = 0; i < max_loop; i++) {
-		del_gendisk(disks[i]);
-		blk_cleanup_queue(loop_dev[i].lo_queue);
-		put_disk(disks[i]);
-	}
-
-	unregister_blkdev(lloop_major, "lloop");
-
-	kfree(disks);
-	kfree(loop_dev);
-}
-
-module_param(max_loop, int, 0444);
-MODULE_PARM_DESC(max_loop, "maximum of lloop_device");
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre virtual block device");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lloop_init);
-module_exit(lloop_exit);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 27ab1261400e..e86bf3c53be3 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -180,11 +176,7 @@ LUSTRE_RO_ATTR(filesfree);
 static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
 				char *buf)
 {
-	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
-					      ll_kobj);
-
-	return sprintf(buf, "%s client\n",
-			sbi->ll_flags & LL_SBI_RMT_CLIENT ? "remote" : "local");
+	return sprintf(buf, "local client\n");
 }
 LUSTRE_RO_ATTR(client_type);
 
@@ -254,7 +246,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
 	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number > totalram_pages / 2) {
-
 		CERROR("can't set file readahead more than %lu MB\n",
 		       totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
 		return -ERANGE;
@@ -365,7 +356,7 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 {
 	struct super_block     *sb    = m->private;
 	struct ll_sb_info      *sbi   = ll_s2sbi(sb);
-	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct cl_client_cache *cache = sbi->ll_cache;
 	int shift = 20 - PAGE_SHIFT;
 	int max_cached_mb;
 	int unused_mb;
@@ -392,7 +383,9 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 {
 	struct super_block *sb = ((struct seq_file *)file->private_data)->private;
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
-	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct cl_client_cache *cache = sbi->ll_cache;
+	struct lu_env *env;
+	int refcheck;
 	int mult, rc, pages_number;
 	int diff = 0;
 	int nrpages = 0;
@@ -430,6 +423,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 		goto out;
 	}
 
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return 0;
+
 	diff = -diff;
 	while (diff > 0) {
 		int tmp;
@@ -455,19 +452,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 			break;
 
 		if (!sbi->ll_dt_exp) { /* being initialized */
-			rc = -ENODEV;
-			break;
+			rc = 0;
+			goto out;
 		}
 
 		/* difficult - have to ask OSCs to drop LRU slots. */
 		tmp = diff << 1;
-		rc = obd_set_info_async(NULL, sbi->ll_dt_exp,
+		rc = obd_set_info_async(env, sbi->ll_dt_exp,
 					sizeof(KEY_CACHE_LRU_SHRINK),
 					KEY_CACHE_LRU_SHRINK,
 					sizeof(tmp), &tmp, NULL);
 		if (rc < 0)
 			break;
 	}
+	cl_env_put(env, &refcheck);
 
 out:
 	if (rc >= 0) {
@@ -818,6 +816,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(xattr_cache);
 
+static ssize_t unstable_stats_show(struct kobject *kobj,
+				   struct attribute *attr,
+				   char *buf)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	struct cl_client_cache *cache = sbi->ll_cache;
+	int pages, mb;
+
+	pages = atomic_read(&cache->ccc_unstable_nr);
+	mb = (pages * PAGE_SIZE) >> 20;
+
+	return sprintf(buf, "unstable_pages: %8d\n"
+			    "unstable_mb:    %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
 static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	/* { "mntpt_path",   ll_rd_path,	     0, 0 }, */
 	{ "site",	  &ll_site_stats_fops,    NULL, 0 },
@@ -853,6 +868,7 @@ static struct attribute *llite_attrs[] = {
 	&lustre_attr_max_easize.attr,
 	&lustre_attr_default_easize.attr,
 	&lustre_attr_xattr_cache.attr,
+	&lustre_attr_unstable_stats.attr,
 	NULL,
 };
 
@@ -953,6 +969,7 @@ static const char *ra_stat_string[] = {
 	[RA_STAT_EOF] = "read-ahead to EOF",
 	[RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
 	[RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+	[RA_STAT_FAILED_REACH_END] = "failed to reach end"
 };
 
 int ldebugfs_register_mountpoint(struct dentry *parent,
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index f8f98e4e8258..2c4dc69731e8 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -128,12 +124,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 			if (rc != 0) {
 				iget_failed(inode);
 				inode = NULL;
-			} else
+			} else {
 				unlock_new_inode(inode);
-		} else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
+			}
+		} else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
 			ll_update_inode(inode, md);
-		CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
-		       inode, PFID(&md->body->fid1));
+			CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
+			       PFID(&md->body->fid1), inode);
+		}
 	}
 	return inode;
 }
@@ -142,7 +140,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
 {
 	struct dentry *dentry, *tmp_subdir;
 
-	ll_lock_dcache(dir);
+	spin_lock(&dir->i_lock);
 	hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!list_empty(&dentry->d_subdirs)) {
@@ -157,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
 		}
 		spin_unlock(&dentry->d_lock);
 	}
-	ll_unlock_dcache(dir);
+	spin_unlock(&dir->i_lock);
 }
 
 int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
@@ -188,7 +186,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 			break;
 
 		/* Invalidate all dentries associated with this inode */
-		LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+		LASSERT(ldlm_is_canceling(lock));
 
 		if (!fid_res_name_eq(ll_inode2fid(inode),
 				     &lock->l_resource->lr_name)) {
@@ -255,8 +253,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 		}
 
 		if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
-			CDEBUG(D_INODE, "invalidating inode %lu\n",
-			       inode->i_ino);
+			CDEBUG(D_INODE, "invalidating inode "DFID"\n",
+			       PFID(ll_inode2fid(inode)));
 			truncate_inode_pages(inode->i_mapping, 0);
 			ll_invalidate_negative_children(inode);
 		}
@@ -316,9 +314,10 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 	if (hlist_empty(&inode->i_dentry))
 		return NULL;
 
-	discon_alias = invalid_alias = NULL;
+	discon_alias = NULL;
+	invalid_alias = NULL;
 
-	ll_lock_dcache(inode);
+	spin_lock(&inode->i_lock);
 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		LASSERT(alias != dentry);
 
@@ -343,7 +342,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 		dget_dlock(alias);
 		spin_unlock(&alias->d_lock);
 	}
-	ll_unlock_dcache(inode);
+	spin_unlock(&inode->i_lock);
 
 	return alias;
 }
@@ -389,12 +388,13 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 	struct inode *inode = NULL;
 	__u64 bits = 0;
 	int rc = 0;
+	struct dentry *alias;
 
 	/* NB 1 request reference will be taken away by ll_intent_lock()
 	 * when I return
 	 */
 	CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
-	       it->d.lustre.it_disposition);
+	       it->it_disposition);
 	if (!it_disposition(it, DISP_LOOKUP_NEG)) {
 		rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
 		if (rc)
@@ -413,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 		 */
 	}
 
-	/* Only hash *de if it is unhashed (new dentry).
-	 * Atoimc_open may passing hashed dentries for open.
-	 */
-	if (d_unhashed(*de)) {
-		struct dentry *alias;
-
-		alias = ll_splice_alias(inode, *de);
-		if (IS_ERR(alias)) {
-			rc = PTR_ERR(alias);
-			goto out;
-		}
-		*de = alias;
-	} else if (!it_disposition(it, DISP_LOOKUP_NEG)  &&
-		   !it_disposition(it, DISP_OPEN_CREATE)) {
-		/* With DISP_OPEN_CREATE dentry will be
-		 * instantiated in ll_create_it.
-		 */
-		LASSERT(!d_inode(*de));
-		d_instantiate(*de, inode);
+	alias = ll_splice_alias(inode, *de);
+	if (IS_ERR(alias)) {
+		rc = PTR_ERR(alias);
+		goto out;
 	}
+	*de = alias;
 
 	if (!it_disposition(it, DISP_LOOKUP_NEG)) {
 		/* we have lookup look - unhide dentry */
@@ -446,7 +432,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 		/* Check that parent has UPDATE lock. */
 		struct lookup_intent parent_it = {
 					.it_op = IT_GETATTR,
-					.d.lustre.it_lock_handle = 0 };
+					.it_lock_handle = 0 };
 
 		if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
 				       &ll_i2info(parent)->lli_fid, NULL)) {
@@ -476,9 +462,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 	if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
-	       dentry, parent->i_ino,
-	       parent->i_generation, parent, LL_IT2STR(it));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n",
+	       dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
 
 	if (d_mountpoint(dentry))
 		CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
@@ -553,9 +538,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
 	struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
 	struct dentry *de;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n",
-	       dentry, parent->i_ino,
-	       parent->i_generation, parent, flags);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
+	       dentry, PFID(ll_inode2fid(parent)), parent, flags);
 
 	/* Optimize away (CREATE && !OPEN). Let .create handle the race. */
 	if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
@@ -586,10 +570,27 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 	long long lookup_flags = LOOKUP_OPEN;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, file, open_flags, mode, *opened);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
+	       *opened);
+
+	/* Only negative dentries enter here */
+	LASSERT(!d_inode(dentry));
+
+	if (!d_in_lookup(dentry)) {
+		/* A valid negative dentry that just passed revalidation,
+		 * there's little point to try and open it server-side,
+		 * even though there's a minuscle chance it might succeed.
+		 * Either way it's a valid race to just return -ENOENT here.
+		 */
+		if (!(open_flags & O_CREAT))
+			return -ENOENT;
+
+		/* Otherwise we just unhash it to be rehashed afresh via
+		 * lookup if necessary
+		 */
+		d_drop(dentry);
+	}
 
 	it = kzalloc(sizeof(*it), GFP_NOFS);
 	if (!it)
@@ -626,13 +627,10 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 		if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) {
 			/* Open dentry. */
 			if (S_ISFIFO(d_inode(dentry)->i_mode)) {
-				/* We cannot call open here as it would
-				 * deadlock.
+				/* We cannot call open here as it might
+				 * deadlock. This case is unreachable in
+				 * practice because of OBD_CONNECT_NODEVOH.
 				 */
-				if (it_disposition(it, DISP_ENQ_OPEN_REF))
-					ptlrpc_req_finished(
-						       (struct ptlrpc_request *)
-							  it->d.lustre.it_data);
 				rc = finish_no_open(file, de);
 			} else {
 				file->private_data = it;
@@ -663,10 +661,10 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 	struct ll_sb_info *sbi = ll_i2sbi(dir);
 	int rc;
 
-	LASSERT(it && it->d.lustre.it_disposition);
+	LASSERT(it && it->it_disposition);
 
 	LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
-	request = it->d.lustre.it_data;
+	request = it->it_request;
 	it_clear_disposition(it, DISP_ENQ_CREATE_REF);
 	rc = ll_prep_inode(&inode, request, dir->i_sb, it);
 	if (rc) {
@@ -680,8 +678,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 	 * lock on the inode.  Since we finally have an inode pointer,
 	 * stuff it in the lock.
 	 */
-	CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
-	       inode, inode->i_ino, inode->i_generation);
+	CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n",
+	       PFID(ll_inode2fid(dir)), inode);
 	ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
  out:
 	ptlrpc_req_finished(request);
@@ -708,9 +706,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, LL_IT2STR(it));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
 
 	rc = it_open_error(DISP_OPEN_CREATE, it);
 	if (rc)
@@ -733,8 +730,9 @@ static void ll_update_times(struct ptlrpc_request *request,
 	LASSERT(body);
 	if (body->valid & OBD_MD_FLMTIME &&
 	    body->mtime > LTIME_S(inode->i_mtime)) {
-		CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
-		       inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
+		CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
+		       PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
+		       body->mtime);
 		LTIME_S(inode->i_mtime) = body->mtime;
 	}
 	if (body->valid & OBD_MD_FLCTIME &&
@@ -791,9 +789,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild,
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n",
-	       dchild, dir->i_ino, dir->i_generation, dir,
-	       mode, old_encode_dev(rdev));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
+	       dchild, PFID(ll_inode2fid(dir)), dir, mode,
+	       old_encode_dev(rdev));
 
 	if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
 		mode &= ~current_umask();
@@ -831,9 +829,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 {
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, mode, want_excl);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
 
 	rc = ll_mknod(dir, dentry, mode, 0);
 
@@ -845,12 +842,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 	return rc;
 }
 
-static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
-{
-	if (d_really_is_positive(child))
-		*fid = *ll_inode2fid(d_inode(child));
-}
-
 int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
 {
 	struct mdt_body *body;
@@ -927,23 +918,25 @@ out:
  * is any lock existing. They will recycle dentries and inodes based upon locks
  * too. b=20433
  */
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
+static int ll_unlink(struct inode *dir, struct dentry *dchild)
 {
 	struct ptlrpc_request *request = NULL;
 	struct md_op_data *op_data;
 	int rc;
 
 	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	       dchild, dir->i_ino, dir->i_generation, dir);
 
 	op_data = ll_prep_md_op_data(NULL, dir, NULL,
-				     dentry->d_name.name,
-				     dentry->d_name.len,
+				     dchild->d_name.name,
+				     dchild->d_name.len,
 				     0, LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(dentry, &op_data->op_fid3);
+	if (dchild && dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
 	op_data->op_fid2 = op_data->op_fid3;
 	rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
 	ll_finish_md_op_data(op_data);
@@ -963,8 +956,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir);
 
 	if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
 		mode &= ~current_umask();
@@ -977,23 +970,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return err;
 }
 
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+static int ll_rmdir(struct inode *dir, struct dentry *dchild)
 {
 	struct ptlrpc_request *request = NULL;
 	struct md_op_data *op_data;
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
+	       dchild, PFID(ll_inode2fid(dir)), dir);
 
 	op_data = ll_prep_md_op_data(NULL, dir, NULL,
-				     dentry->d_name.name,
-				     dentry->d_name.len,
+				     dchild->d_name.name,
+				     dchild->d_name.len,
 				     S_IFDIR, LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(dentry, &op_data->op_fid3);
+	if (dchild && dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
 	op_data->op_fid2 = op_data->op_fid3;
 	rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
 	ll_finish_md_op_data(op_data);
@@ -1011,9 +1006,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n",
-	       dentry, dir->i_ino, dir->i_generation,
-	       dir, 3000, oldname);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
 
 	err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO,
 			  0, LUSTRE_OPC_SYMLINK);
@@ -1033,10 +1027,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
 	struct md_op_data *op_data;
 	int err;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n",
-	       src->i_ino, src->i_generation, src, dir->i_ino,
-	       dir->i_generation, dir, new_dentry);
+	CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
+	       PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
+	       new_dentry);
 
 	op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
 				     new_dentry->d_name.len,
@@ -1056,42 +1049,45 @@ out:
 	return err;
 }
 
-static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
-		     struct inode *new_dir, struct dentry *new_dentry)
+static int ll_rename(struct inode *src, struct dentry *src_dchild,
+		     struct inode *tgt, struct dentry *tgt_dchild)
 {
 	struct ptlrpc_request *request = NULL;
-	struct ll_sb_info *sbi = ll_i2sbi(old_dir);
+	struct ll_sb_info *sbi = ll_i2sbi(src);
 	struct md_op_data *op_data;
 	int err;
 
 	CDEBUG(D_VFSTRACE,
-	       "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n",
-	       old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir,
-	       new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir);
+	       "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n",
+	       src_dchild, PFID(ll_inode2fid(src)), src,
+	       tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
 
-	op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0,
+	op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
 				     LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(old_dentry, &op_data->op_fid3);
-	ll_get_child_fid(new_dentry, &op_data->op_fid4);
+	if (src_dchild && src_dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
+	if (tgt_dchild && tgt_dchild->d_inode)
+		op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
+
 	err = md_rename(sbi->ll_md_exp, op_data,
-			old_dentry->d_name.name,
-			old_dentry->d_name.len,
-			new_dentry->d_name.name,
-			new_dentry->d_name.len, &request);
+			src_dchild->d_name.name,
+			src_dchild->d_name.len,
+			tgt_dchild->d_name.name,
+			tgt_dchild->d_name.len, &request);
 	ll_finish_md_op_data(op_data);
 	if (!err) {
-		ll_update_times(request, old_dir);
-		ll_update_times(request, new_dir);
+		ll_update_times(request, src);
+		ll_update_times(request, tgt);
 		ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
-		err = ll_objects_destroy(request, old_dir);
+		err = ll_objects_destroy(request, src);
 	}
 
 	ptlrpc_req_finished(request);
 	if (!err)
-		d_move(old_dentry, new_dentry);
+		d_move(src_dchild, tgt_dchild);
 	return err;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
deleted file mode 100644
index e9d25317cd28..000000000000
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/remote_perm.c
- *
- * Lustre Permission Cache for Remote Client
- *
- * Author: Lai Siyao <lsy@clusterfs.com>
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_ha.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lprocfs_status.h"
-#include "../include/lustre_disk.h"
-#include "../include/lustre_param.h"
-#include "llite_internal.h"
-
-struct kmem_cache *ll_remote_perm_cachep;
-struct kmem_cache *ll_rmtperm_hash_cachep;
-
-static inline struct ll_remote_perm *alloc_ll_remote_perm(void)
-{
-	struct ll_remote_perm *lrp;
-
-	lrp = kmem_cache_zalloc(ll_remote_perm_cachep, GFP_KERNEL);
-	if (lrp)
-		INIT_HLIST_NODE(&lrp->lrp_list);
-	return lrp;
-}
-
-static inline void free_ll_remote_perm(struct ll_remote_perm *lrp)
-{
-	if (!lrp)
-		return;
-
-	if (!hlist_unhashed(&lrp->lrp_list))
-		hlist_del(&lrp->lrp_list);
-	kmem_cache_free(ll_remote_perm_cachep, lrp);
-}
-
-static struct hlist_head *alloc_rmtperm_hash(void)
-{
-	struct hlist_head *hash;
-	int i;
-
-	hash = kmem_cache_zalloc(ll_rmtperm_hash_cachep, GFP_NOFS);
-	if (!hash)
-		return NULL;
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
-		INIT_HLIST_HEAD(hash + i);
-
-	return hash;
-}
-
-void free_rmtperm_hash(struct hlist_head *hash)
-{
-	int i;
-	struct ll_remote_perm *lrp;
-	struct hlist_node *next;
-
-	if (!hash)
-		return;
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
-		hlist_for_each_entry_safe(lrp, next, hash + i, lrp_list)
-			free_ll_remote_perm(lrp);
-	kmem_cache_free(ll_rmtperm_hash_cachep, hash);
-}
-
-static inline int remote_perm_hashfunc(uid_t uid)
-{
-	return uid & (REMOTE_PERM_HASHSIZE - 1);
-}
-
-/* NB: setxid permission is not checked here, instead it's done on
- * MDT when client get remote permission.
- */
-static int do_check_remote_perm(struct ll_inode_info *lli, int mask)
-{
-	struct hlist_head *head;
-	struct ll_remote_perm *lrp;
-	int found = 0, rc;
-
-	if (!lli->lli_remote_perms)
-		return -ENOENT;
-
-	head = lli->lli_remote_perms +
-		remote_perm_hashfunc(from_kuid(&init_user_ns, current_uid()));
-
-	spin_lock(&lli->lli_lock);
-	hlist_for_each_entry(lrp, head, lrp_list) {
-		if (lrp->lrp_uid != from_kuid(&init_user_ns, current_uid()))
-			continue;
-		if (lrp->lrp_gid != from_kgid(&init_user_ns, current_gid()))
-			continue;
-		if (lrp->lrp_fsuid != from_kuid(&init_user_ns, current_fsuid()))
-			continue;
-		if (lrp->lrp_fsgid != from_kgid(&init_user_ns, current_fsgid()))
-			continue;
-		found = 1;
-		break;
-	}
-
-	if (!found) {
-		rc = -ENOENT;
-		goto out;
-	}
-
-	CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n",
-	       lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
-	       lrp->lrp_access_perm);
-	rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES;
-
-out:
-	spin_unlock(&lli->lli_lock);
-	return rc;
-}
-
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ll_remote_perm *lrp = NULL, *tmp = NULL;
-	struct hlist_head *head, *perm_hash = NULL;
-
-	LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);
-
-#if 0
-	if (perm->rp_uid != current->uid ||
-	    perm->rp_gid != current->gid ||
-	    perm->rp_fsuid != current->fsuid ||
-	    perm->rp_fsgid != current->fsgid) {
-		/* user might setxid in this small period */
-		CDEBUG(D_SEC,
-		       "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
-		       perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
-		       perm->rp_fsgid, current->uid, current->gid,
-		       current->fsuid, current->fsgid);
-		return -EAGAIN;
-	}
-#endif
-
-	if (!lli->lli_remote_perms) {
-		perm_hash = alloc_rmtperm_hash();
-		if (!perm_hash) {
-			CERROR("alloc lli_remote_perms failed!\n");
-			return -ENOMEM;
-		}
-	}
-
-	spin_lock(&lli->lli_lock);
-
-	if (!lli->lli_remote_perms)
-		lli->lli_remote_perms = perm_hash;
-	else
-		free_rmtperm_hash(perm_hash);
-
-	head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);
-
-again:
-	hlist_for_each_entry(tmp, head, lrp_list) {
-		if (tmp->lrp_uid != perm->rp_uid)
-			continue;
-		if (tmp->lrp_gid != perm->rp_gid)
-			continue;
-		if (tmp->lrp_fsuid != perm->rp_fsuid)
-			continue;
-		if (tmp->lrp_fsgid != perm->rp_fsgid)
-			continue;
-		free_ll_remote_perm(lrp);
-		lrp = tmp;
-		break;
-	}
-
-	if (!lrp) {
-		spin_unlock(&lli->lli_lock);
-		lrp = alloc_ll_remote_perm();
-		if (!lrp) {
-			CERROR("alloc memory for ll_remote_perm failed!\n");
-			return -ENOMEM;
-		}
-		spin_lock(&lli->lli_lock);
-		goto again;
-	}
-
-	lrp->lrp_access_perm = perm->rp_access_perm;
-	if (lrp != tmp) {
-		lrp->lrp_uid	 = perm->rp_uid;
-		lrp->lrp_gid	 = perm->rp_gid;
-		lrp->lrp_fsuid       = perm->rp_fsuid;
-		lrp->lrp_fsgid       = perm->rp_fsgid;
-		hlist_add_head(&lrp->lrp_list, head);
-	}
-	lli->lli_rmtperm_time = cfs_time_current();
-	spin_unlock(&lli->lli_lock);
-
-	CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
-	       lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
-	       lrp->lrp_access_perm);
-
-	return 0;
-}
-
-int lustre_check_remote_perm(struct inode *inode, int mask)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ll_sb_info *sbi = ll_i2sbi(inode);
-	struct ptlrpc_request *req = NULL;
-	struct mdt_remote_perm *perm;
-	unsigned long save;
-	int i = 0, rc;
-
-	do {
-		save = lli->lli_rmtperm_time;
-		rc = do_check_remote_perm(lli, mask);
-		if (!rc || (rc != -ENOENT && i))
-			break;
-
-		might_sleep();
-
-		mutex_lock(&lli->lli_rmtperm_mutex);
-		/* check again */
-		if (save != lli->lli_rmtperm_time) {
-			rc = do_check_remote_perm(lli, mask);
-			if (!rc || (rc != -ENOENT && i)) {
-				mutex_unlock(&lli->lli_rmtperm_mutex);
-				break;
-			}
-		}
-
-		if (i++ > 5) {
-			CERROR("check remote perm falls in dead loop!\n");
-			LBUG();
-		}
-
-		rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode),
-					ll_i2suppgid(inode), &req);
-		if (rc) {
-			mutex_unlock(&lli->lli_rmtperm_mutex);
-			break;
-		}
-
-		perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL,
-						   lustre_swab_mdt_remote_perm);
-		if (unlikely(!perm)) {
-			mutex_unlock(&lli->lli_rmtperm_mutex);
-			rc = -EPROTO;
-			break;
-		}
-
-		rc = ll_update_remote_perm(inode, perm);
-		mutex_unlock(&lli->lli_rmtperm_mutex);
-		if (rc == -ENOMEM)
-			break;
-
-		ptlrpc_req_finished(req);
-		req = NULL;
-	} while (1);
-	ptlrpc_req_finished(req);
-	return rc;
-}
-
-#if 0  /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock,
-	* because it will fail sanity test 48.
-	*/
-void ll_free_remote_perms(struct inode *inode)
-{
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct hlist_head *hash = lli->lli_remote_perms;
-	struct ll_remote_perm *lrp;
-	struct hlist_node *node, *next;
-	int i;
-
-	LASSERT(hash);
-
-	spin_lock(&lli->lli_lock);
-
-	for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) {
-		hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list)
-			free_ll_remote_perm(lrp);
-	}
-
-	spin_unlock(&lli->lli_lock);
-}
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index edab6c5b7e50..87393c4bd51e 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -59,225 +55,6 @@
 #include "llite_internal.h"
 #include "../include/linux/lustre_compat25.h"
 
-/**
- * Finalizes cl-data before exiting typical address_space operation. Dual to
- * ll_cl_init().
- */
-static void ll_cl_fini(struct ll_cl_context *lcc)
-{
-	struct lu_env  *env  = lcc->lcc_env;
-	struct cl_io   *io   = lcc->lcc_io;
-	struct cl_page *page = lcc->lcc_page;
-
-	LASSERT(lcc->lcc_cookie == current);
-	LASSERT(env);
-
-	if (page) {
-		lu_ref_del(&page->cp_reference, "cl_io", io);
-		cl_page_put(env, page);
-	}
-
-	cl_env_put(env, &lcc->lcc_refcheck);
-}
-
-/**
- * Initializes common cl-data at the typical address_space operation entry
- * point.
- */
-static struct ll_cl_context *ll_cl_init(struct file *file,
-					struct page *vmpage, int create)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env    *env;
-	struct cl_io     *io;
-	struct cl_object *clob;
-	struct ccc_io    *cio;
-
-	int refcheck;
-	int result = 0;
-
-	clob = ll_i2info(vmpage->mapping->host)->lli_clob;
-	LASSERT(clob);
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return ERR_CAST(env);
-
-	lcc = &vvp_env_info(env)->vti_io_ctx;
-	memset(lcc, 0, sizeof(*lcc));
-	lcc->lcc_env = env;
-	lcc->lcc_refcheck = refcheck;
-	lcc->lcc_cookie = current;
-
-	cio = ccc_env_io(env);
-	io = cio->cui_cl.cis_io;
-	if (!io && create) {
-		struct inode *inode = vmpage->mapping->host;
-		loff_t pos;
-
-		if (inode_trylock(inode)) {
-			inode_unlock((inode));
-
-			/* this is too bad. Someone is trying to write the
-			 * page w/o holding inode mutex. This means we can
-			 * add dirty pages into cache during truncate
-			 */
-			CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
-			       current->comm);
-			dump_stack();
-			LBUG();
-			return ERR_PTR(-EIO);
-		}
-
-		/*
-		 * Loop-back driver calls ->prepare_write().
-		 * methods directly, bypassing file system ->write() operation,
-		 * so cl_io has to be created here.
-		 */
-		io = ccc_env_thread_io(env);
-		ll_io_init(io, file, 1);
-
-		/* No lock at all for this kind of IO - we can't do it because
-		 * we have held page lock, it would cause deadlock.
-		 * XXX: This causes poor performance to loop device - One page
-		 *      per RPC.
-		 *      In order to get better performance, users should use
-		 *      lloop driver instead.
-		 */
-		io->ci_lockreq = CILR_NEVER;
-
-		pos = vmpage->index << PAGE_SHIFT;
-
-		/* Create a temp IO to serve write. */
-		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
-		if (result == 0) {
-			cio->cui_fd = LUSTRE_FPRIVATE(file);
-			cio->cui_iter = NULL;
-			result = cl_io_iter_init(env, io);
-			if (result == 0) {
-				result = cl_io_lock(env, io);
-				if (result == 0)
-					result = cl_io_start(env, io);
-			}
-		} else
-			result = io->ci_result;
-	}
-
-	lcc->lcc_io = io;
-	if (!io)
-		result = -EIO;
-	if (result == 0) {
-		struct cl_page   *page;
-
-		LASSERT(io->ci_state == CIS_IO_GOING);
-		LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file));
-		page = cl_page_find(env, clob, vmpage->index, vmpage,
-				    CPT_CACHEABLE);
-		if (!IS_ERR(page)) {
-			lcc->lcc_page = page;
-			lu_ref_add(&page->cp_reference, "cl_io", io);
-			result = 0;
-		} else
-			result = PTR_ERR(page);
-	}
-	if (result) {
-		ll_cl_fini(lcc);
-		lcc = ERR_PTR(result);
-	}
-
-	CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
-	       vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
-	       env, io);
-	return lcc;
-}
-
-static struct ll_cl_context *ll_cl_get(void)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env *env;
-	int refcheck;
-
-	env = cl_env_get(&refcheck);
-	LASSERT(!IS_ERR(env));
-	lcc = &vvp_env_info(env)->vti_io_ctx;
-	LASSERT(env == lcc->lcc_env);
-	LASSERT(current == lcc->lcc_cookie);
-	cl_env_put(env, &refcheck);
-
-	/* env has got in ll_cl_init, so it is still usable. */
-	return lcc;
-}
-
-/**
- * ->prepare_write() address space operation called by generic_file_write()
- * for every page during write.
- */
-int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
-		     unsigned to)
-{
-	struct ll_cl_context *lcc;
-	int result;
-
-	lcc = ll_cl_init(file, vmpage, 1);
-	if (!IS_ERR(lcc)) {
-		struct lu_env  *env = lcc->lcc_env;
-		struct cl_io   *io  = lcc->lcc_io;
-		struct cl_page *page = lcc->lcc_page;
-
-		cl_page_assume(env, io, page);
-
-		result = cl_io_prepare_write(env, io, page, from, to);
-		if (result == 0) {
-			/*
-			 * Add a reference, so that page is not evicted from
-			 * the cache until ->commit_write() is called.
-			 */
-			cl_page_get(page);
-			lu_ref_add(&page->cp_reference, "prepare_write",
-				   current);
-		} else {
-			cl_page_unassume(env, io, page);
-			ll_cl_fini(lcc);
-		}
-		/* returning 0 in prepare assumes commit must be called
-		 * afterwards
-		 */
-	} else {
-		result = PTR_ERR(lcc);
-	}
-	return result;
-}
-
-int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
-		    unsigned to)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env    *env;
-	struct cl_io     *io;
-	struct cl_page   *page;
-	int result = 0;
-
-	lcc  = ll_cl_get();
-	env  = lcc->lcc_env;
-	page = lcc->lcc_page;
-	io   = lcc->lcc_io;
-
-	LASSERT(cl_page_is_owned(page, io));
-	LASSERT(from <= to);
-	if (from != to) /* handle short write case. */
-		result = cl_io_commit_write(env, io, page, from, to);
-	if (cl_page_is_owned(page, io))
-		cl_page_unassume(env, io, page);
-
-	/*
-	 * Release reference acquired by ll_prepare_write().
-	 */
-	lu_ref_del(&page->cp_reference, "prepare_write", current);
-	cl_page_put(env, page);
-	ll_cl_fini(lcc);
-	return result;
-}
-
 static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
 
 /**
@@ -301,7 +78,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
  */
 static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 				     struct ra_io_arg *ria,
-				     unsigned long pages)
+				     unsigned long pages, unsigned long min)
 {
 	struct ll_ra_info *ra = &sbi->ll_ra_info;
 	long ret;
@@ -341,6 +118,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 	}
 
 out:
+	if (ret < min) {
+		/* override ra limit for maximum performance */
+		atomic_add(min - ret, &ra->ra_cur_pages);
+		ret = min;
+	}
 	return ret;
 }
 
@@ -357,9 +139,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
 	lprocfs_counter_incr(sbi->ll_ra_stats, which);
 }
 
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
 {
-	struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
 	ll_ra_stats_inc_sbi(sbi, which);
 }
@@ -388,61 +170,42 @@ static int index_in_window(unsigned long index, unsigned long point,
 	return start <= index && index <= end;
 }
 
-static struct ll_readahead_state *ll_ras_get(struct file *f)
+void ll_ras_enter(struct file *f)
 {
-	struct ll_file_data       *fd;
-
-	fd = LUSTRE_FPRIVATE(f);
-	return &fd->fd_ras;
-}
-
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-{
-	struct ll_readahead_state *ras;
-
-	ras = ll_ras_get(f);
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
+	struct ll_readahead_state *ras = &fd->fd_ras;
 
 	spin_lock(&ras->ras_lock);
 	ras->ras_requests++;
 	ras->ras_request_index = 0;
 	ras->ras_consecutive_requests++;
-	rar->lrr_reader = current;
-
-	list_add(&rar->lrr_linkage, &ras->ras_read_beads);
-	spin_unlock(&ras->ras_lock);
-}
-
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-{
-	struct ll_readahead_state *ras;
-
-	ras = ll_ras_get(f);
-
-	spin_lock(&ras->ras_lock);
-	list_del_init(&rar->lrr_linkage);
 	spin_unlock(&ras->ras_lock);
 }
 
 static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue, struct cl_page *page,
-			      struct page *vmpage)
+			      struct cl_object *clob, pgoff_t *max_index)
 {
-	struct ccc_page *cp;
+	struct page *vmpage = page->cp_vmpage;
+	struct vvp_page *vpg;
 	int	      rc;
 
 	rc = 0;
 	cl_page_assume(env, io, page);
 	lu_ref_add(&page->cp_reference, "ra", current);
-	cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-	if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
-		rc = cl_page_is_under_lock(env, io, page);
-		if (rc == -EBUSY) {
-			cp->cpg_defer_uptodate = 1;
-			cp->cpg_ra_used = 0;
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
+		CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
+		       vvp_index(vpg), *max_index);
+		if (*max_index == 0 || vvp_index(vpg) > *max_index)
+			rc = cl_page_is_under_lock(env, io, page, max_index);
+		if (rc == 0) {
+			vpg->vpg_defer_uptodate = 1;
+			vpg->vpg_ra_used = 0;
 			cl_page_list_add(queue, page);
 			rc = 1;
 		} else {
-			cl_page_delete(env, page);
+			cl_page_discard(env, io, page);
 			rc = -ENOLCK;
 		}
 	} else {
@@ -466,24 +229,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
  */
 static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue,
-			      pgoff_t index, struct address_space *mapping)
+			      pgoff_t index, pgoff_t *max_index)
 {
+	struct cl_object *clob  = io->ci_obj;
+	struct inode     *inode = vvp_object_inode(clob);
 	struct page      *vmpage;
-	struct cl_object *clob  = ll_i2info(mapping->host)->lli_clob;
 	struct cl_page   *page;
 	enum ra_stat      which = _NR_RA_STAT; /* keep gcc happy */
 	int	       rc    = 0;
 	const char       *msg   = NULL;
 
-	vmpage = grab_cache_page_nowait(mapping, index);
+	vmpage = grab_cache_page_nowait(inode->i_mapping, index);
 	if (vmpage) {
 		/* Check if vmpage was truncated or reclaimed */
-		if (vmpage->mapping == mapping) {
+		if (vmpage->mapping == inode->i_mapping) {
 			page = cl_page_find(env, clob, vmpage->index,
 					    vmpage, CPT_CACHEABLE);
 			if (!IS_ERR(page)) {
 				rc = cl_read_ahead_page(env, io, queue,
-							page, vmpage);
+							page, clob, max_index);
 				if (rc == -ENOLCK) {
 					which = RA_STAT_FAILED_MATCH;
 					msg   = "lock match failed";
@@ -504,7 +268,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 		msg   = "g_c_p_n failed";
 	}
 	if (msg) {
-		ll_ra_stats_inc(mapping, which);
+		ll_ra_stats_inc(inode, which);
 		CDEBUG(D_READA, "%s\n", msg);
 	}
 	return rc;
@@ -616,11 +380,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 			       struct cl_io *io, struct cl_page_list *queue,
 			       struct ra_io_arg *ria,
 			       unsigned long *reserved_pages,
-			       struct address_space *mapping,
 			       unsigned long *ra_end)
 {
-	int rc, count = 0, stride_ria;
-	unsigned long page_idx;
+	int rc, count = 0;
+	bool stride_ria;
+	pgoff_t page_idx;
+	pgoff_t max_index = 0;
 
 	LASSERT(ria);
 	RIA_DEBUG(ria);
@@ -631,12 +396,13 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 		if (ras_inside_ra_window(page_idx, ria)) {
 			/* If the page is inside the read-ahead window*/
 			rc = ll_read_ahead_page(env, io, queue,
-						page_idx, mapping);
+						page_idx, &max_index);
 			if (rc == 1) {
 				(*reserved_pages)--;
 				count++;
-			} else if (rc == -ENOLCK)
+			} else if (rc == -ENOLCK) {
 				break;
+			}
 		} else if (stride_ria) {
 			/* If it is not in the read-ahead window, and it is
 			 * read-ahead mode, then check whether it should skip
@@ -666,25 +432,22 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 }
 
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
-		 struct ll_readahead_state *ras, struct address_space *mapping,
-		 struct cl_page_list *queue, int flags)
+		 struct cl_page_list *queue, struct ll_readahead_state *ras,
+		 bool hit)
 {
 	struct vvp_io *vio = vvp_env_io(env);
-	struct vvp_thread_info *vti = vvp_env_info(env);
-	struct cl_attr *attr = ccc_env_thread_attr(env);
+	struct ll_thread_info *lti = ll_env_info(env);
+	struct cl_attr *attr = vvp_env_thread_attr(env);
 	unsigned long start = 0, end = 0, reserved;
-	unsigned long ra_end, len;
+	unsigned long ra_end, len, mlen = 0;
 	struct inode *inode;
-	struct ll_ra_read *bead;
-	struct ra_io_arg *ria = &vti->vti_ria;
-	struct ll_inode_info *lli;
+	struct ra_io_arg *ria = &lti->lti_ria;
 	struct cl_object *clob;
 	int ret = 0;
 	__u64 kms;
 
-	inode = mapping->host;
-	lli = ll_i2info(inode);
-	clob = lli->lli_clob;
+	clob = io->ci_obj;
+	inode = vvp_object_inode(clob);
 
 	memset(ria, 0, sizeof(*ria));
 
@@ -696,22 +459,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 		return ret;
 	kms = attr->cat_kms;
 	if (kms == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
 		return 0;
 	}
 
 	spin_lock(&ras->ras_lock);
-	if (vio->cui_ra_window_set)
-		bead = &vio->cui_bead;
-	else
-		bead = NULL;
 
 	/* Enlarge the RA window to encompass the full read */
-	if (bead && ras->ras_window_start + ras->ras_window_len <
-	    bead->lrr_start + bead->lrr_count) {
-		ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+	if (vio->vui_ra_valid &&
+	    ras->ras_window_start + ras->ras_window_len <
+	    vio->vui_ra_start + vio->vui_ra_count) {
+		ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
 				      ras->ras_window_start;
 	}
+
 	/* Reserve a part of the read-ahead window that we'll be issuing */
 	if (ras->ras_window_len) {
 		start = ras->ras_next_readahead;
@@ -755,29 +516,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	spin_unlock(&ras->ras_lock);
 
 	if (end == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
 	}
 	len = ria_page_count(ria);
-	if (len == 0)
+	if (len == 0) {
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
+	}
+
+	CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
+	       PFID(lu_object_fid(&clob->co_lu)),
+	       ria->ria_start, ria->ria_end,
+	       vio->vui_ra_valid ? vio->vui_ra_start : 0,
+	       vio->vui_ra_valid ? vio->vui_ra_count : 0,
+	       hit);
+
+	/* at least to extend the readahead window to cover current read */
+	if (!hit && vio->vui_ra_valid &&
+	    vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
+		/* to the end of current read window. */
+		mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
+		/* trim to RPC boundary */
+		start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
+		mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+	}
 
-	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
 	if (reserved < len)
-		ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+		ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
 
-	CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+	CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
+	       reserved, len, mlen,
 	       atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
 	       ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
 
-	ret = ll_read_ahead_pages(env, io, queue,
-				  ria, &reserved, mapping, &ra_end);
+	ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
 
 	if (reserved != 0)
 		ll_ra_count_put(ll_i2sbi(inode), reserved);
 
 	if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
-		ll_ra_stats_inc(mapping, RA_STAT_EOF);
+		ll_ra_stats_inc(inode, RA_STAT_EOF);
 
 	/* if we didn't get to the end of the region we reserved from
 	 * the ras we need to go back and update the ras so that the
@@ -789,6 +569,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	       ra_end, end, ria->ria_end);
 
 	if (ra_end != end + 1) {
+		ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
 		spin_lock(&ras->ras_lock);
 		if (ra_end < ras->ras_next_readahead &&
 		    index_in_window(ra_end, ras->ras_window_start, 0,
@@ -836,7 +617,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
 	spin_lock_init(&ras->ras_lock);
 	ras_reset(inode, ras, 0);
 	ras->ras_requests = 0;
-	INIT_LIST_HEAD(&ras->ras_read_beads);
 }
 
 /*
@@ -1059,15 +839,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	ras->ras_last_readpage = index;
 	ras_set_start(inode, ras, index);
 
-	if (stride_io_mode(ras))
+	if (stride_io_mode(ras)) {
 		/* Since stride readahead is sensitive to the offset
 		 * of read-ahead, so we use original offset here,
 		 * instead of ras_window_start, which is RPC aligned
 		 */
 		ras->ras_next_readahead = max(index, ras->ras_next_readahead);
-	else
-		ras->ras_next_readahead = max(ras->ras_window_start,
-					      ras->ras_next_readahead);
+	} else {
+		if (ras->ras_next_readahead < ras->ras_window_start)
+			ras->ras_next_readahead = ras->ras_window_start;
+		if (!hit)
+			ras->ras_next_readahead = index + 1;
+	}
 	RAS_CDEBUG(ras);
 
 	/* Trigger RA in the mmap case where ras_consecutive_requests
@@ -1129,7 +912,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
 	clob  = ll_i2info(inode)->lli_clob;
 	LASSERT(clob);
 
-	io = ccc_env_thread_io(env);
+	io = vvp_env_thread_io(env);
 	io->ci_obj = clob;
 	io->ci_ignore_layout = 1;
 	result = cl_io_init(env, io, CIT_MISC, clob);
@@ -1240,23 +1023,77 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
 		if (end == OBD_OBJECT_EOF)
-			end = i_size_read(inode);
-		mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
+			mapping->writeback_index = 0;
+		else
+			mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
 	}
 	return result;
 }
 
+struct ll_cl_context *ll_cl_find(struct file *file)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc;
+	struct ll_cl_context *found = NULL;
+
+	read_lock(&fd->fd_lock);
+	list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
+		if (lcc->lcc_cookie == current) {
+			found = lcc;
+			break;
+		}
+	}
+	read_unlock(&fd->fd_lock);
+
+	return found;
+}
+
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+	memset(lcc, 0, sizeof(*lcc));
+	INIT_LIST_HEAD(&lcc->lcc_list);
+	lcc->lcc_cookie = current;
+	lcc->lcc_env = env;
+	lcc->lcc_io = io;
+
+	write_lock(&fd->fd_lock);
+	list_add(&lcc->lcc_list, &fd->fd_lccs);
+	write_unlock(&fd->fd_lock);
+}
+
+void ll_cl_remove(struct file *file, const struct lu_env *env)
+{
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+	struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+	write_lock(&fd->fd_lock);
+	list_del_init(&lcc->lcc_list);
+	write_unlock(&fd->fd_lock);
+}
+
 int ll_readpage(struct file *file, struct page *vmpage)
 {
+	struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
 	struct ll_cl_context *lcc;
+	const struct lu_env  *env;
+	struct cl_io   *io;
+	struct cl_page *page;
 	int result;
 
-	lcc = ll_cl_init(file, vmpage, 0);
-	if (!IS_ERR(lcc)) {
-		struct lu_env  *env  = lcc->lcc_env;
-		struct cl_io   *io   = lcc->lcc_io;
-		struct cl_page *page = lcc->lcc_page;
+	lcc = ll_cl_find(file);
+	if (!lcc) {
+		unlock_page(vmpage);
+		return -EIO;
+	}
 
+	env = lcc->lcc_env;
+	io = lcc->lcc_io;
+	LASSERT(io->ci_state == CIS_IO_GOING);
+	page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+	if (!IS_ERR(page)) {
 		LASSERT(page->cp_type == CPT_CACHEABLE);
 		if (likely(!PageUptodate(vmpage))) {
 			cl_page_assume(env, io, page);
@@ -1266,10 +1103,35 @@ int ll_readpage(struct file *file, struct page *vmpage)
 			unlock_page(vmpage);
 			result = 0;
 		}
-		ll_cl_fini(lcc);
+		cl_page_put(env, page);
 	} else {
 		unlock_page(vmpage);
-		result = PTR_ERR(lcc);
+		result = PTR_ERR(page);
 	}
 	return result;
 }
+
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+		    struct cl_page *page, enum cl_req_type crt)
+{
+	struct cl_2queue  *queue;
+	int result;
+
+	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+	queue = &io->ci_queue;
+	cl_2queue_init_page(queue, page);
+
+	result = cl_io_submit_sync(env, io, crt, queue, 0);
+	LASSERT(cl_page_is_owned(page, io));
+
+	if (crt == CRT_READ)
+		/*
+		 * in CRT_WRITE case page is left locked even in case of
+		 * error.
+		 */
+		cl_page_list_disown(env, io, &queue->c2_qin);
+	cl_2queue_fini(env, queue);
+
+	return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 69aa15e8e3ef..d98c7acc0832 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -95,15 +91,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
 			if (obj) {
 				page = cl_vmpage_page(vmpage, obj);
 				if (page) {
-					lu_ref_add(&page->cp_reference,
-						   "delete", vmpage);
 					cl_page_delete(env, page);
-					lu_ref_del(&page->cp_reference,
-						   "delete", vmpage);
 					cl_page_put(env, page);
 				}
-			} else
+			} else {
 				LASSERT(vmpage->private == 0);
+			}
 			cl_env_put(env, &refcheck);
 		}
 	}
@@ -111,12 +104,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
 
 static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 {
-	struct cl_env_nest nest;
 	struct lu_env     *env;
+	void			*cookie;
 	struct cl_object  *obj;
 	struct cl_page    *page;
 	struct address_space *mapping;
-	int result;
+	int result = 0;
 
 	LASSERT(PageLocked(vmpage));
 	if (PageWriteback(vmpage) || PageDirty(vmpage))
@@ -130,53 +123,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 	if (!obj)
 		return 1;
 
-	/* 1 for page allocator, 1 for cl_page and 1 for page cache */
+	/* 1 for caller, 1 for cl_page and 1 for page cache */
 	if (page_count(vmpage) > 3)
 		return 0;
 
-	/* TODO: determine what gfp should be used by @gfp_mask. */
-	env = cl_env_nested_get(&nest);
-	if (IS_ERR(env))
-		/* If we can't allocate an env we won't call cl_page_put()
-		 * later on which further means it's impossible to drop
-		 * page refcount by cl_page, so ask kernel to not free
-		 * this page.
-		 */
-		return 0;
-
 	page = cl_vmpage_page(vmpage, obj);
-	result = !page;
-	if (page) {
-		if (!cl_page_in_use(page)) {
-			result = 1;
-			cl_page_delete(env, page);
-		}
-		cl_page_put(env, page);
-	}
-	cl_env_nested_put(&nest, env);
-	return result;
-}
+	if (!page)
+		return 1;
 
-static int ll_set_page_dirty(struct page *vmpage)
-{
-#if 0
-	struct cl_page    *page = vvp_vmpage_page_transient(vmpage);
-	struct vvp_object *obj  = cl_inode2vvp(vmpage->mapping->host);
-	struct vvp_page   *cpg;
+	cookie = cl_env_reenter();
+	env = cl_env_percpu_get();
+	LASSERT(!IS_ERR(env));
 
-	/*
-	 * XXX should page method be called here?
-	 */
-	LASSERT(&obj->co_cl == page->cp_obj);
-	cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
-	/*
-	 * XXX cannot do much here, because page is possibly not locked:
-	 * sys_munmap()->...
-	 *     ->unmap_page_range()->zap_pte_range()->set_page_dirty().
+	if (!cl_page_in_use(page)) {
+		result = 1;
+		cl_page_delete(env, page);
+	}
+
+	/* To use percpu env array, the call path can not be rescheduled;
+	 * otherwise percpu array will be messed if ll_releaspage() called
+	 * again on the same CPU.
+	 *
+	 * If this page holds the last refc of cl_object, the following
+	 * call path may cause reschedule:
+	 *   cl_page_put -> cl_page_free -> cl_object_put ->
+	 *     lu_object_put -> lu_object_free -> lov_delete_raid0.
+	 *
+	 * However, the kernel can't get rid of this inode until all pages have
+	 * been cleaned up. Now that we hold page lock here, it's pretty safe
+	 * that we won't get into object delete path.
 	 */
-	vvp_write_pending(obj, cpg);
-#endif
-	return __set_page_dirty_nobuffers(vmpage);
+	LASSERT(cl_object_refc(obj) > 1);
+	cl_page_put(env, page);
+
+	cl_env_percpu_put(env);
+	cl_env_reexit(cookie);
+	return result;
 }
 
 #define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
@@ -266,7 +248,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
 		 * write directly
 		 */
 		if (clp->cp_type == CPT_CACHEABLE) {
-			struct page *vmpage = cl_page_vmpage(env, clp);
+			struct page *vmpage = cl_page_vmpage(clp);
 			struct page *src_page;
 			struct page *dst_page;
 			void       *src;
@@ -358,14 +340,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  */
 #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) *	  \
 		       PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
-			       loff_t file_offset)
+static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct lu_env *env;
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct ccc_object *obj = cl_inode2ccc(inode);
+	struct vvp_object *obj = cl_inode2vvp(inode);
+	loff_t file_offset = iocb->ki_pos;
 	ssize_t count = iov_iter_count(iter);
 	ssize_t tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
@@ -376,22 +358,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 		return -EBADF;
 
 	/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
-	if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+	if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
 		return -EINVAL;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
-	       inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
+	       PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
 	       file_offset, file_offset, count >> PAGE_SHIFT,
 	       MAX_DIO_SIZE >> PAGE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+	if (iov_iter_alignment(iter) & ~PAGE_MASK)
 		return -EINVAL;
 
 	env = cl_env_get(&refcheck);
 	LASSERT(!IS_ERR(env));
-	io = ccc_env_io(env)->cui_cl.cis_io;
+	io = vvp_env_io(env)->vui_cl.cis_io;
 	LASSERT(io);
 
 	/* 0. Need locking between buffered and direct access. and race with
@@ -401,7 +382,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == READ)
 		inode_lock(inode);
 
-	LASSERT(obj->cob_transient_pages == 0);
+	LASSERT(obj->vob_transient_pages == 0);
 	while (iov_iter_count(iter)) {
 		struct page **pages;
 		size_t offs;
@@ -435,8 +416,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 			    size > (PAGE_SIZE / sizeof(*pages)) *
 			    PAGE_SIZE) {
 				size = ((((size / 2) - 1) |
-					 ~CFS_PAGE_MASK) + 1) &
-					CFS_PAGE_MASK;
+					 ~PAGE_MASK) + 1) &
+					PAGE_MASK;
 				CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
 				       size);
 				continue;
@@ -449,62 +430,214 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 		file_offset += result;
 	}
 out:
-	LASSERT(obj->cob_transient_pages == 0);
+	LASSERT(obj->vob_transient_pages == 0);
 	if (iov_iter_rw(iter) == READ)
 		inode_unlock(inode);
 
 	if (tot_bytes > 0) {
-		if (iov_iter_rw(iter) == WRITE) {
-			struct lov_stripe_md *lsm;
-
-			lsm = ccc_inode_lsm_get(inode);
-			LASSERT(lsm);
-			lov_stripe_lock(lsm);
-			obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0);
-			lov_stripe_unlock(lsm);
-			ccc_inode_lsm_put(inode, lsm);
-		}
+		struct vvp_io *vio = vvp_env_io(env);
+
+		/* no commit async for direct IO */
+		vio->u.write.vui_written += tot_bytes;
 	}
 
 	cl_env_put(env, &refcheck);
-	return tot_bytes ? : result;
+	return tot_bytes ? tot_bytes : result;
+}
+
+/**
+ * Prepare partially written-to page for a write.
+ */
+static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
+				   struct cl_page *pg)
+{
+	struct cl_attr *attr   = vvp_env_thread_attr(env);
+	struct cl_object *obj  = io->ci_obj;
+	struct vvp_page *vpg   = cl_object_page_slice(obj, pg);
+	loff_t          offset = cl_offset(obj, vvp_index(vpg));
+	int             result;
+
+	cl_object_attr_lock(obj);
+	result = cl_object_attr_get(env, obj, attr);
+	cl_object_attr_unlock(obj);
+	if (result == 0) {
+		/*
+		 * If are writing to a new page, no need to read old data.
+		 * The extent locking will have updated the KMS, and for our
+		 * purposes here we can treat it like i_size.
+		 */
+		if (attr->cat_kms <= offset) {
+			char *kaddr = kmap_atomic(vpg->vpg_page);
+
+			memset(kaddr, 0, cl_page_size(obj));
+			kunmap_atomic(kaddr);
+		} else if (vpg->vpg_defer_uptodate) {
+			vpg->vpg_ra_used = 1;
+		} else {
+			result = ll_page_sync_io(env, io, pg, CRT_READ);
+		}
+	}
+	return result;
 }
 
 static int ll_write_begin(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned flags,
 			  struct page **pagep, void **fsdata)
 {
+	struct ll_cl_context *lcc;
+	const struct lu_env  *env;
+	struct cl_io   *io;
+	struct cl_page *page;
+	struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
 	pgoff_t index = pos >> PAGE_SHIFT;
-	struct page *page;
-	int rc;
-	unsigned from = pos & (PAGE_SIZE - 1);
+	struct page *vmpage = NULL;
+	unsigned int from = pos & (PAGE_SIZE - 1);
+	unsigned int to = from + len;
+	int result = 0;
 
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
-		return -ENOMEM;
+	CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
 
-	*pagep = page;
+	lcc = ll_cl_find(file);
+	if (!lcc) {
+		result = -EIO;
+		goto out;
+	}
+
+	env = lcc->lcc_env;
+	io  = lcc->lcc_io;
+
+	/* To avoid deadlock, try to lock page first. */
+	vmpage = grab_cache_page_nowait(mapping, index);
+	if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
+		struct vvp_io *vio = vvp_env_io(env);
+		struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+		/* if the page is already in dirty cache, we have to commit
+		 * the pages right now; otherwise, it may cause deadlock
+		 * because it holds page lock of a dirty page and request for
+		 * more grants. It's okay for the dirty page to be the first
+		 * one in commit page list, though.
+		 */
+		if (vmpage && plist->pl_nr > 0) {
+			unlock_page(vmpage);
+			put_page(vmpage);
+			vmpage = NULL;
+		}
+
+		/* commit pages and then wait for page lock */
+		result = vvp_io_write_commit(env, io);
+		if (result < 0)
+			goto out;
+
+		if (!vmpage) {
+			vmpage = grab_cache_page_write_begin(mapping, index,
+							     flags);
+			if (!vmpage) {
+				result = -ENOMEM;
+				goto out;
+			}
+		}
+	}
 
-	rc = ll_prepare_write(file, page, from, from + len);
-	if (rc) {
-		unlock_page(page);
-		put_page(page);
+	page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+	if (IS_ERR(page)) {
+		result = PTR_ERR(page);
+		goto out;
 	}
-	return rc;
+
+	lcc->lcc_page = page;
+	lu_ref_add(&page->cp_reference, "cl_io", io);
+
+	cl_page_assume(env, io, page);
+	if (!PageUptodate(vmpage)) {
+		/*
+		 * We're completely overwriting an existing page,
+		 * so _don't_ set it up to date until commit_write
+		 */
+		if (from == 0 && to == PAGE_SIZE) {
+			CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
+			POISON_PAGE(vmpage, 0x11);
+		} else {
+			/* TODO: can be optimized at OSC layer to check if it
+			 * is a lockless IO. In that case, it's not necessary
+			 * to read the data.
+			 */
+			result = ll_prepare_partial_page(env, io, page);
+			if (result == 0)
+				SetPageUptodate(vmpage);
+		}
+	}
+	if (result < 0)
+		cl_page_unassume(env, io, page);
+out:
+	if (result < 0) {
+		if (vmpage) {
+			unlock_page(vmpage);
+			put_page(vmpage);
+		}
+	} else {
+		*pagep = vmpage;
+		*fsdata = lcc;
+	}
+	return result;
 }
 
 static int ll_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
+			struct page *vmpage, void *fsdata)
 {
+	struct ll_cl_context *lcc = fsdata;
+	const struct lu_env *env;
+	struct cl_io *io;
+	struct vvp_io *vio;
+	struct cl_page *page;
 	unsigned from = pos & (PAGE_SIZE - 1);
-	int rc;
+	bool unplug = false;
+	int result = 0;
+
+	put_page(vmpage);
+
+	env  = lcc->lcc_env;
+	page = lcc->lcc_page;
+	io   = lcc->lcc_io;
+	vio  = vvp_env_io(env);
+
+	LASSERT(cl_page_is_owned(page, io));
+	if (copied > 0) {
+		struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+		lcc->lcc_page = NULL; /* page will be queued */
+
+		/* Add it into write queue */
+		cl_page_list_add(plist, page);
+		if (plist->pl_nr == 1) /* first page */
+			vio->u.write.vui_from = from;
+		else
+			LASSERT(from == 0);
+		vio->u.write.vui_to = from + copied;
+
+		/* We may have one full RPC, commit it soon */
+		if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
+			unplug = true;
+
+		CL_PAGE_DEBUG(D_VFSTRACE, env, page,
+			      "queued page: %d.\n", plist->pl_nr);
+	} else {
+		cl_page_disown(env, io, page);
+
+		lcc->lcc_page = NULL;
+		lu_ref_del(&page->cp_reference, "cl_io", io);
+		cl_page_put(env, page);
+
+		/* page list is not contiguous now, commit it now */
+		unplug = true;
+	}
 
-	rc = ll_commit_write(file, page, from, from + copied);
-	unlock_page(page);
-	put_page(page);
+	if (unplug ||
+	    file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
+		result = vvp_io_write_commit(env, io);
 
-	return rc ?: copied;
+	return result >= 0 ? copied : result;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -523,7 +656,7 @@ const struct address_space_operations ll_aops = {
 	.direct_IO      = ll_direct_IO_26,
 	.writepage      = ll_writepage,
 	.writepages     = ll_writepages,
-	.set_page_dirty = ll_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 	.write_begin    = ll_write_begin,
 	.write_end      = ll_write_end,
 	.invalidatepage = ll_invalidatepage,
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 99ffd1589df8..c1cb6b19e724 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -174,7 +170,8 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
  * Insert it into sai_entries tail when init.
  */
 static struct ll_sa_entry *
-ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
+ll_sa_entry_alloc(struct dentry *parent,
+		  struct ll_statahead_info *sai, __u64 index,
 		  const char *name, int len)
 {
 	struct ll_inode_info *lli;
@@ -221,7 +218,8 @@ ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
 	dname = (char *)entry + sizeof(struct ll_sa_entry);
 	memcpy(dname, name, len);
 	dname[len] = 0;
-	entry->se_qstr.hash = full_name_hash(name, len);
+
+	entry->se_qstr.hash = full_name_hash(parent, name, len);
 	entry->se_qstr.len = len;
 	entry->se_qstr.name = dname;
 
@@ -650,7 +648,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 		}
 	}
 
-	it->d.lustre.it_lock_handle = entry->se_handle;
+	it->it_lock_handle = entry->se_handle;
 	rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
 	if (rc != 1) {
 		rc = -EAGAIN;
@@ -661,8 +659,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 	if (rc)
 		goto out;
 
-	CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
-	       child, child->i_ino, child->i_generation);
+	CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+	       ll_get_fsname(child->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(child)), child);
 	ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
 
 	entry->se_inode = child;
@@ -703,7 +702,7 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
 		 * process enqueues lock on child with parent lock held, eg.
 		 * unlink.
 		 */
-		handle = it->d.lustre.it_lock_handle;
+		handle = it->it_lock_handle;
 		ll_intent_drop_lock(it);
 	}
 
@@ -782,7 +781,7 @@ static int sa_args_init(struct inode *dir, struct inode *child,
 			struct ll_sa_entry *entry, struct md_enqueue_info **pmi,
 			struct ldlm_enqueue_info **pei)
 {
-	struct qstr	      *qstr = &entry->se_qstr;
+	const struct qstr      *qstr = &entry->se_qstr;
 	struct ll_inode_info     *lli  = ll_i2info(dir);
 	struct md_enqueue_info   *minfo;
 	struct ldlm_enqueue_info *einfo;
@@ -853,7 +852,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 {
 	struct inode	     *inode = d_inode(dentry);
 	struct lookup_intent      it = { .it_op = IT_GETATTR,
-					 .d.lustre.it_lock_handle = 0 };
+					 .it_lock_handle = 0 };
 	struct md_enqueue_info   *minfo;
 	struct ldlm_enqueue_info *einfo;
 	int rc;
@@ -868,7 +867,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 	rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
 				NULL);
 	if (rc == 1) {
-		entry->se_handle = it.d.lustre.it_lock_handle;
+		entry->se_handle = it.it_lock_handle;
 		ll_intent_release(&it);
 		return 1;
 	}
@@ -901,7 +900,7 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
 	int		       rc;
 	int		       rc1;
 
-	entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name,
+	entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name,
 				  entry_name_len);
 	if (IS_ERR(entry))
 		return;
@@ -1341,7 +1340,7 @@ enum {
 static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 {
 	struct ll_dir_chain   chain;
-	struct qstr	  *target = &dentry->d_name;
+	const struct qstr  *target = &dentry->d_name;
 	struct page	  *page;
 	__u64		 pos    = 0;
 	int		   dot_de;
@@ -1572,7 +1571,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 		if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
 			struct inode *inode = entry->se_inode;
 			struct lookup_intent it = { .it_op = IT_GETATTR,
-						    .d.lustre.it_lock_handle =
+						    .it_lock_handle =
 						     entry->se_handle };
 			__u64 bits;
 
@@ -1591,13 +1590,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 					*dentryp = alias;
 				} else if (d_inode(*dentryp) != inode) {
 					/* revalidate, but inode is recreated */
-					CDEBUG(D_READA,
-					       "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
-					      *dentryp,
-					      d_inode(*dentryp)->i_ino,
-					      d_inode(*dentryp)->i_generation,
-					      inode->i_ino,
-					      inode->i_generation);
+					CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+					       ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
+					       *dentryp,
+					       PFID(ll_inode2fid(d_inode(*dentryp))),
+					       PFID(ll_inode2fid(inode)));
 					ll_sai_unplug(sai, entry);
 					return -ESTALE;
 				} else {
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 61856d37afc5..3dd7e0eb0b54 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -118,19 +114,6 @@ static int __init lustre_init(void)
 	if (!ll_file_data_slab)
 		goto out_cache;
 
-	ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache",
-						  sizeof(struct ll_remote_perm),
-						      0, 0, NULL);
-	if (!ll_remote_perm_cachep)
-		goto out_cache;
-
-	ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache",
-						   REMOTE_PERM_HASHSIZE *
-						   sizeof(struct list_head),
-						   0, 0, NULL);
-	if (!ll_rmtperm_hash_cachep)
-		goto out_cache;
-
 	llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
 	if (IS_ERR_OR_NULL(llite_root)) {
 		rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
@@ -164,9 +147,18 @@ static int __init lustre_init(void)
 	if (rc != 0)
 		goto out_sysfs;
 
+	cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
+					 LCT_REMEMBER | LCT_NOREF);
+	if (IS_ERR(cl_inode_fini_env)) {
+		rc = PTR_ERR(cl_inode_fini_env);
+		goto out_vvp;
+	}
+
+	cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
+
 	rc = ll_xattr_init();
 	if (rc != 0)
-		goto out_vvp;
+		goto out_inode_fini_env;
 
 	lustre_register_client_fill_super(ll_fill_super);
 	lustre_register_kill_super_cb(ll_kill_super);
@@ -174,6 +166,8 @@ static int __init lustre_init(void)
 
 	return 0;
 
+out_inode_fini_env:
+	cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
 out_vvp:
 	vvp_global_fini();
 out_sysfs:
@@ -183,8 +177,6 @@ out_debugfs:
 out_cache:
 	kmem_cache_destroy(ll_inode_cachep);
 	kmem_cache_destroy(ll_file_data_slab);
-	kmem_cache_destroy(ll_remote_perm_cachep);
-	kmem_cache_destroy(ll_rmtperm_hash_cachep);
 	return rc;
 }
 
@@ -198,13 +190,10 @@ static void __exit lustre_exit(void)
 	kset_unregister(llite_kset);
 
 	ll_xattr_fini();
+	cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
 	vvp_global_fini();
 
 	kmem_cache_destroy(ll_inode_cachep);
-	kmem_cache_destroy(ll_rmtperm_hash_cachep);
-
-	kmem_cache_destroy(ll_remote_perm_cachep);
-
 	kmem_cache_destroy(ll_file_data_slab);
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 46d03ea48352..8c8bdfe1ad71 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -77,7 +73,9 @@ static int ll_readlink_internal(struct inode *inode,
 	ll_finish_md_op_data(op_data);
 	if (rc) {
 		if (rc != -ENOENT)
-			CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
+			CERROR("%s: inode "DFID": rc = %d\n",
+			       ll_get_fsname(inode->i_sb, NULL, 0),
+			       PFID(ll_inode2fid(inode)), rc);
 		goto failed;
 	}
 
@@ -90,8 +88,10 @@ static int ll_readlink_internal(struct inode *inode,
 
 	LASSERT(symlen != 0);
 	if (body->eadatasize != symlen) {
-		CERROR("inode %lu: symlink length %d not expected %d\n",
-		       inode->i_ino, body->eadatasize - 1, symlen - 1);
+		CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+		       symlen - 1);
 		rc = -EPROTO;
 		goto failed;
 	}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 282b70b776da..e623216e962d 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * cl_device and cl_device_type implementation for VVP layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
@@ -56,13 +53,33 @@
  * "llite_" (var. "ll_") prefix.
  */
 
-static struct kmem_cache *vvp_thread_kmem;
+static struct kmem_cache *ll_thread_kmem;
+struct kmem_cache *vvp_lock_kmem;
+struct kmem_cache *vvp_object_kmem;
+struct kmem_cache *vvp_req_kmem;
 static struct kmem_cache *vvp_session_kmem;
+static struct kmem_cache *vvp_thread_kmem;
+
 static struct lu_kmem_descr vvp_caches[] = {
 	{
-		.ckd_cache = &vvp_thread_kmem,
-		.ckd_name  = "vvp_thread_kmem",
-		.ckd_size  = sizeof(struct vvp_thread_info),
+		.ckd_cache = &ll_thread_kmem,
+		.ckd_name  = "ll_thread_kmem",
+		.ckd_size  = sizeof(struct ll_thread_info),
+	},
+	{
+		.ckd_cache = &vvp_lock_kmem,
+		.ckd_name  = "vvp_lock_kmem",
+		.ckd_size  = sizeof(struct vvp_lock),
+	},
+	{
+		.ckd_cache = &vvp_object_kmem,
+		.ckd_name  = "vvp_object_kmem",
+		.ckd_size  = sizeof(struct vvp_object),
+	},
+	{
+		.ckd_cache = &vvp_req_kmem,
+		.ckd_name  = "vvp_req_kmem",
+		.ckd_size  = sizeof(struct vvp_req),
 	},
 	{
 		.ckd_cache = &vvp_session_kmem,
@@ -70,29 +87,40 @@ static struct lu_kmem_descr vvp_caches[] = {
 		.ckd_size  = sizeof(struct vvp_session)
 	},
 	{
+		.ckd_cache = &vvp_thread_kmem,
+		.ckd_name  = "vvp_thread_kmem",
+		.ckd_size  = sizeof(struct vvp_thread_info),
+	},
+	{
 		.ckd_cache = NULL
 	}
 };
 
-static void *vvp_key_init(const struct lu_context *ctx,
-			  struct lu_context_key *key)
+static void *ll_thread_key_init(const struct lu_context *ctx,
+				struct lu_context_key *key)
 {
 	struct vvp_thread_info *info;
 
-	info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+	info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
 	if (!info)
 		info = ERR_PTR(-ENOMEM);
 	return info;
 }
 
-static void vvp_key_fini(const struct lu_context *ctx,
-			 struct lu_context_key *key, void *data)
+static void ll_thread_key_fini(const struct lu_context *ctx,
+			       struct lu_context_key *key, void *data)
 {
 	struct vvp_thread_info *info = data;
 
-	kmem_cache_free(vvp_thread_kmem, info);
+	kmem_cache_free(ll_thread_kmem, info);
 }
 
+struct lu_context_key ll_thread_key = {
+	.lct_tags = LCT_CL_THREAD,
+	.lct_init = ll_thread_key_init,
+	.lct_fini = ll_thread_key_fini
+};
+
 static void *vvp_session_key_init(const struct lu_context *ctx,
 				  struct lu_context_key *key)
 {
@@ -112,34 +140,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx,
 	kmem_cache_free(vvp_session_kmem, session);
 }
 
-struct lu_context_key vvp_key = {
-	.lct_tags = LCT_CL_THREAD,
-	.lct_init = vvp_key_init,
-	.lct_fini = vvp_key_fini
-};
-
 struct lu_context_key vvp_session_key = {
 	.lct_tags = LCT_SESSION,
 	.lct_init = vvp_session_key_init,
 	.lct_fini = vvp_session_key_fini
 };
 
+static void *vvp_thread_key_init(const struct lu_context *ctx,
+				 struct lu_context_key *key)
+{
+	struct vvp_thread_info *vti;
+
+	vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+	if (!vti)
+		vti = ERR_PTR(-ENOMEM);
+	return vti;
+}
+
+static void vvp_thread_key_fini(const struct lu_context *ctx,
+				struct lu_context_key *key, void *data)
+{
+	struct vvp_thread_info *vti = data;
+
+	kmem_cache_free(vvp_thread_kmem, vti);
+}
+
+struct lu_context_key vvp_thread_key = {
+	.lct_tags = LCT_CL_THREAD,
+	.lct_init = vvp_thread_key_init,
+	.lct_fini = vvp_thread_key_fini
+};
+
 /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key);
+LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
 
 static const struct lu_device_operations vvp_lu_ops = {
 	.ldo_object_alloc      = vvp_object_alloc
 };
 
 static const struct cl_device_operations vvp_cl_ops = {
-	.cdo_req_init = ccc_req_init
+	.cdo_req_init = vvp_req_init
 };
 
+static struct lu_device *vvp_device_free(const struct lu_env *env,
+					 struct lu_device *d)
+{
+	struct vvp_device *vdv  = lu2vvp_dev(d);
+	struct cl_site    *site = lu2cl_site(d->ld_site);
+	struct lu_device  *next = cl2lu_dev(vdv->vdv_next);
+
+	if (d->ld_site) {
+		cl_site_fini(site);
+		kfree(site);
+	}
+	cl_device_fini(lu2cl_dev(d));
+	kfree(vdv);
+	return next;
+}
+
 static struct lu_device *vvp_device_alloc(const struct lu_env *env,
 					  struct lu_device_type *t,
 					  struct lustre_cfg *cfg)
 {
-	return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops);
+	struct vvp_device *vdv;
+	struct lu_device  *lud;
+	struct cl_site    *site;
+	int rc;
+
+	vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
+	if (!vdv)
+		return ERR_PTR(-ENOMEM);
+
+	lud = &vdv->vdv_cl.cd_lu_dev;
+	cl_device_init(&vdv->vdv_cl, t);
+	vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
+	vdv->vdv_cl.cd_ops = &vvp_cl_ops;
+
+	site = kzalloc(sizeof(*site), GFP_NOFS);
+	if (site) {
+		rc = cl_site_init(site, &vdv->vdv_cl);
+		if (rc == 0) {
+			rc = lu_site_init_finish(&site->cs_lu);
+		} else {
+			LASSERT(!lud->ld_site);
+			CERROR("Cannot init lu_site, rc %d.\n", rc);
+			kfree(site);
+		}
+	} else {
+		rc = -ENOMEM;
+	}
+	if (rc != 0) {
+		vvp_device_free(env, lud);
+		lud = ERR_PTR(rc);
+	}
+	return lud;
+}
+
+static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
+			   const char *name, struct lu_device *next)
+{
+	struct vvp_device  *vdv;
+	int rc;
+
+	vdv = lu2vvp_dev(d);
+	vdv->vdv_next = lu2cl_dev(next);
+
+	LASSERT(d->ld_site && next->ld_type);
+	next->ld_site = d->ld_site;
+	rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
+						      next->ld_type->ldt_name,
+						      NULL);
+	if (rc == 0) {
+		lu_device_get(next);
+		lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+	}
+	return rc;
+}
+
+static struct lu_device *vvp_device_fini(const struct lu_env *env,
+					 struct lu_device *d)
+{
+	return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
 }
 
 static const struct lu_device_type_operations vvp_device_type_ops = {
@@ -150,9 +271,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = {
 	.ldto_stop  = vvp_type_stop,
 
 	.ldto_device_alloc = vvp_device_alloc,
-	.ldto_device_free  = ccc_device_free,
-	.ldto_device_init  = ccc_device_init,
-	.ldto_device_fini  = ccc_device_fini
+	.ldto_device_free	= vvp_device_free,
+	.ldto_device_init	= vvp_device_init,
+	.ldto_device_fini	= vvp_device_fini,
 };
 
 struct lu_device_type vvp_device_type = {
@@ -168,20 +289,27 @@ struct lu_device_type vvp_device_type = {
  */
 int vvp_global_init(void)
 {
-	int result;
+	int rc;
 
-	result = lu_kmem_init(vvp_caches);
-	if (result == 0) {
-		result = ccc_global_init(&vvp_device_type);
-		if (result != 0)
-			lu_kmem_fini(vvp_caches);
-	}
-	return result;
+	rc = lu_kmem_init(vvp_caches);
+	if (rc != 0)
+		return rc;
+
+	rc = lu_device_type_init(&vvp_device_type);
+	if (rc != 0)
+		goto out_kmem;
+
+	return 0;
+
+out_kmem:
+	lu_kmem_fini(vvp_caches);
+
+	return rc;
 }
 
 void vvp_global_fini(void)
 {
-	ccc_global_fini(&vvp_device_type);
+	lu_device_type_fini(&vvp_device_type);
 	lu_kmem_fini(vvp_caches);
 }
 
@@ -205,13 +333,14 @@ int cl_sb_init(struct super_block *sb)
 		cl = cl_type_setup(env, NULL, &vvp_device_type,
 				   sbi->ll_dt_exp->exp_obd->obd_lu_dev);
 		if (!IS_ERR(cl)) {
-			cl2ccc_dev(cl)->cdv_sb = sb;
+			cl2vvp_dev(cl)->vdv_sb = sb;
 			sbi->ll_cl = cl;
 			sbi->ll_site = cl2lu_dev(cl)->ld_site;
 		}
 		cl_env_put(env, &refcheck);
-	} else
+	} else {
 		rc = PTR_ERR(env);
+	}
 	return rc;
 }
 
@@ -356,23 +485,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
 			return ~0ULL;
 		clob = vvp_pgcache_obj(env, dev, &id);
 		if (clob) {
-			struct cl_object_header *hdr;
-			int		      nr;
-			struct cl_page	  *pg;
-
-			/* got an object. Find next page. */
-			hdr = cl_object_header(clob);
+			struct inode *inode = vvp_object_inode(clob);
+			struct page *vmpage;
+			int nr;
 
-			spin_lock(&hdr->coh_page_guard);
-			nr = radix_tree_gang_lookup(&hdr->coh_tree,
-						    (void **)&pg,
-						    id.vpi_index, 1);
+			nr = find_get_pages_contig(inode->i_mapping,
+						   id.vpi_index, 1, &vmpage);
 			if (nr > 0) {
-				id.vpi_index = pg->cp_index;
+				id.vpi_index = vmpage->index;
 				/* Cant support over 16T file */
-				nr = !(pg->cp_index > 0xffffffff);
+				nr = !(vmpage->index > 0xffffffff);
+				put_page(vmpage);
 			}
-			spin_unlock(&hdr->coh_page_guard);
 
 			lu_object_ref_del(&clob->co_lu, "dump", current);
 			cl_object_put(env, clob);
@@ -398,21 +522,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
 static void vvp_pgcache_page_show(const struct lu_env *env,
 				  struct seq_file *seq, struct cl_page *page)
 {
-	struct ccc_page *cpg;
+	struct vvp_page *vpg;
 	struct page      *vmpage;
 	int	      has_flags;
 
-	cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-	vmpage = cpg->cpg_page;
-	seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
+	vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
+	vmpage = vpg->vpg_page;
+	seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
 		   0 /* gen */,
-		   cpg, page,
+		   vpg, page,
 		   "none",
-		   cpg->cpg_write_queued ? "wq" : "- ",
-		   cpg->cpg_defer_uptodate ? "du" : "- ",
+		   vpg->vpg_write_queued ? "wq" : "- ",
+		   vpg->vpg_defer_uptodate ? "du" : "- ",
 		   PageWriteback(vmpage) ? "wb" : "-",
-		   vmpage, vmpage->mapping->host->i_ino,
-		   vmpage->mapping->host->i_generation,
+		   vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
 		   vmpage->mapping->host, vmpage->index,
 		   page_count(vmpage));
 	has_flags = 0;
@@ -431,40 +554,49 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
 	struct ll_sb_info       *sbi;
 	struct cl_object	*clob;
 	struct lu_env	   *env;
-	struct cl_page	  *page;
-	struct cl_object_header *hdr;
 	struct vvp_pgcache_id    id;
 	int		      refcheck;
 	int		      result;
 
 	env = cl_env_get(&refcheck);
 	if (!IS_ERR(env)) {
-		pos = *(loff_t *) v;
+		pos = *(loff_t *)v;
 		vvp_pgcache_id_unpack(pos, &id);
 		sbi = f->private;
 		clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
 		if (clob) {
-			hdr = cl_object_header(clob);
-
-			spin_lock(&hdr->coh_page_guard);
-			page = cl_page_lookup(hdr, id.vpi_index);
-			spin_unlock(&hdr->coh_page_guard);
+			struct inode *inode = vvp_object_inode(clob);
+			struct cl_page *page = NULL;
+			struct page *vmpage;
+
+			result = find_get_pages_contig(inode->i_mapping,
+						       id.vpi_index, 1,
+						       &vmpage);
+			if (result > 0) {
+				lock_page(vmpage);
+				page = cl_vmpage_page(vmpage, clob);
+				unlock_page(vmpage);
+				put_page(vmpage);
+			}
 
-			seq_printf(f, "%8x@"DFID": ",
-				   id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+			seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+				   PFID(lu_object_fid(&clob->co_lu)));
 			if (page) {
 				vvp_pgcache_page_show(env, f, page);
 				cl_page_put(env, page);
-			} else
+			} else {
 				seq_puts(f, "missing\n");
+			}
 			lu_object_ref_del(&clob->co_lu, "dump", current);
 			cl_object_put(env, clob);
-		} else
+		} else {
 			seq_printf(f, "%llx missing\n", pos);
+		}
 		cl_env_put(env, &refcheck);
 		result = 0;
-	} else
+	} else {
 		result = PTR_ERR(env);
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index bb393378c9bb..79fc428461ed 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -41,21 +37,337 @@
 #ifndef VVP_INTERNAL_H
 #define VVP_INTERNAL_H
 
+#include "../include/lustre/lustre_idl.h"
 #include "../include/cl_object.h"
-#include "llite_internal.h"
 
-int vvp_io_init(const struct lu_env *env,
-		struct cl_object *obj, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env,
-		  struct cl_object *obj, struct cl_lock *lock,
-				   const struct cl_io *io);
+enum obd_notify_event;
+struct inode;
+struct lov_stripe_md;
+struct lustre_md;
+struct obd_capa;
+struct obd_device;
+struct obd_export;
+struct page;
+
+/* specific architecture can implement only part of this list */
+enum vvp_io_subtype {
+	/** normal IO */
+	IO_NORMAL,
+	/** io started from splice_{read|write} */
+	IO_SPLICE
+};
+
+/**
+ * IO state private to IO state private to VVP layer.
+ */
+struct vvp_io {
+	/** super class */
+	struct cl_io_slice     vui_cl;
+	struct cl_io_lock_link vui_link;
+	/**
+	 * I/O vector information to or from which read/write is going.
+	 */
+	struct iov_iter *vui_iter;
+	/**
+	 * Total size for the left IO.
+	 */
+	size_t vui_tot_count;
+
+	union {
+		struct vvp_fault_io {
+			/**
+			 * Inode modification time that is checked across DLM
+			 * lock request.
+			 */
+			time64_t	    ft_mtime;
+			struct vm_area_struct *ft_vma;
+			/**
+			 *  locked page returned from vvp_io
+			 */
+			struct page	    *ft_vmpage;
+			/**
+			 * kernel fault info
+			 */
+			struct vm_fault *ft_vmf;
+			/**
+			 * fault API used bitflags for return code.
+			 */
+			unsigned int    ft_flags;
+			/**
+			 * check that flags are from filemap_fault
+			 */
+			bool		ft_flags_valid;
+		} fault;
+		struct {
+			struct pipe_inode_info	*vui_pipe;
+			unsigned int		 vui_flags;
+		} splice;
+		struct {
+			struct cl_page_list vui_queue;
+			unsigned long vui_written;
+			int vui_from;
+			int vui_to;
+		} write;
+	} u;
+
+	enum vvp_io_subtype	vui_io_subtype;
+
+	/**
+	 * Layout version when this IO is initialized
+	 */
+	__u32			vui_layout_gen;
+	/**
+	 * File descriptor against which IO is done.
+	 */
+	struct ll_file_data	*vui_fd;
+	struct kiocb		*vui_iocb;
+
+	/* Readahead state. */
+	pgoff_t	vui_ra_start;
+	pgoff_t	vui_ra_count;
+	/* Set when vui_ra_{start,count} have been initialized. */
+	bool		vui_ra_valid;
+};
+
+extern struct lu_device_type vvp_device_type;
+
+extern struct lu_context_key vvp_session_key;
+extern struct lu_context_key vvp_thread_key;
+
+extern struct kmem_cache *vvp_lock_kmem;
+extern struct kmem_cache *vvp_object_kmem;
+extern struct kmem_cache *vvp_req_kmem;
+
+struct vvp_thread_info {
+	struct cl_lock		vti_lock;
+	struct cl_lock_descr	vti_descr;
+	struct cl_io		vti_io;
+	struct cl_attr		vti_attr;
+};
+
+static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+{
+	struct vvp_thread_info      *vti;
+
+	vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
+	LASSERT(vti);
+
+	return vti;
+}
+
+static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
+{
+	struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
+
+	memset(lock, 0, sizeof(*lock));
+	return lock;
+}
+
+static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
+{
+	struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
+
+	memset(attr, 0, sizeof(*attr));
+
+	return attr;
+}
+
+static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
+{
+	struct cl_io *io = &vvp_env_info(env)->vti_io;
+
+	memset(io, 0, sizeof(*io));
+
+	return io;
+}
+
+struct vvp_session {
+	struct vvp_io cs_ios;
+};
+
+static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+{
+	struct vvp_session *ses;
+
+	ses = lu_context_key_get(env->le_ses, &vvp_session_key);
+	LASSERT(ses);
+
+	return ses;
+}
+
+static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
+{
+	return &vvp_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct vvp_object {
+	struct cl_object_header vob_header;
+	struct cl_object        vob_cl;
+	struct inode           *vob_inode;
+
+	/**
+	 * A list of dirty pages pending IO in the cache. Used by
+	 * SOM. Protected by ll_inode_info::lli_lock.
+	 *
+	 * \see vvp_page::vpg_pending_linkage
+	 */
+	struct list_head	vob_pending_list;
+
+	/**
+	 * Access this counter is protected by inode->i_sem. Now that
+	 * the lifetime of transient pages must be covered by inode sem,
+	 * we don't need to hold any lock..
+	 */
+	int			vob_transient_pages;
+	/**
+	 * Number of outstanding mmaps on this file.
+	 *
+	 * \see ll_vm_open(), ll_vm_close().
+	 */
+	atomic_t                vob_mmap_cnt;
+
+	/**
+	 * various flags
+	 * vob_discard_page_warned
+	 *     if pages belonging to this object are discarded when a client
+	 * is evicted, some debug info will be printed, this flag will be set
+	 * during processing the first discarded page, then avoid flooding
+	 * debug message for lots of discarded pages.
+	 *
+	 * \see ll_dirty_page_discard_warn.
+	 */
+	unsigned int		vob_discard_page_warned:1;
+};
+
+/**
+ * VVP-private page state.
+ */
+struct vvp_page {
+	struct cl_page_slice vpg_cl;
+	int		  vpg_defer_uptodate;
+	int		  vpg_ra_used;
+	int		  vpg_write_queued;
+	/**
+	 * Non-empty iff this page is already counted in
+	 * vvp_object::vob_pending_list. This list is only used as a flag,
+	 * that is, never iterated through, only checked for list_empty(), but
+	 * having a list is useful for debugging.
+	 */
+	struct list_head	   vpg_pending_linkage;
+	/** VM page */
+	struct page	  *vpg_page;
+};
+
+static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
+{
+	return container_of(slice, struct vvp_page, vpg_cl);
+}
+
+static inline pgoff_t vvp_index(struct vvp_page *vvp)
+{
+	return vvp->vpg_cl.cpl_index;
+}
+
+struct vvp_device {
+	struct cl_device    vdv_cl;
+	struct super_block *vdv_sb;
+	struct cl_device   *vdv_next;
+};
+
+struct vvp_lock {
+	struct cl_lock_slice vlk_cl;
+};
+
+struct vvp_req {
+	struct cl_req_slice  vrq_cl;
+};
+
+void *ccc_key_init(const struct lu_context *ctx,
+		   struct lu_context_key *key);
+void ccc_key_fini(const struct lu_context *ctx,
+		  struct lu_context_key *key, void *data);
+
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+
+static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
+{
+	return &vdv->vdv_cl.cd_lu_dev;
+}
+
+static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
+{
+	return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
+}
+
+static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
+{
+	return container_of0(d, struct vvp_device, vdv_cl);
+}
+
+static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
+{
+	return container_of0(obj, struct vvp_object, vob_cl);
+}
+
+static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
+{
+	return container_of0(obj, struct vvp_object, vob_cl.co_lu);
+}
+
+static inline struct inode *vvp_object_inode(const struct cl_object *obj)
+{
+	return cl2vvp(obj)->vob_inode;
+}
+
+int vvp_object_invariant(const struct cl_object *obj);
+struct vvp_object *cl_inode2vvp(struct inode *inode);
+
+static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
+{
+	return cl2vvp_page(slice)->vpg_page;
+}
+
+static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
+{
+	return container_of(slice, struct vvp_lock, vlk_cl);
+}
+
+# define CLOBINVRNT(env, clob, expr)					\
+	((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering.
+ */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+		struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
+		  struct cl_lock *lock, const struct cl_io *io);
 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t index);
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req);
 struct lu_object *vvp_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
 
-struct ccc_object *cl_inode2ccc(struct inode *inode);
+int vvp_global_init(void);
+void vvp_global_fini(void);
 
 extern const struct file_operations vvp_dump_pgcache_file_ops;
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 85a835976174..94916dcc6caa 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -44,21 +40,30 @@
 #include "../include/obd.h"
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
 static struct vvp_io *cl2vvp_io(const struct lu_env *env,
-				const struct cl_io_slice *slice);
+				const struct cl_io_slice *slice)
+{
+	struct vvp_io *vio;
+
+	vio = container_of(slice, struct vvp_io, vui_cl);
+	LASSERT(vio == vvp_env_io(env));
+
+	return vio;
+}
 
 /**
  * True, if \a io is a normal io, False for splice_{read,write}
  */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
+static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
 {
 	struct vvp_io *vio = vvp_env_io(env);
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
-	return vio->cui_io_subtype == IO_NORMAL;
+	return vio->vui_io_subtype == IO_NORMAL;
 }
 
 /**
@@ -71,7 +76,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 			       struct inode *inode)
 {
 	struct ll_inode_info	*lli = ll_i2info(inode);
-	struct ccc_io		*cio = ccc_env_io(env);
+	struct vvp_io		*vio = vvp_env_io(env);
 	bool rc = true;
 
 	switch (io->ci_type) {
@@ -80,7 +85,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 		/* don't need lock here to check lli_layout_gen as we have held
 		 * extent lock and GROUP lock has to hold to swap layout
 		 */
-		if (ll_layout_version_get(lli) != cio->cui_layout_gen) {
+		if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
 			io->ci_need_restart = 1;
 			/* this will return application a short read/write */
 			io->ci_continue = 0;
@@ -95,20 +100,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 	return rc;
 }
 
+static void vvp_object_size_lock(struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	ll_inode_size_lock(inode);
+	cl_object_attr_lock(obj);
+}
+
+static void vvp_object_size_unlock(struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	cl_object_attr_unlock(obj);
+	ll_inode_size_unlock(inode);
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
+			 struct cl_io *io, loff_t start, size_t count,
+			 int *exceed)
+{
+	struct cl_attr *attr  = vvp_env_thread_attr(env);
+	struct inode   *inode = vvp_object_inode(obj);
+	loff_t	  pos   = start + count - 1;
+	loff_t kms;
+	int result;
+
+	/*
+	 * Consistency guarantees: following possibilities exist for the
+	 * relation between region being accessed and real file size at this
+	 * moment:
+	 *
+	 *  (A): the region is completely inside of the file;
+	 *
+	 *  (B-x): x bytes of region are inside of the file, the rest is
+	 *  outside;
+	 *
+	 *  (C): the region is completely outside of the file.
+	 *
+	 * This classification is stable under DLM lock already acquired by
+	 * the caller, because to change the class, other client has to take
+	 * DLM lock conflicting with our lock. Also, any updates to ->i_size
+	 * by other threads on this client are serialized by
+	 * ll_inode_size_lock(). This guarantees that short reads are handled
+	 * correctly in the face of concurrent writes and truncates.
+	 */
+	vvp_object_size_lock(obj);
+	result = cl_object_attr_get(env, obj, attr);
+	if (result == 0) {
+		kms = attr->cat_kms;
+		if (pos > kms) {
+			/*
+			 * A glimpse is necessary to determine whether we
+			 * return a short read (B) or some zeroes at the end
+			 * of the buffer (C)
+			 */
+			vvp_object_size_unlock(obj);
+			result = cl_glimpse_lock(env, io, inode, obj, 0);
+			if (result == 0 && exceed) {
+				/* If objective page index exceed end-of-file
+				 * page index, return directly. Do not expect
+				 * kernel will check such case correctly.
+				 * linux-2.6.18-128.1.1 miss to do that.
+				 * --bug 17336
+				 */
+				loff_t size = i_size_read(inode);
+				loff_t cur_index = start >> PAGE_SHIFT;
+				loff_t size_index = (size - 1) >> PAGE_SHIFT;
+
+				if ((size == 0 && cur_index != 0) ||
+				    size_index < cur_index)
+					*exceed = 1;
+			}
+			return result;
+		}
+		/*
+		 * region is within kms and, hence, within real file
+		 * size (A). We need to increase i_size to cover the
+		 * read region so that generic_file_read() will do its
+		 * job, but that doesn't mean the kms size is
+		 * _correct_, it is only the _minimum_ size. If
+		 * someone does a stat they will get the correct size
+		 * which will always be >= the kms value here.
+		 * b=11081
+		 */
+		if (i_size_read(inode) < kms) {
+			i_size_write(inode, kms);
+			CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+			       PFID(lu_object_fid(&obj->co_lu)),
+			       (__u64)i_size_read(inode));
+		}
+	}
+
+	vvp_object_size_unlock(obj);
+
+	return result;
+}
+
 /*****************************************************************************
  *
  * io operations.
  *
  */
 
+static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+				 __u32 enqflags, enum cl_lock_mode mode,
+			  pgoff_t start, pgoff_t end)
+{
+	struct vvp_io          *vio   = vvp_env_io(env);
+	struct cl_lock_descr   *descr = &vio->vui_link.cill_descr;
+	struct cl_object       *obj   = io->ci_obj;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
+
+	memset(&vio->vui_link, 0, sizeof(vio->vui_link));
+
+	if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+		descr->cld_mode = CLM_GROUP;
+		descr->cld_gid  = vio->vui_fd->fd_grouplock.lg_gid;
+	} else {
+		descr->cld_mode  = mode;
+	}
+	descr->cld_obj   = obj;
+	descr->cld_start = start;
+	descr->cld_end   = end;
+	descr->cld_enq_flags = enqflags;
+
+	cl_io_lock_add(env, io, &vio->vui_link);
+	return 0;
+}
+
+static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
+			   __u32 enqflags, enum cl_lock_mode mode,
+			   loff_t start, loff_t end)
+{
+	struct cl_object *obj = io->ci_obj;
+
+	return vvp_io_one_lock_index(env, io, enqflags, mode,
+				     cl_index(obj, start), cl_index(obj, end));
+}
+
+static int vvp_io_write_iter_init(const struct lu_env *env,
+				  const struct cl_io_slice *ios)
+{
+	struct vvp_io *vio = cl2vvp_io(env, ios);
+
+	cl_page_list_init(&vio->u.write.vui_queue);
+	vio->u.write.vui_written = 0;
+	vio->u.write.vui_from = 0;
+	vio->u.write.vui_to = PAGE_SIZE;
+
+	return 0;
+}
+
+static void vvp_io_write_iter_fini(const struct lu_env *env,
+				   const struct cl_io_slice *ios)
+{
+	struct vvp_io *vio = cl2vvp_io(env, ios);
+
+	LASSERT(vio->u.write.vui_queue.pl_nr == 0);
+}
+
 static int vvp_io_fault_iter_init(const struct lu_env *env,
 				  const struct cl_io_slice *ios)
 {
 	struct vvp_io *vio   = cl2vvp_io(env, ios);
-	struct inode  *inode = ccc_object_inode(ios->cis_obj);
+	struct inode  *inode = vvp_object_inode(ios->cis_obj);
 
-	LASSERT(inode ==
-		file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file));
+	LASSERT(inode == file_inode(vio->vui_fd->fd_file));
 	vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
 	return 0;
 }
@@ -117,15 +289,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 {
 	struct cl_io     *io  = ios->cis_io;
 	struct cl_object *obj = io->ci_obj;
-	struct ccc_io    *cio = cl2ccc_io(env, ios);
+	struct vvp_io    *vio = cl2vvp_io(env, ios);
+	struct inode *inode = vvp_object_inode(obj);
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, DFID
 	       " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
 	       PFID(lu_object_fid(&obj->co_lu)),
 	       io->ci_ignore_layout, io->ci_verify_layout,
-	       cio->cui_layout_gen, io->ci_restore_needed);
+	       vio->vui_layout_gen, io->ci_restore_needed);
 
 	if (io->ci_restore_needed == 1) {
 		int	rc;
@@ -133,7 +306,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 		/* file was detected release, we need to restore it
 		 * before finishing the io
 		 */
-		rc = ll_layout_restore(ccc_object_inode(obj));
+		rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
 		/* if restore registration failed, no restart,
 		 * we will return -ENODATA
 		 */
@@ -159,16 +332,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 		__u32 gen = 0;
 
 		/* check layout version */
-		ll_layout_refresh(ccc_object_inode(obj), &gen);
-		io->ci_need_restart = cio->cui_layout_gen != gen;
+		ll_layout_refresh(inode, &gen);
+		io->ci_need_restart = vio->vui_layout_gen != gen;
 		if (io->ci_need_restart) {
 			CDEBUG(D_VFSTRACE,
 			       DFID" layout changed from %d to %d.\n",
 			       PFID(lu_object_fid(&obj->co_lu)),
-			       cio->cui_layout_gen, gen);
+			       vio->vui_layout_gen, gen);
 			/* today successful restore is the only possible case */
 			/* restore was done, clear restoring state */
-			ll_i2info(ccc_object_inode(obj))->lli_flags &=
+			ll_i2info(vvp_object_inode(obj))->lli_flags &=
 				~LLIF_FILE_RESTORING;
 		}
 	}
@@ -180,7 +353,7 @@ static void vvp_io_fault_fini(const struct lu_env *env,
 	struct cl_io   *io   = ios->cis_io;
 	struct cl_page *page = io->u.ci_fault.ft_page;
 
-	CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+	CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
 
 	if (page) {
 		lu_ref_del(&page->cp_reference, "fault", io);
@@ -203,16 +376,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
 }
 
 static int vvp_mmap_locks(const struct lu_env *env,
-			  struct ccc_io *vio, struct cl_io *io)
+			  struct vvp_io *vio, struct cl_io *io)
 {
-	struct ccc_thread_info *cti = ccc_env_info(env);
+	struct vvp_thread_info *cti = vvp_env_info(env);
 	struct mm_struct       *mm = current->mm;
 	struct vm_area_struct  *vma;
-	struct cl_lock_descr   *descr = &cti->cti_descr;
+	struct cl_lock_descr   *descr = &cti->vti_descr;
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
 	ssize_t		 count;
-	int		     result;
+	int		 result = 0;
 	struct iov_iter i;
 	struct iovec iov;
 
@@ -221,21 +394,21 @@ static int vvp_mmap_locks(const struct lu_env *env,
 	if (!cl_is_normalio(env, io))
 		return 0;
 
-	if (!vio->cui_iter) /* nfs or loop back device write */
+	if (!vio->vui_iter) /* nfs or loop back device write */
 		return 0;
 
 	/* No MM (e.g. NFS)? No vmas too. */
 	if (!mm)
 		return 0;
 
-	iov_for_each(iov, i, *(vio->cui_iter)) {
+	iov_for_each(iov, i, *vio->vui_iter) {
 		addr = (unsigned long)iov.iov_base;
 		count = iov.iov_len;
 		if (count == 0)
 			continue;
 
-		count += addr & (~CFS_PAGE_MASK);
-		addr &= CFS_PAGE_MASK;
+		count += addr & (~PAGE_MASK);
+		addr &= PAGE_MASK;
 
 		down_read(&mm->mmap_sem);
 		while ((vma = our_vma(mm, addr, count)) != NULL) {
@@ -244,10 +417,10 @@ static int vvp_mmap_locks(const struct lu_env *env,
 
 			if (ll_file_nolock(vma->vm_file)) {
 				/*
-				 * For no lock case, a lockless lock will be
-				 * generated.
+				 * For no lock case is not allowed for mmap
 				 */
-				flags = CEF_NEVER;
+				result = -EINVAL;
+				break;
 			}
 
 			/*
@@ -269,10 +442,8 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			       descr->cld_mode, descr->cld_start,
 			       descr->cld_end);
 
-			if (result < 0) {
-				up_read(&mm->mmap_sem);
-				return result;
-			}
+			if (result < 0)
+				break;
 
 			if (vma->vm_end - addr >= count)
 				break;
@@ -281,26 +452,55 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			addr = vma->vm_end;
 		}
 		up_read(&mm->mmap_sem);
+		if (result < 0)
+			break;
 	}
-	return 0;
+	return result;
+}
+
+static void vvp_io_advance(const struct lu_env *env,
+			   const struct cl_io_slice *ios,
+			   size_t nob)
+{
+	struct vvp_io    *vio = cl2vvp_io(env, ios);
+	struct cl_io     *io  = ios->cis_io;
+	struct cl_object *obj = ios->cis_io->ci_obj;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	if (!cl_is_normalio(env, io))
+		return;
+
+	iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count  -= nob);
+}
+
+static void vvp_io_update_iov(const struct lu_env *env,
+			      struct vvp_io *vio, struct cl_io *io)
+{
+	size_t size = io->u.ci_rw.crw_count;
+
+	if (!cl_is_normalio(env, io) || !vio->vui_iter)
+		return;
+
+	iov_iter_truncate(vio->vui_iter, size);
 }
 
 static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
 			  enum cl_lock_mode mode, loff_t start, loff_t end)
 {
-	struct ccc_io *cio = ccc_env_io(env);
+	struct vvp_io *vio = vvp_env_io(env);
 	int result;
 	int ast_flags = 0;
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
-	ccc_io_update_iov(env, cio, io);
+	vvp_io_update_iov(env, vio, io);
 
 	if (io->u.ci_rw.crw_nonblock)
 		ast_flags |= CEF_NONBLOCK;
-	result = vvp_mmap_locks(env, cio, io);
+	result = vvp_mmap_locks(env, vio, io);
 	if (result == 0)
-		result = ccc_io_one_lock(env, io, ast_flags, mode, start, end);
+		result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
 	return result;
 }
 
@@ -325,9 +525,11 @@ static int vvp_io_fault_lock(const struct lu_env *env,
 	/*
 	 * XXX LDLM_FL_CBPENDING
 	 */
-	return ccc_io_one_lock_index
-		(env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma),
-		 io->u.ci_fault.ft_index, io->u.ci_fault.ft_index);
+	return vvp_io_one_lock_index(env,
+				     io, 0,
+				     vvp_mode_from_vma(vio->u.fault.ft_vma),
+				     io->u.ci_fault.ft_index,
+				     io->u.ci_fault.ft_index);
 }
 
 static int vvp_io_write_lock(const struct lu_env *env,
@@ -354,14 +556,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env,
 }
 
 /**
- * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io.
+ * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
  *
  * Handles "lockless io" mode when extent locking is done by server.
  */
 static int vvp_io_setattr_lock(const struct lu_env *env,
 			       const struct cl_io_slice *ios)
 {
-	struct ccc_io *cio = ccc_env_io(env);
 	struct cl_io  *io  = ios->cis_io;
 	__u64 new_size;
 	__u32 enqflags = 0;
@@ -378,8 +579,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
 			return 0;
 		new_size = 0;
 	}
-	cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK;
-	return ccc_io_one_lock(env, io, enqflags, CLM_WRITE,
+
+	return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
 			       new_size, OBD_OBJECT_EOF);
 }
 
@@ -413,7 +614,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
 {
 	struct cl_io       *io    = ios->cis_io;
 	struct cl_object   *obj   = io->ci_obj;
-	struct cl_attr     *attr  = ccc_env_thread_attr(env);
+	struct cl_attr     *attr  = vvp_env_thread_attr(env);
 	int result;
 	unsigned valid = CAT_CTIME;
 
@@ -437,7 +638,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
 				const struct cl_io_slice *ios)
 {
 	struct cl_io	*io    = ios->cis_io;
-	struct inode	*inode = ccc_object_inode(io->ci_obj);
+	struct inode	*inode = vvp_object_inode(io->ci_obj);
 	int result = 0;
 
 	inode_lock(inode);
@@ -453,7 +654,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
 			       const struct cl_io_slice *ios)
 {
 	struct cl_io *io    = ios->cis_io;
-	struct inode *inode = ccc_object_inode(io->ci_obj);
+	struct inode *inode = vvp_object_inode(io->ci_obj);
 
 	if (cl_io_is_trunc(io))
 		/* Truncate in memory pages - they must be clean pages
@@ -474,27 +675,25 @@ static int vvp_io_read_start(const struct lu_env *env,
 			     const struct cl_io_slice *ios)
 {
 	struct vvp_io     *vio   = cl2vvp_io(env, ios);
-	struct ccc_io     *cio   = cl2ccc_io(env, ios);
 	struct cl_io      *io    = ios->cis_io;
 	struct cl_object  *obj   = io->ci_obj;
-	struct inode      *inode = ccc_object_inode(obj);
-	struct ll_ra_read *bead  = &vio->cui_bead;
-	struct file       *file  = cio->cui_fd->fd_file;
+	struct inode      *inode = vvp_object_inode(obj);
+	struct file       *file  = vio->vui_fd->fd_file;
 
 	int     result;
 	loff_t  pos = io->u.ci_rd.rd.crw_pos;
 	long    cnt = io->u.ci_rd.rd.crw_count;
-	long    tot = cio->cui_tot_count;
+	long    tot = vio->vui_tot_count;
 	int     exceed = 0;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
 
 	if (!can_populate_pages(env, io, inode))
 		return 0;
 
-	result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
+	result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
 	if (result != 0)
 		return result;
 	else if (exceed != 0)
@@ -505,30 +704,27 @@ static int vvp_io_read_start(const struct lu_env *env,
 			 inode->i_ino, cnt, pos, i_size_read(inode));
 
 	/* turn off the kernel's read-ahead */
-	cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+	vio->vui_fd->fd_file->f_ra.ra_pages = 0;
 
 	/* initialize read-ahead window once per syscall */
-	if (!vio->cui_ra_window_set) {
-		vio->cui_ra_window_set = 1;
-		bead->lrr_start = cl_index(obj, pos);
-		/*
-		 * XXX: explicit PAGE_SIZE
-		 */
-		bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
-		ll_ra_read_in(file, bead);
+	if (!vio->vui_ra_valid) {
+		vio->vui_ra_valid = true;
+		vio->vui_ra_start = cl_index(obj, pos);
+		vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+		ll_ras_enter(file);
 	}
 
 	/* BUG: 5972 */
 	file_accessed(file);
-	switch (vio->cui_io_subtype) {
+	switch (vio->vui_io_subtype) {
 	case IO_NORMAL:
-		LASSERT(cio->cui_iocb->ki_pos == pos);
-		result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
+		LASSERT(vio->vui_iocb->ki_pos == pos);
+		result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
 		break;
 	case IO_SPLICE:
 		result = generic_file_splice_read(file, &pos,
-						  vio->u.splice.cui_pipe, cnt,
-						  vio->u.splice.cui_flags);
+						  vio->u.splice.vui_pipe, cnt,
+						  vio->u.splice.vui_flags);
 		/* LU-1109: do splice read stripe by stripe otherwise if it
 		 * may make nfsd stuck if this read occupied all internal pipe
 		 * buffers.
@@ -536,7 +732,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 		io->ci_continue = 0;
 		break;
 	default:
-		CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
+		CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
 		LBUG();
 	}
 
@@ -546,30 +742,201 @@ out:
 			io->ci_continue = 0;
 		io->ci_nob += result;
 		ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
-				  cio->cui_fd, pos, result, READ);
+				  vio->vui_fd, pos, result, READ);
 		result = 0;
 	}
 	return result;
 }
 
-static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
+			      struct cl_page_list *plist, int from, int to)
 {
-	struct vvp_io *vio = cl2vvp_io(env, ios);
-	struct ccc_io *cio = cl2ccc_io(env, ios);
+	struct cl_2queue *queue = &io->ci_queue;
+	struct cl_page *page;
+	unsigned int bytes = 0;
+	int rc = 0;
 
-	if (vio->cui_ra_window_set)
-		ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead);
+	if (plist->pl_nr == 0)
+		return 0;
 
-	vvp_io_fini(env, ios);
+	if (from > 0 || to != PAGE_SIZE) {
+		page = cl_page_list_first(plist);
+		if (plist->pl_nr == 1) {
+			cl_page_clip(env, page, from, to);
+		} else {
+			if (from > 0)
+				cl_page_clip(env, page, from, PAGE_SIZE);
+			if (to != PAGE_SIZE) {
+				page = cl_page_list_last(plist);
+				cl_page_clip(env, page, 0, to);
+			}
+		}
+	}
+
+	cl_2queue_init(queue);
+	cl_page_list_splice(plist, &queue->c2_qin);
+	rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
+
+	/* plist is not sorted any more */
+	cl_page_list_splice(&queue->c2_qin, plist);
+	cl_page_list_splice(&queue->c2_qout, plist);
+	cl_2queue_fini(env, queue);
+
+	if (rc == 0) {
+		/* calculate bytes */
+		bytes = plist->pl_nr << PAGE_SHIFT;
+		bytes -= from + PAGE_SIZE - to;
+
+		while (plist->pl_nr > 0) {
+			page = cl_page_list_first(plist);
+			cl_page_list_del(env, plist, page);
+
+			cl_page_clip(env, page, 0, PAGE_SIZE);
+
+			SetPageUptodate(cl_page_vmpage(page));
+			cl_page_disown(env, io, page);
+
+			/* held in ll_cl_init() */
+			lu_ref_del(&page->cp_reference, "cl_io", io);
+			cl_page_put(env, page);
+		}
+	}
+
+	return bytes > 0 ? bytes : rc;
+}
+
+static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
+				  struct cl_page *page)
+{
+	struct vvp_page *vpg;
+	struct page *vmpage = page->cp_vmpage;
+	struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+	SetPageUptodate(vmpage);
+	set_page_dirty(vmpage);
+
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	vvp_write_pending(cl2vvp(clob), vpg);
+
+	cl_page_disown(env, io, page);
+
+	/* held in ll_cl_init() */
+	lu_ref_del(&page->cp_reference, "cl_io", io);
+	cl_page_put(env, page);
+}
+
+/* make sure the page list is contiguous */
+static bool page_list_sanity_check(struct cl_object *obj,
+				   struct cl_page_list *plist)
+{
+	struct cl_page *page;
+	pgoff_t index = CL_PAGE_EOF;
+
+	cl_page_list_for_each(page, plist) {
+		struct vvp_page *vpg = cl_object_page_slice(obj, page);
+
+		if (index == CL_PAGE_EOF) {
+			index = vvp_index(vpg);
+			continue;
+		}
+
+		++index;
+		if (index == vvp_index(vpg))
+			continue;
+
+		return false;
+	}
+	return true;
+}
+
+/* Return how many bytes have queued or written */
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
+{
+	struct cl_object *obj = io->ci_obj;
+	struct inode *inode = vvp_object_inode(obj);
+	struct vvp_io *vio = vvp_env_io(env);
+	struct cl_page_list *queue = &vio->u.write.vui_queue;
+	struct cl_page *page;
+	int rc = 0;
+	int bytes = 0;
+	unsigned int npages = vio->u.write.vui_queue.pl_nr;
+
+	if (npages == 0)
+		return 0;
+
+	CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
+	       npages, vio->u.write.vui_from, vio->u.write.vui_to);
+
+	LASSERT(page_list_sanity_check(obj, queue));
+
+	/* submit IO with async write */
+	rc = cl_io_commit_async(env, io, queue,
+				vio->u.write.vui_from, vio->u.write.vui_to,
+				write_commit_callback);
+	npages -= queue->pl_nr; /* already committed pages */
+	if (npages > 0) {
+		/* calculate how many bytes were written */
+		bytes = npages << PAGE_SHIFT;
+
+		/* first page */
+		bytes -= vio->u.write.vui_from;
+		if (queue->pl_nr == 0) /* last page */
+			bytes -= PAGE_SIZE - vio->u.write.vui_to;
+		LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
+
+		vio->u.write.vui_written += bytes;
+
+		CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
+		       npages, bytes, vio->u.write.vui_written);
+
+		/* the first page must have been written. */
+		vio->u.write.vui_from = 0;
+	}
+	LASSERT(page_list_sanity_check(obj, queue));
+	LASSERT(ergo(rc == 0, queue->pl_nr == 0));
+
+	/* out of quota, try sync write */
+	if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
+		rc = vvp_io_commit_sync(env, io, queue,
+					vio->u.write.vui_from,
+					vio->u.write.vui_to);
+		if (rc > 0) {
+			vio->u.write.vui_written += rc;
+			rc = 0;
+		}
+	}
+
+	/* update inode size */
+	ll_merge_attr(env, inode);
+
+	/* Now the pages in queue were failed to commit, discard them
+	 * unless they were dirtied before.
+	 */
+	while (queue->pl_nr > 0) {
+		page = cl_page_list_first(queue);
+		cl_page_list_del(env, queue, page);
+
+		if (!PageDirty(cl_page_vmpage(page)))
+			cl_page_discard(env, io, page);
+
+		cl_page_disown(env, io, page);
+
+		/* held in ll_cl_init() */
+		lu_ref_del(&page->cp_reference, "cl_io", io);
+		cl_page_put(env, page);
+	}
+	cl_page_list_fini(env, queue);
+
+	return rc;
 }
 
 static int vvp_io_write_start(const struct lu_env *env,
 			      const struct cl_io_slice *ios)
 {
-	struct ccc_io      *cio   = cl2ccc_io(env, ios);
+	struct vvp_io      *vio   = cl2vvp_io(env, ios);
 	struct cl_io       *io    = ios->cis_io;
 	struct cl_object   *obj   = io->ci_obj;
-	struct inode       *inode = ccc_object_inode(obj);
+	struct inode       *inode = vvp_object_inode(obj);
 	ssize_t result = 0;
 	loff_t pos = io->u.ci_wr.wr.crw_pos;
 	size_t cnt = io->u.ci_wr.wr.crw_count;
@@ -582,25 +949,42 @@ static int vvp_io_write_start(const struct lu_env *env,
 		 * PARALLEL IO This has to be changed for parallel IO doing
 		 * out-of-order writes.
 		 */
-		pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
-		cio->cui_iocb->ki_pos = pos;
+		ll_merge_attr(env, inode);
+		pos = i_size_read(inode);
+		io->u.ci_wr.wr.crw_pos = pos;
+		vio->vui_iocb->ki_pos = pos;
 	} else {
-		LASSERT(cio->cui_iocb->ki_pos == pos);
+		LASSERT(vio->vui_iocb->ki_pos == pos);
 	}
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */
+	if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
 		result = 0;
 	else
-		result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+		result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+
+	if (result > 0) {
+		result = vvp_io_write_commit(env, io);
+		if (vio->u.write.vui_written > 0) {
+			result = vio->u.write.vui_written;
+			io->ci_nob += result;
 
+			CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
+			       io->ci_nob, result);
+		}
+	}
 	if (result > 0) {
+		struct ll_inode_info *lli = ll_i2info(inode);
+
+		spin_lock(&lli->lli_lock);
+		lli->lli_flags |= LLIF_DATA_MODIFIED;
+		spin_unlock(&lli->lli_lock);
+
 		if (result < cnt)
 			io->ci_continue = 0;
-		io->ci_nob += result;
 		ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
-				  cio->cui_fd, pos, result, WRITE);
+				  vio->vui_fd, pos, result, WRITE);
 		result = 0;
 	}
 	return result;
@@ -608,10 +992,10 @@ static int vvp_io_write_start(const struct lu_env *env,
 
 static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 {
-	struct vm_fault *vmf = cfio->fault.ft_vmf;
+	struct vm_fault *vmf = cfio->ft_vmf;
 
-	cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
-	cfio->fault.ft_flags_valid = 1;
+	cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+	cfio->ft_flags_valid = 1;
 
 	if (vmf->page) {
 		CDEBUG(D_PAGE,
@@ -619,39 +1003,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 		       vmf->page, vmf->page->mapping, vmf->page->index,
 		       (long)vmf->page->flags, page_count(vmf->page),
 		       page_private(vmf->page), vmf->virtual_address);
-		if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
+		if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
 			lock_page(vmf->page);
-			cfio->fault.ft_flags |= VM_FAULT_LOCKED;
+			cfio->ft_flags |= VM_FAULT_LOCKED;
 		}
 
 		cfio->ft_vmpage = vmf->page;
 		return 0;
 	}
 
-	if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+	if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
 		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
 		return -EFAULT;
 	}
 
-	if (cfio->fault.ft_flags & VM_FAULT_OOM) {
+	if (cfio->ft_flags & VM_FAULT_OOM) {
 		CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
 		return -ENOMEM;
 	}
 
-	if (cfio->fault.ft_flags & VM_FAULT_RETRY)
+	if (cfio->ft_flags & VM_FAULT_RETRY)
 		return -EAGAIN;
 
-	CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags);
+	CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
 	return -EINVAL;
 }
 
+static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
+				    struct cl_page *page)
+{
+	struct vvp_page *vpg;
+	struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+	set_page_dirty(page->cp_vmpage);
+
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	vvp_write_pending(cl2vvp(clob), vpg);
+}
+
 static int vvp_io_fault_start(const struct lu_env *env,
 			      const struct cl_io_slice *ios)
 {
 	struct vvp_io       *vio     = cl2vvp_io(env, ios);
 	struct cl_io	*io      = ios->cis_io;
 	struct cl_object    *obj     = io->ci_obj;
-	struct inode	*inode   = ccc_object_inode(obj);
+	struct inode        *inode   = vvp_object_inode(obj);
 	struct cl_fault_io  *fio     = &io->u.ci_fault;
 	struct vvp_fault_io *cfio    = &vio->u.fault;
 	loff_t	       offset;
@@ -659,7 +1055,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	struct page	  *vmpage  = NULL;
 	struct cl_page      *page;
 	loff_t	       size;
-	pgoff_t	      last; /* last page in a file data region */
+	pgoff_t		     last_index;
 
 	if (fio->ft_executable &&
 	    inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
@@ -670,7 +1066,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	/* offset of the last byte on the page */
 	offset = cl_offset(obj, fio->ft_index + 1) - 1;
 	LASSERT(cl_index(obj, offset) == fio->ft_index);
-	result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL);
+	result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
 	if (result != 0)
 		return result;
 
@@ -705,15 +1101,15 @@ static int vvp_io_fault_start(const struct lu_env *env,
 		goto out;
 	}
 
+	last_index = cl_index(obj, size - 1);
+
 	if (fio->ft_mkwrite) {
-		pgoff_t last_index;
 		/*
 		 * Capture the size while holding the lli_trunc_sem from above
 		 * we want to make sure that we complete the mkwrite action
 		 * while holding this lock. We need to make sure that we are
 		 * not past the end of the file.
 		 */
-		last_index = cl_index(obj, size - 1);
 		if (last_index < fio->ft_index) {
 			CDEBUG(D_PAGE,
 			       "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
@@ -745,25 +1141,32 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	 */
 	if (fio->ft_mkwrite) {
 		wait_on_page_writeback(vmpage);
-		if (set_page_dirty(vmpage)) {
-			struct ccc_page *cp;
+		if (!PageDirty(vmpage)) {
+			struct cl_page_list *plist = &io->ci_queue.c2_qin;
+			struct vvp_page *vpg = cl_object_page_slice(obj, page);
+			int to = PAGE_SIZE;
 
 			/* vvp_page_assume() calls wait_on_page_writeback(). */
 			cl_page_assume(env, io, page);
 
-			cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-			vvp_write_pending(cl2ccc(obj), cp);
+			cl_page_list_init(plist);
+			cl_page_list_add(plist, page);
+
+			/* size fixup */
+			if (last_index == vvp_index(vpg))
+				to = size & ~PAGE_MASK;
 
 			/* Do not set Dirty bit here so that in case IO is
 			 * started before the page is really made dirty, we
 			 * still have chance to detect it.
 			 */
-			result = cl_page_cache_add(env, io, page, CRT_WRITE);
+			result = cl_io_commit_async(env, io, plist, 0, to,
+						    mkwrite_commit_callback);
 			LASSERT(cl_page_is_owned(page, io));
+			cl_page_list_fini(env, plist);
 
 			vmpage = NULL;
 			if (result < 0) {
-				cl_page_unmap(env, io, page);
 				cl_page_discard(env, io, page);
 				cl_page_disown(env, io, page);
 
@@ -773,20 +1176,20 @@ static int vvp_io_fault_start(const struct lu_env *env,
 				if (result == -EDQUOT)
 					result = -ENOSPC;
 				goto out;
-			} else
+			} else {
 				cl_page_disown(env, io, page);
+			}
 		}
 	}
 
-	last = cl_index(obj, size - 1);
 	/*
 	 * The ft_index is only used in the case of
 	 * a mkwrite action. We need to check
 	 * our assertions are correct, since
 	 * we should have caught this above
 	 */
-	LASSERT(!fio->ft_mkwrite || fio->ft_index <= last);
-	if (fio->ft_index == last)
+	LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
+	if (fio->ft_index == last_index)
 		/*
 		 * Last page is mapped partially.
 		 */
@@ -801,7 +1204,9 @@ out:
 	/* return unlocked vmpage to avoid deadlocking */
 	if (vmpage)
 		unlock_page(vmpage);
-	cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
+
+	cfio->ft_flags &= ~VM_FAULT_LOCKED;
+
 	return result;
 }
 
@@ -820,293 +1225,58 @@ static int vvp_io_read_page(const struct lu_env *env,
 			    const struct cl_page_slice *slice)
 {
 	struct cl_io	      *io     = ios->cis_io;
-	struct cl_object	  *obj    = slice->cpl_obj;
-	struct ccc_page	   *cp     = cl2ccc_page(slice);
+	struct vvp_page           *vpg    = cl2vvp_page(slice);
 	struct cl_page	    *page   = slice->cpl_page;
-	struct inode	      *inode  = ccc_object_inode(obj);
+	struct inode              *inode  = vvp_object_inode(slice->cpl_obj);
 	struct ll_sb_info	 *sbi    = ll_i2sbi(inode);
-	struct ll_file_data       *fd     = cl2ccc_io(env, ios)->cui_fd;
+	struct ll_file_data       *fd     = cl2vvp_io(env, ios)->vui_fd;
 	struct ll_readahead_state *ras    = &fd->fd_ras;
-	struct page		*vmpage = cp->cpg_page;
 	struct cl_2queue	  *queue  = &io->ci_queue;
-	int rc;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-	LASSERT(slice->cpl_obj == obj);
 
 	if (sbi->ll_ra_info.ra_max_pages_per_file &&
 	    sbi->ll_ra_info.ra_max_pages)
-		ras_update(sbi, inode, ras, page->cp_index,
-			   cp->cpg_defer_uptodate);
-
-	/* Sanity check whether the page is protected by a lock. */
-	rc = cl_page_is_under_lock(env, io, page);
-	if (rc != -EBUSY) {
-		CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n",
-			       rc == -ENODATA ? "without a lock" :
-			       "match failed", rc);
-		if (rc != -ENODATA)
-			return rc;
-	}
+		ras_update(sbi, inode, ras, vvp_index(vpg),
+			   vpg->vpg_defer_uptodate);
 
-	if (cp->cpg_defer_uptodate) {
-		cp->cpg_ra_used = 1;
+	if (vpg->vpg_defer_uptodate) {
+		vpg->vpg_ra_used = 1;
 		cl_page_export(env, page, 1);
 	}
 	/*
 	 * Add page into the queue even when it is marked uptodate above.
 	 * this will unlock it automatically as part of cl_page_list_disown().
 	 */
+
 	cl_page_list_add(&queue->c2_qin, page);
 	if (sbi->ll_ra_info.ra_max_pages_per_file &&
 	    sbi->ll_ra_info.ra_max_pages)
-		ll_readahead(env, io, ras,
-			     vmpage->mapping, &queue->c2_qin, fd->fd_flags);
+		ll_readahead(env, io, &queue->c2_qin, ras,
+			     vpg->vpg_defer_uptodate);
 
 	return 0;
 }
 
-static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io,
-			    struct cl_page *page, struct ccc_page *cp,
-			    enum cl_req_type crt)
+static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
 {
-	struct cl_2queue  *queue;
-	int result;
-
-	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
-	queue = &io->ci_queue;
-	cl_2queue_init_page(queue, page);
-
-	result = cl_io_submit_sync(env, io, crt, queue, 0);
-	LASSERT(cl_page_is_owned(page, io));
-
-	if (crt == CRT_READ)
-		/*
-		 * in CRT_WRITE case page is left locked even in case of
-		 * error.
-		 */
-		cl_page_list_disown(env, io, &queue->c2_qin);
-	cl_2queue_fini(env, queue);
-
-	return result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io,
-				  struct cl_object *obj, struct cl_page *pg,
-				  struct ccc_page *cp,
-				  unsigned from, unsigned to)
-{
-	struct cl_attr *attr   = ccc_env_thread_attr(env);
-	loff_t	  offset = cl_offset(obj, pg->cp_index);
-	int	     result;
-
-	cl_object_attr_lock(obj);
-	result = cl_object_attr_get(env, obj, attr);
-	cl_object_attr_unlock(obj);
-	if (result == 0) {
-		/*
-		 * If are writing to a new page, no need to read old data.
-		 * The extent locking will have updated the KMS, and for our
-		 * purposes here we can treat it like i_size.
-		 */
-		if (attr->cat_kms <= offset) {
-			char *kaddr = kmap_atomic(cp->cpg_page);
-
-			memset(kaddr, 0, cl_page_size(obj));
-			kunmap_atomic(kaddr);
-		} else if (cp->cpg_defer_uptodate)
-			cp->cpg_ra_used = 1;
-		else
-			result = vvp_page_sync_io(env, io, pg, cp, CRT_READ);
-		/*
-		 * In older implementations, obdo_refresh_inode is called here
-		 * to update the inode because the write might modify the
-		 * object info at OST. However, this has been proven useless,
-		 * since LVB functions will be called when user space program
-		 * tries to retrieve inode attribute.  Also, see bug 15909 for
-		 * details. -jay
-		 */
-		if (result == 0)
-			cl_page_export(env, pg, 1);
-	}
-	return result;
-}
-
-static int vvp_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
-{
-	struct cl_object *obj    = slice->cpl_obj;
-	struct ccc_page  *cp     = cl2ccc_page(slice);
-	struct cl_page   *pg     = slice->cpl_page;
-	struct page       *vmpage = cp->cpg_page;
-
-	int result;
-
-	LINVRNT(cl_page_is_vmlocked(env, pg));
-	LASSERT(vmpage->mapping->host == ccc_object_inode(obj));
-
-	result = 0;
-
-	CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to);
-	if (!PageUptodate(vmpage)) {
-		/*
-		 * We're completely overwriting an existing page, so _don't_
-		 * set it up to date until commit_write
-		 */
-		if (from == 0 && to == PAGE_SIZE) {
-			CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
-			POISON_PAGE(page, 0x11);
-		} else
-			result = vvp_io_prepare_partial(env, ios->cis_io, obj,
-							pg, cp, from, to);
-	} else
-		CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n");
-	return result;
-}
-
-static int vvp_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
-{
-	struct cl_object  *obj    = slice->cpl_obj;
-	struct cl_io      *io     = ios->cis_io;
-	struct ccc_page   *cp     = cl2ccc_page(slice);
-	struct cl_page    *pg     = slice->cpl_page;
-	struct inode      *inode  = ccc_object_inode(obj);
-	struct ll_sb_info *sbi    = ll_i2sbi(inode);
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct page	*vmpage = cp->cpg_page;
-
-	int    result;
-	int    tallyop;
-	loff_t size;
-
-	LINVRNT(cl_page_is_vmlocked(env, pg));
-	LASSERT(vmpage->mapping->host == inode);
-
-	LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n");
-	CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);
-
-	/*
-	 * queue a write for some time in the future the first time we
-	 * dirty the page.
-	 *
-	 * This is different from what other file systems do: they usually
-	 * just mark page (and some of its buffers) dirty and rely on
-	 * balance_dirty_pages() to start a write-back. Lustre wants write-back
-	 * to be started earlier for the following reasons:
-	 *
-	 *     (1) with a large number of clients we need to limit the amount
-	 *     of cached data on the clients a lot;
-	 *
-	 *     (2) large compute jobs generally want compute-only then io-only
-	 *     and the IO should complete as quickly as possible;
-	 *
-	 *     (3) IO is batched up to the RPC size and is async until the
-	 *     client max cache is hit
-	 *     (/sys/fs/lustre/osc/OSC.../max_dirty_mb)
-	 *
-	 */
-	if (!PageDirty(vmpage)) {
-		tallyop = LPROC_LL_DIRTY_MISSES;
-		result = cl_page_cache_add(env, io, pg, CRT_WRITE);
-		if (result == 0) {
-			/* page was added into cache successfully. */
-			set_page_dirty(vmpage);
-			vvp_write_pending(cl2ccc(obj), cp);
-		} else if (result == -EDQUOT) {
-			pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
-			bool need_clip = true;
-
-			/*
-			 * Client ran out of disk space grant. Possible
-			 * strategies are:
-			 *
-			 *     (a) do a sync write, renewing grant;
-			 *
-			 *     (b) stop writing on this stripe, switch to the
-			 *     next one.
-			 *
-			 * (b) is a part of "parallel io" design that is the
-			 * ultimate goal. (a) is what "old" client did, and
-			 * what the new code continues to do for the time
-			 * being.
-			 */
-			if (last_index > pg->cp_index) {
-				to = PAGE_SIZE;
-				need_clip = false;
-			} else if (last_index == pg->cp_index) {
-				int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-
-				if (to < size_to)
-					to = size_to;
-			}
-			if (need_clip)
-				cl_page_clip(env, pg, 0, to);
-			result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
-			if (result)
-				CERROR("Write page %lu of inode %p failed %d\n",
-				       pg->cp_index, inode, result);
-		}
-	} else {
-		tallyop = LPROC_LL_DIRTY_HITS;
-		result = 0;
-	}
-	ll_stats_ops_tally(sbi, tallyop, 1);
-
-	/* Inode should be marked DIRTY even if no new page was marked DIRTY
-	 * because page could have been not flushed between 2 modifications.
-	 * It is important the file is marked DIRTY as soon as the I/O is done
-	 * Indeed, when cache is flushed, file could be already closed and it
-	 * is too late to warn the MDT.
-	 * It is acceptable that file is marked DIRTY even if I/O is dropped
-	 * for some reasons before being flushed to OST.
-	 */
-	if (result == 0) {
-		spin_lock(&lli->lli_lock);
-		lli->lli_flags |= LLIF_DATA_MODIFIED;
-		spin_unlock(&lli->lli_lock);
-	}
-
-	size = cl_offset(obj, pg->cp_index) + to;
-
-	ll_inode_size_lock(inode);
-	if (result == 0) {
-		if (size > i_size_read(inode)) {
-			cl_isize_write_nolock(inode, size);
-			CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n",
-			       PFID(lu_object_fid(&obj->co_lu)),
-			       (unsigned long)size);
-		}
-		cl_page_export(env, pg, 1);
-	} else {
-		if (size > i_size_read(inode))
-			cl_page_discard(env, io, pg);
-	}
-	ll_inode_size_unlock(inode);
-	return result;
+	CLOBINVRNT(env, ios->cis_io->ci_obj,
+		   vvp_object_invariant(ios->cis_io->ci_obj));
 }
 
 static const struct cl_io_operations vvp_io_ops = {
 	.op = {
 		[CIT_READ] = {
-			.cio_fini      = vvp_io_read_fini,
+			.cio_fini	= vvp_io_fini,
 			.cio_lock      = vvp_io_read_lock,
 			.cio_start     = vvp_io_read_start,
-			.cio_advance   = ccc_io_advance
+			.cio_advance	= vvp_io_advance,
 		},
 		[CIT_WRITE] = {
 			.cio_fini      = vvp_io_fini,
+			.cio_iter_init = vvp_io_write_iter_init,
+			.cio_iter_fini = vvp_io_write_iter_fini,
 			.cio_lock      = vvp_io_write_lock,
 			.cio_start     = vvp_io_write_start,
-			.cio_advance   = ccc_io_advance
+			.cio_advance   = vvp_io_advance,
 		},
 		[CIT_SETATTR] = {
 			.cio_fini       = vvp_io_setattr_fini,
@@ -1120,7 +1290,7 @@ static const struct cl_io_operations vvp_io_ops = {
 			.cio_iter_init = vvp_io_fault_iter_init,
 			.cio_lock      = vvp_io_fault_lock,
 			.cio_start     = vvp_io_fault_start,
-			.cio_end       = ccc_io_end
+			.cio_end       = vvp_io_end,
 		},
 		[CIT_FSYNC] = {
 			.cio_start  = vvp_io_fsync_start,
@@ -1131,29 +1301,26 @@ static const struct cl_io_operations vvp_io_ops = {
 		}
 	},
 	.cio_read_page     = vvp_io_read_page,
-	.cio_prepare_write = vvp_io_prepare_write,
-	.cio_commit_write  = vvp_io_commit_write
 };
 
 int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		struct cl_io *io)
 {
 	struct vvp_io      *vio   = vvp_env_io(env);
-	struct ccc_io      *cio   = ccc_env_io(env);
-	struct inode       *inode = ccc_object_inode(obj);
+	struct inode       *inode = vvp_object_inode(obj);
 	int		 result;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, DFID
 	       " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
 	       PFID(lu_object_fid(&obj->co_lu)),
 	       io->ci_ignore_layout, io->ci_verify_layout,
-	       cio->cui_layout_gen, io->ci_restore_needed);
+	       vio->vui_layout_gen, io->ci_restore_needed);
 
-	CL_IO_SLICE_CLEAN(cio, cui_cl);
-	cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
-	vio->cui_ra_window_set = 0;
+	CL_IO_SLICE_CLEAN(vio, vui_cl);
+	cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
+	vio->vui_ra_valid = false;
 	result = 0;
 	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
 		size_t count;
@@ -1166,7 +1333,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		if (count == 0)
 			result = 1;
 		else
-			cio->cui_tot_count = count;
+			vio->vui_tot_count = count;
 
 		/* for read/write, we store the jobid in the inode, and
 		 * it'll be fetched by osc when building RPC.
@@ -1192,7 +1359,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 	 * because it might not grant layout lock in IT_OPEN.
 	 */
 	if (result == 0 && !io->ci_ignore_layout) {
-		result = ll_layout_refresh(inode, &cio->cui_layout_gen);
+		result = ll_layout_refresh(inode, &vio->vui_layout_gen);
 		if (result == -ENOENT)
 			/* If the inode on MDS has been removed, but the objects
 			 * on OSTs haven't been destroyed (async unlink), layout
@@ -1208,11 +1375,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 
 	return result;
 }
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
-				const struct cl_io_slice *slice)
-{
-	/* Calling just for assertion */
-	cl2ccc_io(env, slice);
-	return vvp_env_io(env);
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index ff0948043c7a..64be0c9df35b 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -40,7 +36,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/obd.h"
+#include "../include/obd_support.h"
 #include "../include/lustre_lite.h"
 
 #include "vvp_internal.h"
@@ -51,36 +47,41 @@
  *
  */
 
-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
-				    const struct cl_lock_slice *slice)
+static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
 {
-	struct ccc_object *cob = cl2ccc(slice->cls_obj);
+	struct vvp_lock *vlk = cl2vvp_lock(slice);
 
-	return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0;
+	kmem_cache_free(vvp_lock_kmem, vlk);
+}
+
+static int vvp_lock_enqueue(const struct lu_env *env,
+			    const struct cl_lock_slice *slice,
+			    struct cl_io *unused, struct cl_sync_io *anchor)
+{
+	CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
+
+	return 0;
 }
 
 static const struct cl_lock_operations vvp_lock_ops = {
-	.clo_delete    = ccc_lock_delete,
-	.clo_fini      = ccc_lock_fini,
-	.clo_enqueue   = ccc_lock_enqueue,
-	.clo_wait      = ccc_lock_wait,
-	.clo_use       = ccc_lock_use,
-	.clo_unuse     = ccc_lock_unuse,
-	.clo_fits_into = ccc_lock_fits_into,
-	.clo_state     = ccc_lock_state,
-	.clo_weigh     = vvp_lock_weigh
+	.clo_fini	= vvp_lock_fini,
+	.clo_enqueue	= vvp_lock_enqueue,
 };
 
 int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_lock *lock, const struct cl_io *io)
+		  struct cl_lock *lock, const struct cl_io *unused)
 {
-	return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops);
+	struct vvp_lock *vlk;
+	int result;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
+	if (vlk) {
+		cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
+		result = 0;
+	} else {
+		result = -ENOMEM;
+	}
+	return result;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 03c887d8ed83..2c520b0bf6ca 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -45,6 +41,7 @@
 #include "../include/obd.h"
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
 /*****************************************************************************
@@ -53,16 +50,25 @@
  *
  */
 
+int vvp_object_invariant(const struct cl_object *obj)
+{
+	struct inode *inode  = vvp_object_inode(obj);
+	struct ll_inode_info *lli = ll_i2info(inode);
+
+	return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
+		lli->lli_clob == obj;
+}
+
 static int vvp_object_print(const struct lu_env *env, void *cookie,
 			    lu_printer_t p, const struct lu_object *o)
 {
-	struct ccc_object    *obj   = lu2ccc(o);
-	struct inode	 *inode = obj->cob_inode;
+	struct vvp_object    *obj   = lu2vvp(o);
+	struct inode	 *inode = obj->vob_inode;
 	struct ll_inode_info *lli;
 
 	(*p)(env, cookie, "(%s %d %d) inode: %p ",
-	     list_empty(&obj->cob_pending_list) ? "-" : "+",
-	     obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt),
+	     list_empty(&obj->vob_pending_list) ? "-" : "+",
+	     obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
 	     inode);
 	if (inode) {
 		lli = ll_i2info(inode);
@@ -77,7 +83,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
 static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
 			struct cl_attr *attr)
 {
-	struct inode *inode = ccc_object_inode(obj);
+	struct inode *inode = vvp_object_inode(obj);
 
 	/*
 	 * lov overwrites most of these fields in
@@ -99,7 +105,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
 static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
 			const struct cl_attr *attr, unsigned valid)
 {
-	struct inode *inode = ccc_object_inode(obj);
+	struct inode *inode = vvp_object_inode(obj);
 
 	if (valid & CAT_UID)
 		inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
@@ -112,7 +118,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
 	if (valid & CAT_CTIME)
 		inode->i_ctime.tv_sec = attr->cat_ctime;
 	if (0 && valid & CAT_SIZE)
-		cl_isize_write_nolock(inode, attr->cat_size);
+		i_size_write(inode, attr->cat_size);
 	/* not currently necessary */
 	if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
 		mark_inode_dirty(inode);
@@ -165,6 +171,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
 	return 0;
 }
 
+static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+	int rc;
+
+	rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+	if (rc < 0) {
+		CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+		       PFID(lu_object_fid(&obj->co_lu)), rc);
+		return rc;
+	}
+
+	truncate_inode_pages(inode->i_mapping, 0);
+	return 0;
+}
+
+static int vvp_object_glimpse(const struct lu_env *env,
+			      const struct cl_object *obj, struct ost_lvb *lvb)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+	lvb->lvb_atime = LTIME_S(inode->i_atime);
+	lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+	/*
+	 * LU-417: Add dirty pages block count lest i_blocks reports 0, some
+	 * "cp" or "tar" on remote node may think it's a completely sparse file
+	 * and skip it.
+	 */
+	if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
+		lvb->lvb_blocks = dirty_cnt(inode);
+	return 0;
+}
+
 static const struct cl_object_operations vvp_ops = {
 	.coo_page_init = vvp_page_init,
 	.coo_lock_init = vvp_lock_init,
@@ -172,29 +212,94 @@ static const struct cl_object_operations vvp_ops = {
 	.coo_attr_get  = vvp_attr_get,
 	.coo_attr_set  = vvp_attr_set,
 	.coo_conf_set  = vvp_conf_set,
-	.coo_glimpse   = ccc_object_glimpse
+	.coo_prune     = vvp_prune,
+	.coo_glimpse   = vvp_object_glimpse
 };
 
+static int vvp_object_init0(const struct lu_env *env,
+			    struct vvp_object *vob,
+			    const struct cl_object_conf *conf)
+{
+	vob->vob_inode = conf->coc_inode;
+	vob->vob_transient_pages = 0;
+	cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
+	return 0;
+}
+
+static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
+			   const struct lu_object_conf *conf)
+{
+	struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
+	struct vvp_object *vob = lu2vvp(obj);
+	struct lu_object  *below;
+	struct lu_device  *under;
+	int result;
+
+	under = &dev->vdv_next->cd_lu_dev;
+	below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+	if (below) {
+		const struct cl_object_conf *cconf;
+
+		cconf = lu2cl_conf(conf);
+		INIT_LIST_HEAD(&vob->vob_pending_list);
+		lu_object_add(obj, below);
+		result = vvp_object_init0(env, vob, cconf);
+	} else {
+		result = -ENOMEM;
+	}
+
+	return result;
+}
+
+static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+	struct vvp_object *vob = lu2vvp(obj);
+
+	lu_object_fini(obj);
+	lu_object_header_fini(obj->lo_header);
+	kmem_cache_free(vvp_object_kmem, vob);
+}
+
 static const struct lu_object_operations vvp_lu_obj_ops = {
-	.loo_object_init  = ccc_object_init,
-	.loo_object_free  = ccc_object_free,
-	.loo_object_print = vvp_object_print
+	.loo_object_init	= vvp_object_init,
+	.loo_object_free	= vvp_object_free,
+	.loo_object_print	= vvp_object_print,
 };
 
-struct ccc_object *cl_inode2ccc(struct inode *inode)
+struct vvp_object *cl_inode2vvp(struct inode *inode)
 {
-	struct cl_inode_info *lli = cl_i2info(inode);
+	struct ll_inode_info *lli = ll_i2info(inode);
 	struct cl_object     *obj = lli->lli_clob;
 	struct lu_object     *lu;
 
 	lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
 	LASSERT(lu);
-	return lu2ccc(lu);
+	return lu2vvp(lu);
 }
 
 struct lu_object *vvp_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *hdr,
+				   const struct lu_object_header *unused,
 				   struct lu_device *dev)
 {
-	return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops);
+	struct vvp_object *vob;
+	struct lu_object  *obj;
+
+	vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
+	if (vob) {
+		struct cl_object_header *hdr;
+
+		obj = &vob->vob_cl.co_lu;
+		hdr = &vob->vob_header;
+		cl_object_header_init(hdr);
+		hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
+		lu_object_init(obj, &hdr->coh_lu, dev);
+		lu_object_add_top(&hdr->coh_lu, obj);
+
+		vob->vob_cl.co_ops = &vvp_ops;
+		obj->lo_ops = &vvp_lu_obj_ops;
+	} else {
+		obj = NULL;
+	}
+	return obj;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 33ca3eb34965..2e566d90bb94 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -41,9 +37,16 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/obd.h"
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
 /*****************************************************************************
@@ -52,9 +55,9 @@
  *
  */
 
-static void vvp_page_fini_common(struct ccc_page *cp)
+static void vvp_page_fini_common(struct vvp_page *vpg)
 {
-	struct page *vmpage = cp->cpg_page;
+	struct page *vmpage = vpg->vpg_page;
 
 	LASSERT(vmpage);
 	put_page(vmpage);
@@ -63,23 +66,23 @@ static void vvp_page_fini_common(struct ccc_page *cp)
 static void vvp_page_fini(const struct lu_env *env,
 			  struct cl_page_slice *slice)
 {
-	struct ccc_page *cp = cl2ccc_page(slice);
-	struct page *vmpage  = cp->cpg_page;
+	struct vvp_page *vpg     = cl2vvp_page(slice);
+	struct page     *vmpage  = vpg->vpg_page;
 
 	/*
 	 * vmpage->private was already cleared when page was moved into
 	 * VPG_FREEING state.
 	 */
 	LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
-	vvp_page_fini_common(cp);
+	vvp_page_fini_common(vpg);
 }
 
 static int vvp_page_own(const struct lu_env *env,
 			const struct cl_page_slice *slice, struct cl_io *io,
 			int nonblock)
 {
-	struct ccc_page *vpg    = cl2ccc_page(slice);
-	struct page      *vmpage = vpg->cpg_page;
+	struct vvp_page *vpg    = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
 
 	LASSERT(vmpage);
 	if (nonblock) {
@@ -96,6 +99,7 @@ static int vvp_page_own(const struct lu_env *env,
 
 	lock_page(vmpage);
 	wait_on_page_writeback(vmpage);
+
 	return 0;
 }
 
@@ -136,41 +140,15 @@ static void vvp_page_discard(const struct lu_env *env,
 			     struct cl_io *unused)
 {
 	struct page	   *vmpage  = cl2vm_page(slice);
-	struct address_space *mapping;
-	struct ccc_page      *cpg     = cl2ccc_page(slice);
+	struct vvp_page      *vpg     = cl2vvp_page(slice);
 
 	LASSERT(vmpage);
 	LASSERT(PageLocked(vmpage));
 
-	mapping = vmpage->mapping;
+	if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
+		ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
 
-	if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
-		ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
-
-	/*
-	 * truncate_complete_page() calls
-	 * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
-	 */
-	truncate_complete_page(mapping, vmpage);
-}
-
-static int vvp_page_unmap(const struct lu_env *env,
-			  const struct cl_page_slice *slice,
-			  struct cl_io *unused)
-{
-	struct page *vmpage = cl2vm_page(slice);
-	__u64       offset;
-
-	LASSERT(vmpage);
-	LASSERT(PageLocked(vmpage));
-
-	offset = vmpage->index << PAGE_SHIFT;
-
-	/*
-	 * XXX is it safe to call this with the page lock held?
-	 */
-	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
-	return 0;
+	ll_invalidate_page(vmpage);
 }
 
 static void vvp_page_delete(const struct lu_env *env,
@@ -179,12 +157,20 @@ static void vvp_page_delete(const struct lu_env *env,
 	struct page       *vmpage = cl2vm_page(slice);
 	struct inode     *inode  = vmpage->mapping->host;
 	struct cl_object *obj    = slice->cpl_obj;
+	struct cl_page   *page   = slice->cpl_page;
+	int refc;
 
 	LASSERT(PageLocked(vmpage));
-	LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
-	LASSERT(inode == ccc_object_inode(obj));
+	LASSERT((struct cl_page *)vmpage->private == page);
+	LASSERT(inode == vvp_object_inode(obj));
+
+	vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
+
+	/* Drop the reference count held in vvp_page_init */
+	refc = atomic_dec_return(&page->cp_ref);
+	LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
 
-	vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
+	ClearPageUptodate(vmpage);
 	ClearPagePrivate(vmpage);
 	vmpage->private = 0;
 	/*
@@ -237,7 +223,7 @@ static int vvp_page_prep_write(const struct lu_env *env,
 	if (!pg->cp_sync_io)
 		set_page_writeback(vmpage);
 
-	vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+	vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
 
 	return 0;
 }
@@ -250,11 +236,11 @@ static int vvp_page_prep_write(const struct lu_env *env,
  */
 static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
 {
-	struct ccc_object *obj = cl_inode2ccc(inode);
+	struct vvp_object *obj = cl_inode2vvp(inode);
 
 	if (ioret == 0) {
 		ClearPageError(vmpage);
-		obj->cob_discard_page_warned = 0;
+		obj->vob_discard_page_warned = 0;
 	} else {
 		SetPageError(vmpage);
 		if (ioret == -ENOSPC)
@@ -263,8 +249,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
 			set_bit(AS_EIO, &inode->i_mapping->flags);
 
 		if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
-		    obj->cob_discard_page_warned == 0) {
-			obj->cob_discard_page_warned = 1;
+		     obj->vob_discard_page_warned == 0) {
+			obj->vob_discard_page_warned = 1;
 			ll_dirty_page_discard_warn(vmpage, ioret);
 		}
 	}
@@ -274,22 +260,23 @@ static void vvp_page_completion_read(const struct lu_env *env,
 				     const struct cl_page_slice *slice,
 				     int ioret)
 {
-	struct ccc_page *cp     = cl2ccc_page(slice);
-	struct page      *vmpage = cp->cpg_page;
-	struct cl_page  *page   = cl_page_top(slice->cpl_page);
-	struct inode    *inode  = ccc_object_inode(page->cp_obj);
+	struct vvp_page *vpg    = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
+	struct cl_page  *page   = slice->cpl_page;
+	struct inode    *inode  = vvp_object_inode(page->cp_obj);
 
 	LASSERT(PageLocked(vmpage));
 	CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
 
-	if (cp->cpg_defer_uptodate)
+	if (vpg->vpg_defer_uptodate)
 		ll_ra_count_put(ll_i2sbi(inode), 1);
 
 	if (ioret == 0)  {
-		if (!cp->cpg_defer_uptodate)
+		if (!vpg->vpg_defer_uptodate)
 			cl_page_export(env, page, 1);
-	} else
-		cp->cpg_defer_uptodate = 0;
+	} else {
+		vpg->vpg_defer_uptodate = 0;
+	}
 
 	if (!page->cp_sync_io)
 		unlock_page(vmpage);
@@ -299,9 +286,9 @@ static void vvp_page_completion_write(const struct lu_env *env,
 				      const struct cl_page_slice *slice,
 				      int ioret)
 {
-	struct ccc_page *cp     = cl2ccc_page(slice);
+	struct vvp_page *vpg     = cl2vvp_page(slice);
 	struct cl_page  *pg     = slice->cpl_page;
-	struct page      *vmpage = cp->cpg_page;
+	struct page     *vmpage = vpg->vpg_page;
 
 	CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
 
@@ -315,8 +302,8 @@ static void vvp_page_completion_write(const struct lu_env *env,
 	 * and then re-add the page into pending transfer queue.  -jay
 	 */
 
-	cp->cpg_write_queued = 0;
-	vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
+	vpg->vpg_write_queued = 0;
+	vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
 
 	if (pg->cp_sync_io) {
 		LASSERT(PageLocked(vmpage));
@@ -327,7 +314,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
 		 * Only mark the page error only when it's an async write
 		 * because applications won't wait for IO to finish.
 		 */
-		vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
+		vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
 
 		end_page_writeback(vmpage);
 	}
@@ -359,7 +346,7 @@ static int vvp_page_make_ready(const struct lu_env *env,
 		LASSERT(pg->cp_state == CPS_CACHED);
 		/* This actually clears the dirty bit in the radix tree. */
 		set_page_writeback(vmpage);
-		vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+		vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
 		CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
 	} else if (pg->cp_state == CPS_PAGEOUT) {
 		/* is it possible for osc_flush_async_page() to already
@@ -375,24 +362,51 @@ static int vvp_page_make_ready(const struct lu_env *env,
 	return result;
 }
 
+static int vvp_page_is_under_lock(const struct lu_env *env,
+				  const struct cl_page_slice *slice,
+				  struct cl_io *io, pgoff_t *max_index)
+{
+	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+	    io->ci_type == CIT_FAULT) {
+		struct vvp_io *vio = vvp_env_io(env);
+
+		if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
+			*max_index = CL_PAGE_EOF;
+	}
+	return 0;
+}
+
 static int vvp_page_print(const struct lu_env *env,
 			  const struct cl_page_slice *slice,
 			  void *cookie, lu_printer_t printer)
 {
-	struct ccc_page *vp = cl2ccc_page(slice);
-	struct page      *vmpage = vp->cpg_page;
+	struct vvp_page *vpg = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
 
 	(*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
-		   vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
-		   vp->cpg_write_queued, vmpage);
+		   vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
+		   vpg->vpg_write_queued, vmpage);
 	if (vmpage) {
 		(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
 			   (long)vmpage->flags, page_count(vmpage),
 			   page_mapcount(vmpage), vmpage->private,
-			   page_index(vmpage),
+			   vmpage->index,
 			   list_empty(&vmpage->lru) ? "not-" : "");
 	}
+
 	(*printer)(env, cookie, "\n");
+
+	return 0;
+}
+
+static int vvp_page_fail(const struct lu_env *env,
+			 const struct cl_page_slice *slice)
+{
+	/*
+	 * Cached read?
+	 */
+	LBUG();
+
 	return 0;
 }
 
@@ -401,32 +415,38 @@ static const struct cl_page_operations vvp_page_ops = {
 	.cpo_assume	= vvp_page_assume,
 	.cpo_unassume      = vvp_page_unassume,
 	.cpo_disown	= vvp_page_disown,
-	.cpo_vmpage	= ccc_page_vmpage,
 	.cpo_discard       = vvp_page_discard,
 	.cpo_delete	= vvp_page_delete,
-	.cpo_unmap	 = vvp_page_unmap,
 	.cpo_export	= vvp_page_export,
 	.cpo_is_vmlocked   = vvp_page_is_vmlocked,
 	.cpo_fini	  = vvp_page_fini,
 	.cpo_print	 = vvp_page_print,
-	.cpo_is_under_lock = ccc_page_is_under_lock,
+	.cpo_is_under_lock = vvp_page_is_under_lock,
 	.io = {
 		[CRT_READ] = {
 			.cpo_prep	= vvp_page_prep_read,
 			.cpo_completion  = vvp_page_completion_read,
-			.cpo_make_ready  = ccc_fail,
+			.cpo_make_ready = vvp_page_fail,
 		},
 		[CRT_WRITE] = {
 			.cpo_prep	= vvp_page_prep_write,
 			.cpo_completion  = vvp_page_completion_write,
 			.cpo_make_ready  = vvp_page_make_ready,
-		}
-	}
+		},
+	},
 };
 
+static int vvp_transient_page_prep(const struct lu_env *env,
+				   const struct cl_page_slice *slice,
+				   struct cl_io *unused)
+{
+	/* transient page should always be sent. */
+	return 0;
+}
+
 static void vvp_transient_page_verify(const struct cl_page *page)
 {
-	struct inode *inode = ccc_object_inode(page->cp_obj);
+	struct inode *inode = vvp_object_inode(page->cp_obj);
 
 	LASSERT(!inode_trylock(inode));
 }
@@ -477,7 +497,7 @@ static void vvp_transient_page_discard(const struct lu_env *env,
 static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
 					  const struct cl_page_slice *slice)
 {
-	struct inode    *inode = ccc_object_inode(slice->cpl_obj);
+	struct inode    *inode = vvp_object_inode(slice->cpl_obj);
 	int	locked;
 
 	locked = !inode_trylock(inode);
@@ -497,13 +517,13 @@ vvp_transient_page_completion(const struct lu_env *env,
 static void vvp_transient_page_fini(const struct lu_env *env,
 				    struct cl_page_slice *slice)
 {
-	struct ccc_page *cp = cl2ccc_page(slice);
+	struct vvp_page *vpg = cl2vvp_page(slice);
 	struct cl_page *clp = slice->cpl_page;
-	struct ccc_object *clobj = cl2ccc(clp->cp_obj);
+	struct vvp_object *clobj = cl2vvp(clp->cp_obj);
 
-	vvp_page_fini_common(cp);
-	LASSERT(!inode_trylock(clobj->cob_inode));
-	clobj->cob_transient_pages--;
+	vvp_page_fini_common(vpg);
+	LASSERT(!inode_trylock(clobj->vob_inode));
+	clobj->vob_transient_pages--;
 }
 
 static const struct cl_page_operations vvp_transient_page_ops = {
@@ -512,45 +532,48 @@ static const struct cl_page_operations vvp_transient_page_ops = {
 	.cpo_unassume      = vvp_transient_page_unassume,
 	.cpo_disown	= vvp_transient_page_disown,
 	.cpo_discard       = vvp_transient_page_discard,
-	.cpo_vmpage	= ccc_page_vmpage,
 	.cpo_fini	  = vvp_transient_page_fini,
 	.cpo_is_vmlocked   = vvp_transient_page_is_vmlocked,
 	.cpo_print	 = vvp_page_print,
-	.cpo_is_under_lock = ccc_page_is_under_lock,
+	.cpo_is_under_lock	= vvp_page_is_under_lock,
 	.io = {
 		[CRT_READ] = {
-			.cpo_prep	= ccc_transient_page_prep,
+			.cpo_prep	= vvp_transient_page_prep,
 			.cpo_completion  = vvp_transient_page_completion,
 		},
 		[CRT_WRITE] = {
-			.cpo_prep	= ccc_transient_page_prep,
+			.cpo_prep	= vvp_transient_page_prep,
 			.cpo_completion  = vvp_transient_page_completion,
 		}
 	}
 };
 
 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		struct cl_page *page, pgoff_t index)
 {
-	struct ccc_page *cpg = cl_object_page_slice(obj, page);
+	struct vvp_page *vpg = cl_object_page_slice(obj, page);
+	struct page     *vmpage = page->cp_vmpage;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
-	cpg->cpg_page = vmpage;
+	vpg->vpg_page = vmpage;
 	get_page(vmpage);
 
-	INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+	INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
 	if (page->cp_type == CPT_CACHEABLE) {
+		/* in cache, decref in vvp_page_delete */
+		atomic_inc(&page->cp_ref);
 		SetPagePrivate(vmpage);
 		vmpage->private = (unsigned long)page;
-		cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops);
+		cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
+				  &vvp_page_ops);
 	} else {
-		struct ccc_object *clobj = cl2ccc(obj);
+		struct vvp_object *clobj = cl2vvp(obj);
 
-		LASSERT(!inode_trylock(clobj->cob_inode));
-		cl_page_slice_add(page, &cpg->cpg_cl, obj,
+		LASSERT(!inode_trylock(clobj->vob_inode));
+		cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
 				  &vvp_transient_page_ops);
-		clobj->cob_transient_pages++;
+		clobj->vob_transient_pages++;
 	}
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
new file mode 100644
index 000000000000..9fe9d6c0a7d1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/cl_object.h"
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+#include "vvp_internal.h"
+
+static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
+{
+	return container_of0(slice, struct vvp_req, vrq_cl);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for VVP
+ * layer. VVP is responsible for
+ *
+ *    - o_[mac]time
+ *
+ *    - o_mode
+ *
+ *    - o_parent_seq
+ *
+ *    - o_[ug]id
+ *
+ *    - o_parent_oid
+ *
+ *    - o_parent_ver
+ *
+ *    - o_ioepoch,
+ *
+ */
+static void vvp_req_attr_set(const struct lu_env *env,
+			     const struct cl_req_slice *slice,
+			     const struct cl_object *obj,
+			     struct cl_req_attr *attr, u64 flags)
+{
+	struct inode *inode;
+	struct obdo  *oa;
+	u32	      valid_flags;
+
+	oa = attr->cra_oa;
+	inode = vvp_object_inode(obj);
+	valid_flags = OBD_MD_FLTYPE;
+
+	if (slice->crs_req->crq_type == CRT_WRITE) {
+		if (flags & OBD_MD_FLEPOCH) {
+			oa->o_valid |= OBD_MD_FLEPOCH;
+			oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
+			valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+				       OBD_MD_FLUID | OBD_MD_FLGID;
+		}
+	}
+	obdo_from_inode(oa, inode, valid_flags & flags);
+	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+	memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
+	       JOBSTATS_JOBID_SIZE);
+}
+
+static void vvp_req_completion(const struct lu_env *env,
+			       const struct cl_req_slice *slice, int ioret)
+{
+	struct vvp_req *vrq;
+
+	if (ioret > 0)
+		cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
+
+	vrq = cl2vvp_req(slice);
+	kmem_cache_free(vvp_req_kmem, vrq);
+}
+
+static const struct cl_req_operations vvp_req_ops = {
+	.cro_attr_set   = vvp_req_attr_set,
+	.cro_completion = vvp_req_completion
+};
+
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req)
+{
+	struct vvp_req *vrq;
+	int result;
+
+	vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS);
+	if (vrq) {
+		cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops);
+		result = 0;
+	} else {
+		result = -ENOMEM;
+	}
+	return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index b68dcc921ca2..98303cf85815 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -111,11 +107,6 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ptlrpc_request *req = NULL;
 	int xattr_type, rc;
-#ifdef CONFIG_FS_POSIX_ACL
-	struct rmtacl_ctl_entry *rce = NULL;
-	posix_acl_xattr_header *new_value = NULL;
-	ext_acl_xattr_header *acl = NULL;
-#endif
 	const char *pv = value;
 
 	xattr_type = get_xattr_type(name);
@@ -143,61 +134,9 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	    strcmp(name, "security.selinux") == 0)
 		return -EOPNOTSUPP;
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
-	    (xattr_type == XATTR_ACL_ACCESS_T ||
-	    xattr_type == XATTR_ACL_DEFAULT_T)) {
-		rce = rct_search(&sbi->ll_rct, current_pid());
-		if (!rce ||
-		    (rce->rce_ops != RMT_LSETFACL &&
-		    rce->rce_ops != RMT_RSETFACL))
-			return -EOPNOTSUPP;
-
-		if (rce->rce_ops == RMT_LSETFACL) {
-			struct eacl_entry *ee;
-
-			ee = et_search_del(&sbi->ll_et, current_pid(),
-					   ll_inode2fid(inode), xattr_type);
-			if (valid & OBD_MD_FLXATTR) {
-				acl = lustre_acl_xattr_merge2ext(
-						(posix_acl_xattr_header *)value,
-						size, ee->ee_acl);
-				if (IS_ERR(acl)) {
-					ee_free(ee);
-					return PTR_ERR(acl);
-				}
-				size =  CFS_ACL_XATTR_SIZE(\
-						le32_to_cpu(acl->a_count), \
-						ext_acl_xattr);
-				pv = (const char *)acl;
-			}
-			ee_free(ee);
-		} else if (rce->rce_ops == RMT_RSETFACL) {
-			rc = lustre_posix_acl_xattr_filter(
-						(posix_acl_xattr_header *)value,
-						size, &new_value);
-			if (unlikely(rc < 0))
-				return rc;
-			size = rc;
-
-			pv = (const char *)new_value;
-		} else
-			return -EOPNOTSUPP;
-
-		valid |= rce_ops2valid(rce->rce_ops);
-	}
-#endif
 	rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
 			 valid, name, pv, size, 0, flags,
 			 ll_i2suppgid(inode), &req);
-#ifdef CONFIG_FS_POSIX_ACL
-	/*
-	 * Release the posix ACL space.
-	 */
-	kfree(new_value);
-	if (acl)
-		lustre_ext_acl_xattr_free(acl);
-#endif
 	if (rc) {
 		if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
 			LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
@@ -210,16 +149,14 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	return 0;
 }
 
-int ll_setxattr(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags)
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+		const char *name, const void *value, size_t size, int flags)
 {
-	struct inode *inode = d_inode(dentry);
-
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
 
@@ -243,12 +180,12 @@ int ll_setxattr(struct dentry *dentry, const char *name,
 			lump->lmm_stripe_offset = -1;
 
 		if (lump && S_ISREG(inode->i_mode)) {
-			int flags = FMODE_WRITE;
+			__u64 it_flags = FMODE_WRITE;
 			int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
 				sizeof(*lump) : sizeof(struct lov_user_md_v3);
 
-			rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump,
-						      lum_size);
+			rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
+						      lump, lum_size);
 			/* b10667: rc always be 0 here for now */
 			rc = 0;
 		} else if (S_ISDIR(inode->i_mode)) {
@@ -272,8 +209,8 @@ int ll_removexattr(struct dentry *dentry, const char *name)
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
 	return ll_setxattr_common(inode, name, NULL, 0, 0,
@@ -289,11 +226,10 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 	struct mdt_body *body;
 	int xattr_type, rc;
 	void *xdata;
-	struct rmtacl_ctl_entry *rce = NULL;
 	struct ll_inode_info *lli = ll_i2info(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	/* listxattr have slightly different behavior from of ext3:
 	 * without 'user_xattr' ext3 will list all xattr names but
@@ -320,25 +256,11 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 		return -EOPNOTSUPP;
 
 #ifdef CONFIG_FS_POSIX_ACL
-	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
-	    (xattr_type == XATTR_ACL_ACCESS_T ||
-	    xattr_type == XATTR_ACL_DEFAULT_T)) {
-		rce = rct_search(&sbi->ll_rct, current_pid());
-		if (!rce ||
-		    (rce->rce_ops != RMT_LSETFACL &&
-		    rce->rce_ops != RMT_LGETFACL &&
-		    rce->rce_ops != RMT_RSETFACL &&
-		    rce->rce_ops != RMT_RGETFACL))
-			return -EOPNOTSUPP;
-	}
-
 	/* posix acl is under protection of LOOKUP lock. when calling to this,
 	 * we just have path resolution to the target inode, so we have great
 	 * chance that cached ACL is uptodate.
 	 */
-	if (xattr_type == XATTR_ACL_ACCESS_T &&
-	    !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-
+	if (xattr_type == XATTR_ACL_ACCESS_T) {
 		struct posix_acl *acl;
 
 		spin_lock(&lli->lli_lock);
@@ -380,9 +302,7 @@ do_getxattr:
 	} else {
 getxattr_nocache:
 		rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
-				valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
-				name, NULL, 0, size, 0, &req);
-
+				 valid, name, NULL, 0, size, 0, &req);
 		if (rc < 0)
 			goto out_xattr;
 
@@ -419,26 +339,6 @@ getxattr_nocache:
 		rc = body->eadatasize;
 	}
 
-#ifdef CONFIG_FS_POSIX_ACL
-	if (rce && rce->rce_ops == RMT_LSETFACL) {
-		ext_acl_xattr_header *acl;
-
-		acl = lustre_posix_acl_xattr_2ext(
-					(posix_acl_xattr_header *)buffer, rc);
-		if (IS_ERR(acl)) {
-			rc = PTR_ERR(acl);
-			goto out;
-		}
-
-		rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
-			    xattr_type, acl);
-		if (unlikely(rc < 0)) {
-			lustre_ext_acl_xattr_free(acl);
-			goto out;
-		}
-	}
-#endif
-
 out_xattr:
 	if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
 		LCONSOLE_INFO(
@@ -451,16 +351,14 @@ out:
 	return rc;
 }
 
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-		    void *buffer, size_t size)
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer, size_t size)
 {
-	struct inode *inode = d_inode(dentry);
-
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
 
@@ -554,8 +452,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 	LASSERT(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
 
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 3480ce2bb3cc..8089da8143d9 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli)
  */
 static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
 {
-
 	if (!ll_xattr_cache_valid(lli))
 		return 0;
 
@@ -289,8 +288,8 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 				       LCK_PR);
 		if (mode != 0) {
 			/* fake oit in mdc_revalidate_lock() manner */
-			oit->d.lustre.it_lock_handle = lockh.cookie;
-			oit->d.lustre.it_lock_mode = mode;
+			oit->it_lock_handle = lockh.cookie;
+			oit->it_lock_mode = mode;
 			goto out;
 		}
 	}
@@ -316,7 +315,7 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 		return rc;
 	}
 
-	*req = (struct ptlrpc_request *)oit->d.lustre.it_data;
+	*req = oit->it_request;
 out:
 	down_write(&lli->lli_xattrs_list_rwsem);
 	mutex_unlock(&lli->lli_xattrs_enq_lock);
@@ -363,10 +362,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
 		goto out_maybe_drop;
 	}
 
-	if (oit->d.lustre.it_status < 0) {
+	if (oit->it_status < 0) {
 		CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n",
-		       oit->d.lustre.it_status, PFID(ll_inode2fid(inode)));
-		rc = oit->d.lustre.it_status;
+		       oit->it_status, PFID(ll_inode2fid(inode)));
+		rc = oit->it_status;
 		/* xattr data is so large that we don't want to cache it */
 		if (rc == -ERANGE)
 			rc = -EAGAIN;
@@ -449,8 +448,8 @@ out_destroy:
 	up_write(&lli->lli_xattrs_list_rwsem);
 
 	ldlm_lock_decref_and_cancel((struct lustre_handle *)
-					&oit->d.lustre.it_lock_handle,
-					oit->d.lustre.it_lock_mode);
+					&oit->it_lock_handle,
+					oit->it_lock_mode);
 
 	goto out_no_unlock;
 }
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
index 378691b2a062..a3d170aa6fd2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index e0958eaed054..2f58fdab8d1e 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -84,11 +80,11 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 	/*
 	 * We got LOOKUP lock, but we really need attrs.
 	 */
-	pmode = it->d.lustre.it_lock_mode;
+	pmode = it->it_lock_mode;
 	if (pmode) {
-		plock.cookie = it->d.lustre.it_lock_handle;
-		it->d.lustre.it_lock_mode = 0;
-		it->d.lustre.it_data = NULL;
+		plock.cookie = it->it_lock_handle;
+		it->it_lock_mode = 0;
+		it->it_request = NULL;
 	}
 
 	LASSERT(fid_is_sane(&body->fid1));
@@ -134,14 +130,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 	 * maintain dcache consistency. Thus drop UPDATE|PERM lock here
 	 * and put LOOKUP in request.
 	 */
-	if (it->d.lustre.it_lock_mode != 0) {
-		it->d.lustre.it_remote_lock_handle =
-					it->d.lustre.it_lock_handle;
-		it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode;
+	if (it->it_lock_mode != 0) {
+		it->it_remote_lock_handle =
+					it->it_lock_handle;
+		it->it_remote_lock_mode = it->it_lock_mode;
 	}
 
-	it->d.lustre.it_lock_handle = plock.cookie;
-	it->d.lustre.it_lock_mode = pmode;
+	it->it_lock_handle = plock.cookie;
+	it->it_lock_mode = pmode;
 
 out_free_op_data:
 	kfree(op_data);
@@ -201,9 +197,9 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 	 * Nothing is found, do not access body->fid1 as it is zero and thus
 	 * pointless.
 	 */
-	if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
-	    !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
-	    !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
+	if ((it->it_disposition & DISP_LOOKUP_NEG) &&
+	    !(it->it_disposition & DISP_OPEN_CREATE) &&
+	    !(it->it_disposition & DISP_OPEN_OPEN))
 		return rc;
 
 	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 8a0087190e23..0beafc49b8d2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -42,9 +38,6 @@
 
 #define LMV_MAX_TGT_COUNT 128
 
-#define lmv_init_lock(lmv)   mutex_lock(&lmv->init_mutex)
-#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex)
-
 #define LL_IT2STR(it)					\
 	((it) ? ldlm_it2str((it)->it_op) : "0")
 
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 9abb7c2b9231..0e1588a43187 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -132,8 +128,9 @@ static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
 static struct obd_uuid *lmv_get_uuid(struct obd_export *exp)
 {
 	struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
-	return obd_get_uuid(lmv->tgts[0]->ltd_exp);
+	return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL;
 }
 
 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
@@ -249,7 +246,6 @@ static int lmv_connect(const struct lu_env *env,
 
 static void lmv_set_timeouts(struct obd_device *obd)
 {
-	struct lmv_tgt_desc   *tgt;
 	struct lmv_obd	*lmv;
 	int		    i;
 
@@ -261,8 +257,10 @@ static void lmv_set_timeouts(struct obd_device *obd)
 		return;
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
 		tgt = lmv->tgts[i];
-		if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
 		obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
@@ -302,13 +300,14 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize,
 		return 0;
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) {
 			CWARN("%s: NULL export for %d\n", obd->obd_name, i);
 			continue;
 		}
 
-		rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize,
+		rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize,
 				     cookiesize, def_cookiesize);
 		if (rc) {
 			CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
@@ -425,7 +424,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 
 	CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
 
-	lmv_init_lock(lmv);
+	mutex_lock(&lmv->lmv_init_mutex);
 
 	if (lmv->desc.ld_tgt_count == 0) {
 		struct obd_device *mdc_obd;
@@ -433,7 +432,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
 						&obd->obd_uuid);
 		if (!mdc_obd) {
-			lmv_init_unlock(lmv);
+			mutex_unlock(&lmv->lmv_init_mutex);
 			CERROR("%s: Target %s not attached: rc = %d\n",
 			       obd->obd_name, uuidp->uuid, -EINVAL);
 			return -EINVAL;
@@ -445,7 +444,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
 		       obd->obd_name,
 		       obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST);
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return -EEXIST;
 	}
 
@@ -459,7 +458,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 			newsize <<= 1;
 		newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS);
 		if (!newtgts) {
-			lmv_init_unlock(lmv);
+			mutex_unlock(&lmv->lmv_init_mutex);
 			return -ENOMEM;
 		}
 
@@ -481,7 +480,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 
 	tgt = kzalloc(sizeof(*tgt), GFP_NOFS);
 	if (!tgt) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return -ENOMEM;
 	}
 
@@ -507,7 +506,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		}
 	}
 
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return rc;
 }
 
@@ -522,18 +521,27 @@ int lmv_check_connect(struct obd_device *obd)
 	if (lmv->connected)
 		return 0;
 
-	lmv_init_lock(lmv);
+	mutex_lock(&lmv->lmv_init_mutex);
 	if (lmv->connected) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return 0;
 	}
 
 	if (lmv->desc.ld_tgt_count == 0) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		CERROR("%s: no targets configured.\n", obd->obd_name);
 		return -EINVAL;
 	}
 
+	LASSERT(lmv->tgts);
+
+	if (!lmv->tgts[0]) {
+		mutex_unlock(&lmv->lmv_init_mutex);
+		CERROR("%s: no target configured for index 0.\n",
+		       obd->obd_name);
+		return -EINVAL;
+	}
+
 	CDEBUG(D_CONFIG, "Time to connect %s to %s\n",
 	       lmv->cluuid.uuid, obd->obd_name);
 
@@ -551,7 +559,7 @@ int lmv_check_connect(struct obd_device *obd)
 	lmv->connected = 1;
 	easize = lmv_get_easize(lmv);
 	lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return 0;
 
  out_disc:
@@ -572,7 +580,7 @@ int lmv_check_connect(struct obd_device *obd)
 		}
 	}
 	class_disconnect(lmv->exp);
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return rc;
 }
 
@@ -796,6 +804,11 @@ static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len,
 
 	/* unregister request (call from llapi_hsm_copytool_fini) */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp)
+			continue;
+
 		/* best effort: try to clean as much as possible
 		 * (continue on error)
 		 */
@@ -825,20 +838,28 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
 	 * except if it because of inactive target.
 	 */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg);
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp)
+			continue;
+
+		err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg);
 		if (err) {
-			if (lmv->tgts[i]->ltd_active) {
+			if (tgt->ltd_active) {
 				/* permanent error */
 				CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
-				       lmv->tgts[i]->ltd_uuid.uuid,
-				       i, cmd, err);
+				       tgt->ltd_uuid.uuid, i, cmd, err);
 				rc = err;
 				lk->lk_flags |= LK_FLG_STOP;
 				/* unregister from previous MDS */
-				for (j = 0; j < i; j++)
-					obd_iocontrol(cmd,
-						      lmv->tgts[j]->ltd_exp,
-						      len, lk, uarg);
+				for (j = 0; j < i; j++) {
+					tgt = lmv->tgts[j];
+
+					if (!tgt || !tgt->ltd_exp)
+						continue;
+					obd_iocontrol(cmd, tgt->ltd_exp, len,
+						      lk, uarg);
+				}
 				return rc;
 			}
 			/* else: transient error.
@@ -877,6 +898,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 {
 	struct obd_device    *obddev = class_exp2obd(exp);
 	struct lmv_obd       *lmv = &obddev->u.lmv;
+	struct lmv_tgt_desc *tgt = NULL;
 	int		   i = 0;
 	int		   rc = 0;
 	int		   set = 0;
@@ -896,10 +918,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 		if (index >= count)
 			return -ENODEV;
 
-		if (!lmv->tgts[index] || lmv->tgts[index]->ltd_active == 0)
+		tgt = lmv->tgts[index];
+		if (!tgt || !tgt->ltd_active)
 			return -ENODATA;
 
-		mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp);
+		mdc_obd = class_exp2obd(tgt->ltd_exp);
 		if (!mdc_obd)
 			return -EINVAL;
 
@@ -909,7 +932,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				     (int)sizeof(struct obd_uuid))))
 			return -EFAULT;
 
-		rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf,
+		rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf,
 				cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
 				0);
 		if (rc)
@@ -922,11 +945,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case OBD_IOC_QUOTACTL: {
 		struct if_quotactl *qctl = karg;
-		struct lmv_tgt_desc *tgt = NULL;
 		struct obd_quotactl *oqctl;
 
 		if (qctl->qc_valid == QC_MDTIDX) {
-			if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+			if (count <= qctl->qc_idx)
 				return -EINVAL;
 
 			tgt = lmv->tgts[qctl->qc_idx];
@@ -975,18 +997,18 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 		if (icc->icc_mdtindex >= count)
 			return -ENODEV;
 
-		if (!lmv->tgts[icc->icc_mdtindex] ||
-		    !lmv->tgts[icc->icc_mdtindex]->ltd_exp ||
-		    lmv->tgts[icc->icc_mdtindex]->ltd_active == 0)
+		tgt = lmv->tgts[icc->icc_mdtindex];
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			return -ENODEV;
-		rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp,
-				   sizeof(*icc), icc, NULL);
+		rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL);
 		break;
 	}
 	case LL_IOC_GET_CONNECT_FLAGS: {
-		if (!lmv->tgts[0])
+		tgt = lmv->tgts[0];
+
+		if (!tgt || !tgt->ltd_exp)
 			return -ENODATA;
-		rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg);
+		rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
 		break;
 	}
 	case OBD_IOC_FID2PATH: {
@@ -997,7 +1019,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	case LL_IOC_HSM_STATE_SET:
 	case LL_IOC_HSM_ACTION: {
 		struct md_op_data	*op_data = karg;
-		struct lmv_tgt_desc	*tgt;
 
 		tgt = lmv_find_target(lmv, &op_data->op_fid1);
 		if (IS_ERR(tgt))
@@ -1011,7 +1032,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case LL_IOC_HSM_PROGRESS: {
 		const struct hsm_progress_kernel *hpk = karg;
-		struct lmv_tgt_desc	*tgt;
 
 		tgt = lmv_find_target(lmv, &hpk->hpk_fid);
 		if (IS_ERR(tgt))
@@ -1021,7 +1041,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case LL_IOC_HSM_REQUEST: {
 		struct hsm_user_request *hur = karg;
-		struct lmv_tgt_desc	*tgt;
 		unsigned int reqcount = hur->hur_request.hr_itemcount;
 
 		if (reqcount == 0)
@@ -1044,7 +1063,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				int			rc1;
 				struct hsm_user_request *req;
 
-				nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]);
+				tgt = lmv->tgts[i];
+				if (!tgt || !tgt->ltd_exp)
+					continue;
+
+				nr = lmv_hsm_req_count(lmv, hur, tgt);
 				if (nr == 0) /* nothing for this MDS */
 					continue;
 
@@ -1056,10 +1079,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				if (!req)
 					return -ENOMEM;
 
-				lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req);
+				lmv_hsm_req_build(lmv, hur, tgt, req);
 
-				rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp,
-						    reqlen, req, uarg);
+				rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen,
+						    req, uarg);
 				if (rc1 != 0 && rc == 0)
 					rc = rc1;
 				kvfree(req);
@@ -1103,27 +1126,27 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 			struct obd_device *mdc_obd;
 			int err;
 
-			if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
+			tgt = lmv->tgts[i];
+			if (!tgt || !tgt->ltd_exp)
 				continue;
 			/* ll_umount_begin() sets force flag but for lmv, not
 			 * mdc. Let's pass it through
 			 */
-			mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp);
+			mdc_obd = class_exp2obd(tgt->ltd_exp);
 			mdc_obd->obd_force = obddev->obd_force;
-			err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len,
-					    karg, uarg);
+			err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
 			if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
 				return err;
 			} else if (err) {
-				if (lmv->tgts[i]->ltd_active) {
+				if (tgt->ltd_active) {
 					CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
-					       lmv->tgts[i]->ltd_uuid.uuid,
-					       i, cmd, err);
+					       tgt->ltd_uuid.uuid, i, cmd, err);
 					if (!rc)
 						rc = err;
 				}
-			} else
+			} else {
 				set = 1;
+			}
 		}
 		if (!set && !rc)
 			rc = -EIO;
@@ -1269,7 +1292,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	lmv->lmv_placement = PLACEMENT_CHAR_POLICY;
 
 	spin_lock_init(&lmv->lmv_lock);
-	mutex_init(&lmv->init_mutex);
+	mutex_init(&lmv->lmv_init_mutex);
 
 	lprocfs_lmv_init_vars(&lvars);
 
@@ -1656,7 +1679,7 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 		   struct lustre_handle *lockh, void *lmm, int lmmsize,
 		   __u64 extra_lock_flags)
 {
-	struct ptlrpc_request      *req = it->d.lustre.it_data;
+	struct ptlrpc_request      *req = it->it_request;
 	struct obd_device	  *obd = exp->exp_obd;
 	struct lmv_obd	     *lmv = &obd->u.lmv;
 	struct lustre_handle	plock;
@@ -1678,11 +1701,11 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 	/*
 	 * We got LOOKUP lock, but we really need attrs.
 	 */
-	pmode = it->d.lustre.it_lock_mode;
+	pmode = it->it_lock_mode;
 	LASSERT(pmode != 0);
 	memcpy(&plock, lockh, sizeof(plock));
-	it->d.lustre.it_lock_mode = 0;
-	it->d.lustre.it_data = NULL;
+	it->it_lock_mode = 0;
+	it->it_request = NULL;
 	fid1 = body->fid1;
 
 	ptlrpc_req_finished(req);
@@ -2071,7 +2094,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
 			dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
 
 			/* Check if we've reached the end of the CFS_PAGE. */
-			if (!((unsigned long)dp & ~CFS_PAGE_MASK))
+			if (!((unsigned long)dp & ~PAGE_MASK))
 				break;
 
 			/* Save the hash and flags of this lu_dirpage. */
@@ -2268,7 +2291,6 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 
 	lmv = &obd->u.lmv;
 	if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
-		struct lmv_tgt_desc *tgt;
 		int i;
 
 		rc = lmv_check_connect(obd);
@@ -2277,7 +2299,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 
 		LASSERT(*vallen == sizeof(__u32));
 		for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-			tgt = lmv->tgts[i];
+			struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
 			/*
 			 * All tgts should be connected when this gets called.
 			 */
@@ -2466,12 +2489,13 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
 	LASSERT(fid);
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0)
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
-		err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid,
-				       policy, mode, flags, opaque);
+		err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags,
+				       opaque);
 		if (!rc)
 			rc = err;
 	}
@@ -2482,9 +2506,13 @@ static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
 			     __u64 *bits)
 {
 	struct lmv_obd	  *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 	int		      rc;
 
-	rc =  md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+
+	rc = md_set_lock_data(tgt->ltd_exp, lockh, data, bits);
 	return rc;
 }
 
@@ -2509,12 +2537,13 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
 	 * one fid was created in.
 	 */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0)
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
-		rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid,
-				   type, policy, mode, lockh);
+		rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode,
+				   lockh);
 		if (rc)
 			return rc;
 	}
@@ -2529,18 +2558,24 @@ static int lmv_get_lustre_md(struct obd_export *exp,
 			     struct lustre_md *md)
 {
 	struct lmv_obd	  *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
-	return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+	return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md);
 }
 
 static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
 {
 	struct obd_device       *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
 	if (md->mea)
 		obd_free_memmd(exp, (void *)&md->mea);
-	return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+	return md_free_lustre_md(tgt->ltd_exp, md);
 }
 
 static int lmv_set_open_replay_data(struct obd_export *exp,
@@ -2572,27 +2607,6 @@ static int lmv_clear_open_replay_data(struct obd_export *exp,
 	return md_clear_open_replay_data(tgt->ltd_exp, och);
 }
 
-static int lmv_get_remote_perm(struct obd_export *exp,
-			       const struct lu_fid *fid,
-			       __u32 suppgid, struct ptlrpc_request **request)
-{
-	struct obd_device       *obd = exp->exp_obd;
-	struct lmv_obd	  *lmv = &obd->u.lmv;
-	struct lmv_tgt_desc     *tgt;
-	int		      rc;
-
-	rc = lmv_check_connect(obd);
-	if (rc)
-		return rc;
-
-	tgt = lmv_find_target(lmv, fid);
-	if (IS_ERR(tgt))
-		return PTR_ERR(tgt);
-
-	rc = md_get_remote_perm(tgt->ltd_exp, fid, suppgid, request);
-	return rc;
-}
-
 static int lmv_intent_getattr_async(struct obd_export *exp,
 				    struct md_enqueue_info *minfo,
 				    struct ldlm_enqueue_info *einfo)
@@ -2647,9 +2661,10 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 	struct lmv_obd      *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 	int		  rc = 0, i;
-	__u64		curspace, curinodes;
+	__u64 curspace = 0, curinodes = 0;
 
-	if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) {
+	if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
+	    !lmv->desc.ld_tgt_count) {
 		CERROR("master lmv inactive\n");
 		return -EIO;
 	}
@@ -2659,18 +2674,13 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 		return rc;
 	}
 
-	curspace = curinodes = 0;
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
 		int err;
 
 		tgt = lmv->tgts[i];
 
-		if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
-			continue;
-		if (!tgt->ltd_active) {
-			CDEBUG(D_HA, "mdt %d is inactive.\n", i);
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
-		}
 
 		err = obd_quotactl(tgt->ltd_exp, oqctl);
 		if (err) {
@@ -2760,7 +2770,6 @@ static struct md_ops lmv_md_ops = {
 	.free_lustre_md		= lmv_free_lustre_md,
 	.set_open_replay_data	= lmv_set_open_replay_data,
 	.clear_open_replay_data	= lmv_clear_open_replay_data,
-	.get_remote_perm	= lmv_get_remote_perm,
 	.intent_getattr_async	= lmv_intent_getattr_async,
 	.revalidate_lock	= lmv_revalidate_lock
 };
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
index b39e364a29ab..c29c361eb0cc 100644
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 7dd3162b51e9..9740568d9521 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -73,19 +69,6 @@
  *     - top-page keeps a reference to its sub-page, and destroys it when it
  *       is destroyed.
  *
- *     - sub-lock keep a reference to its top-locks. Top-lock keeps a
- *       reference (and a hold, see cl_lock_hold()) on its sub-locks when it
- *       actively using them (that is, in cl_lock_state::CLS_QUEUING,
- *       cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When
- *       moving into cl_lock_state::CLS_CACHED state, top-lock releases a
- *       hold. From this moment top-lock has only a 'weak' reference to its
- *       sub-locks. This reference is protected by top-lock
- *       cl_lock::cll_guard, and will be automatically cleared by the sub-lock
- *       when the latter is destroyed. When a sub-lock is canceled, a
- *       reference to it is removed from the top-lock array, and top-lock is
- *       moved into CLS_NEW state. It is guaranteed that all sub-locks exist
- *       while their top-lock is in CLS_HELD or CLS_CACHED states.
- *
  *     - IO's are not reference counted.
  *
  * To implement a connection between top and sub entities, lov layer is split
@@ -281,24 +264,17 @@ struct lov_object {
 };
 
 /**
- * Flags that top-lock can set on each of its sub-locks.
- */
-enum lov_sub_flags {
-	/** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
-	LSF_HELD = 1 << 0
-};
-
-/**
  * State lov_lock keeps for each sub-lock.
  */
 struct lov_lock_sub {
 	/** sub-lock itself */
-	struct lovsub_lock  *sub_lock;
-	/** An array of per-sub-lock flags, taken from enum lov_sub_flags */
-	unsigned	     sub_flags;
+	struct cl_lock		sub_lock;
+	/** Set if the sublock has ever been enqueued, meaning it may
+	 * hold resources of underlying layers
+	 */
+	unsigned int		sub_is_enqueued:1,
+				sub_initialized:1;
 	int		  sub_stripe;
-	struct cl_lock_descr sub_descr;
-	struct cl_lock_descr sub_got;
 };
 
 /**
@@ -308,59 +284,8 @@ struct lov_lock {
 	struct cl_lock_slice   lls_cl;
 	/** Number of sub-locks in this lock */
 	int		    lls_nr;
-	/**
-	 * Number of existing sub-locks.
-	 */
-	unsigned	       lls_nr_filled;
-	/**
-	 * Set when sub-lock was canceled, while top-lock was being
-	 * used, or unused.
-	 */
-	unsigned int	       lls_cancel_race:1;
-	/**
-	 * An array of sub-locks
-	 *
-	 * There are two issues with managing sub-locks:
-	 *
-	 *     - sub-locks are concurrently canceled, and
-	 *
-	 *     - sub-locks are shared with other top-locks.
-	 *
-	 * To manage cancellation, top-lock acquires a hold on a sublock
-	 * (lov_sublock_adopt()) when the latter is inserted into
-	 * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
-	 * when top-lock is going into CLS_CACHED state or destroyed. Hold
-	 * prevents sub-lock from cancellation.
-	 *
-	 * Sub-lock sharing means, among other things, that top-lock that is
-	 * in the process of creation (i.e., not yet inserted into lock list)
-	 * is already accessible to other threads once at least one of its
-	 * sub-locks is created, see lov_lock_sub_init().
-	 *
-	 * Sub-lock can be in one of the following states:
-	 *
-	 *     - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
-	 *       sub-lock was either never created (top-lock is in CLS_NEW
-	 *       state), or it was created, then canceled, then destroyed
-	 *       (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
-	 *
-	 *     - sub-lock exists and is on
-	 *       hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
-	 *       normal state of a sub-lock in CLS_HELD and CLS_CACHED states
-	 *       of a top-lock.
-	 *
-	 *     - sub-lock exists, but is not held by the top-lock. This
-	 *       happens after top-lock released a hold on sub-locks before
-	 *       going into cache (lov_lock_unuse()).
-	 *
-	 * \todo To support wide-striping, array has to be replaced with a set
-	 * of queues to avoid scanning.
-	 */
-	struct lov_lock_sub   *lls_sub;
-	/**
-	 * Original description with which lock was enqueued.
-	 */
-	struct cl_lock_descr   lls_orig;
+	/** sublock array */
+	struct lov_lock_sub     lls_sub[0];
 };
 
 struct lov_page {
@@ -444,8 +369,9 @@ struct lov_thread_info {
 	struct cl_lock_descr    lti_ldescr;
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
-	struct cl_lock_closure  lti_closure;
+	struct cl_page_list     lti_plist;
 	wait_queue_t	  lti_waiter;
+	struct cl_attr          lti_attr;
 };
 
 /**
@@ -611,14 +537,13 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
 		       const struct cl_lock_descr *d, int idx);
 
 int lov_page_init(const struct lu_env *env, struct cl_object *ob,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t index);
 int lovsub_page_init(const struct lu_env *env, struct cl_object *ob,
-		     struct cl_page *page, struct page *vmpage);
-
+		     struct cl_page *page, pgoff_t index);
 int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage);
+			struct cl_page *page, pgoff_t index);
 int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage);
+			struct cl_page *page, pgoff_t index);
 struct lu_object *lov_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
@@ -631,6 +556,7 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
 					 struct lovsub_lock *sub);
 struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
 				  const struct cl_page_slice *slice);
+int lov_page_stripe(const struct cl_page *page);
 
 #define lov_foreach_target(lov, var)		    \
 	for (var = 0; var < lov_targets_nr(lov); ++var)
@@ -789,11 +715,6 @@ static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice)
 	return container_of0(slice, struct lovsub_req, lsrq_cl);
 }
 
-static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice)
-{
-	return slice->cpl_page->cp_child;
-}
-
 static inline struct lov_io *cl2lov_io(const struct lu_env *env,
 				       const struct cl_io_slice *ios)
 {
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index 532ef87dfb44..b1f260d43bc7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -143,9 +139,7 @@ static void *lov_key_init(const struct lu_context *ctx,
 	struct lov_thread_info *info;
 
 	info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS);
-	if (info)
-		INIT_LIST_HEAD(&info->lti_closure.clc_list);
-	else
+	if (!info)
 		info = ERR_PTR(-ENOMEM);
 	return info;
 }
@@ -155,7 +149,6 @@ static void lov_key_fini(const struct lu_context *ctx,
 {
 	struct lov_thread_info *info = data;
 
-	LINVRNT(list_empty(&info->lti_closure.clc_list));
 	kmem_cache_free(lov_thread_kmem, info);
 }
 
@@ -265,8 +258,9 @@ static int lov_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (lr) {
 		cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -335,14 +329,15 @@ static struct lov_device_emerg **lov_emerg_alloc(int nr)
 			cl_page_list_init(&em->emrg_page_list);
 			em->emrg_env = cl_env_alloc(&em->emrg_refcheck,
 						    LCT_REMEMBER | LCT_NOREF);
-			if (!IS_ERR(em->emrg_env))
+			if (!IS_ERR(em->emrg_env)) {
 				em->emrg_env->le_ctx.lc_cookie = 0x2;
-			else {
+			} else {
 				result = PTR_ERR(em->emrg_env);
 				em->emrg_env = NULL;
 			}
-		} else
+		} else {
 			result = -ENOMEM;
+		}
 	}
 	if (result != 0) {
 		lov_emerg_free(emerg, nr);
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index b6529401c713..5053dead17bb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -48,11 +44,6 @@
 
 #include "lov_internal.h"
 
-struct lovea_unpack_args {
-	struct lov_stripe_md *lsm;
-	int		   cursor;
-};
-
 static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
 				 __u16 stripe_count)
 {
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 590f9326af37..12bd511e8988 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -72,6 +68,21 @@
 })
 #endif
 
+#define pool_tgt_size(p)	((p)->pool_obds.op_size)
+#define pool_tgt_count(p)	((p)->pool_obds.op_count)
+#define pool_tgt_array(p)	((p)->pool_obds.op_array)
+#define pool_tgt_rw_sem(p)	((p)->pool_obds.op_rw_sem)
+
+struct pool_desc {
+	char			 pool_name[LOV_MAXPOOLNAME + 1];
+	struct ost_pool		 pool_obds;
+	atomic_t		 pool_refcount;
+	struct hlist_node	 pool_hash;		/* access by poolname */
+	struct list_head	 pool_list;		/* serial access */
+	struct dentry		*pool_debugfs_entry;	/* file in debugfs */
+	struct obd_device	*pool_lobd;		/* owner */
+};
+
 struct lov_request {
 	struct obd_info	  rq_oi;
 	struct lov_request_set  *rq_rqset;
@@ -88,7 +99,6 @@ struct lov_request {
 };
 
 struct lov_request_set {
-	struct ldlm_enqueue_info	*set_ei;
 	struct obd_info			*set_oi;
 	atomic_t			set_refcount;
 	struct obd_export		*set_exp;
@@ -102,10 +112,8 @@ struct lov_request_set {
 	atomic_t			set_finish_checked;
 	struct llog_cookie		*set_cookies;
 	int				set_cookie_sent;
-	struct obd_trans_info		*set_oti;
 	struct list_head			set_list;
 	wait_queue_head_t			set_waitq;
-	spinlock_t			set_lock;
 };
 
 extern struct kmem_cache *lov_oinfo_slab;
@@ -114,12 +122,6 @@ extern struct lu_kmem_descr lov_caches[];
 
 void lov_finish_set(struct lov_request_set *set);
 
-static inline void lov_get_reqset(struct lov_request_set *set)
-{
-	LASSERT(atomic_read(&set->set_refcount) > 0);
-	atomic_inc(&set->set_refcount);
-}
-
 static inline void lov_put_reqset(struct lov_request_set *set)
 {
 	if (atomic_dec_and_test(&set->set_refcount))
@@ -146,10 +148,8 @@ int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
 			  u64 start, u64 end,
 			  u64 *obd_start, u64 *obd_end);
 int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off);
-
-/* lov_qos.c */
-#define LOV_USES_ASSIGNED_STRIPE	0
-#define LOV_USES_DEFAULT_STRIPE	 1
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+			 int stripe);
 
 /* lov_request.c */
 int lov_update_common_set(struct lov_request_set *set,
@@ -176,6 +176,8 @@ int lov_fini_statfs_set(struct lov_request_set *set);
 int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc);
 
 /* lov_obd.c */
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
 void lov_fix_desc(struct lov_desc *desc);
 void lov_fix_desc_stripe_size(__u64 *val);
 void lov_fix_desc_stripe_count(__u32 *val);
@@ -231,8 +233,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname);
 int lov_pool_del(struct obd_device *obd, char *poolname);
 int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
 void lov_pool_putref(struct pool_desc *pool);
 
 static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm)
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 4296aacd84fc..84032a510254 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -225,8 +221,9 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env,
 	if (!sub->sub_io_initialized) {
 		sub->sub_stripe = stripe;
 		rc = lov_io_sub_init(env, lio, sub);
-	} else
+	} else {
 		rc = 0;
+	}
 	if (rc == 0)
 		lov_sub_enter(sub);
 	else
@@ -245,13 +242,15 @@ void lov_sub_put(struct lov_io_sub *sub)
  *
  */
 
-static int lov_page_stripe(const struct cl_page *page)
+int lov_page_stripe(const struct cl_page *page)
 {
 	struct lovsub_object *subobj;
+	const struct cl_page_slice *slice;
 
-	subobj = lu2lovsub(
-		lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header,
-				 &lovsub_device_type));
+	slice = cl_page_at(page, &lovsub_device_type);
+	LASSERT(slice->cpl_obj);
+
+	subobj = cl2lovsub(slice->cpl_obj);
 	return subobj->lso_index;
 }
 
@@ -274,10 +273,11 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
 static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 			     struct cl_io *io)
 {
-	struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+	struct lov_stripe_md *lsm;
 	int result;
 
 	LASSERT(lio->lis_object);
+	lsm = lio->lis_object->lo_lsm;
 
 	/*
 	 * Need to be optimized, we can't afford to allocate a piece of memory
@@ -292,8 +292,9 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 		lio->lis_single_subio_index = -1;
 		lio->lis_active_subios = 0;
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -411,8 +412,9 @@ static int lov_io_iter_init(const struct lu_env *env,
 			lov_sub_put(sub);
 			CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n",
 			       stripe, start, end);
-		} else
+		} else {
 			rc = PTR_ERR(sub);
+		}
 
 		if (!rc)
 			list_add_tail(&sub->sub_linkage, &lio->lis_active);
@@ -436,7 +438,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env,
 
 	/* fast path for common case. */
 	if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) {
-
 		lov_do_div64(start, ssize);
 		next = (start + 1) * ssize;
 		if (next <= start * ssize)
@@ -543,13 +544,6 @@ static void lov_io_unlock(const struct lu_env *env,
 	LASSERT(rc == 0);
 }
 
-static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld,
-					      struct cl_page_list *qin,
-					      int idx, int alloc)
-{
-	return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list;
-}
-
 /**
  * lov implementation of cl_operations::cio_submit() method. It takes a list
  * of pages in \a queue, splits it into per-stripe sub-lists, invokes
@@ -569,25 +563,17 @@ static int lov_io_submit(const struct lu_env *env,
 			 const struct cl_io_slice *ios,
 			 enum cl_req_type crt, struct cl_2queue *queue)
 {
-	struct lov_io	  *lio = cl2lov_io(env, ios);
-	struct lov_object      *obj = lio->lis_object;
-	struct lov_device       *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev);
-	struct cl_page_list    *qin = &queue->c2_qin;
-	struct cl_2queue      *cl2q = &lov_env_info(env)->lti_cl2q;
-	struct cl_page_list *stripes_qin = NULL;
+	struct cl_page_list *qin = &queue->c2_qin;
+	struct lov_io *lio = cl2lov_io(env, ios);
+	struct lov_io_sub *sub;
+	struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
 	struct cl_page *page;
-	struct cl_page *tmp;
 	int stripe;
 
-#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc)
-
 	int rc = 0;
-	int alloc =
-		!(current->flags & PF_MEMALLOC);
 
 	if (lio->lis_active_subios == 1) {
 		int idx = lio->lis_single_subio_index;
-		struct lov_io_sub *sub;
 
 		LASSERT(idx < lio->lis_nr_subios);
 		sub = lov_sub_get(env, lio, idx);
@@ -600,119 +586,120 @@ static int lov_io_submit(const struct lu_env *env,
 	}
 
 	LASSERT(lio->lis_subs);
-	if (alloc) {
-		stripes_qin =
-			libcfs_kvzalloc(sizeof(*stripes_qin) *
-					lio->lis_nr_subios,
-					GFP_NOFS);
-		if (!stripes_qin)
-			return -ENOMEM;
-
-		for (stripe = 0; stripe < lio->lis_nr_subios; stripe++)
-			cl_page_list_init(&stripes_qin[stripe]);
-	} else {
-		/*
-		 * If we get here, it means pageout & swap doesn't help.
-		 * In order to not make things worse, even don't try to
-		 * allocate the memory with __GFP_NOWARN. -jay
-		 */
-		mutex_lock(&ld->ld_mutex);
-		lio->lis_mem_frozen = 1;
-	}
 
-	cl_2queue_init(cl2q);
-	cl_page_list_for_each_safe(page, tmp, qin) {
-		stripe = lov_page_stripe(page);
-		cl_page_list_move(QIN(stripe), qin, page);
-	}
+	cl_page_list_init(plist);
+	while (qin->pl_nr > 0) {
+		struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
 
-	for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
-		struct lov_io_sub   *sub;
-		struct cl_page_list *sub_qin = QIN(stripe);
+		cl_2queue_init(cl2q);
 
-		if (list_empty(&sub_qin->pl_pages))
-			continue;
+		page = cl_page_list_first(qin);
+		cl_page_list_move(&cl2q->c2_qin, qin, page);
+
+		stripe = lov_page_stripe(page);
+		while (qin->pl_nr > 0) {
+			page = cl_page_list_first(qin);
+			if (stripe != lov_page_stripe(page))
+				break;
+
+			cl_page_list_move(&cl2q->c2_qin, qin, page);
+		}
 
-		cl_page_list_splice(sub_qin, &cl2q->c2_qin);
 		sub = lov_sub_get(env, lio, stripe);
 		if (!IS_ERR(sub)) {
 			rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
 					     crt, cl2q);
 			lov_sub_put(sub);
-		} else
+		} else {
 			rc = PTR_ERR(sub);
-		cl_page_list_splice(&cl2q->c2_qin,  &queue->c2_qin);
+		}
+
+		cl_page_list_splice(&cl2q->c2_qin, plist);
 		cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout);
+		cl_2queue_fini(env, cl2q);
+
 		if (rc != 0)
 			break;
 	}
 
-	for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
-		struct cl_page_list *sub_qin = QIN(stripe);
+	cl_page_list_splice(plist, qin);
+	cl_page_list_fini(env, plist);
 
-		if (list_empty(&sub_qin->pl_pages))
-			continue;
+	return rc;
+}
+
+static int lov_io_commit_async(const struct lu_env *env,
+			       const struct cl_io_slice *ios,
+			       struct cl_page_list *queue, int from, int to,
+			       cl_commit_cbt cb)
+{
+	struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
+	struct lov_io *lio = cl2lov_io(env, ios);
+	struct lov_io_sub *sub;
+	struct cl_page *page;
+	int rc = 0;
 
-		cl_page_list_splice(sub_qin, qin);
+	if (lio->lis_active_subios == 1) {
+		int idx = lio->lis_single_subio_index;
+
+		LASSERT(idx < lio->lis_nr_subios);
+		sub = lov_sub_get(env, lio, idx);
+		LASSERT(!IS_ERR(sub));
+		LASSERT(sub->sub_io == &lio->lis_single_subio);
+		rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue,
+					from, to, cb);
+		lov_sub_put(sub);
+		return rc;
 	}
 
-	if (alloc) {
-		kvfree(stripes_qin);
-	} else {
-		int i;
+	LASSERT(lio->lis_subs);
 
-		for (i = 0; i < lio->lis_nr_subios; i++) {
-			struct cl_io *cio = lio->lis_subs[i].sub_io;
+	cl_page_list_init(plist);
+	while (queue->pl_nr > 0) {
+		int stripe_to = to;
+		int stripe;
 
-			if (cio && cio == &ld->ld_emrg[i]->emrg_subio)
-				lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
+		LASSERT(plist->pl_nr == 0);
+		page = cl_page_list_first(queue);
+		cl_page_list_move(plist, queue, page);
+
+		stripe = lov_page_stripe(page);
+		while (queue->pl_nr > 0) {
+			page = cl_page_list_first(queue);
+			if (stripe != lov_page_stripe(page))
+				break;
+
+			cl_page_list_move(plist, queue, page);
 		}
-		lio->lis_mem_frozen = 0;
-		mutex_unlock(&ld->ld_mutex);
-	}
 
-	return rc;
-#undef QIN
-}
+		if (queue->pl_nr > 0) /* still has more pages */
+			stripe_to = PAGE_SIZE;
 
-static int lov_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
-{
-	struct lov_io     *lio      = cl2lov_io(env, ios);
-	struct cl_page    *sub_page = lov_sub_page(slice);
-	struct lov_io_sub *sub;
-	int result;
+		sub = lov_sub_get(env, lio, stripe);
+		if (!IS_ERR(sub)) {
+			rc = cl_io_commit_async(sub->sub_env, sub->sub_io,
+						plist, from, stripe_to, cb);
+			lov_sub_put(sub);
+		} else {
+			rc = PTR_ERR(sub);
+			break;
+		}
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		result = cl_io_prepare_write(sub->sub_env, sub->sub_io,
-					     sub_page, from, to);
-		lov_sub_put(sub);
-	} else
-		result = PTR_ERR(sub);
-	return result;
-}
+		if (plist->pl_nr > 0) /* short write */
+			break;
 
-static int lov_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
-{
-	struct lov_io     *lio      = cl2lov_io(env, ios);
-	struct cl_page    *sub_page = lov_sub_page(slice);
-	struct lov_io_sub *sub;
-	int result;
+		from = 0;
+	}
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		result = cl_io_commit_write(sub->sub_env, sub->sub_io,
-					    sub_page, from, to);
-		lov_sub_put(sub);
-	} else
-		result = PTR_ERR(sub);
-	return result;
+	/* for error case, add the page back into the qin list */
+	LASSERT(ergo(rc == 0, plist->pl_nr == 0));
+	while (plist->pl_nr > 0) {
+		/* error occurred, add the uncommitted pages back into queue */
+		page = cl_page_list_last(plist);
+		cl_page_list_move_head(queue, plist, page);
+	}
+
+	return rc;
 }
 
 static int lov_io_fault_start(const struct lu_env *env,
@@ -803,16 +790,8 @@ static const struct cl_io_operations lov_io_ops = {
 			.cio_fini   = lov_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = lov_io_submit
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = lov_io_submit
-		 }
-	 },
-	.cio_prepare_write = lov_io_prepare_write,
-	.cio_commit_write  = lov_io_commit_write
+	.cio_submit                    = lov_io_submit,
+	.cio_commit_async              = lov_io_commit_async,
 };
 
 /*****************************************************************************
@@ -880,15 +859,8 @@ static const struct cl_io_operations lov_empty_io_ops = {
 			.cio_fini   = lov_empty_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = LOV_EMPTY_IMPOSSIBLE
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = LOV_EMPTY_IMPOSSIBLE
-		 }
-	 },
-	.cio_commit_write = LOV_EMPTY_IMPOSSIBLE
+	.cio_submit                    = LOV_EMPTY_IMPOSSIBLE,
+	.cio_commit_async              = LOV_EMPTY_IMPOSSIBLE
 };
 
 int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
@@ -943,7 +915,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
 	}
 
 	io->ci_result = result < 0 ? result : 0;
-	return result != 0;
+	return result;
 }
 
 int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
@@ -986,7 +958,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
 	}
 
 	io->ci_result = result < 0 ? result : 0;
-	return result != 0;
+	return result;
 }
 
 /** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
index ae854bc25dbe..f3a0583f28f5 100644
--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -46,11 +42,6 @@
  *  @{
  */
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-					       struct cl_lock *parent);
-
-static int lov_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice);
 /*****************************************************************************
  *
  * Lov lock operations.
@@ -58,7 +49,7 @@ static int lov_lock_unuse(const struct lu_env *env,
  */
 
 static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
-						   struct cl_lock *parent,
+						   const struct cl_lock *parent,
 						   struct lov_lock_sub *lls)
 {
 	struct lov_sublock_env *subenv;
@@ -100,185 +91,26 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv)
 		lov_sub_put(subenv->lse_sub);
 }
 
-static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
-			      struct cl_lock *sublock, int idx,
-			      struct lov_lock_link *link)
+static int lov_sublock_init(const struct lu_env *env,
+			    const struct cl_lock *parent,
+			    struct lov_lock_sub *lls)
 {
-	struct lovsub_lock *lsl;
-	struct cl_lock     *parent = lck->lls_cl.cls_lock;
-	int		 rc;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-	LASSERT(cl_lock_is_mutexed(sublock));
-
-	lsl = cl2sub_lock(sublock);
-	/*
-	 * check that sub-lock doesn't have lock link to this top-lock.
-	 */
-	LASSERT(!lov_lock_link_find(env, lck, lsl));
-	LASSERT(idx < lck->lls_nr);
-
-	lck->lls_sub[idx].sub_lock = lsl;
-	lck->lls_nr_filled++;
-	LASSERT(lck->lls_nr_filled <= lck->lls_nr);
-	list_add_tail(&link->lll_list, &lsl->lss_parents);
-	link->lll_idx = idx;
-	link->lll_super = lck;
-	cl_lock_get(parent);
-	lu_ref_add(&parent->cll_reference, "lov-child", sublock);
-	lck->lls_sub[idx].sub_flags |= LSF_HELD;
-	cl_lock_user_add(env, sublock);
-
-	rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
-	LASSERT(rc == 0); /* there is no way this can fail, currently */
-}
-
-static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
-					 const struct cl_io *io,
-					 struct lov_lock *lck,
-					 int idx, struct lov_lock_link **out)
-{
-	struct cl_lock       *sublock;
-	struct cl_lock       *parent;
-	struct lov_lock_link *link;
-
-	LASSERT(idx < lck->lls_nr);
-
-	link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS);
-	if (link) {
-		struct lov_sublock_env *subenv;
-		struct lov_lock_sub  *lls;
-		struct cl_lock_descr *descr;
-
-		parent = lck->lls_cl.cls_lock;
-		lls    = &lck->lls_sub[idx];
-		descr  = &lls->sub_got;
-
-		subenv = lov_sublock_env_get(env, parent, lls);
-		if (!IS_ERR(subenv)) {
-			/* CAVEAT: Don't try to add a field in lov_lock_sub
-			 * to remember the subio. This is because lock is able
-			 * to be cached, but this is not true for IO. This
-			 * further means a sublock might be referenced in
-			 * different io context. -jay
-			 */
-
-			sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
-					       descr, "lov-parent", parent);
-			lov_sublock_env_put(subenv);
-		} else {
-			/* error occurs. */
-			sublock = (void *)subenv;
-		}
-
-		if (!IS_ERR(sublock))
-			*out = link;
-		else
-			kmem_cache_free(lov_lock_link_kmem, link);
-	} else
-		sublock = ERR_PTR(-ENOMEM);
-	return sublock;
-}
-
-static void lov_sublock_unlock(const struct lu_env *env,
-			       struct lovsub_lock *lsl,
-			       struct cl_lock_closure *closure,
-			       struct lov_sublock_env *subenv)
-{
-	lov_sublock_env_put(subenv);
-	lsl->lss_active = NULL;
-	cl_lock_disclosure(env, closure);
-}
-
-static int lov_sublock_lock(const struct lu_env *env,
-			    struct lov_lock *lck,
-			    struct lov_lock_sub *lls,
-			    struct cl_lock_closure *closure,
-			    struct lov_sublock_env **lsep)
-{
-	struct lovsub_lock *sublock;
-	struct cl_lock     *child;
-	int		 result = 0;
-
-	LASSERT(list_empty(&closure->clc_list));
-
-	sublock = lls->sub_lock;
-	child = sublock->lss_cl.cls_lock;
-	result = cl_lock_closure_build(env, child, closure);
-	if (result == 0) {
-		struct cl_lock *parent = closure->clc_origin;
-
-		LASSERT(cl_lock_is_mutexed(child));
-		sublock->lss_active = parent;
-
-		if (unlikely((child->cll_state == CLS_FREEING) ||
-			     (child->cll_flags & CLF_CANCELLED))) {
-			struct lov_lock_link *link;
-			/*
-			 * we could race with lock deletion which temporarily
-			 * put the lock in freeing state, bug 19080.
-			 */
-			LASSERT(!(lls->sub_flags & LSF_HELD));
-
-			link = lov_lock_link_find(env, lck, sublock);
-			LASSERT(link);
-			lov_lock_unlink(env, link, sublock);
-			lov_sublock_unlock(env, sublock, closure, NULL);
-			lck->lls_cancel_race = 1;
-			result = CLO_REPEAT;
-		} else if (lsep) {
-			struct lov_sublock_env *subenv;
+	struct lov_sublock_env *subenv;
+	int result;
 
-			subenv = lov_sublock_env_get(env, parent, lls);
-			if (IS_ERR(subenv)) {
-				lov_sublock_unlock(env, sublock,
-						   closure, NULL);
-				result = PTR_ERR(subenv);
-			} else {
-				*lsep = subenv;
-			}
-		}
+	subenv = lov_sublock_env_get(env, parent, lls);
+	if (!IS_ERR(subenv)) {
+		result = cl_lock_init(subenv->lse_env, &lls->sub_lock,
+				      subenv->lse_io);
+		lov_sublock_env_put(subenv);
+	} else {
+		/* error occurs. */
+		result = PTR_ERR(subenv);
 	}
 	return result;
 }
 
 /**
- * Updates the result of a top-lock operation from a result of sub-lock
- * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
- * over sub-locks and lov_subresult() is used to calculate return value of a
- * top-operation. To this end, possible return values of sub-operations are
- * ordered as
- *
- *     - 0		  success
- *     - CLO_WAIT	   wait for event
- *     - CLO_REPEAT	 repeat top-operation
- *     - -ne		fundamental error
- *
- * Top-level return code can only go down through this list. CLO_REPEAT
- * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
- * has to be rechecked by the upper layer.
- */
-static int lov_subresult(int result, int rc)
-{
-	int result_rank;
-	int rc_rank;
-
-	LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
-		 "result = %d\n", result);
-	LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
-		 "rc = %d\n", rc);
-	CLASSERT(CLO_WAIT < CLO_REPEAT);
-
-	/* calculate ranks in the ordering above */
-	result_rank = result < 0 ? 1 + CLO_REPEAT : result;
-	rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
-
-	if (result_rank < rc_rank)
-		result = rc;
-	return result;
-}
-
-/**
  * Creates sub-locks for a given lov_lock for the first time.
  *
  * Goes through all sub-objects of top-object, and creates sub-locks on every
@@ -286,8 +118,9 @@ static int lov_subresult(int result, int rc)
  * fact that top-lock (that is being created) can be accessed concurrently
  * through already created sub-locks (possibly shared with other top-locks).
  */
-static int lov_lock_sub_init(const struct lu_env *env,
-			     struct lov_lock *lck, const struct cl_io *io)
+static struct lov_lock *lov_lock_sub_init(const struct lu_env *env,
+					  const struct cl_object *obj,
+					  struct cl_lock *lock)
 {
 	int result = 0;
 	int i;
@@ -297,241 +130,86 @@ static int lov_lock_sub_init(const struct lu_env *env,
 	u64 file_start;
 	u64 file_end;
 
-	struct lov_object       *loo    = cl2lov(lck->lls_cl.cls_obj);
+	struct lov_object       *loo    = cl2lov(obj);
 	struct lov_layout_raid0 *r0     = lov_r0(loo);
-	struct cl_lock	  *parent = lck->lls_cl.cls_lock;
+	struct lov_lock		*lovlck;
 
-	lck->lls_orig = parent->cll_descr;
-	file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
-	file_end   = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
+	file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start);
+	file_end   = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1;
 
 	for (i = 0, nr = 0; i < r0->lo_nr; i++) {
 		/*
 		 * XXX for wide striping smarter algorithm is desirable,
 		 * breaking out of the loop, early.
 		 */
-		if (likely(r0->lo_sub[i]) &&
+		if (likely(r0->lo_sub[i]) && /* spare layout */
 		    lov_stripe_intersects(loo->lo_lsm, i,
 					  file_start, file_end, &start, &end))
 			nr++;
 	}
 	LASSERT(nr > 0);
-	lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS);
-	if (!lck->lls_sub)
-		return -ENOMEM;
+	lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]),
+				 GFP_NOFS);
+	if (!lovlck)
+		return ERR_PTR(-ENOMEM);
 
-	lck->lls_nr = nr;
-	/*
-	 * First, fill in sub-lock descriptions in
-	 * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
-	 * (called below in this function, and by lov_lock_enqueue()) to
-	 * create sub-locks. At this moment, no other thread can access
-	 * top-lock.
-	 */
+	lovlck->lls_nr = nr;
 	for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
 		if (likely(r0->lo_sub[i]) &&
 		    lov_stripe_intersects(loo->lo_lsm, i,
 					  file_start, file_end, &start, &end)) {
+			struct lov_lock_sub *lls = &lovlck->lls_sub[nr];
 			struct cl_lock_descr *descr;
 
-			descr = &lck->lls_sub[nr].sub_descr;
+			descr = &lls->sub_lock.cll_descr;
 
 			LASSERT(!descr->cld_obj);
 			descr->cld_obj   = lovsub2cl(r0->lo_sub[i]);
 			descr->cld_start = cl_index(descr->cld_obj, start);
 			descr->cld_end   = cl_index(descr->cld_obj, end);
-			descr->cld_mode  = parent->cll_descr.cld_mode;
-			descr->cld_gid   = parent->cll_descr.cld_gid;
-			descr->cld_enq_flags   = parent->cll_descr.cld_enq_flags;
-			/* XXX has no effect */
-			lck->lls_sub[nr].sub_got = *descr;
-			lck->lls_sub[nr].sub_stripe = i;
+			descr->cld_mode  = lock->cll_descr.cld_mode;
+			descr->cld_gid   = lock->cll_descr.cld_gid;
+			descr->cld_enq_flags = lock->cll_descr.cld_enq_flags;
+			lls->sub_stripe = i;
+
+			/* initialize sub lock */
+			result = lov_sublock_init(env, lock, lls);
+			if (result < 0)
+				break;
+
+			lls->sub_initialized = 1;
 			nr++;
 		}
 	}
-	LASSERT(nr == lck->lls_nr);
-
-	/*
-	 * Some sub-locks can be missing at this point. This is not a problem,
-	 * because enqueue will create them anyway. Main duty of this function
-	 * is to fill in sub-lock descriptions in a race free manner.
-	 */
-	return result;
-}
+	LASSERT(ergo(result == 0, nr == lovlck->lls_nr));
 
-static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
-			       int i, int deluser, int rc)
-{
-	struct cl_lock *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-
-	if (lck->lls_sub[i].sub_flags & LSF_HELD) {
-		struct cl_lock    *sublock;
-		int dying;
-
-		sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-		LASSERT(cl_lock_is_mutexed(sublock));
+	if (result != 0) {
+		for (i = 0; i < nr; ++i) {
+			if (!lovlck->lls_sub[i].sub_initialized)
+				break;
 
-		lck->lls_sub[i].sub_flags &= ~LSF_HELD;
-		if (deluser)
-			cl_lock_user_del(env, sublock);
-		/*
-		 * If the last hold is released, and cancellation is pending
-		 * for a sub-lock, release parent mutex, to avoid keeping it
-		 * while sub-lock is being paged out.
-		 */
-		dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
-			 sublock->cll_descr.cld_mode == CLM_GROUP ||
-			 (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
-			sublock->cll_holds == 1;
-		if (dying)
-			cl_lock_mutex_put(env, parent);
-		cl_lock_unhold(env, sublock, "lov-parent", parent);
-		if (dying) {
-			cl_lock_mutex_get(env, parent);
-			rc = lov_subresult(rc, CLO_REPEAT);
+			cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
 		}
-		/*
-		 * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
-		 * not backed by a reference on a
-		 * sub-lock. lovsub_lock_delete() will clear
-		 * lck->lls_sub[i].sub_lock under semaphores, just before
-		 * sub-lock is destroyed.
-		 */
+		kvfree(lovlck);
+		lovlck = ERR_PTR(result);
 	}
-	return rc;
-}
-
-static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
-			     int i)
-{
-	struct cl_lock *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-
-	if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
-		struct cl_lock *sublock;
-
-		sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-		LASSERT(cl_lock_is_mutexed(sublock));
-		LASSERT(sublock->cll_state != CLS_FREEING);
 
-		lck->lls_sub[i].sub_flags |= LSF_HELD;
-
-		cl_lock_get_trust(sublock);
-		cl_lock_hold_add(env, sublock, "lov-parent", parent);
-		cl_lock_user_add(env, sublock);
-		cl_lock_put(env, sublock);
-	}
+	return lovlck;
 }
 
 static void lov_lock_fini(const struct lu_env *env,
 			  struct cl_lock_slice *slice)
 {
-	struct lov_lock *lck;
+	struct lov_lock *lovlck;
 	int i;
 
-	lck = cl2lov_lock(slice);
-	LASSERT(lck->lls_nr_filled == 0);
-	if (lck->lls_sub) {
-		for (i = 0; i < lck->lls_nr; ++i)
-			/*
-			 * No sub-locks exists at this point, as sub-lock has
-			 * a reference on its parent.
-			 */
-			LASSERT(!lck->lls_sub[i].sub_lock);
-		kvfree(lck->lls_sub);
+	lovlck = cl2lov_lock(slice);
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		LASSERT(!lovlck->lls_sub[i].sub_is_enqueued);
+		if (lovlck->lls_sub[i].sub_initialized)
+			cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
 	}
-	kmem_cache_free(lov_lock_kmem, lck);
-}
-
-static int lov_lock_enqueue_wait(const struct lu_env *env,
-				 struct lov_lock *lck,
-				 struct cl_lock *sublock)
-{
-	struct cl_lock *lock = lck->lls_cl.cls_lock;
-	int	     result;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-
-	cl_lock_mutex_put(env, lock);
-	result = cl_lock_enqueue_wait(env, sublock, 0);
-	cl_lock_mutex_get(env, lock);
-	return result ?: CLO_REPEAT;
-}
-
-/**
- * Tries to advance a state machine of a given sub-lock toward enqueuing of
- * the top-lock.
- *
- * \retval 0 if state-transition can proceed
- * \retval -ve otherwise.
- */
-static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
-				struct cl_lock *sublock,
-				struct cl_io *io, __u32 enqflags, int last)
-{
-	int result;
-
-	/* first, try to enqueue a sub-lock ... */
-	result = cl_enqueue_try(env, sublock, io, enqflags);
-	if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
-		/* if it is enqueued, try to `wait' on it---maybe it's already
-		 * granted
-		 */
-		result = cl_wait_try(env, sublock);
-		if (result == CLO_REENQUEUED)
-			result = CLO_WAIT;
-	}
-	/*
-	 * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
-	 * parallel, otherwise---enqueue has to wait until sub-lock is granted
-	 * before proceeding to the next one.
-	 */
-	if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
-	    (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
-		result = 0;
-	return result;
-}
-
-/**
- * Helper function for lov_lock_enqueue() that creates missing sub-lock.
- */
-static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
-			    struct cl_io *io, struct lov_lock *lck, int idx)
-{
-	struct lov_lock_link *link = NULL;
-	struct cl_lock       *sublock;
-	int		   result;
-
-	LASSERT(parent->cll_depth == 1);
-	cl_lock_mutex_put(env, parent);
-	sublock = lov_sublock_alloc(env, io, lck, idx, &link);
-	if (!IS_ERR(sublock))
-		cl_lock_mutex_get(env, sublock);
-	cl_lock_mutex_get(env, parent);
-
-	if (!IS_ERR(sublock)) {
-		cl_lock_get_trust(sublock);
-		if (parent->cll_state == CLS_QUEUING &&
-		    !lck->lls_sub[idx].sub_lock) {
-			lov_sublock_adopt(env, lck, sublock, idx, link);
-		} else {
-			kmem_cache_free(lov_lock_link_kmem, link);
-			/* other thread allocated sub-lock, or enqueue is no
-			 * longer going on
-			 */
-			cl_lock_mutex_put(env, parent);
-			cl_lock_unhold(env, sublock, "lov-parent", parent);
-			cl_lock_mutex_get(env, parent);
-		}
-		cl_lock_mutex_put(env, sublock);
-		cl_lock_put(env, sublock);
-		result = CLO_REPEAT;
-	} else
-		result = PTR_ERR(sublock);
-	return result;
+	kvfree(lovlck);
 }
 
 /**
@@ -543,529 +221,59 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
  */
 static int lov_lock_enqueue(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *io, __u32 enqflags)
+			    struct cl_io *io, struct cl_sync_io *anchor)
 {
-	struct cl_lock	 *lock    = slice->cls_lock;
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, lock);
+	struct cl_lock *lock = slice->cls_lock;
+	struct lov_lock *lovlck = cl2lov_lock(slice);
 	int i;
-	int result;
-	enum cl_lock_state minstate;
+	int rc = 0;
 
-	for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct lov_lock_sub    *lls;
-		struct cl_lock	 *sublock;
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		struct lov_lock_sub  *lls = &lovlck->lls_sub[i];
 		struct lov_sublock_env *subenv;
 
-		if (lock->cll_state != CLS_QUEUING) {
-			/*
-			 * Lock might have left QUEUING state if previous
-			 * iteration released its mutex. Stop enqueing in this
-			 * case and let the upper layer to decide what to do.
-			 */
-			LASSERT(i > 0 && result != 0);
-			break;
-		}
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		/*
-		 * Sub-lock might have been canceled, while top-lock was
-		 * cached.
-		 */
-		if (!sub) {
-			result = lov_sublock_fill(env, lock, io, lck, i);
-			/* lov_sublock_fill() released @lock mutex,
-			 * restart.
-			 */
+		subenv = lov_sublock_env_get(env, lock, lls);
+		if (IS_ERR(subenv)) {
+			rc = PTR_ERR(subenv);
 			break;
 		}
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			lov_sublock_hold(env, lck, i);
-			rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
-						  subenv->lse_io, enqflags,
-						  i == lck->lls_nr - 1);
-			minstate = min(minstate, sublock->cll_state);
-			if (rc == CLO_WAIT) {
-				switch (sublock->cll_state) {
-				case CLS_QUEUING:
-					/* take recursive mutex, the lock is
-					 * released in lov_lock_enqueue_wait.
-					 */
-					cl_lock_mutex_get(env, sublock);
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					rc = lov_lock_enqueue_wait(env, lck,
-								   sublock);
-					break;
-				case CLS_CACHED:
-					cl_lock_get(sublock);
-					/* take recursive mutex of sublock */
-					cl_lock_mutex_get(env, sublock);
-					/* need to release all locks in closure
-					 * otherwise it may deadlock. LU-2683.
-					 */
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					/* sublock and parent are held. */
-					rc = lov_sublock_release(env, lck, i,
-								 1, rc);
-					cl_lock_mutex_put(env, sublock);
-					cl_lock_put(env, sublock);
-					break;
-				default:
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					break;
-				}
-			} else {
-				LASSERT(!sublock->cll_conflict);
-				lov_sublock_unlock(env, sub, closure, subenv);
-			}
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
+		rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io,
+				     &lls->sub_lock, anchor);
+		lov_sublock_env_put(subenv);
+		if (rc != 0)
 			break;
-	}
-	cl_lock_closure_fini(closure);
-	return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	int i;
-	int result;
-
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
 
-		/* top-lock state cannot change concurrently, because single
-		 * thread (one that released the last hold) carries unlocking
-		 * to the completion.
-		 */
-		LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub)
-			continue;
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			if (lls->sub_flags & LSF_HELD) {
-				LASSERT(sublock->cll_state == CLS_HELD ||
-					sublock->cll_state == CLS_ENQUEUED);
-				rc = cl_unuse_try(subenv->lse_env, sublock);
-				rc = lov_sublock_release(env, lck, i, 0, rc);
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		result = lov_subresult(result, rc);
+		lls->sub_is_enqueued = 1;
 	}
-
-	if (result == 0 && lck->lls_cancel_race) {
-		lck->lls_cancel_race = 0;
-		result = -ESTALE;
-	}
-	cl_lock_closure_fini(closure);
-	return result;
+	return rc;
 }
 
 static void lov_lock_cancel(const struct lu_env *env,
 			    const struct cl_lock_slice *slice)
 {
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+	struct cl_lock *lock = slice->cls_lock;
+	struct lov_lock *lovlck = cl2lov_lock(slice);
 	int i;
-	int result;
 
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		struct lov_lock_sub *lls = &lovlck->lls_sub[i];
+		struct cl_lock *sublock = &lls->sub_lock;
 		struct lov_sublock_env *subenv;
 
-		/* top-lock state cannot change concurrently, because single
-		 * thread (one that released the last hold) carries unlocking
-		 * to the completion.
-		 */
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub)
-			continue;
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			if (!(lls->sub_flags & LSF_HELD)) {
-				lov_sublock_unlock(env, sub, closure, subenv);
-				continue;
-			}
-
-			switch (sublock->cll_state) {
-			case CLS_HELD:
-				rc = cl_unuse_try(subenv->lse_env, sublock);
-				lov_sublock_release(env, lck, i, 0, 0);
-				break;
-			default:
-				lov_sublock_release(env, lck, i, 1, 0);
-				break;
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-
-		if (rc == CLO_REPEAT) {
-			--i;
-			continue;
-		}
-
-		result = lov_subresult(result, rc);
-	}
-
-	if (result)
-		CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
-			      "lov_lock_cancel fails with %d.\n", result);
-
-	cl_lock_closure_fini(closure);
-}
-
-static int lov_lock_wait(const struct lu_env *env,
-			 const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	enum cl_lock_state      minstate;
-	int		     reenqueued;
-	int		     result;
-	int		     i;
-
-again:
-	for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
-	     i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			LASSERT(sublock->cll_state >= CLS_ENQUEUED);
-			if (sublock->cll_state < CLS_HELD)
-				rc = cl_wait_try(env, sublock);
-
-			minstate = min(minstate, sublock->cll_state);
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		if (rc == CLO_REENQUEUED) {
-			reenqueued++;
-			rc = 0;
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
-			break;
-	}
-	/* Each sublock only can be reenqueued once, so will not loop
-	 * forever.
-	 */
-	if (result == 0 && reenqueued != 0)
-		goto again;
-	cl_lock_closure_fini(closure);
-	return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_use(const struct lu_env *env,
-			const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	int		     result;
-	int		     i;
-
-	LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
-
-		LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub) {
-			/*
-			 * Sub-lock might have been canceled, while top-lock was
-			 * cached.
-			 */
-			result = -ESTALE;
-			break;
-		}
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			LASSERT(sublock->cll_state != CLS_FREEING);
-			lov_sublock_hold(env, lck, i);
-			if (sublock->cll_state == CLS_CACHED) {
-				rc = cl_use_try(subenv->lse_env, sublock, 0);
-				if (rc != 0)
-					rc = lov_sublock_release(env, lck,
-								 i, 1, rc);
-			} else if (sublock->cll_state == CLS_NEW) {
-				/* Sub-lock might have been canceled, while
-				 * top-lock was cached.
-				 */
-				result = -ESTALE;
-				lov_sublock_release(env, lck, i, 1, result);
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
-			break;
-	}
-
-	if (lck->lls_cancel_race) {
-		/*
-		 * If there is unlocking happened at the same time, then
-		 * sublock_lock state should be FREEING, and lov_sublock_lock
-		 * should return CLO_REPEAT. In this case, it should return
-		 * ESTALE, and up layer should reset the lock state to be NEW.
-		 */
-		lck->lls_cancel_race = 0;
-		LASSERT(result != 0);
-		result = -ESTALE;
-	}
-	cl_lock_closure_fini(closure);
-	return result;
-}
-
-/**
- * Check if the extent region \a descr is covered by \a child against the
- * specific \a stripe.
- */
-static int lov_lock_stripe_is_matching(const struct lu_env *env,
-				       struct lov_object *lov, int stripe,
-				       const struct cl_lock_descr *child,
-				       const struct cl_lock_descr *descr)
-{
-	struct lov_stripe_md *lsm = lov->lo_lsm;
-	u64 start;
-	u64 end;
-	int result;
-
-	if (lov_r0(lov)->lo_nr == 1)
-		return cl_lock_ext_match(child, descr);
-
-	/*
-	 * For a multi-stripes object:
-	 * - make sure the descr only covers child's stripe, and
-	 * - check if extent is matching.
-	 */
-	start = cl_offset(&lov->lo_cl, descr->cld_start);
-	end   = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
-	result = 0;
-	/* glimpse should work on the object with LOV EA hole. */
-	if (end - start <= lsm->lsm_stripe_size) {
-		int idx;
-
-		idx = lov_stripe_number(lsm, start);
-		if (idx == stripe ||
-		    unlikely(!lov_r0(lov)->lo_sub[idx])) {
-			idx = lov_stripe_number(lsm, end);
-			if (idx == stripe ||
-			    unlikely(!lov_r0(lov)->lo_sub[idx]))
-				result = 1;
-		}
-	}
-
-	if (result != 0) {
-		struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
-		u64 sub_start;
-		u64 sub_end;
-
-		subd->cld_obj  = NULL;   /* don't need sub object at all */
-		subd->cld_mode = descr->cld_mode;
-		subd->cld_gid  = descr->cld_gid;
-		result = lov_stripe_intersects(lsm, stripe, start, end,
-					       &sub_start, &sub_end);
-		LASSERT(result);
-		subd->cld_start = cl_index(child->cld_obj, sub_start);
-		subd->cld_end   = cl_index(child->cld_obj, sub_end);
-		result = cl_lock_ext_match(child, subd);
-	}
-	return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_fits_into() method.
- *
- * Checks whether a lock (given by \a slice) is suitable for \a
- * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
- * O_APPEND write.
- *
- * \see ccc_lock_fits_into().
- */
-static int lov_lock_fits_into(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      const struct cl_lock_descr *need,
-			      const struct cl_io *io)
-{
-	struct lov_lock   *lov = cl2lov_lock(slice);
-	struct lov_object *obj = cl2lov(slice->cls_obj);
-	int result;
-
-	LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
-	LASSERT(lov->lls_nr > 0);
-
-	/* for top lock, it's necessary to match enq flags otherwise it will
-	 * run into problem if a sublock is missing and reenqueue.
-	 */
-	if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
-		return 0;
-
-	if (need->cld_mode == CLM_GROUP)
-		/*
-		 * always allow to match group lock.
-		 */
-		result = cl_lock_ext_match(&lov->lls_orig, need);
-	else if (lov->lls_nr == 1) {
-		struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
-
-		result = lov_lock_stripe_is_matching(env,
-						     cl2lov(slice->cls_obj),
-						     lov->lls_sub[0].sub_stripe,
-						     got, need);
-	} else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
-		   !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
-		/*
-		 * Multi-stripe locks are only suitable for `quick' IO and for
-		 * glimpse.
-		 */
-		result = 0;
-	else
-		/*
-		 * Most general case: multi-stripe existing lock, and
-		 * (potentially) multi-stripe @need lock. Check that @need is
-		 * covered by @lov's sub-locks.
-		 *
-		 * For now, ignore lock expansions made by the server, and
-		 * match against original lock extent.
-		 */
-		result = cl_lock_ext_match(&lov->lls_orig, need);
-	CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
-	       PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
-	       lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
-	       result);
-	return result;
-}
-
-void lov_lock_unlink(const struct lu_env *env,
-		     struct lov_lock_link *link, struct lovsub_lock *sub)
-{
-	struct lov_lock *lck    = link->lll_super;
-	struct cl_lock  *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-	LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
-	list_del_init(&link->lll_list);
-	LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
-	/* yank this sub-lock from parent's array */
-	lck->lls_sub[link->lll_idx].sub_lock = NULL;
-	LASSERT(lck->lls_nr_filled > 0);
-	lck->lls_nr_filled--;
-	lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
-	cl_lock_put(env, parent);
-	kmem_cache_free(lov_lock_link_kmem, link);
-}
-
-struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
-					 struct lov_lock *lck,
-					 struct lovsub_lock *sub)
-{
-	struct lov_lock_link *scan;
-
-	LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		if (scan->lll_super == lck)
-			return scan;
-	}
-	return NULL;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked for "top-to-bottom" delete, when lock destruction starts from the
- * top-lock, e.g., as a result of inode destruction.
- *
- * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
- * this is done separately elsewhere:
- *
- *     - for inode destruction, lov_object_delete() calls cl_object_kill() for
- *       each sub-object, purging its locks;
- *
- *     - in other cases (e.g., a fatal error with a top-lock) sub-locks are
- *       left in the cache.
- */
-static void lov_lock_delete(const struct lu_env *env,
-			    const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	struct lov_lock_link   *link;
-	int		     rc;
-	int		     i;
-
-	LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
-
-	for (i = 0; i < lck->lls_nr; ++i) {
-		struct lov_lock_sub *lls = &lck->lls_sub[i];
-		struct lovsub_lock  *lsl = lls->sub_lock;
-
-		if (!lsl) /* already removed */
+		if (!lls->sub_is_enqueued)
 			continue;
 
-		rc = lov_sublock_lock(env, lck, lls, closure, NULL);
-		if (rc == CLO_REPEAT) {
-			--i;
-			continue;
+		lls->sub_is_enqueued = 0;
+		subenv = lov_sublock_env_get(env, lock, lls);
+		if (!IS_ERR(subenv)) {
+			cl_lock_cancel(subenv->lse_env, sublock);
+			lov_sublock_env_put(subenv);
+		} else {
+			CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
+				      "lov_lock_cancel fails with %ld.\n",
+				      PTR_ERR(subenv));
 		}
-
-		LASSERT(rc == 0);
-		LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
-
-		if (lls->sub_flags & LSF_HELD)
-			lov_sublock_release(env, lck, i, 1, 0);
-
-		link = lov_lock_link_find(env, lck, lsl);
-		LASSERT(link);
-		lov_lock_unlink(env, link, lsl);
-		LASSERT(!lck->lls_sub[i].sub_lock);
-
-		lov_sublock_unlock(env, lsl, closure, NULL);
 	}
-
-	cl_lock_closure_fini(closure);
 }
 
 static int lov_lock_print(const struct lu_env *env, void *cookie,
@@ -1079,12 +287,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
 		struct lov_lock_sub *sub;
 
 		sub = &lck->lls_sub[i];
-		(*p)(env, cookie, "    %d %x: ", i, sub->sub_flags);
-		if (sub->sub_lock)
-			cl_lock_print(env, cookie, p,
-				      sub->sub_lock->lss_cl.cls_lock);
-		else
-			(*p)(env, cookie, "---\n");
+		(*p)(env, cookie, "    %d %x: ", i, sub->sub_is_enqueued);
+		cl_lock_print(env, cookie, p, &sub->sub_lock);
 	}
 	return 0;
 }
@@ -1092,12 +296,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
 static const struct cl_lock_operations lov_lock_ops = {
 	.clo_fini      = lov_lock_fini,
 	.clo_enqueue   = lov_lock_enqueue,
-	.clo_wait      = lov_lock_wait,
-	.clo_use       = lov_lock_use,
-	.clo_unuse     = lov_lock_unuse,
 	.clo_cancel    = lov_lock_cancel,
-	.clo_fits_into = lov_lock_fits_into,
-	.clo_delete    = lov_lock_delete,
 	.clo_print     = lov_lock_print
 };
 
@@ -1105,14 +304,13 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
 			struct cl_lock *lock, const struct cl_io *io)
 {
 	struct lov_lock *lck;
-	int result;
+	int result = 0;
 
-	lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
-	if (lck) {
+	lck = lov_lock_sub_init(env, obj, lock);
+	if (!IS_ERR(lck))
 		cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
-		result = lov_lock_sub_init(env, lck, io);
-	} else
-		result = -ENOMEM;
+	else
+		result = PTR_ERR(lck);
 	return result;
 }
 
@@ -1147,21 +345,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
 	lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
 	if (lck) {
 		cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
-		lck->lls_orig = lock->cll_descr;
 		result = 0;
 	}
 	return result;
 }
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-					       struct cl_lock *parent)
-{
-	struct cl_lock_closure *closure;
-
-	closure = &lov_env_info(env)->lti_closure;
-	LASSERT(list_empty(&closure->clc_list));
-	cl_lock_closure_init(env, closure, parent, 1);
-	return closure;
-}
-
 /** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index 029cd4d62796..b9c90865fdfc 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -154,6 +150,7 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 	valid &= src->o_valid;
 
 	if (*set) {
+		tgt->o_valid &= valid;
 		if (valid & OBD_MD_FLSIZE) {
 			/* this handles sparse files properly */
 			u64 lov_size;
@@ -172,12 +169,22 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 			tgt->o_mtime = src->o_mtime;
 		if (valid & OBD_MD_FLDATAVERSION)
 			tgt->o_data_version += src->o_data_version;
+
+		/* handle flags */
+		if (valid & OBD_MD_FLFLAGS)
+			tgt->o_flags &= src->o_flags;
+		else
+			tgt->o_flags = 0;
 	} else {
 		memcpy(tgt, src, sizeof(*tgt));
 		tgt->o_oi = lsm->lsm_oi;
+		tgt->o_valid = valid;
 		if (valid & OBD_MD_FLSIZE)
 			tgt->o_size = lov_stripe_size(lsm, src->o_size,
 						      stripeno);
+		tgt->o_flags = 0;
+		if (valid & OBD_MD_FLFLAGS)
+			tgt->o_flags = src->o_flags;
 	}
 
 	/* data_version needs to be valid on all stripes to be correct! */
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index 5daa7faf4dda..9b92d5522edb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -54,7 +50,6 @@
 #include "../include/lprocfs_status.h"
 #include "../include/lustre_param.h"
 #include "../include/cl_object.h"
-#include "../include/lclient.h"		/* for cl_client_lru */
 #include "../include/lustre/ll_fiemap.h"
 #include "../include/lustre_fid.h"
 
@@ -124,7 +119,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
 static int lov_notify(struct obd_device *obd, struct obd_device *watched,
 		      enum obd_notify_event ev, void *data);
 
-#define MAX_STRING_SIZE 128
 int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
 		    struct obd_connect_data *data)
 {
@@ -898,6 +892,12 @@ static int lov_cleanup(struct obd_device *obd)
 		kfree(lov->lov_tgts);
 		lov->lov_tgt_size = 0;
 	}
+
+	if (lov->lov_cache) {
+		cl_cache_decref(lov->lov_cache);
+		lov->lov_cache = NULL;
+	}
+
 	return 0;
 }
 
@@ -965,7 +965,6 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
 		CERROR("Unknown command: %d\n", lcfg->lcfg_command);
 		rc = -EINVAL;
 		goto out;
-
 	}
 	}
 out:
@@ -1734,6 +1733,27 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 	unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
 
 	if (!lsm_has_objects(lsm)) {
+		if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start <
+		    fm_key->oa.o_size)) {
+			/*
+			 * released file, return a minimal FIEMAP if
+			 * request fits in file-size.
+			 */
+			fiemap->fm_mapped_extents = 1;
+			fiemap->fm_extents[0].fe_logical =
+					fm_key->fiemap.fm_start;
+			if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length <
+			    fm_key->oa.o_size) {
+				fiemap->fm_extents[0].fe_length =
+					fm_key->fiemap.fm_length;
+			} else {
+				fiemap->fm_extents[0].fe_length =
+					fm_key->oa.o_size - fm_key->fiemap.fm_start;
+				fiemap->fm_extents[0].fe_flags |=
+						(FIEMAP_EXTENT_UNKNOWN |
+						 FIEMAP_EXTENT_LAST);
+			}
+		}
 		rc = 0;
 		goto out;
 	}
@@ -1754,7 +1774,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 	fm_start = fiemap->fm_start;
 	fm_length = fiemap->fm_length;
 	/* Calculate start stripe, last stripe and length of mapping */
-	actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+	start_stripe = lov_stripe_number(lsm, fm_start);
+	actual_start_stripe = start_stripe;
 	fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
 						fm_start + fm_length - 1);
 	/* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
@@ -2077,11 +2098,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	u32 count;
 	int i, rc = 0, err;
 	struct lov_tgt_desc *tgt;
-	unsigned incr, check_uuid,
-		 do_inactive, no_set;
-	unsigned next_id = 0,  mds_con = 0;
+	unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0;
+	unsigned int next_id = 0, mds_con = 0;
 
-	incr = check_uuid = do_inactive = no_set = 0;
 	if (!set) {
 		no_set = 1;
 		set = ptlrpc_prep_set();
@@ -2108,6 +2127,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		LASSERT(!lov->lov_cache);
 		lov->lov_cache = val;
 		do_inactive = 1;
+		cl_cache_incref(lov->lov_cache);
 	}
 
 	for (i = 0; i < count; i++, val = (char *)val + incr) {
@@ -2173,7 +2193,6 @@ void lov_stripe_lock(struct lov_stripe_md *md)
 	LASSERT(md->lsm_lock_owner == 0);
 	md->lsm_lock_owner = current_pid();
 }
-EXPORT_SYMBOL(lov_stripe_lock);
 
 void lov_stripe_unlock(struct lov_stripe_md *md)
 		__releases(&md->lsm_lock)
@@ -2182,7 +2201,6 @@ void lov_stripe_unlock(struct lov_stripe_md *md)
 	md->lsm_lock_owner = 0;
 	spin_unlock(&md->lsm_lock);
 }
-EXPORT_SYMBOL(lov_stripe_unlock);
 
 static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
 			struct obd_quotactl *oqctl)
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index 1f8ed95a6d89..f9621b0fd469 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -67,7 +63,7 @@ struct lov_layout_operations {
 	int  (*llo_print)(const struct lu_env *env, void *cookie,
 			  lu_printer_t p, const struct lu_object *o);
 	int  (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
-			      struct cl_page *page, struct page *vmpage);
+			      struct cl_page *page, pgoff_t index);
 	int  (*llo_lock_init)(const struct lu_env *env,
 			      struct cl_object *obj, struct cl_lock *lock,
 			      const struct cl_io *io);
@@ -185,14 +181,26 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
 		}
 
 		LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
-				"stripe %d is already owned.\n", idx);
-		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
+				"stripe %d is already owned.", idx);
+		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
 		LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
 		cl_object_put(env, stripe);
 	}
 	return result;
 }
 
+static int lov_page_slice_fixup(struct lov_object *lov,
+				struct cl_object *stripe)
+{
+	struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
+	struct cl_object *o;
+
+	cl_object_for_each(o, stripe)
+		o->co_slice_off += hdr->coh_page_bufsize;
+
+	return cl_object_header(stripe)->coh_page_bufsize;
+}
+
 static int lov_init_raid0(const struct lu_env *env,
 			  struct lov_device *dev, struct lov_object *lov,
 			  const struct cl_object_conf *conf,
@@ -222,6 +230,8 @@ static int lov_init_raid0(const struct lu_env *env,
 	r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]),
 				     GFP_NOFS);
 	if (r0->lo_sub) {
+		int psz = 0;
+
 		result = 0;
 		subconf->coc_inode = conf->coc_inode;
 		spin_lock_init(&r0->lo_sub_lock);
@@ -254,13 +264,24 @@ static int lov_init_raid0(const struct lu_env *env,
 				if (result == -EAGAIN) { /* try again */
 					--i;
 					result = 0;
+					continue;
 				}
 			} else {
 				result = PTR_ERR(stripe);
 			}
+
+			if (result == 0) {
+				int sz = lov_page_slice_fixup(lov, stripe);
+
+				LASSERT(ergo(psz > 0, psz == sz));
+				psz = sz;
+			}
 		}
-	} else
+		if (result == 0)
+			cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz;
+	} else {
 		result = -ENOMEM;
+	}
 out:
 	return result;
 }
@@ -286,8 +307,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
 	LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
 
 	lov_layout_wait(env, lov);
-
-	cl_object_prune(env, &lov->lo_cl);
 	return 0;
 }
 
@@ -355,7 +374,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			struct lovsub_object *los = r0->lo_sub[i];
 
 			if (los) {
-				cl_locks_prune(env, &los->lso_cl, 1);
+				cl_object_prune(env, &los->lso_cl);
 				/*
 				 * If top-level object is to be evicted from
 				 * the cache, so are its sub-objects.
@@ -364,7 +383,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			}
 		}
 	}
-	cl_object_prune(env, &lov->lo_cl);
 	return 0;
 }
 
@@ -666,7 +684,6 @@ static int lov_layout_change(const struct lu_env *unused,
 	const struct lov_layout_operations *old_ops;
 	const struct lov_layout_operations *new_ops;
 
-	struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
 	void *cookie;
 	struct lu_env *env;
 	int refcheck;
@@ -691,13 +708,15 @@ static int lov_layout_change(const struct lu_env *unused,
 	old_ops = &lov_dispatch[lov->lo_type];
 	new_ops = &lov_dispatch[llt];
 
+	result = cl_object_prune(env, &lov->lo_cl);
+	if (result != 0)
+		goto out;
+
 	result = old_ops->llo_delete(env, lov, &lov->u);
 	if (result == 0) {
 		old_ops->llo_fini(env, lov, &lov->u);
 
 		LASSERT(atomic_read(&lov->lo_active_ios) == 0);
-		LASSERT(!hdr->coh_tree.rnode);
-		LASSERT(hdr->coh_pages == 0);
 
 		lov->lo_type = LLT_EMPTY;
 		result = new_ops->llo_init(env,
@@ -713,6 +732,7 @@ static int lov_layout_change(const struct lu_env *unused,
 		}
 	}
 
+out:
 	cl_env_put(env, &refcheck);
 	cl_env_reexit(cookie);
 	return result;
@@ -793,7 +813,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
 		goto out;
 	}
 
-	lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+	result = lov_layout_change(env, lov, conf);
+	lov->lo_layout_invalid = result != 0;
 
 out:
 	lov_conf_unlock(lov);
@@ -825,10 +846,10 @@ static int lov_object_print(const struct lu_env *env, void *cookie,
 }
 
 int lov_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		  struct cl_page *page, pgoff_t index)
 {
-	return LOV_2DISPATCH_NOLOCK(cl2lov(obj),
-				    llo_page_init, env, obj, page, vmpage);
+	return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page,
+				    index);
 }
 
 /**
@@ -911,8 +932,9 @@ struct lu_object *lov_object_alloc(const struct lu_env *env,
 		 * for object with different layouts.
 		 */
 		obj->lo_ops = &lov_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
index ae83eb0f6f36..ecca74fbff00 100644
--- a/drivers/staging/lustre/lustre/lov/lov_offset.c
+++ b/drivers/staging/lustre/lustre/lov/lov_offset.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -66,6 +62,18 @@ u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno)
 	return lov_size;
 }
 
+/**
+ * Compute file level page index by stripe level page offset
+ */
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+			 int stripe)
+{
+	loff_t offset;
+
+	offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe);
+	return offset >> PAGE_SHIFT;
+}
+
 /* we have an offset in file backed by an lov and want to find out where
  * that offset lands in our given stripe of the file.  for the easy
  * case where the offset is within the stripe, we just have to scale the
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 3925633a99ec..869ef41b13ca 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -136,7 +132,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 		CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
 		       lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
 		return -EINVAL;
-
 	}
 
 	if (lsm) {
@@ -444,8 +439,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	if (lum.lmm_magic == LOV_USER_MAGIC) {
 		/* User request for v1, we need skip lmm_pool_name */
 		if (lmmk->lmm_magic == LOV_MAGIC_V3) {
-			memmove((char *)(&lmmk->lmm_stripe_count) +
-				sizeof(lmmk->lmm_stripe_count),
+			memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects,
 				((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
 				lmmk->lmm_stripe_count *
 				sizeof(struct lov_ost_data_v1));
@@ -457,9 +451,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	}
 
 	/* User wasn't expecting this many OST entries */
-	if (lum.lmm_stripe_count == 0)
+	if (lum.lmm_stripe_count == 0) {
 		lmm_size = lum_size;
-	else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
+	} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
 		rc = -EOVERFLOW;
 		goto out_set;
 	}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index fdcaf8047ad8..c17026f14896 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Implementation of cl_page for LOV layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LOV
@@ -52,116 +49,66 @@
  *
  */
 
-static int lov_page_invariant(const struct cl_page_slice *slice)
+/**
+ * Adjust the stripe index by layout of raid0. @max_index is the maximum
+ * page index covered by an underlying DLM lock.
+ * This function converts max_index from stripe level to file level, and make
+ * sure it's not beyond one stripe.
+ */
+static int lov_raid0_page_is_under_lock(const struct lu_env *env,
+					const struct cl_page_slice *slice,
+					struct cl_io *unused,
+					pgoff_t *max_index)
 {
-	const struct cl_page  *page = slice->cpl_page;
-	const struct cl_page  *sub  = lov_sub_page(slice);
+	struct lov_object *loo = cl2lov(slice->cpl_obj);
+	struct lov_layout_raid0 *r0 = lov_r0(loo);
+	pgoff_t index = *max_index;
+	unsigned int pps; /* pages per stripe */
 
-	return ergo(sub,
-		    page->cp_child == sub &&
-		    sub->cp_parent == page &&
-		    page->cp_state == sub->cp_state);
-}
+	CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+	if (index == 0) /* the page is not covered by any lock */
+		return 0;
 
-static void lov_page_fini(const struct lu_env *env,
-			  struct cl_page_slice *slice)
-{
-	struct cl_page  *sub = lov_sub_page(slice);
+	if (r0->lo_nr == 1) /* single stripe file */
+		return 0;
 
-	LINVRNT(lov_page_invariant(slice));
-
-	if (sub) {
-		LASSERT(sub->cp_state == CPS_FREEING);
-		lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent);
-		sub->cp_parent = NULL;
-		slice->cpl_page->cp_child = NULL;
-		cl_page_put(env, sub);
+	/* max_index is stripe level, convert it into file level */
+	if (index != CL_PAGE_EOF) {
+		int stripeno = lov_page_stripe(slice->cpl_page);
+		*max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno);
 	}
-}
-
-static int lov_page_own(const struct lu_env *env,
-			const struct cl_page_slice *slice, struct cl_io *io,
-			int nonblock)
-{
-	struct lov_io     *lio = lov_env_io(env);
-	struct lov_io_sub *sub;
 
-	LINVRNT(lov_page_invariant(slice));
-	LINVRNT(!cl2lov_page(slice)->lps_invalid);
+	/* calculate the end of current stripe */
+	pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
+	index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		lov_sub_page(slice)->cp_owner = sub->sub_io;
-		lov_sub_put(sub);
-	} else
-		LBUG(); /* Arrgh */
+	/* never exceed the end of the stripe */
+	*max_index = min_t(pgoff_t, *max_index, index);
 	return 0;
 }
 
-static void lov_page_assume(const struct lu_env *env,
-			    const struct cl_page_slice *slice, struct cl_io *io)
-{
-	lov_page_own(env, slice, io, 0);
-}
-
-static int lov_page_cache_add(const struct lu_env *env,
-			      const struct cl_page_slice *slice,
-			      struct cl_io *io)
-{
-	struct lov_io     *lio = lov_env_io(env);
-	struct lov_io_sub *sub;
-	int rc = 0;
-
-	LINVRNT(lov_page_invariant(slice));
-	LINVRNT(!cl2lov_page(slice)->lps_invalid);
-
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		rc = cl_page_cache_add(sub->sub_env, sub->sub_io,
-				       slice->cpl_page->cp_child, CRT_WRITE);
-		lov_sub_put(sub);
-	} else {
-		rc = PTR_ERR(sub);
-		CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc);
-	}
-	return rc;
-}
-
-static int lov_page_print(const struct lu_env *env,
-			  const struct cl_page_slice *slice,
-			  void *cookie, lu_printer_t printer)
+static int lov_raid0_page_print(const struct lu_env *env,
+				const struct cl_page_slice *slice,
+				void *cookie, lu_printer_t printer)
 {
 	struct lov_page *lp = cl2lov_page(slice);
 
-	return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp);
+	return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp);
 }
 
-static const struct cl_page_operations lov_page_ops = {
-	.cpo_fini   = lov_page_fini,
-	.cpo_own    = lov_page_own,
-	.cpo_assume = lov_page_assume,
-	.io = {
-		[CRT_WRITE] = {
-			.cpo_cache_add = lov_page_cache_add
-		}
-	},
-	.cpo_print  = lov_page_print
+static const struct cl_page_operations lov_raid0_page_ops = {
+	.cpo_is_under_lock = lov_raid0_page_is_under_lock,
+	.cpo_print  = lov_raid0_page_print
 };
 
-static void lov_empty_page_fini(const struct lu_env *env,
-				struct cl_page_slice *slice)
-{
-	LASSERT(!slice->cpl_page->cp_child);
-}
-
 int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage)
+			struct cl_page *page, pgoff_t index)
 {
 	struct lov_object *loo = cl2lov(obj);
 	struct lov_layout_raid0 *r0 = lov_r0(loo);
 	struct lov_io     *lio = lov_env_io(env);
-	struct cl_page    *subpage;
 	struct cl_object  *subobj;
+	struct cl_object  *o;
 	struct lov_io_sub *sub;
 	struct lov_page   *lpg = cl_object_page_slice(obj, page);
 	loff_t	     offset;
@@ -169,59 +116,57 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
 	int		stripe;
 	int		rc;
 
-	offset = cl_offset(obj, page->cp_index);
+	offset = cl_offset(obj, index);
 	stripe = lov_stripe_number(loo->lo_lsm, offset);
 	LASSERT(stripe < r0->lo_nr);
 	rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
 	LASSERT(rc == 0);
 
-	lpg->lps_invalid = 1;
-	cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops);
+	cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
 
 	sub = lov_sub_get(env, lio, stripe);
-	if (IS_ERR(sub)) {
-		rc = PTR_ERR(sub);
-		goto out;
-	}
+	if (IS_ERR(sub))
+		return PTR_ERR(sub);
 
 	subobj = lovsub2cl(r0->lo_sub[stripe]);
-	subpage = cl_page_find_sub(sub->sub_env, subobj,
-				   cl_index(subobj, suboff), vmpage, page);
-	lov_sub_put(sub);
-	if (IS_ERR(subpage)) {
-		rc = PTR_ERR(subpage);
-		goto out;
-	}
-
-	if (likely(subpage->cp_parent == page)) {
-		lu_ref_add(&subpage->cp_reference, "lov", page);
-		lpg->lps_invalid = 0;
-		rc = 0;
-	} else {
-		CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
-		CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
-		LASSERT(0);
+	list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers,
+			    co_lu.lo_linkage) {
+		if (o->co_ops->coo_page_init) {
+			rc = o->co_ops->coo_page_init(sub->sub_env, o, page,
+						      cl_index(subobj, suboff));
+			if (rc != 0)
+				break;
+		}
 	}
+	lov_sub_put(sub);
 
-out:
 	return rc;
 }
 
+static int lov_empty_page_print(const struct lu_env *env,
+				const struct cl_page_slice *slice,
+				void *cookie, lu_printer_t printer)
+{
+	struct lov_page *lp = cl2lov_page(slice);
+
+	return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n",
+			  lp);
+}
+
 static const struct cl_page_operations lov_empty_page_ops = {
-	.cpo_fini   = lov_empty_page_fini,
-	.cpo_print  = lov_page_print
+	.cpo_print = lov_empty_page_print
 };
 
 int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage)
+			struct cl_page *page, pgoff_t index)
 {
 	struct lov_page *lpg = cl_object_page_slice(obj, page);
 	void *addr;
 
-	cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops);
-	addr = kmap(vmpage);
+	cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops);
+	addr = kmap(page->cp_vmpage);
 	memset(addr, 0, cl_page_size(obj));
-	kunmap(vmpage);
+	kunmap(page->cp_vmpage);
 	cl_page_export(env, page, 1);
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 9ae1d6f42d6e..4c2d21729589 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -14,12 +14,8 @@
  * in the LICENSE file that accompanied this code).
  *
  * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * version 2 along with this program; If not, see
+ * http://http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -65,7 +61,6 @@ void lov_pool_putref(struct pool_desc *pool)
 		LASSERT(hlist_unhashed(&pool->pool_hash));
 		LASSERT(list_empty(&pool->pool_list));
 		LASSERT(!pool->pool_debugfs_entry);
-		lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
 		lov_ost_pool_free(&(pool->pool_obds));
 		kfree(pool);
 	}
@@ -424,11 +419,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
 	if (rc)
 		goto out_err;
 
-	memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
-	rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
-	if (rc)
-		goto out_free_pool_obds;
-
 	INIT_HLIST_NODE(&new_pool->pool_hash);
 
 	/* get ref for debugfs file */
@@ -469,13 +459,10 @@ out_err:
 	list_del_init(&new_pool->pool_list);
 	lov->lov_pool_count--;
 	spin_unlock(&obd->obd_dev_lock);
-
 	ldebugfs_remove(&new_pool->pool_debugfs_entry);
-
-	lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
-out_free_pool_obds:
 	lov_ost_pool_free(&new_pool->pool_obds);
 	kfree(new_pool);
+
 	return rc;
 }
 
@@ -543,8 +530,6 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 	if (rc)
 		goto out;
 
-	pool->pool_rr.lqr_dirty = 1;
-
 	CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
 	       ostname, poolname,  pool_tgt_count(pool));
 
@@ -589,8 +574,6 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 
 	lov_ost_pool_remove(&pool->pool_obds, lov_idx);
 
-	pool->pool_rr.lqr_dirty = 1;
-
 	CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
 	       poolname);
 
@@ -599,50 +582,3 @@ out:
 	lov_pool_putref(pool);
 	return rc;
 }
-
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
-{
-	int i, rc;
-
-	/* caller may no have a ref on pool if it got the pool
-	 * without calling lov_find_pool() (e.g. go through the lov pool
-	 * list)
-	 */
-	lov_pool_getref(pool);
-
-	down_read(&pool_tgt_rw_sem(pool));
-
-	for (i = 0; i < pool_tgt_count(pool); i++) {
-		if (pool_tgt_array(pool)[i] == idx) {
-			rc = 0;
-			goto out;
-		}
-	}
-	rc = -ENOENT;
-out:
-	up_read(&pool_tgt_rw_sem(pool));
-
-	lov_pool_putref(pool);
-	return rc;
-}
-
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
-{
-	struct pool_desc *pool;
-
-	pool = NULL;
-	if (poolname[0] != '\0') {
-		pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-		if (!pool)
-			CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
-			      poolname);
-		if (pool && (pool_tgt_count(pool) == 0)) {
-			CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
-			      poolname);
-			/* pool is ignored, so we remove ref on it */
-			lov_pool_putref(pool);
-			pool = NULL;
-		}
-	}
-	return pool;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 7178a02d6267..4099b51f826e 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -52,7 +48,6 @@ static void lov_init_set(struct lov_request_set *set)
 	INIT_LIST_HEAD(&set->set_list);
 	atomic_set(&set->set_refcount, 1);
 	init_waitqueue_head(&set->set_waitq);
-	spin_lock_init(&set->set_lock);
 }
 
 void lov_finish_set(struct lov_request_set *set)
@@ -235,7 +230,6 @@ out:
 	if (tmp_oa)
 		kmem_cache_free(obdo_cachep, tmp_oa);
 	return rc;
-
 }
 
 int lov_fini_getattr_set(struct lov_request_set *set)
@@ -363,7 +357,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
 	set->set_oi = oinfo;
 	set->set_oi->oi_md = lsm;
 	set->set_oi->oi_oa = src_oa;
-	set->set_oti = oti;
 	if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
 		set->set_cookies = oti->oti_logcookies;
 
@@ -480,7 +473,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
 	lov_init_set(set);
 
 	set->set_exp = exp;
-	set->set_oti = oti;
 	set->set_oi = oinfo;
 	if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
 		set->set_cookies = oti->oti_logcookies;
@@ -716,12 +708,15 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
 		struct lov_request *req;
 
 		if (!lov->lov_tgts[i] ||
-		    (!lov_check_and_wait_active(lov, i) &&
-		     (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
+		    (oinfo->oi_flags & OBD_STATFS_NODELAY &&
+		     !lov->lov_tgts[i]->ltd_active)) {
 			CDEBUG(D_HA, "lov idx %d inactive\n", i);
 			continue;
 		}
 
+		if (!lov->lov_tgts[i]->ltd_active)
+			lov_check_and_wait_active(lov, i);
+
 		/* skip targets that have been explicitly disabled by the
 		 * administrator
 		 */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
index c335c020f4f4..b519a1940e1e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -151,8 +147,9 @@ static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (lsr) {
 		cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -182,10 +179,12 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
 			d = lovsub2lu_dev(lsd);
 			d->ld_ops	 = &lovsub_lu_ops;
 			lsd->acid_cl.cd_ops = &lovsub_cl_ops;
-		} else
+		} else {
 			d = ERR_PTR(result);
-	} else
+		}
+	} else {
 		d = ERR_PTR(-ENOMEM);
+	}
 	return d;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_io.c b/drivers/staging/lustre/lustre/lov/lovsub_io.c
index 783ec687a4e7..6a9820218a3e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_io.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
index 3bb0c9068a90..38f9b735c241 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -62,391 +58,8 @@ static void lovsub_lock_fini(const struct lu_env *env,
 	kmem_cache_free(lovsub_lock_kmem, lsl);
 }
 
-static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_get(parent);
-	lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_mutex_get(env, parent);
-}
-
-static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
-	lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_put(env, parent);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for lovsub layer, which
- * method is called whenever sub-lock state changes. Propagates state change
- * to the top-locks.
- */
-static void lovsub_lock_state(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      enum cl_lock_state state)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock_link *scan;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		struct lov_lock *lov    = scan->lll_super;
-		struct cl_lock  *parent = lov->lls_cl.cls_lock;
-
-		if (sub->lss_active != parent) {
-			lovsub_parent_lock(env, lov);
-			cl_lock_signal(env, parent);
-			lovsub_parent_unlock(env, lov);
-		}
-	}
-}
-
-/**
- * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
- * asking parent lock.
- */
-static unsigned long lovsub_lock_weigh(const struct lu_env *env,
-				       const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock *lock = cl2lovsub_lock(slice);
-	struct lov_lock    *lov;
-	unsigned long       dumbbell;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	if (!list_empty(&lock->lss_parents)) {
-		/*
-		 * It is not clear whether all parents have to be asked and
-		 * their estimations summed, or it is enough to ask one. For
-		 * the current usages, one is always enough.
-		 */
-		lov = container_of(lock->lss_parents.next,
-				   struct lov_lock_link, lll_list)->lll_super;
-
-		lovsub_parent_lock(env, lov);
-		dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
-		lovsub_parent_unlock(env, lov);
-	} else
-		dumbbell = 0;
-
-	return dumbbell;
-}
-
-/**
- * Maps start/end offsets within a stripe, to offsets within a file.
- */
-static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
-				  struct lov_object *lov,
-				  int stripe, struct cl_lock_descr *out)
-{
-	pgoff_t size; /* stripe size in pages */
-	pgoff_t skip; /* how many pages in every stripe are occupied by
-		       * "other" stripes
-		       */
-	pgoff_t start;
-	pgoff_t end;
-
-	start = in->cld_start;
-	end   = in->cld_end;
-
-	if (lov->lo_lsm->lsm_stripe_count > 1) {
-		size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
-		skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
-
-		/* XXX overflow check here? */
-		start += start/size * skip + stripe * size;
-
-		if (end != CL_PAGE_EOF) {
-			end += end/size * skip + stripe * size;
-			/*
-			 * And check for overflow...
-			 */
-			if (end < in->cld_end)
-				end = CL_PAGE_EOF;
-		}
-	}
-	out->cld_start = start;
-	out->cld_end   = end;
-}
-
-/**
- * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
- * called in two ways:
- *
- *     - as part of receive call-back, when server returns granted extent to
- *       the client, and
- *
- *     - when top-lock finds existing sub-lock in the cache.
- *
- * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
- * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
- */
-int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
-		       struct lovsub_lock *sublock,
-		       const struct cl_lock_descr *d, int idx)
-{
-	struct cl_lock       *parent;
-	struct lovsub_object *subobj;
-	struct cl_lock_descr *pd;
-	struct cl_lock_descr *parent_descr;
-	int		   result;
-
-	parent       = lov->lls_cl.cls_lock;
-	parent_descr = &parent->cll_descr;
-	LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
-
-	subobj = cl2lovsub(sublock->lss_cl.cls_obj);
-	pd     = &lov_env_info(env)->lti_ldescr;
-
-	pd->cld_obj  = parent_descr->cld_obj;
-	pd->cld_mode = parent_descr->cld_mode;
-	pd->cld_gid  = parent_descr->cld_gid;
-	lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
-	lov->lls_sub[idx].sub_got = *d;
-	/*
-	 * Notify top-lock about modification, if lock description changes
-	 * materially.
-	 */
-	if (!cl_lock_ext_match(parent_descr, pd))
-		result = cl_lock_modify(env, parent, pd);
-	else
-		result = 0;
-	return result;
-}
-
-static int lovsub_lock_modify(const struct lu_env *env,
-			      const struct cl_lock_slice *s,
-			      const struct cl_lock_descr *d)
-{
-	struct lovsub_lock   *lock   = cl2lovsub_lock(s);
-	struct lov_lock_link *scan;
-	struct lov_lock      *lov;
-	int result		   = 0;
-
-	LASSERT(cl_lock_mode_match(d->cld_mode,
-				   s->cls_lock->cll_descr.cld_mode));
-	list_for_each_entry(scan, &lock->lss_parents, lll_list) {
-		int rc;
-
-		lov = scan->lll_super;
-		lovsub_parent_lock(env, lov);
-		rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
-		lovsub_parent_unlock(env, lov);
-		result = result ?: rc;
-	}
-	return result;
-}
-
-static int lovsub_lock_closure(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       struct cl_lock_closure *closure)
-{
-	struct lovsub_lock   *sub;
-	struct cl_lock       *parent;
-	struct lov_lock_link *scan;
-	int		   result;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	sub    = cl2lovsub_lock(slice);
-	result = 0;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		parent = scan->lll_super->lls_cl.cls_lock;
-		result = cl_lock_closure_build(env, parent, closure);
-		if (result != 0)
-			break;
-	}
-	return result;
-}
-
-/**
- * A helper function for lovsub_lock_delete() that deals with a given parent
- * top-lock.
- */
-static int lovsub_lock_delete_one(const struct lu_env *env,
-				  struct cl_lock *child, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-	int	     result;
-
-	parent = lov->lls_cl.cls_lock;
-	if (parent->cll_error)
-		return 0;
-
-	result = 0;
-	switch (parent->cll_state) {
-	case CLS_ENQUEUED:
-		/* See LU-1355 for the case that a glimpse lock is
-		 * interrupted by signal
-		 */
-		LASSERT(parent->cll_flags & CLF_CANCELLED);
-		break;
-	case CLS_QUEUING:
-	case CLS_FREEING:
-		cl_lock_signal(env, parent);
-		break;
-	case CLS_INTRANSIT:
-		/*
-		 * Here lies a problem: a sub-lock is canceled while top-lock
-		 * is being unlocked. Top-lock cannot be moved into CLS_NEW
-		 * state, because unlocking has to succeed eventually by
-		 * placing lock into CLS_CACHED (or failing it), see
-		 * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
-		 * state, because lov maintains an invariant that all
-		 * sub-locks exist in CLS_CACHED (this allows cached top-lock
-		 * to be reused immediately). Nor can we wait for top-lock
-		 * state to change, because this can be synchronous to the
-		 * current thread.
-		 *
-		 * We know for sure that lov_lock_unuse() will be called at
-		 * least one more time to finish un-using, so leave a mark on
-		 * the top-lock, that will be seen by the next call to
-		 * lov_lock_unuse().
-		 */
-		if (cl_lock_is_intransit(parent))
-			lov->lls_cancel_race = 1;
-		break;
-	case CLS_CACHED:
-		/*
-		 * if a sub-lock is canceled move its top-lock into CLS_NEW
-		 * state to preserve an invariant that a top-lock in
-		 * CLS_CACHED is immediately ready for re-use (i.e., has all
-		 * sub-locks), and so that next attempt to re-use the top-lock
-		 * enqueues missing sub-lock.
-		 */
-		cl_lock_state_set(env, parent, CLS_NEW);
-		/* fall through */
-	case CLS_NEW:
-		/*
-		 * if last sub-lock is canceled, destroy the top-lock (which
-		 * is now `empty') proactively.
-		 */
-		if (lov->lls_nr_filled == 0) {
-			/* ... but unfortunately, this cannot be done easily,
-			 * as cancellation of a top-lock might acquire mutices
-			 * of its other sub-locks, violating lock ordering,
-			 * see cl_lock_{cancel,delete}() preconditions.
-			 *
-			 * To work around this, the mutex of this sub-lock is
-			 * released, top-lock is destroyed, and sub-lock mutex
-			 * acquired again. The list of parents has to be
-			 * re-scanned from the beginning after this.
-			 *
-			 * Only do this if no mutices other than on @child and
-			 * @parent are held by the current thread.
-			 *
-			 * TODO: The lock modal here is too complex, because
-			 * the lock may be canceled and deleted by voluntarily:
-			 *    cl_lock_request
-			 *      -> osc_lock_enqueue_wait
-			 *	-> osc_lock_cancel_wait
-			 *	  -> cl_lock_delete
-			 *	    -> lovsub_lock_delete
-			 *	      -> cl_lock_cancel/delete
-			 *		-> ...
-			 *
-			 * The better choice is to spawn a kernel thread for
-			 * this purpose. -jay
-			 */
-			if (cl_lock_nr_mutexed(env) == 2) {
-				cl_lock_mutex_put(env, child);
-				cl_lock_cancel(env, parent);
-				cl_lock_delete(env, parent);
-				result = 1;
-			}
-		}
-		break;
-	case CLS_HELD:
-		CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
-	default:
-		CERROR("Impossible state: %d\n", parent->cll_state);
-		LBUG();
-		break;
-	}
-
-	return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
- */
-static void lovsub_lock_delete(const struct lu_env *env,
-			       const struct cl_lock_slice *slice)
-{
-	struct cl_lock     *child = slice->cls_lock;
-	struct lovsub_lock *sub   = cl2lovsub_lock(slice);
-	int restart;
-
-	LASSERT(cl_lock_is_mutexed(child));
-
-	/*
-	 * Destruction of a sub-lock might take multiple iterations, because
-	 * when the last sub-lock of a given top-lock is deleted, top-lock is
-	 * canceled proactively, and this requires to release sub-lock
-	 * mutex. Once sub-lock mutex has been released, list of its parents
-	 * has to be re-scanned from the beginning.
-	 */
-	do {
-		struct lov_lock      *lov;
-		struct lov_lock_link *scan;
-		struct lov_lock_link *temp;
-		struct lov_lock_sub  *subdata;
-
-		restart = 0;
-		list_for_each_entry_safe(scan, temp,
-					 &sub->lss_parents, lll_list) {
-			lov     = scan->lll_super;
-			subdata = &lov->lls_sub[scan->lll_idx];
-			lovsub_parent_lock(env, lov);
-			subdata->sub_got = subdata->sub_descr;
-			lov_lock_unlink(env, scan, sub);
-			restart = lovsub_lock_delete_one(env, child, lov);
-			lovsub_parent_unlock(env, lov);
-
-			if (restart) {
-				cl_lock_mutex_get(env, child);
-				break;
-			}
-	       }
-	} while (restart);
-}
-
-static int lovsub_lock_print(const struct lu_env *env, void *cookie,
-			     lu_printer_t p, const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock      *lov;
-	struct lov_lock_link *scan;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		lov = scan->lll_super;
-		(*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
-		if (lov)
-			cl_lock_descr_print(env, cookie, p,
-					    &lov->lls_cl.cls_lock->cll_descr);
-		(*p)(env, cookie, "] ");
-	}
-	return 0;
-}
-
 static const struct cl_lock_operations lovsub_lock_ops = {
 	.clo_fini    = lovsub_lock_fini,
-	.clo_state   = lovsub_lock_state,
-	.clo_delete  = lovsub_lock_delete,
-	.clo_modify  = lovsub_lock_modify,
-	.clo_closure = lovsub_lock_closure,
-	.clo_weigh   = lovsub_lock_weigh,
-	.clo_print   = lovsub_lock_print
 };
 
 int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
@@ -460,8 +73,9 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
 		INIT_LIST_HEAD(&lsk->lss_parents);
 		cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index 6c5430d938d0..fb2f2660b3e9 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -67,10 +63,10 @@ int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
 		lu_object_add(obj, below);
 		cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page));
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
-
 }
 
 static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj)
@@ -154,8 +150,9 @@ struct lu_object *lovsub_object_alloc(const struct lu_env *env,
 		lu_object_add_top(&hdr->coh_lu, obj);
 		los->lso_cl.co_ops = &lovsub_ops;
 		obj->lo_ops = &lovsub_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
index 2d945532b78e..b2e68c3e820d 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_page.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -60,11 +56,11 @@ static const struct cl_page_operations lovsub_page_ops = {
 };
 
 int lovsub_page_init(const struct lu_env *env, struct cl_object *obj,
-		     struct cl_page *page, struct page *unused)
+		     struct cl_page *page, pgoff_t index)
 {
 	struct lovsub_page *lsb = cl_object_page_slice(obj, page);
 
-	cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops);
+	cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops);
 	return 0;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c
index 0dcb6b6a7782..eb6d30d34e3a 100644
--- a/drivers/staging/lustre/lustre/lov/lproc_lov.c
+++ b/drivers/staging/lustre/lustre/lov/lproc_lov.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 38f267a60f59..98d15fb247bc 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -49,9 +45,9 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
 					      obd_kobj);
 	struct client_obd *cli = &dev->u.cli;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -74,9 +70,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 	if (val < 1 || val > MDC_MAX_RIF_MAX)
 		return -ERANGE;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_rpcs_in_flight = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index c5519aeb0d8a..58f2841cabe4 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index b3bfdcb73670..143bd7628572 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -279,8 +275,7 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
 	rec->sa_atime  = LTIME_S(op_data->op_attr.ia_atime);
 	rec->sa_mtime  = LTIME_S(op_data->op_attr.ia_mtime);
 	rec->sa_ctime  = LTIME_S(op_data->op_attr.ia_ctime);
-	rec->sa_attr_flags =
-			((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags;
+	rec->sa_attr_flags = op_data->op_attr_flags;
 	if ((op_data->op_attr.ia_valid & ATTR_GID) &&
 	    in_group_p(op_data->op_attr.ia_gid))
 		rec->sa_suppgid =
@@ -439,7 +434,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
 		char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
 
 		LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-
 	}
 }
 
@@ -455,7 +449,7 @@ static void mdc_hsm_release_pack(struct ptlrpc_request *req,
 		lock = ldlm_handle2lock(&op_data->op_lease_handle);
 		if (lock) {
 			data->cd_handle = lock->l_remote_handle;
-			ldlm_lock_put(lock);
+			LDLM_LOCK_PUT(lock);
 		}
 		ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
 
@@ -473,6 +467,18 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
 
 	mdc_setattr_pack_rec(rec, op_data);
+	/*
+	 * The client will zero out local timestamps when losing the IBITS lock
+	 * so any new RPC timestamps will update the client inode's timestamps.
+	 * There was a defect on the server side which allowed the atime to be
+	 * overwritten by a zeroed-out atime packed into the close RPC.
+	 *
+	 * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case
+	 * to avoid zeroing the atime on old unpatched servers.  See LU-8041.
+	 */
+	if (rec->sa_atime == 0)
+		rec->sa_valid &= ~MDS_ATTR_ATIME;
+
 	mdc_ioepoch_pack(epoch, op_data);
 	mdc_hsm_release_pack(req, op_data);
 }
@@ -481,9 +487,9 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&mcw->mcw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 };
 
@@ -497,23 +503,23 @@ int mdc_enter_request(struct client_obd *cli)
 	struct mdc_cache_waiter mcw;
 	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
 		init_waitqueue_head(&mcw.mcw_waitq);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw),
 				  &lwi);
 		if (rc) {
-			client_obd_list_lock(&cli->cl_loi_list_lock);
+			spin_lock(&cli->cl_loi_list_lock);
 			if (list_empty(&mcw.mcw_entry))
 				cli->cl_r_in_flight--;
 			list_del_init(&mcw.mcw_entry);
-			client_obd_list_unlock(&cli->cl_loi_list_lock);
+			spin_unlock(&cli->cl_loi_list_lock);
 		}
 	} else {
 		cli->cl_r_in_flight++;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 	return rc;
 }
@@ -523,7 +529,7 @@ void mdc_exit_request(struct client_obd *cli)
 	struct list_head *l, *tmp;
 	struct mdc_cache_waiter *mcw;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_r_in_flight--;
 	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
 		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
@@ -538,5 +544,5 @@ void mdc_exit_request(struct client_obd *cli)
 	}
 	/* Empty waiting list? Decrease reqs in-flight number */
 
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 958a164f620d..f48b58423307 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -54,61 +50,43 @@ struct mdc_getattr_args {
 	struct ldlm_enqueue_info    *ga_einfo;
 };
 
-int it_disposition(struct lookup_intent *it, int flag)
-{
-	return it->d.lustre.it_disposition & flag;
-}
-EXPORT_SYMBOL(it_disposition);
-
-void it_set_disposition(struct lookup_intent *it, int flag)
-{
-	it->d.lustre.it_disposition |= flag;
-}
-EXPORT_SYMBOL(it_set_disposition);
-
-void it_clear_disposition(struct lookup_intent *it, int flag)
-{
-	it->d.lustre.it_disposition &= ~flag;
-}
-EXPORT_SYMBOL(it_clear_disposition);
-
 int it_open_error(int phase, struct lookup_intent *it)
 {
 	if (it_disposition(it, DISP_OPEN_LEASE)) {
 		if (phase >= DISP_OPEN_LEASE)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 	if (it_disposition(it, DISP_OPEN_OPEN)) {
 		if (phase >= DISP_OPEN_OPEN)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_OPEN_CREATE)) {
 		if (phase >= DISP_OPEN_CREATE)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_LOOKUP_EXECD)) {
 		if (phase >= DISP_LOOKUP_EXECD)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
 
 	if (it_disposition(it, DISP_IT_EXECD)) {
 		if (phase >= DISP_IT_EXECD)
-			return it->d.lustre.it_status;
+			return it->it_status;
 		else
 			return 0;
 	}
-	CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
-	       it->d.lustre.it_status);
+	CERROR("it disp: %X, status: %d\n", it->it_disposition,
+	       it->it_status);
 	LBUG();
 	return 0;
 }
@@ -347,10 +325,6 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
 	mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
 		      lmmsize);
 
-	/* for remote client, fetch remote perm for current user */
-	if (client_is_remote(exp))
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
 	ptlrpc_request_set_replen(req);
 	return req;
 }
@@ -444,9 +418,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 	struct obd_device     *obddev = class_exp2obd(exp);
 	u64		       valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
 				       OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
-				       OBD_MD_MEA |
-				       (client_is_remote(exp) ?
-					       OBD_MD_FLRMTPERM : OBD_MD_FLACL);
+				       OBD_MD_MEA | OBD_MD_FLACL;
 	struct ldlm_intent    *lit;
 	int		    rc;
 	int		    easize;
@@ -478,9 +450,6 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 	mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
 
 	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize);
-	if (client_is_remote(exp))
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
 	ptlrpc_request_set_replen(req);
 	return req;
 }
@@ -555,7 +524,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	struct req_capsule  *pill = &req->rq_pill;
 	struct ldlm_request *lockreq;
 	struct ldlm_reply   *lockrep;
-	struct lustre_intent_data *intent = &it->d.lustre;
 	struct ldlm_lock    *lock;
 	void		*lvb_data = NULL;
 	int		  lvb_len = 0;
@@ -589,17 +557,17 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 
 	lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
 
-	intent->it_disposition = (int)lockrep->lock_policy_res1;
-	intent->it_status = (int)lockrep->lock_policy_res2;
-	intent->it_lock_mode = einfo->ei_mode;
-	intent->it_lock_handle = lockh->cookie;
-	intent->it_data = req;
+	it->it_disposition = (int)lockrep->lock_policy_res1;
+	it->it_status = (int)lockrep->lock_policy_res2;
+	it->it_lock_mode = einfo->ei_mode;
+	it->it_lock_handle = lockh->cookie;
+	it->it_request = req;
 
 	/* Technically speaking rq_transno must already be zero if
 	 * it_status is in error, so the check is a bit redundant
 	 */
-	if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay)
-		mdc_clear_replay_flag(req, intent->it_status);
+	if ((!req->rq_transno || it->it_status < 0) && req->rq_replay)
+		mdc_clear_replay_flag(req, it->it_status);
 
 	/* If we're doing an IT_OPEN which did not result in an actual
 	 * successful open, then we need to remove the bit which saves
@@ -610,11 +578,11 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	 * (bug 3440)
 	 */
 	if (it->it_op & IT_OPEN && req->rq_replay &&
-	    (!it_disposition(it, DISP_OPEN_OPEN) || intent->it_status != 0))
-		mdc_clear_replay_flag(req, intent->it_status);
+	    (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0))
+		mdc_clear_replay_flag(req, it->it_status);
 
 	DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
-		  it->it_op, intent->it_disposition, intent->it_status);
+		  it->it_op, it->it_disposition, it->it_status);
 
 	/* We know what to expect, so we do any byte flipping required here */
 	if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
@@ -687,16 +655,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 					memcpy(lmm, eadata, body->eadatasize);
 			}
 		}
-
-		if (body->valid & OBD_MD_FLRMTPERM) {
-			struct mdt_remote_perm *perm;
-
-			LASSERT(client_is_remote(exp));
-			perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-			if (!perm)
-				return -EPROTO;
-		}
 	} else if (it->it_op & IT_LAYOUT) {
 		/* maybe the lock was granted right away and layout
 		 * is packed into RMF_DLM_LVB of req
@@ -715,7 +673,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	if (lock && ldlm_has_layout(lock) && lvb_data) {
 		void *lmm;
 
-		LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n",
+		LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d",
 			   ldlm_it2str(it->it_op), lvb_len);
 
 		lmm = libcfs_kvzalloc(lvb_len, GFP_NOFS);
@@ -869,7 +827,9 @@ resend:
 		 * (explicits or automatically generated by Kernel to clean
 		 * current FLocks upon exit) that can't be trashed
 		 */
-		if ((rc == -EINTR) || (rc == -ETIMEDOUT))
+		if (((rc == -EINTR) || (rc == -ETIMEDOUT)) &&
+		    (einfo->ei_type == LDLM_FLOCK) &&
+		    (einfo->ei_mode == LCK_NL))
 			goto resend;
 		return rc;
 	}
@@ -921,9 +881,9 @@ resend:
 		}
 		ptlrpc_req_finished(req);
 
-		it->d.lustre.it_lock_handle = 0;
-		it->d.lustre.it_lock_mode = 0;
-		it->d.lustre.it_data = NULL;
+		it->it_lock_handle = 0;
+		it->it_lock_mode = 0;
+		it->it_request = NULL;
 	}
 
 	return rc;
@@ -947,8 +907,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 		/* The server failed before it even started executing the
 		 * intent, i.e. because it couldn't unpack the request.
 		 */
-		LASSERT(it->d.lustre.it_status != 0);
-		return it->d.lustre.it_status;
+		LASSERT(it->it_status != 0);
+		return it->it_status;
 	}
 	rc = it_open_error(DISP_IT_EXECD, it);
 	if (rc)
@@ -963,7 +923,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 	if (fid_is_sane(&op_data->op_fid2) &&
 	    it->it_create_mode & M_CHECK_STALE &&
 	    it->it_op != IT_GETATTR) {
-
 		/* Also: did we find the same inode? */
 		/* sever can return one of two fids:
 		 * op_fid2 - new allocated fid - if file is created.
@@ -1032,15 +991,15 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 				    LDLM_IBITS, &policy, LCK_NL,
 				    &old_lock, 0)) {
 			ldlm_lock_decref_and_cancel(lockh,
-						    it->d.lustre.it_lock_mode);
+						    it->it_lock_mode);
 			memcpy(lockh, &old_lock, sizeof(old_lock));
-			it->d.lustre.it_lock_handle = lockh->cookie;
+			it->it_lock_handle = lockh->cookie;
 		}
 	}
 	CDEBUG(D_DENTRY,
 	       "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
 	       op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
-	       it->d.lustre.it_status, it->d.lustre.it_disposition, rc);
+	       it->it_status, it->it_disposition, rc);
 	return rc;
 }
 
@@ -1056,8 +1015,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 	ldlm_policy_data_t policy;
 	enum ldlm_mode mode;
 
-	if (it->d.lustre.it_lock_handle) {
-		lockh.cookie = it->d.lustre.it_lock_handle;
+	if (it->it_lock_handle) {
+		lockh.cookie = it->it_lock_handle;
 		mode = ldlm_revalidate_lock_handle(&lockh, bits);
 	} else {
 		fid_build_reg_res_name(fid, &res_id);
@@ -1098,11 +1057,11 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 	}
 
 	if (mode) {
-		it->d.lustre.it_lock_handle = lockh.cookie;
-		it->d.lustre.it_lock_mode = mode;
+		it->it_lock_handle = lockh.cookie;
+		it->it_lock_mode = mode;
 	} else {
-		it->d.lustre.it_lock_handle = 0;
-		it->d.lustre.it_lock_mode = 0;
+		it->it_lock_handle = 0;
+		it->it_lock_mode = 0;
 	}
 
 	return !!mode;
@@ -1124,15 +1083,15 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
  * ll_create/ll_open gets called.
  *
  * The server will return to us, in it_disposition, an indication of
- * exactly what d.lustre.it_status refers to.
+ * exactly what it_status refers to.
  *
- * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
  * otherwise if DISP_OPEN_CREATE is set, then it status is the
  * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
  * DISP_LOOKUP_POS will be set, indicating whether the child lookup
  * was successful.
  *
- * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the
  * child lookup.
  */
 int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
@@ -1165,7 +1124,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 		 * be called in revalidate_it if we already have a lock, let's
 		 * verify that.
 		 */
-		it->d.lustre.it_lock_handle = 0;
+		it->it_lock_handle = 0;
 		rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
 		/* Only return failure if it was not GETATTR by cfid
 		 * (from inode_revalidate)
@@ -1187,7 +1146,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 	if (rc < 0)
 		return rc;
 
-	*reqp = it->d.lustre.it_data;
+	*reqp = it->it_request;
 	rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 4ef3db147f87..5dba2c813857 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -234,7 +230,7 @@ rebuild:
 						MDS_INODELOCK_UPDATE);
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
-				   &RQF_MDS_REINT_CREATE_RMT_ACL);
+				   &RQF_MDS_REINT_CREATE_ACL);
 	if (!req) {
 		ldlm_lock_list_put(&cancels, l_bl_ast, count);
 		return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index b91d3ff18b02..542801f04b0d 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -142,25 +138,14 @@ static int mdc_getattr_common(struct obd_export *exp,
 
 	CDEBUG(D_NET, "mode: %o\n", body->mode);
 
+	mdc_update_max_ea_from_body(exp, body);
 	if (body->eadatasize != 0) {
-		mdc_update_max_ea_from_body(exp, body);
-
 		eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
 						      body->eadatasize);
 		if (!eadata)
 			return -EPROTO;
 	}
 
-	if (body->valid & OBD_MD_FLRMTPERM) {
-		struct mdt_remote_perm *perm;
-
-		LASSERT(client_is_remote(exp));
-		perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-		if (!perm)
-			return -EPROTO;
-	}
-
 	return 0;
 }
 
@@ -191,11 +176,6 @@ static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data,
 
 	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
 			     op_data->op_mode);
-	if (op_data->op_valid & OBD_MD_FLRMTPERM) {
-		LASSERT(client_is_remote(exp));
-		req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-				     sizeof(struct mdt_remote_perm));
-	}
 	ptlrpc_request_set_replen(req);
 
 	rc = mdc_getattr_common(exp, req);
@@ -435,7 +415,7 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
 		return rc;
 	}
 
-	rc = posix_acl_valid(acl);
+	rc = posix_acl_valid(&init_user_ns, acl);
 	if (rc) {
 		CERROR("validate acl: %d\n", rc);
 		posix_acl_release(acl);
@@ -540,16 +520,7 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 	}
 	rc = 0;
 
-	if (md->body->valid & OBD_MD_FLRMTPERM) {
-		/* remote permission */
-		LASSERT(client_is_remote(exp));
-		md->remote_perm = req_capsule_server_swab_get(pill, &RMF_ACL,
-						lustre_swab_mdt_remote_perm);
-		if (!md->remote_perm) {
-			rc = -EPROTO;
-			goto out;
-		}
-	} else if (md->body->valid & OBD_MD_FLACL) {
+	if (md->body->valid & OBD_MD_FLACL) {
 		/* for ACL, it's possible that FLACL is set but aclsize is zero.
 		 * only when aclsize != 0 there's an actual segment for ACL
 		 * in reply buffer.
@@ -666,7 +637,7 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 	struct md_open_data   *mod;
 	struct mdt_rec_create *rec;
 	struct mdt_body       *body;
-	struct ptlrpc_request *open_req = it->d.lustre.it_data;
+	struct ptlrpc_request *open_req = it->it_request;
 	struct obd_import     *imp = open_req->rq_import;
 
 	if (!open_req->rq_replay)
@@ -1169,7 +1140,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
@@ -1203,7 +1174,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	archive_mask = req_capsule_client_get(&req->rq_pill,
@@ -1242,7 +1213,7 @@ static int mdc_ioc_hsm_current_action(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	ptlrpc_request_set_replen(req);
@@ -1278,7 +1249,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	ptlrpc_request_set_replen(req);
 
@@ -1307,7 +1278,7 @@ static int mdc_ioc_hsm_state_get(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	ptlrpc_request_set_replen(req);
@@ -1348,7 +1319,7 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+	mdc_pack_body(req, &op_data->op_fid1, 0, 0,
 		      op_data->op_suppgids[0], 0);
 
 	/* Copy states */
@@ -1395,7 +1366,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, 0, 0, -1, 0);
 
 	/* Copy hsm_request struct */
 	req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
@@ -1808,7 +1779,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 	case IOC_OBD_STATFS: {
 		struct obd_statfs stat_buf = {0};
 
-		if (*((__u32 *) data->ioc_inlbuf2) != 0) {
+		if (*((__u32 *)data->ioc_inlbuf2) != 0) {
 			rc = -ENODEV;
 			goto out;
 		}
@@ -1952,7 +1923,7 @@ static void lustre_swab_hal(struct hsm_action_list *h)
 	__swab32s(&h->hal_count);
 	__swab32s(&h->hal_archive_id);
 	__swab64s(&h->hal_flags);
-	hai = hai_zero(h);
+	hai = hai_first(h);
 	for (i = 0; i < h->hal_count; i++, hai = hai_next(hai))
 		lustre_swab_hai(hai);
 }
@@ -2002,7 +1973,7 @@ static int mdc_hsm_copytool_send(int len, void *val)
 
 	if (len < sizeof(*lh) + sizeof(*hal)) {
 		CERROR("Short HSM message %d < %d\n", len,
-		       (int) (sizeof(*lh) + sizeof(*hal)));
+		       (int)(sizeof(*lh) + sizeof(*hal)));
 		return -EPROTO;
 	}
 	if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
@@ -2249,7 +2220,7 @@ static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
  * recovery, non zero value will be return if the lock can be canceled,
  * or zero returned for not
  */
-static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+static int mdc_cancel_weight(struct ldlm_lock *lock)
 {
 	if (lock->l_resource->lr_type != LDLM_IBITS)
 		return 0;
@@ -2314,12 +2285,14 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 		return -ENOMEM;
 	mdc_init_rpc_lock(cli->cl_rpc_lock);
 
-	ptlrpcd_addref();
+	rc = ptlrpcd_addref();
+	if (rc < 0)
+		goto err_rpc_lock;
 
 	cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS);
 	if (!cli->cl_close_lock) {
 		rc = -ENOMEM;
-		goto err_rpc_lock;
+		goto err_ptlrpcd_decref;
 	}
 	mdc_init_rpc_lock(cli->cl_close_lock);
 
@@ -2331,7 +2304,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 	sptlrpc_lprocfs_cliobd_attach(obd);
 	ptlrpc_lprocfs_register_obd(obd);
 
-	ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+	ns_register_cancel(obd->obd_namespace, mdc_cancel_weight);
 
 	obd->obd_namespace->ns_lvbo = &inode_lvbo;
 
@@ -2345,9 +2318,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 
 err_close_lock:
 	kfree(cli->cl_close_lock);
+err_ptlrpcd_decref:
+	ptlrpcd_decref();
 err_rpc_lock:
 	kfree(cli->cl_rpc_lock);
-	ptlrpcd_decref();
 	return rc;
 }
 
@@ -2430,41 +2404,6 @@ static int mdc_process_config(struct obd_device *obd, u32 len, void *buf)
 	return rc;
 }
 
-/* get remote permission for current user on fid */
-static int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
-			       __u32 suppgid, struct ptlrpc_request **request)
-{
-	struct ptlrpc_request  *req;
-	int		    rc;
-
-	LASSERT(client_is_remote(exp));
-
-	*request = NULL;
-	req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
-	if (!req)
-		return -ENOMEM;
-
-	rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
-	if (rc) {
-		ptlrpc_request_free(req);
-		return rc;
-	}
-
-	mdc_pack_body(req, fid, OBD_MD_FLRMTPERM, 0, suppgid, 0);
-
-	req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
-			     sizeof(struct mdt_remote_perm));
-
-	ptlrpc_request_set_replen(req);
-
-	rc = ptlrpc_queue_wait(req);
-	if (rc)
-		ptlrpc_req_finished(req);
-	else
-		*request = req;
-	return rc;
-}
-
 static struct obd_ops mdc_obd_ops = {
 	.owner          = THIS_MODULE,
 	.setup          = mdc_setup,
@@ -2516,7 +2455,6 @@ static struct md_ops mdc_md_ops = {
 	.free_lustre_md		= mdc_free_lustre_md,
 	.set_open_replay_data	= mdc_set_open_replay_data,
 	.clear_open_replay_data	= mdc_clear_open_replay_data,
-	.get_remote_perm	= mdc_get_remote_perm,
 	.intent_getattr_async	= mdc_intent_getattr_async,
 	.revalidate_lock	= mdc_revalidate_lock
 };
diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
index 8d5bc5a751a4..0735220b2a18 100644
--- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
+++ b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
index 82fb8f46e037..f146f7521c92 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h
+++ b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 3924b095bfb0..9d0bd4745865 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -500,10 +496,16 @@ static void do_requeue(struct config_llog_data *cld)
 	 * export which is being disconnected. Take the client
 	 * semaphore to make the check non-racy.
 	 */
-	down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
+	down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem,
+			 OBD_CLI_SEM_MGC);
+
 	if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
+		int rc;
+
 		CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname);
-		mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+		rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+		if (rc && rc != -ENOENT)
+			CERROR("failed processing log: %d\n", rc);
 	} else {
 		CDEBUG(D_MGC, "disconnecting, won't update log %s\n",
 		       cld->cld_logname);
@@ -734,7 +736,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	struct task_struct *task;
 	int rc;
 
-	ptlrpcd_addref();
+	rc = ptlrpcd_addref();
+	if (rc < 0)
+		goto err_noref;
 
 	rc = client_obd_setup(obd, lcfg);
 	if (rc)
@@ -773,6 +777,7 @@ err_cleanup:
 	client_obd_cleanup(obd);
 err_decref:
 	ptlrpcd_decref();
+err_noref:
 	return rc;
 }
 
@@ -1027,7 +1032,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		rc = sptlrpc_parse_flavor(val, &flvr);
 		if (rc) {
 			CERROR("invalid sptlrpc flavor %s to MGS\n",
-			       (char *) val);
+			       (char *)val);
 			return rc;
 		}
 
@@ -1043,7 +1048,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 			sptlrpc_flavor2name(&cli->cl_flvr_mgc,
 					    str, sizeof(str));
 			LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n",
-				       (char *) val, str);
+				       (char *)val, str);
 			rc = -EPERM;
 		}
 		return rc;
@@ -1720,7 +1725,6 @@ static int mgc_process_config(struct obd_device *obd, u32 len, void *buf)
 		CERROR("Unknown command: %d\n", lcfg->lcfg_command);
 		rc = -EINVAL;
 		goto out;
-
 	}
 	}
 out:
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index c404eb3864ff..df7e47f35a66 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -5,5 +5,4 @@ obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
 	      genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
 	      lustre_handles.o lustre_peer.o statfs_pack.o \
 	      obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
-	      cl_object.o cl_page.o cl_lock.o cl_io.o \
-	      acl.o kernelcomm.o
+	      cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/acl.c b/drivers/staging/lustre/lustre/obdclass/acl.c
deleted file mode 100644
index 0e02ae97b7ed..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/acl.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/acl.c
- *
- * Lustre Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-#include "../include/lu_object.h"
-#include "../include/lustre_acl.h"
-#include "../include/lustre_eacl.h"
-#include "../include/obd_support.h"
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#define CFS_ACL_XATTR_VERSION POSIX_ACL_XATTR_VERSION
-
-enum {
-	ES_UNK  = 0,    /* unknown stat */
-	ES_UNC  = 1,    /* ACL entry is not changed */
-	ES_MOD  = 2,    /* ACL entry is modified */
-	ES_ADD  = 3,    /* ACL entry is added */
-	ES_DEL  = 4     /* ACL entry is deleted */
-};
-
-static inline void lustre_ext_acl_le_to_cpu(ext_acl_xattr_entry *d,
-					    ext_acl_xattr_entry *s)
-{
-	d->e_tag	= le16_to_cpu(s->e_tag);
-	d->e_perm       = le16_to_cpu(s->e_perm);
-	d->e_id	 = le32_to_cpu(s->e_id);
-	d->e_stat       = le32_to_cpu(s->e_stat);
-}
-
-static inline void lustre_ext_acl_cpu_to_le(ext_acl_xattr_entry *d,
-					    ext_acl_xattr_entry *s)
-{
-	d->e_tag	= cpu_to_le16(s->e_tag);
-	d->e_perm       = cpu_to_le16(s->e_perm);
-	d->e_id	 = cpu_to_le32(s->e_id);
-	d->e_stat       = cpu_to_le32(s->e_stat);
-}
-
-static inline void lustre_posix_acl_le_to_cpu(posix_acl_xattr_entry *d,
-					      posix_acl_xattr_entry *s)
-{
-	d->e_tag	= le16_to_cpu(s->e_tag);
-	d->e_perm       = le16_to_cpu(s->e_perm);
-	d->e_id	 = le32_to_cpu(s->e_id);
-}
-
-static inline void lustre_posix_acl_cpu_to_le(posix_acl_xattr_entry *d,
-					      posix_acl_xattr_entry *s)
-{
-	d->e_tag	= cpu_to_le16(s->e_tag);
-	d->e_perm       = cpu_to_le16(s->e_perm);
-	d->e_id	 = cpu_to_le32(s->e_id);
-}
-
-/* if "new_count == 0", then "new = {a_version, NULL}", NOT NULL. */
-static int lustre_posix_acl_xattr_reduce_space(posix_acl_xattr_header **header,
-					       int old_count, int new_count)
-{
-	int old_size = CFS_ACL_XATTR_SIZE(old_count, posix_acl_xattr);
-	int new_size = CFS_ACL_XATTR_SIZE(new_count, posix_acl_xattr);
-	posix_acl_xattr_header *new;
-
-	if (unlikely(old_count <= new_count))
-		return old_size;
-
-	new = kmemdup(*header, new_size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	kfree(*header);
-	*header = new;
-	return new_size;
-}
-
-/* if "new_count == 0", then "new = {0, NULL}", NOT NULL. */
-static int lustre_ext_acl_xattr_reduce_space(ext_acl_xattr_header **header,
-					     int old_count)
-{
-	int ext_count = le32_to_cpu((*header)->a_count);
-	int ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
-	ext_acl_xattr_header *new;
-
-	if (unlikely(old_count <= ext_count))
-		return 0;
-
-	new = kmemdup(*header, ext_size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	kfree(*header);
-	*header = new;
-	return 0;
-}
-
-/*
- * Generate new extended ACL based on the posix ACL.
- */
-ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size)
-{
-	int count, i, esize;
-	ext_acl_xattr_header *new;
-
-	if (unlikely(size < 0))
-		return ERR_PTR(-EINVAL);
-	else if (!size)
-		count = 0;
-	else
-		count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	esize = CFS_ACL_XATTR_SIZE(count, ext_acl_xattr);
-	new = kzalloc(esize, GFP_NOFS);
-	if (unlikely(!new))
-		return ERR_PTR(-ENOMEM);
-
-	new->a_count = cpu_to_le32(count);
-	for (i = 0; i < count; i++) {
-		new->a_entries[i].e_tag  = header->a_entries[i].e_tag;
-		new->a_entries[i].e_perm = header->a_entries[i].e_perm;
-		new->a_entries[i].e_id   = header->a_entries[i].e_id;
-		new->a_entries[i].e_stat = cpu_to_le32(ES_UNK);
-	}
-
-	return new;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_2ext);
-
-/*
- * Filter out the "nobody" entries in the posix ACL.
- */
-int lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
-				  posix_acl_xattr_header **out)
-{
-	int count, i, j, rc = 0;
-	__u32 id;
-	posix_acl_xattr_header *new;
-
-	if (!size)
-		return 0;
-	if (size < sizeof(*new))
-		return -EINVAL;
-
-	new = kzalloc(size, GFP_NOFS);
-	if (unlikely(!new))
-		return -ENOMEM;
-
-	new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
-	count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	for (i = 0, j = 0; i < count; i++) {
-		id = le32_to_cpu(header->a_entries[i].e_id);
-		switch (le16_to_cpu(header->a_entries[i].e_tag)) {
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			if (id != ACL_UNDEFINED_ID) {
-				rc = -EIO;
-				goto _out;
-			}
-
-			memcpy(&new->a_entries[j++], &header->a_entries[i],
-			       sizeof(posix_acl_xattr_entry));
-			break;
-		case ACL_USER:
-			if (id != NOBODY_UID)
-				memcpy(&new->a_entries[j++],
-				       &header->a_entries[i],
-				       sizeof(posix_acl_xattr_entry));
-			break;
-		case ACL_GROUP:
-			if (id != NOBODY_GID)
-				memcpy(&new->a_entries[j++],
-				       &header->a_entries[i],
-				       sizeof(posix_acl_xattr_entry));
-			break;
-		default:
-			rc = -EIO;
-			goto _out;
-		}
-	}
-
-	/* free unused space. */
-	rc = lustre_posix_acl_xattr_reduce_space(&new, count, j);
-	if (rc >= 0) {
-		size = rc;
-		*out = new;
-		rc = 0;
-	}
-
-_out:
-	if (rc) {
-		kfree(new);
-		size = rc;
-	}
-	return size;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_filter);
-
-/*
- * Release the extended ACL space.
- */
-void lustre_ext_acl_xattr_free(ext_acl_xattr_header *header)
-{
-	kfree(header);
-}
-EXPORT_SYMBOL(lustre_ext_acl_xattr_free);
-
-static ext_acl_xattr_entry *
-lustre_ext_acl_xattr_search(ext_acl_xattr_header *header,
-			    posix_acl_xattr_entry *entry, int *pos)
-{
-	int once, start, end, i, j, count = le32_to_cpu(header->a_count);
-
-	once = 0;
-	start = *pos;
-	end = count;
-
-again:
-	for (i = start; i < end; i++) {
-		if (header->a_entries[i].e_tag == entry->e_tag &&
-		    header->a_entries[i].e_id == entry->e_id) {
-			j = i;
-			if (++i >= count)
-				i = 0;
-			*pos = i;
-			return &header->a_entries[j];
-		}
-	}
-
-	if (!once) {
-		once = 1;
-		start = 0;
-		end = *pos;
-		goto again;
-	}
-
-	return NULL;
-}
-
-/*
- * Merge the posix ACL and the extended ACL into new extended ACL.
- */
-ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
-			   ext_acl_xattr_header *ext_header)
-{
-	int ori_ext_count, posix_count, ext_count, ext_size;
-	int i, j, pos = 0, rc = 0;
-	posix_acl_xattr_entry pae;
-	ext_acl_xattr_header *new;
-	ext_acl_xattr_entry *ee, eae;
-
-	if (unlikely(size < 0))
-		return ERR_PTR(-EINVAL);
-	else if (!size)
-		posix_count = 0;
-	else
-		posix_count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
-	ori_ext_count = le32_to_cpu(ext_header->a_count);
-	ext_count = posix_count + ori_ext_count;
-	ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
-
-	new = kzalloc(ext_size, GFP_NOFS);
-	if (unlikely(!new))
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0, j = 0; i < posix_count; i++) {
-		lustre_posix_acl_le_to_cpu(&pae, &posix_header->a_entries[i]);
-		switch (pae.e_tag) {
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			if (pae.e_id != ACL_UNDEFINED_ID) {
-				rc = -EIO;
-				goto out;
-		}
-		case ACL_USER:
-			/* ignore "nobody" entry. */
-			if (pae.e_id == NOBODY_UID)
-				break;
-
-			new->a_entries[j].e_tag =
-					posix_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-					posix_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id =
-					posix_header->a_entries[i].e_id;
-			ee = lustre_ext_acl_xattr_search(ext_header,
-					&posix_header->a_entries[i], &pos);
-			if (ee) {
-				if (posix_header->a_entries[i].e_perm !=
-								ee->e_perm)
-					/* entry modified. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_MOD);
-				else
-					/* entry unchanged. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_UNC);
-			} else {
-				/* new entry. */
-				new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_ADD);
-			}
-			break;
-		case ACL_GROUP:
-			/* ignore "nobody" entry. */
-			if (pae.e_id == NOBODY_GID)
-				break;
-			new->a_entries[j].e_tag =
-					posix_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-					posix_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id =
-					posix_header->a_entries[i].e_id;
-			ee = lustre_ext_acl_xattr_search(ext_header,
-					&posix_header->a_entries[i], &pos);
-			if (ee) {
-				if (posix_header->a_entries[i].e_perm !=
-								ee->e_perm)
-					/* entry modified. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_MOD);
-				else
-					/* entry unchanged. */
-					ee->e_stat =
-					new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_UNC);
-			} else {
-				/* new entry. */
-				new->a_entries[j++].e_stat =
-							cpu_to_le32(ES_ADD);
-			}
-			break;
-		default:
-			rc = -EIO;
-			goto out;
-		}
-	}
-
-	/* process deleted entries. */
-	for (i = 0; i < ori_ext_count; i++) {
-		lustre_ext_acl_le_to_cpu(&eae, &ext_header->a_entries[i]);
-		if (eae.e_stat == ES_UNK) {
-			/* ignore "nobody" entry. */
-			if ((eae.e_tag == ACL_USER && eae.e_id == NOBODY_UID) ||
-			    (eae.e_tag == ACL_GROUP && eae.e_id == NOBODY_GID))
-				continue;
-
-			new->a_entries[j].e_tag =
-						ext_header->a_entries[i].e_tag;
-			new->a_entries[j].e_perm =
-						ext_header->a_entries[i].e_perm;
-			new->a_entries[j].e_id = ext_header->a_entries[i].e_id;
-			new->a_entries[j++].e_stat = cpu_to_le32(ES_DEL);
-		}
-	}
-
-	new->a_count = cpu_to_le32(j);
-	/* free unused space. */
-	rc = lustre_ext_acl_xattr_reduce_space(&new, ext_count);
-
-out:
-	if (rc) {
-		kfree(new);
-		new = ERR_PTR(rc);
-	}
-	return new;
-}
-EXPORT_SYMBOL(lustre_acl_xattr_merge2ext);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
index 7eb0ad7b3644..e866754a42d5 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index f5128b4f176f..e72f1fc00a13 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Client IO.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -132,6 +129,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io)
 	case CIT_WRITE:
 		break;
 	case CIT_FAULT:
+		break;
 	case CIT_FSYNC:
 		LASSERT(!io->ci_need_restart);
 		break;
@@ -159,7 +157,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
 
 	io->ci_type = iot;
 	INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
-	INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
 	INIT_LIST_HEAD(&io->ci_lockset.cls_done);
 	INIT_LIST_HEAD(&io->ci_layers);
 
@@ -241,37 +238,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
 			      const struct cl_lock_descr *d1)
 {
 	return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			  lu_object_fid(&d1->cld_obj->co_lu)) ?:
-		__diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
-			     const struct cl_lock_descr *d1)
-{
-	int ret;
-
-	ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			 lu_object_fid(&d1->cld_obj->co_lu));
-	if (ret)
-		return ret;
-	if (d0->cld_end < d1->cld_start)
-		return -1;
-	if (d0->cld_start > d0->cld_end)
-		return 1;
-	return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
-				const struct cl_lock_descr *d1)
-{
-	d0->cld_start = min(d0->cld_start, d1->cld_start);
-	d0->cld_end = max(d0->cld_end, d1->cld_end);
-
-	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
-		d0->cld_mode = CLM_WRITE;
-
-	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
-		d0->cld_mode = CLM_GROUP;
+			  lu_object_fid(&d1->cld_obj->co_lu));
 }
 
 /*
@@ -320,33 +287,35 @@ static void cl_io_locks_sort(struct cl_io *io)
 	} while (!done);
 }
 
-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval   0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+				const struct cl_lock_descr *d1)
 {
-	struct cl_io_lock_link *scan;
+	d0->cld_start = min(d0->cld_start, d1->cld_start);
+	d0->cld_end = max(d0->cld_end, d1->cld_end);
 
-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_match(&scan->cill_descr, need))
-			return 1;
-	}
-	return 0;
+	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+		d0->cld_mode = CLM_WRITE;
+
+	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+		d0->cld_mode = CLM_GROUP;
 }
-EXPORT_SYMBOL(cl_queue_match);
 
-static int cl_queue_merge(const struct list_head *queue,
-			  const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+			    const struct cl_lock_descr *need)
 {
 	struct cl_io_lock_link *scan;
 
-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_cmp(&scan->cill_descr, need))
+	list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+		if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
 			continue;
+
+		/* Merge locks for the same object because ldlm lock server
+		 * may expand the lock extent, otherwise there is a deadlock
+		 * case if two conflicted locks are queueud for the same object
+		 * and lock server expands one lock to overlap the another.
+		 * The side effect is that it can generate a multi-stripe lock
+		 * that may cause casacading problem
+		 */
 		cl_lock_descr_merge(&scan->cill_descr, need);
 		CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
 		       scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@@ -356,87 +325,20 @@ static int cl_queue_merge(const struct list_head *queue,
 	return 0;
 }
 
-static int cl_lockset_match(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_match(&set->cls_curr, need) ||
-	       cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_merge(&set->cls_todo, need) ||
-	       cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
-			       struct cl_io *io, struct cl_lockset *set,
-			       struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock;
-	int	     result;
-
-	lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
-	if (!IS_ERR(lock)) {
-		link->cill_lock = lock;
-		list_move(&link->cill_linkage, &set->cls_curr);
-		if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-		} else
-			result = 0;
-	} else
-		result = PTR_ERR(lock);
-	return result;
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
-			      struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock = link->cill_lock;
-
-	list_del_init(&link->cill_linkage);
-	if (lock) {
-		cl_lock_release(env, lock, "io", io);
-		link->cill_lock = NULL;
-	}
-	if (link->cill_fini)
-		link->cill_fini(env, link);
-}
-
 static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
 			   struct cl_lockset *set)
 {
 	struct cl_io_lock_link *link;
 	struct cl_io_lock_link *temp;
-	struct cl_lock	 *lock;
 	int result;
 
 	result = 0;
 	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
-		if (!cl_lockset_match(set, &link->cill_descr)) {
-			/* XXX some locking to guarantee that locks aren't
-			 * expanded in between.
-			 */
-			result = cl_lockset_lock_one(env, io, set, link);
-			if (result != 0)
-				break;
-		} else
-			cl_lock_link_fini(env, io, link);
-	}
-	if (result == 0) {
-		list_for_each_entry_safe(link, temp,
-					 &set->cls_curr, cill_linkage) {
-			lock = link->cill_lock;
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-			else
-				break;
-		}
+		result = cl_lock_request(env, io, &link->cill_lock);
+		if (result < 0)
+			break;
+
+		list_move(&link->cill_linkage, &set->cls_done);
 	}
 	return result;
 }
@@ -492,16 +394,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
 
 	set = &io->ci_lockset;
 
-	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
-		cl_lock_link_fini(env, io, link);
-
-	list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
-		cl_lock_link_fini(env, io, link);
+	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+		list_del_init(&link->cill_linkage);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
+	}
 
 	list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
-		cl_unuse(env, link->cill_lock);
-		cl_lock_link_fini(env, io, link);
+		list_del_init(&link->cill_linkage);
+		cl_lock_release(env, &link->cill_lock);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
 	}
+
 	cl_io_for_each_reverse(scan, io) {
 		if (scan->cis_iop->op[io->ci_type].cio_unlock)
 			scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@@ -595,9 +500,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
 {
 	int result;
 
-	if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr))
+	if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) {
 		result = 1;
-	else {
+	} else {
 		list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
 		result = 0;
 	}
@@ -627,8 +532,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
 		result = cl_io_lock_add(env, io, link);
 		if (result) /* lock match */
 			link->cill_fini(env, link);
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 
 	return result;
 }
@@ -692,42 +598,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
 }
 
 /**
- * True iff \a page is within \a io range.
- */
-static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io)
-{
-	int     result = 1;
-	loff_t  start;
-	loff_t  end;
-	pgoff_t idx;
-
-	idx = page->cp_index;
-	switch (io->ci_type) {
-	case CIT_READ:
-	case CIT_WRITE:
-		/*
-		 * check that [start, end) and [pos, pos + count) extents
-		 * overlap.
-		 */
-		if (!cl_io_is_append(io)) {
-			const struct cl_io_rw_common *crw = &(io->u.ci_rw);
-
-			start = cl_offset(page->cp_obj, idx);
-			end   = cl_offset(page->cp_obj, idx + 1);
-			result = crw->crw_pos < end &&
-				 start < crw->crw_pos + crw->crw_count;
-		}
-		break;
-	case CIT_FAULT:
-		result = io->u.ci_fault.ft_index == idx;
-		break;
-	default:
-		LBUG();
-	}
-	return result;
-}
-
-/**
  * Called by read io, when page has to be read from the server.
  *
  * \see cl_io_operations::cio_read_page()
@@ -742,7 +612,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 	LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
 	LINVRNT(cl_page_is_owned(page, io));
 	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_page_in_io(page, io));
 	LINVRNT(cl_io_invariant(io));
 
 	queue = &io->ci_queue;
@@ -769,7 +638,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 				break;
 		}
 	}
-	if (result == 0)
+	if (result == 0 && queue->c2_qin.pl_nr > 0)
 		result = cl_io_submit_rw(env, io, CRT_READ, queue);
 	/*
 	 * Unlock unsent pages in case of error.
@@ -781,77 +650,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 EXPORT_SYMBOL(cl_io_read_page);
 
 /**
- * Called by write io to prepare page to receive data from user buffer.
- *
- * \see cl_io_operations::cio_prepare_write()
- */
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
-			struct cl_page *page, unsigned from, unsigned to)
-{
-	const struct cl_io_slice *scan;
-	int result = 0;
-
-	LINVRNT(io->ci_type == CIT_WRITE);
-	LINVRNT(cl_page_is_owned(page, io));
-	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_io_invariant(io));
-	LASSERT(cl_page_in_io(page, io));
-
-	cl_io_for_each_reverse(scan, io) {
-		if (scan->cis_iop->cio_prepare_write) {
-			const struct cl_page_slice *slice;
-
-			slice = cl_io_slice_page(scan, page);
-			result = scan->cis_iop->cio_prepare_write(env, scan,
-								  slice,
-								  from, to);
-			if (result != 0)
-				break;
-		}
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_io_prepare_write);
-
-/**
- * Called by write io after user data were copied into a page.
+ * Commit a list of contiguous pages into writeback cache.
  *
- * \see cl_io_operations::cio_commit_write()
+ * \returns 0 if all pages committed, or errcode if error occurred.
+ * \see cl_io_operations::cio_commit_async()
  */
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
-		       struct cl_page *page, unsigned from, unsigned to)
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+		       struct cl_page_list *queue, int from, int to,
+		       cl_commit_cbt cb)
 {
 	const struct cl_io_slice *scan;
 	int result = 0;
 
-	LINVRNT(io->ci_type == CIT_WRITE);
-	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_io_invariant(io));
-	/*
-	 * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov)
-	 * already called cl_page_cache_add(), moving page into CPS_CACHED
-	 * state. Better (and more general) way of dealing with such situation
-	 * is needed.
-	 */
-	LASSERT(cl_page_is_owned(page, io) || page->cp_parent);
-	LASSERT(cl_page_in_io(page, io));
-
 	cl_io_for_each(scan, io) {
-		if (scan->cis_iop->cio_commit_write) {
-			const struct cl_page_slice *slice;
-
-			slice = cl_io_slice_page(scan, page);
-			result = scan->cis_iop->cio_commit_write(env, scan,
-								 slice,
-								 from, to);
-			if (result != 0)
-				break;
-		}
+		if (!scan->cis_iop->cio_commit_async)
+			continue;
+		result = scan->cis_iop->cio_commit_async(env, scan, queue,
+							 from, to, cb);
+		if (result != 0)
+			break;
 	}
-	LINVRNT(result <= 0);
 	return result;
 }
-EXPORT_SYMBOL(cl_io_commit_write);
+EXPORT_SYMBOL(cl_io_commit_async);
 
 /**
  * Submits a list of pages for immediate io.
@@ -869,13 +690,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 	const struct cl_io_slice *scan;
 	int result = 0;
 
-	LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op));
-
 	cl_io_for_each(scan, io) {
-		if (!scan->cis_iop->req_op[crt].cio_submit)
+		if (!scan->cis_iop->cio_submit)
 			continue;
-		result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt,
-							       queue);
+		result = scan->cis_iop->cio_submit(env, scan, crt, queue);
 		if (result != 0)
 			break;
 	}
@@ -887,6 +705,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(cl_io_submit_rw);
 
+static void cl_page_list_assume(const struct lu_env *env,
+				struct cl_io *io, struct cl_page_list *plist);
+
 /**
  * Submit a sync_io and wait for the IO to be finished, or error happens.
  * If \a timeout is zero, it means to wait for the IO unconditionally.
@@ -904,7 +725,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		pg->cp_sync_io = anchor;
 	}
 
-	cl_sync_io_init(anchor, queue->c2_qin.pl_nr);
+	cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end);
 	rc = cl_io_submit_rw(env, io, iot, queue);
 	if (rc == 0) {
 		/*
@@ -915,12 +736,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		 */
 		cl_page_list_for_each(pg, &queue->c2_qin) {
 			pg->cp_sync_io = NULL;
-			cl_sync_io_note(anchor, 1);
+			cl_sync_io_note(env, anchor, 1);
 		}
 
 		/* wait for the IO to be finished. */
-		rc = cl_sync_io_wait(env, io, &queue->c2_qout,
-				     anchor, timeout);
+		rc = cl_sync_io_wait(env, anchor, timeout);
+		cl_page_list_assume(env, io, &queue->c2_qout);
 	} else {
 		LASSERT(list_empty(&queue->c2_qout.pl_pages));
 		cl_page_list_for_each(pg, &queue->c2_qin)
@@ -931,26 +752,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 EXPORT_SYMBOL(cl_io_submit_sync);
 
 /**
- * Cancel an IO which has been submitted by cl_io_submit_rw.
- */
-static int cl_io_cancel(const struct lu_env *env, struct cl_io *io,
-			struct cl_page_list *queue)
-{
-	struct cl_page *page;
-	int result = 0;
-
-	CERROR("Canceling ongoing page transmission\n");
-	cl_page_list_for_each(page, queue) {
-		int rc;
-
-		LINVRNT(cl_page_in_io(page, io));
-		rc = cl_page_cancel(env, page);
-		result = result ?: rc;
-	}
-	return result;
-}
-
-/**
  * Main io loop.
  *
  * Pumps io through iterations calling
@@ -1072,8 +873,8 @@ EXPORT_SYMBOL(cl_page_list_add);
 /**
  * Removes a page from a page list.
  */
-static void cl_page_list_del(const struct lu_env *env,
-			     struct cl_page_list *plist, struct cl_page *page)
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+		      struct cl_page *page)
 {
 	LASSERT(plist->pl_nr > 0);
 	LINVRNT(plist->pl_owner == current);
@@ -1086,6 +887,7 @@ static void cl_page_list_del(const struct lu_env *env,
 	lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
 	cl_page_put(env, page);
 }
+EXPORT_SYMBOL(cl_page_list_del);
 
 /**
  * Moves a page from one page list to another.
@@ -1106,6 +908,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
 EXPORT_SYMBOL(cl_page_list_move);
 
 /**
+ * Moves a page from one page list to the head of another list.
+ */
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+			    struct cl_page *page)
+{
+	LASSERT(src->pl_nr > 0);
+	LINVRNT(dst->pl_owner == current);
+	LINVRNT(src->pl_owner == current);
+
+	list_move(&page->cp_batch, &dst->pl_pages);
+	--src->pl_nr;
+	++dst->pl_nr;
+	lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
+		      src, dst);
+}
+EXPORT_SYMBOL(cl_page_list_move_head);
+
+/**
  * splice the cl_page_list, just as list head does
  */
 void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
@@ -1162,8 +982,7 @@ EXPORT_SYMBOL(cl_page_list_disown);
 /**
  * Releases pages from queue.
  */
-static void cl_page_list_fini(const struct lu_env *env,
-			      struct cl_page_list *plist)
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
 {
 	struct cl_page *page;
 	struct cl_page *temp;
@@ -1174,6 +993,7 @@ static void cl_page_list_fini(const struct lu_env *env,
 		cl_page_list_del(env, plist, page);
 	LASSERT(plist->pl_nr == 0);
 }
+EXPORT_SYMBOL(cl_page_list_fini);
 
 /**
  * Assumes all pages in a queue.
@@ -1260,7 +1080,7 @@ EXPORT_SYMBOL(cl_2queue_init_page);
 /**
  * Returns top-level io.
  *
- * \see cl_object_top(), cl_page_top().
+ * \see cl_object_top()
  */
 struct cl_io *cl_io_top(struct cl_io *io)
 {
@@ -1323,19 +1143,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req,
 	int result;
 
 	result = 0;
-	page = cl_page_top(page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
-			if (dev->cd_ops->cdo_req_init) {
-				result = dev->cd_ops->cdo_req_init(env,
-								   dev, req);
-				if (result != 0)
-					break;
-			}
+	list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+		dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
+		if (dev->cd_ops->cdo_req_init) {
+			result = dev->cd_ops->cdo_req_init(env, dev, req);
+			if (result != 0)
+				break;
 		}
-		page = page->cp_child;
-	} while (page && result == 0);
+	}
 	return result;
 }
 
@@ -1384,14 +1199,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
 		if (req->crq_o) {
 			req->crq_nrobjs = nr_objects;
 			result = cl_req_init(env, req, page);
-		} else
+		} else {
 			result = -ENOMEM;
+		}
 		if (result != 0) {
 			cl_req_completion(env, req, result);
 			req = ERR_PTR(result);
 		}
-	} else
+	} else {
 		req = ERR_PTR(-ENOMEM);
+	}
 	return req;
 }
 EXPORT_SYMBOL(cl_req_alloc);
@@ -1406,8 +1223,6 @@ void cl_req_page_add(const struct lu_env *env,
 	struct cl_req_obj *rqo;
 	int i;
 
-	page = cl_page_top(page);
-
 	LASSERT(list_empty(&page->cp_flight));
 	LASSERT(!page->cp_req);
 
@@ -1438,8 +1253,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
 {
 	struct cl_req *req = page->cp_req;
 
-	page = cl_page_top(page);
-
 	LASSERT(!list_empty(&page->cp_flight));
 	LASSERT(req->crq_nrpages > 0);
 
@@ -1511,25 +1324,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
 }
 EXPORT_SYMBOL(cl_req_attr_set);
 
+/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to
+ * wait for the IO to finish.
+ */
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor)
+{
+	wake_up_all(&anchor->csi_waitq);
+
+	/* it's safe to nuke or reuse anchor now */
+	atomic_set(&anchor->csi_barrier, 0);
+}
+EXPORT_SYMBOL(cl_sync_io_end);
 
 /**
- * Initialize synchronous io wait anchor, for transfer of \a nrpages pages.
+ * Initialize synchronous io wait anchor
  */
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages)
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+		     void (*end)(const struct lu_env *, struct cl_sync_io *))
 {
+	memset(anchor, 0, sizeof(*anchor));
 	init_waitqueue_head(&anchor->csi_waitq);
-	atomic_set(&anchor->csi_sync_nr, nrpages);
-	atomic_set(&anchor->csi_barrier, nrpages > 0);
+	atomic_set(&anchor->csi_sync_nr, nr);
+	atomic_set(&anchor->csi_barrier, nr > 0);
 	anchor->csi_sync_rc = 0;
+	anchor->csi_end_io = end;
+	LASSERT(end);
 }
 EXPORT_SYMBOL(cl_sync_io_init);
 
 /**
- * Wait until all transfer completes. Transfer completion routine has to call
- * cl_sync_io_note() for every page.
+ * Wait until all IO completes. Transfer completion routine has to call
+ * cl_sync_io_note() for every entity.
  */
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
-		    struct cl_page_list *queue, struct cl_sync_io *anchor,
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
 		    long timeout)
 {
 	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
@@ -1542,11 +1369,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
 			  atomic_read(&anchor->csi_sync_nr) == 0,
 			  &lwi);
 	if (rc < 0) {
-		CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n",
+		CERROR("IO failed: %d, still wait for %d remaining entries\n",
 		       rc, atomic_read(&anchor->csi_sync_nr));
 
-		(void)cl_io_cancel(env, io, queue);
-
 		lwi = (struct l_wait_info) { 0 };
 		(void)l_wait_event(anchor->csi_waitq,
 				   atomic_read(&anchor->csi_sync_nr) == 0,
@@ -1555,14 +1380,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
 		rc = anchor->csi_sync_rc;
 	}
 	LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
-	cl_page_list_assume(env, io, queue);
 
 	/* wait until cl_sync_io_note() has done wakeup */
 	while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) {
 		cpu_relax();
 	}
 
-	POISON(anchor, 0x5a, sizeof(*anchor));
 	return rc;
 }
 EXPORT_SYMBOL(cl_sync_io_wait);
@@ -1570,7 +1393,8 @@ EXPORT_SYMBOL(cl_sync_io_wait);
 /**
  * Indicate that transfer of a single page completed.
  */
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+		     int ioret)
 {
 	if (anchor->csi_sync_rc == 0 && ioret < 0)
 		anchor->csi_sync_rc = ioret;
@@ -1581,9 +1405,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
 	 */
 	LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
 	if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
-		wake_up_all(&anchor->csi_waitq);
-		/* it's safe to nuke or reuse anchor now */
-		atomic_set(&anchor->csi_barrier, 0);
+		LASSERT(anchor->csi_end_io);
+		anchor->csi_end_io(env, anchor);
+		/* Can't access anchor any more */
 	}
 }
 EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
index aec644eb4db9..9d7b5939b0fd 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Client Extent Lock.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -47,138 +44,18 @@
 #include "../include/cl_object.h"
 #include "cl_internal.h"
 
-/** Lock class of cl_lock::cll_guard */
-static struct lock_class_key cl_lock_guard_class;
-static struct kmem_cache *cl_lock_kmem;
-
-static struct lu_kmem_descr cl_lock_caches[] = {
-	{
-		.ckd_cache = &cl_lock_kmem,
-		.ckd_name  = "cl_lock_kmem",
-		.ckd_size  = sizeof (struct cl_lock)
-	},
-	{
-		.ckd_cache = NULL
-	}
-};
-
-#define CS_LOCK_INC(o, item)
-#define CS_LOCK_DEC(o, item)
-#define CS_LOCKSTATE_INC(o, state)
-#define CS_LOCKSTATE_DEC(o, state)
-
-/**
- * Basic lock invariant that is maintained at all times. Caller either has a
- * reference to \a lock, or somehow assures that \a lock cannot be freed.
- *
- * \see cl_lock_invariant()
- */
-static int cl_lock_invariant_trusted(const struct lu_env *env,
-				     const struct cl_lock *lock)
-{
-	return  ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
-		atomic_read(&lock->cll_ref) >= lock->cll_holds &&
-		lock->cll_holds >= lock->cll_users &&
-		lock->cll_holds >= 0 &&
-		lock->cll_users >= 0 &&
-		lock->cll_depth >= 0;
-}
-
-/**
- * Stronger lock invariant, checking that caller has a reference on a lock.
- *
- * \see cl_lock_invariant_trusted()
- */
-static int cl_lock_invariant(const struct lu_env *env,
-			     const struct cl_lock *lock)
-{
-	int result;
-
-	result = atomic_read(&lock->cll_ref) > 0 &&
-		cl_lock_invariant_trusted(env, lock);
-	if (!result && env)
-		CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n");
-	return result;
-}
-
-/**
- * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
- */
-static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
-{
-	return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
-}
-
-/**
- * Returns a set of counters for this lock, depending on a lock nesting.
- */
-static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
-						   const struct cl_lock *lock)
-{
-	struct cl_thread_info *info;
-	enum clt_nesting_level nesting;
-
-	info = cl_env_info(env);
-	nesting = cl_lock_nesting(lock);
-	LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
-	return &info->clt_counters[nesting];
-}
-
 static void cl_lock_trace0(int level, const struct lu_env *env,
 			   const char *prefix, const struct cl_lock *lock,
 			   const char *func, const int line)
 {
 	struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
 
-	CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n",
-	       prefix, lock, atomic_read(&lock->cll_ref),
-	       lock->cll_guarder, lock->cll_depth,
-	       lock->cll_state, lock->cll_error, lock->cll_holds,
-	       lock->cll_users, lock->cll_flags,
-	       env, h->coh_nesting, cl_lock_nr_mutexed(env),
-	       func, line);
+	CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
+	       prefix, lock, env, h->coh_nesting, func, line);
 }
-
-#define cl_lock_trace(level, env, prefix, lock)			 \
+#define cl_lock_trace(level, env, prefix, lock)				\
 	cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
 
-#define RETIP ((unsigned long)__builtin_return_address(0))
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key cl_lock_key;
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{
-	lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
-}
-
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-				    struct cl_lock *lock, __u32 enqflags)
-{
-	cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
-	lock_map_acquire(&lock->dep_map);
-}
-
-static void cl_lock_lockdep_release(const struct lu_env *env,
-				    struct cl_lock *lock)
-{
-	cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
-	lock_release(&lock->dep_map, 0, RETIP);
-}
-
-#else /* !CONFIG_LOCKDEP */
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{}
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-				    struct cl_lock *lock, __u32 enqflags)
-{}
-static void cl_lock_lockdep_release(const struct lu_env *env,
-				    struct cl_lock *lock)
-{}
-
-#endif /* !CONFIG_LOCKDEP */
-
 /**
  * Adds lock slice to the compound lock.
  *
@@ -199,62 +76,10 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
 }
 EXPORT_SYMBOL(cl_lock_slice_add);
 
-/**
- * Returns true iff a lock with the mode \a has provides at least the same
- * guarantees as a lock with the mode \a need.
- */
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
-{
-	LINVRNT(need == CLM_READ || need == CLM_WRITE ||
-		need == CLM_PHANTOM || need == CLM_GROUP);
-	LINVRNT(has == CLM_READ || has == CLM_WRITE ||
-		has == CLM_PHANTOM || has == CLM_GROUP);
-	CLASSERT(CLM_PHANTOM < CLM_READ);
-	CLASSERT(CLM_READ < CLM_WRITE);
-	CLASSERT(CLM_WRITE < CLM_GROUP);
-
-	if (has != CLM_GROUP)
-		return need <= has;
-	else
-		return need == has;
-}
-EXPORT_SYMBOL(cl_lock_mode_match);
-
-/**
- * Returns true iff extent portions of lock descriptions match.
- */
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-		      const struct cl_lock_descr *need)
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
 {
-	return
-		has->cld_start <= need->cld_start &&
-		has->cld_end >= need->cld_end &&
-		cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
-		(has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
-}
-EXPORT_SYMBOL(cl_lock_ext_match);
+	cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
 
-/**
- * Returns true iff a lock with the description \a has provides at least the
- * same guarantees as a lock with the description \a need.
- */
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-			const struct cl_lock_descr *need)
-{
-	return
-		cl_object_same(has->cld_obj, need->cld_obj) &&
-		cl_lock_ext_match(has, need);
-}
-EXPORT_SYMBOL(cl_lock_descr_match);
-
-static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object *obj = lock->cll_descr.cld_obj;
-
-	LINVRNT(!cl_lock_is_mutexed(lock));
-
-	cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
-	might_sleep();
 	while (!list_empty(&lock->cll_layers)) {
 		struct cl_lock_slice *slice;
 
@@ -263,350 +88,36 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
 		list_del_init(lock->cll_layers.next);
 		slice->cls_ops->clo_fini(env, slice);
 	}
-	CS_LOCK_DEC(obj, total);
-	CS_LOCKSTATE_DEC(obj, lock->cll_state);
-	lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock);
-	cl_object_put(env, obj);
-	lu_ref_fini(&lock->cll_reference);
-	lu_ref_fini(&lock->cll_holders);
-	mutex_destroy(&lock->cll_guard);
-	kmem_cache_free(cl_lock_kmem, lock);
-}
-
-/**
- * Releases a reference on a lock.
- *
- * When last reference is released, lock is returned to the cache, unless it
- * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
- * immediately.
- *
- * \see cl_object_put(), cl_page_put()
- */
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object	*obj;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	obj = lock->cll_descr.cld_obj;
-	LINVRNT(obj);
-
-	CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-
-	if (atomic_dec_and_test(&lock->cll_ref)) {
-		if (lock->cll_state == CLS_FREEING) {
-			LASSERT(list_empty(&lock->cll_linkage));
-			cl_lock_free(env, lock);
-		}
-		CS_LOCK_DEC(obj, busy);
-	}
-}
-EXPORT_SYMBOL(cl_lock_put);
-
-/**
- * Acquires an additional reference to a lock.
- *
- * This can be called only by caller already possessing a reference to \a
- * lock.
- *
- * \see cl_object_get(), cl_page_get()
- */
-void cl_lock_get(struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_invariant(NULL, lock));
-	CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-	atomic_inc(&lock->cll_ref);
-}
-EXPORT_SYMBOL(cl_lock_get);
-
-/**
- * Acquires a reference to a lock.
- *
- * This is much like cl_lock_get(), except that this function can be used to
- * acquire initial reference to the cached lock. Caller has to deal with all
- * possible races. Use with care!
- *
- * \see cl_page_get_trust()
- */
-void cl_lock_get_trust(struct cl_lock *lock)
-{
-	CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-	if (atomic_inc_return(&lock->cll_ref) == 1)
-		CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
-}
-EXPORT_SYMBOL(cl_lock_get_trust);
-
-/**
- * Helper function destroying the lock that wasn't completely initialized.
- *
- * Other threads can acquire references to the top-lock through its
- * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
- */
-static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_mutex_get(env, lock);
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
-	cl_lock_mutex_put(env, lock);
-	cl_lock_put(env, lock);
-}
-
-static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
-				     struct cl_object *obj,
-				     const struct cl_io *io,
-				     const struct cl_lock_descr *descr)
-{
-	struct cl_lock	  *lock;
-	struct lu_object_header *head;
-
-	lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS);
-	if (lock) {
-		atomic_set(&lock->cll_ref, 1);
-		lock->cll_descr = *descr;
-		lock->cll_state = CLS_NEW;
-		cl_object_get(obj);
-		lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock",
-				     lock);
-		INIT_LIST_HEAD(&lock->cll_layers);
-		INIT_LIST_HEAD(&lock->cll_linkage);
-		INIT_LIST_HEAD(&lock->cll_inclosure);
-		lu_ref_init(&lock->cll_reference);
-		lu_ref_init(&lock->cll_holders);
-		mutex_init(&lock->cll_guard);
-		lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
-		init_waitqueue_head(&lock->cll_wq);
-		head = obj->co_lu.lo_header;
-		CS_LOCKSTATE_INC(obj, CLS_NEW);
-		CS_LOCK_INC(obj, total);
-		CS_LOCK_INC(obj, create);
-		cl_lock_lockdep_init(lock);
-		list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) {
-			int err;
-
-			err = obj->co_ops->coo_lock_init(env, obj, lock, io);
-			if (err != 0) {
-				cl_lock_finish(env, lock);
-				lock = ERR_PTR(err);
-				break;
-			}
-		}
-	} else
-		lock = ERR_PTR(-ENOMEM);
-	return lock;
-}
-
-/**
- * Transfer the lock into INTRANSIT state and return the original state.
- *
- * \pre  state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
- * \post state: CLS_INTRANSIT
- * \see CLS_INTRANSIT
- */
-static enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
-					    struct cl_lock *lock)
-{
-	enum cl_lock_state state = lock->cll_state;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(state != CLS_INTRANSIT);
-	LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
-		 "Malformed lock state %d.\n", state);
-
-	cl_lock_state_set(env, lock, CLS_INTRANSIT);
-	lock->cll_intransit_owner = current;
-	cl_lock_hold_add(env, lock, "intransit", current);
-	return state;
-}
-
-/**
- *  Exit the intransit state and restore the lock state to the original state
- */
-static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
-			      enum cl_lock_state state)
-{
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(lock->cll_state == CLS_INTRANSIT);
-	LASSERT(state != CLS_INTRANSIT);
-	LASSERT(lock->cll_intransit_owner == current);
-
-	lock->cll_intransit_owner = NULL;
-	cl_lock_state_set(env, lock, state);
-	cl_lock_unhold(env, lock, "intransit", current);
-}
-
-/**
- * Checking whether the lock is intransit state
- */
-int cl_lock_is_intransit(struct cl_lock *lock)
-{
-	LASSERT(cl_lock_is_mutexed(lock));
-	return lock->cll_state == CLS_INTRANSIT &&
-	       lock->cll_intransit_owner != current;
-}
-EXPORT_SYMBOL(cl_lock_is_intransit);
-/**
- * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
- * truncate and O_APPEND cannot be reused for read/non-append-write, as they
- * cover multiple stripes and can trigger cascading timeouts.
- */
-static int cl_lock_fits_into(const struct lu_env *env,
-			     const struct cl_lock *lock,
-			     const struct cl_lock_descr *need,
-			     const struct cl_io *io)
-{
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_invariant_trusted(env, lock));
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_fits_into &&
-		    !slice->cls_ops->clo_fits_into(env, slice, need, io))
-			return 0;
-	}
-	return 1;
+	POISON(lock, 0x5a, sizeof(*lock));
 }
+EXPORT_SYMBOL(cl_lock_fini);
 
-static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
-				      struct cl_object *obj,
-				      const struct cl_io *io,
-				      const struct cl_lock_descr *need)
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+		 const struct cl_io *io)
 {
-	struct cl_lock	  *lock;
-	struct cl_object_header *head;
-
-	head = cl_object_header(obj);
-	assert_spin_locked(&head->coh_lock_guard);
-	CS_LOCK_INC(obj, lookup);
-	list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
-		int matched;
-
-		matched = cl_lock_ext_match(&lock->cll_descr, need) &&
-			  lock->cll_state < CLS_FREEING &&
-			  lock->cll_error == 0 &&
-			  !(lock->cll_flags & CLF_CANCELLED) &&
-			  cl_lock_fits_into(env, lock, need, io);
-		CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
-		       PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
-		       matched);
-		if (matched) {
-			cl_lock_get_trust(lock);
-			CS_LOCK_INC(obj, hit);
-			return lock;
-		}
-	}
-	return NULL;
-}
-
-/**
- * Returns a lock matching description \a need.
- *
- * This is the main entry point into the cl_lock caching interface. First, a
- * cache (implemented as a per-object linked list) is consulted. If lock is
- * found there, it is returned immediately. Otherwise new lock is allocated
- * and returned. In any case, additional reference to lock is acquired.
- *
- * \see cl_object_find(), cl_page_find()
- */
-static struct cl_lock *cl_lock_find(const struct lu_env *env,
-				    const struct cl_io *io,
-				    const struct cl_lock_descr *need)
-{
-	struct cl_object_header *head;
-	struct cl_object	*obj;
-	struct cl_lock	  *lock;
-
-	obj  = need->cld_obj;
-	head = cl_object_header(obj);
-
-	spin_lock(&head->coh_lock_guard);
-	lock = cl_lock_lookup(env, obj, io, need);
-	spin_unlock(&head->coh_lock_guard);
-
-	if (!lock) {
-		lock = cl_lock_alloc(env, obj, io, need);
-		if (!IS_ERR(lock)) {
-			struct cl_lock *ghost;
-
-			spin_lock(&head->coh_lock_guard);
-			ghost = cl_lock_lookup(env, obj, io, need);
-			if (!ghost) {
-				cl_lock_get_trust(lock);
-				list_add_tail(&lock->cll_linkage,
-					      &head->coh_locks);
-				spin_unlock(&head->coh_lock_guard);
-				CS_LOCK_INC(obj, busy);
-			} else {
-				spin_unlock(&head->coh_lock_guard);
-				/*
-				 * Other threads can acquire references to the
-				 * top-lock through its sub-locks. Hence, it
-				 * cannot be cl_lock_free()-ed immediately.
-				 */
-				cl_lock_finish(env, lock);
-				lock = ghost;
-			}
-		}
-	}
-	return lock;
-}
-
-/**
- * Returns existing lock matching given description. This is similar to
- * cl_lock_find() except that no new lock is created, and returned lock is
- * guaranteed to be in enum cl_lock_state::CLS_HELD state.
- */
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source)
-{
-	struct cl_object_header *head;
-	struct cl_object	*obj;
-	struct cl_lock	  *lock;
-
-	obj  = need->cld_obj;
-	head = cl_object_header(obj);
+	struct cl_object *obj = lock->cll_descr.cld_obj;
+	struct cl_object *scan;
+	int result = 0;
 
-	do {
-		spin_lock(&head->coh_lock_guard);
-		lock = cl_lock_lookup(env, obj, io, need);
-		spin_unlock(&head->coh_lock_guard);
-		if (!lock)
-			return NULL;
+	/* Make sure cl_lock::cll_descr is initialized. */
+	LASSERT(obj);
 
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state == CLS_INTRANSIT)
-			/* Don't care return value. */
-			cl_lock_state_wait(env, lock);
-		if (lock->cll_state == CLS_FREEING) {
-			cl_lock_mutex_put(env, lock);
-			cl_lock_put(env, lock);
-			lock = NULL;
+	INIT_LIST_HEAD(&lock->cll_layers);
+	list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
+			    co_lu.lo_linkage) {
+		result = scan->co_ops->coo_lock_init(env, scan, lock, io);
+		if (result != 0) {
+			cl_lock_fini(env, lock);
+			break;
 		}
-	} while (!lock);
-
-	cl_lock_hold_add(env, lock, scope, source);
-	cl_lock_user_add(env, lock);
-	if (lock->cll_state == CLS_CACHED)
-		cl_use_try(env, lock, 1);
-	if (lock->cll_state == CLS_HELD) {
-		cl_lock_mutex_put(env, lock);
-		cl_lock_lockdep_acquire(env, lock, 0);
-		cl_lock_put(env, lock);
-	} else {
-		cl_unuse_try(env, lock);
-		cl_lock_unhold(env, lock, scope, source);
-		cl_lock_mutex_put(env, lock);
-		cl_lock_put(env, lock);
-		lock = NULL;
 	}
 
-	return lock;
+	return result;
 }
-EXPORT_SYMBOL(cl_lock_peek);
+EXPORT_SYMBOL(cl_lock_init);
 
 /**
- * Returns a slice within a lock, corresponding to the given layer in the
+ * Returns a slice with a lock, corresponding to the given layer in the
  * device stack.
  *
  * \see cl_page_at()
@@ -616,8 +127,6 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
 {
 	const struct cl_lock_slice *slice;
 
-	LINVRNT(cl_lock_invariant_trusted(NULL, lock));
-
 	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
 		if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
 			return slice;
@@ -626,1537 +135,96 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
 }
 EXPORT_SYMBOL(cl_lock_at);
 
-static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_counters *counters;
-
-	counters = cl_lock_counters(env, lock);
-	lock->cll_depth++;
-	counters->ctc_nr_locks_locked++;
-	lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
-	cl_lock_trace(D_TRACE, env, "got mutex", lock);
-}
-
-/**
- * Locks cl_lock object.
- *
- * This is used to manipulate cl_lock fields, and to serialize state
- * transitions in the lock state machine.
- *
- * \post cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_put()
- */
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_guarder == current) {
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(lock->cll_depth > 0);
-	} else {
-		struct cl_object_header *hdr;
-		struct cl_thread_info   *info;
-		int i;
-
-		LINVRNT(lock->cll_guarder != current);
-		hdr = cl_object_header(lock->cll_descr.cld_obj);
-		/*
-		 * Check that mutices are taken in the bottom-to-top order.
-		 */
-		info = cl_env_info(env);
-		for (i = 0; i < hdr->coh_nesting; ++i)
-			LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
-		mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
-		lock->cll_guarder = current;
-		LINVRNT(lock->cll_depth == 0);
-	}
-	cl_lock_mutex_tail(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_mutex_get);
-
-/**
- * Try-locks cl_lock object.
- *
- * \retval 0 \a lock was successfully locked
- *
- * \retval -EBUSY \a lock cannot be locked right now
- *
- * \post ergo(result == 0, cl_lock_is_mutexed(lock))
- *
- * \see cl_lock_mutex_get()
- */
-static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
-{
-	int result;
-
-	LINVRNT(cl_lock_invariant_trusted(env, lock));
-
-	result = 0;
-	if (lock->cll_guarder == current) {
-		LINVRNT(lock->cll_depth > 0);
-		cl_lock_mutex_tail(env, lock);
-	} else if (mutex_trylock(&lock->cll_guard)) {
-		LINVRNT(lock->cll_depth == 0);
-		lock->cll_guarder = current;
-		cl_lock_mutex_tail(env, lock);
-	} else
-		result = -EBUSY;
-	return result;
-}
-
-/**
- {* Unlocks cl_lock object.
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_get()
- */
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_counters *counters;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(lock->cll_guarder == current);
-	LINVRNT(lock->cll_depth > 0);
-
-	counters = cl_lock_counters(env, lock);
-	LINVRNT(counters->ctc_nr_locks_locked > 0);
-
-	cl_lock_trace(D_TRACE, env, "put mutex", lock);
-	lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
-	counters->ctc_nr_locks_locked--;
-	if (--lock->cll_depth == 0) {
-		lock->cll_guarder = NULL;
-		mutex_unlock(&lock->cll_guard);
-	}
-}
-EXPORT_SYMBOL(cl_lock_mutex_put);
-
-/**
- * Returns true iff lock's mutex is owned by the current thread.
- */
-int cl_lock_is_mutexed(struct cl_lock *lock)
-{
-	return lock->cll_guarder == current;
-}
-EXPORT_SYMBOL(cl_lock_is_mutexed);
-
-/**
- * Returns number of cl_lock mutices held by the current thread (environment).
- */
-int cl_lock_nr_mutexed(const struct lu_env *env)
-{
-	struct cl_thread_info *info;
-	int i;
-	int locked;
-
-	/*
-	 * NOTE: if summation across all nesting levels (currently 2) proves
-	 *       too expensive, a summary counter can be added to
-	 *       struct cl_thread_info.
-	 */
-	info = cl_env_info(env);
-	for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
-		locked += info->clt_counters[i].ctc_nr_locks_locked;
-	return locked;
-}
-EXPORT_SYMBOL(cl_lock_nr_mutexed);
-
-static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	if (!(lock->cll_flags & CLF_CANCELLED)) {
-		const struct cl_lock_slice *slice;
-
-		lock->cll_flags |= CLF_CANCELLED;
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_cancel)
-				slice->cls_ops->clo_cancel(env, slice);
-		}
-	}
-}
-
-static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object_header    *head;
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_state < CLS_FREEING) {
-		bool in_cache;
-
-		LASSERT(lock->cll_state != CLS_INTRANSIT);
-		cl_lock_state_set(env, lock, CLS_FREEING);
-
-		head = cl_object_header(lock->cll_descr.cld_obj);
-
-		spin_lock(&head->coh_lock_guard);
-		in_cache = !list_empty(&lock->cll_linkage);
-		if (in_cache)
-			list_del_init(&lock->cll_linkage);
-		spin_unlock(&head->coh_lock_guard);
-
-		if (in_cache) /* coh_locks cache holds a refcount. */
-			cl_lock_put(env, lock);
-
-		/*
-		 * From now on, no new references to this lock can be acquired
-		 * by cl_lock_lookup().
-		 */
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_delete)
-				slice->cls_ops->clo_delete(env, slice);
-		}
-		/*
-		 * From now on, no new references to this lock can be acquired
-		 * by layer-specific means (like a pointer from struct
-		 * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
-		 * lov).
-		 *
-		 * Lock will be finally freed in cl_lock_put() when last of
-		 * existing references goes away.
-		 */
-	}
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
- * top-lock (nesting == 0) accounts for this modification in the per-thread
- * debugging counters. Sub-lock holds can be released by a thread different
- * from one that acquired it.
- */
-static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
-			     int delta)
-{
-	struct cl_thread_counters *counters;
-	enum clt_nesting_level     nesting;
-
-	lock->cll_holds += delta;
-	nesting = cl_lock_nesting(lock);
-	if (nesting == CNL_TOP) {
-		counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-		counters->ctc_nr_held += delta;
-		LASSERT(counters->ctc_nr_held >= 0);
-	}
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
- * cl_lock_hold_mod() for the explanation of the debugging code.
- */
-static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
-			     int delta)
-{
-	struct cl_thread_counters *counters;
-	enum clt_nesting_level     nesting;
-
-	lock->cll_users += delta;
-	nesting = cl_lock_nesting(lock);
-	if (nesting == CNL_TOP) {
-		counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-		counters->ctc_nr_used += delta;
-		LASSERT(counters->ctc_nr_used >= 0);
-	}
-}
-
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-			  const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_holds > 0);
-
-	cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
-	lu_ref_del(&lock->cll_holders, scope, source);
-	cl_lock_hold_mod(env, lock, -1);
-	if (lock->cll_holds == 0) {
-		CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
-		if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
-		    lock->cll_descr.cld_mode == CLM_GROUP ||
-		    lock->cll_state != CLS_CACHED)
-			/*
-			 * If lock is still phantom or grouplock when user is
-			 * done with it---destroy the lock.
-			 */
-			lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
-		if (lock->cll_flags & CLF_CANCELPEND) {
-			lock->cll_flags &= ~CLF_CANCELPEND;
-			cl_lock_cancel0(env, lock);
-		}
-		if (lock->cll_flags & CLF_DOOMED) {
-			/* no longer doomed: it's dead... Jim. */
-			lock->cll_flags &= ~CLF_DOOMED;
-			cl_lock_delete0(env, lock);
-		}
-	}
-}
-EXPORT_SYMBOL(cl_lock_hold_release);
-
-/**
- * Waits until lock state is changed.
- *
- * This function is called with cl_lock mutex locked, atomically releases
- * mutex and goes to sleep, waiting for a lock state change (signaled by
- * cl_lock_signal()), and re-acquires the mutex before return.
- *
- * This function is used to wait until lock state machine makes some progress
- * and to emulate synchronous operations on top of asynchronous lock
- * interface.
- *
- * \retval -EINTR wait was interrupted
- *
- * \retval 0 wait wasn't interrupted
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_signal()
- */
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-	wait_queue_t waiter;
-	sigset_t blocked;
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_depth == 1);
-	LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
-
-	cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
-	result = lock->cll_error;
-	if (result == 0) {
-		/* To avoid being interrupted by the 'non-fatal' signals
-		 * (SIGCHLD, for instance), we'd block them temporarily.
-		 * LU-305
-		 */
-		blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
-
-		init_waitqueue_entry(&waiter, current);
-		add_wait_queue(&lock->cll_wq, &waiter);
-		set_current_state(TASK_INTERRUPTIBLE);
-		cl_lock_mutex_put(env, lock);
-
-		LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-		/* Returning ERESTARTSYS instead of EINTR so syscalls
-		 * can be restarted if signals are pending here
-		 */
-		result = -ERESTARTSYS;
-		if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
-			schedule();
-			if (!cfs_signal_pending())
-				result = 0;
-		}
-
-		cl_lock_mutex_get(env, lock);
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&lock->cll_wq, &waiter);
-
-		/* Restore old blocked signals */
-		cfs_restore_sigs(blocked);
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_state_wait);
-
-static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
-				 enum cl_lock_state state)
-{
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
-		if (slice->cls_ops->clo_state)
-			slice->cls_ops->clo_state(env, slice, state);
-	wake_up_all(&lock->cll_wq);
-}
-
-/**
- * Notifies waiters that lock state changed.
- *
- * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
- * layers about state change by calling cl_lock_operations::clo_state()
- * top-to-bottom.
- */
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
-	cl_lock_state_signal(env, lock, lock->cll_state);
-}
-EXPORT_SYMBOL(cl_lock_signal);
-
-/**
- * Changes lock state.
- *
- * This function is invoked to notify layers that lock state changed, possible
- * as a result of an asynchronous event such as call-back reception.
- *
- * \post lock->cll_state == state
- *
- * \see cl_lock_operations::clo_state()
- */
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-		       enum cl_lock_state state)
-{
-	LASSERT(lock->cll_state <= state ||
-		(lock->cll_state == CLS_CACHED &&
-		 (state == CLS_HELD || /* lock found in cache */
-		  state == CLS_NEW  ||   /* sub-lock canceled */
-		  state == CLS_INTRANSIT)) ||
-		/* lock is in transit state */
-		lock->cll_state == CLS_INTRANSIT);
-
-	if (lock->cll_state != state) {
-		CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
-		CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
-
-		cl_lock_state_signal(env, lock, state);
-		lock->cll_state = state;
-	}
-}
-EXPORT_SYMBOL(cl_lock_state_set);
-
-static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
-{
-	const struct cl_lock_slice *slice;
-	int result;
-
-	do {
-		result = 0;
-
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(cl_lock_invariant(env, lock));
-		LASSERT(lock->cll_state == CLS_INTRANSIT);
-
-		result = -ENOSYS;
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_unuse) {
-				result = slice->cls_ops->clo_unuse(env, slice);
-				if (result != 0)
-					break;
-			}
-		}
-		LASSERT(result != -ENOSYS);
-	} while (result == CLO_REPEAT);
-
-	return result;
-}
-
-/**
- * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
- * cl_lock_operations::clo_use() top-to-bottom to notify layers.
- * @atomic = 1, it must unuse the lock to recovery the lock to keep the
- *  use process atomic
- */
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
 {
 	const struct cl_lock_slice *slice;
-	int result;
-	enum cl_lock_state state;
-
-	cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
-
-	LASSERT(lock->cll_state == CLS_CACHED);
-	if (lock->cll_error)
-		return lock->cll_error;
-
-	result = -ENOSYS;
-	state = cl_lock_intransit(env, lock);
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_use) {
-			result = slice->cls_ops->clo_use(env, slice);
-			if (result != 0)
-				break;
-		}
-	}
-	LASSERT(result != -ENOSYS);
-
-	LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
-		 lock->cll_state);
-
-	if (result == 0) {
-		state = CLS_HELD;
-	} else {
-		if (result == -ESTALE) {
-			/*
-			 * ESTALE means sublock being cancelled
-			 * at this time, and set lock state to
-			 * be NEW here and ask the caller to repeat.
-			 */
-			state = CLS_NEW;
-			result = CLO_REPEAT;
-		}
-
-		/* @atomic means back-off-on-failure. */
-		if (atomic) {
-			int rc;
-
-			rc = cl_unuse_try_internal(env, lock);
-			/* Vet the results. */
-			if (rc < 0 && result > 0)
-				result = rc;
-		}
 
+	cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
+	list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+		if (slice->cls_ops->clo_cancel)
+			slice->cls_ops->clo_cancel(env, slice);
 	}
-	cl_lock_extransit(env, lock, state);
-	return result;
 }
-EXPORT_SYMBOL(cl_use_try);
+EXPORT_SYMBOL(cl_lock_cancel);
 
 /**
- * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
- * top-to-bottom.
+ * Enqueue a lock.
+ * \param anchor: if we need to wait for resources before getting the lock,
+ *		  use @anchor for the purpose.
+ * \retval 0  enqueue successfully
+ * \retval <0 error code
  */
-static int cl_enqueue_kick(const struct lu_env *env,
-			   struct cl_lock *lock,
-			   struct cl_io *io, __u32 flags)
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock, struct cl_sync_io *anchor)
 {
-	int result;
 	const struct cl_lock_slice *slice;
+	int rc = -ENOSYS;
 
-	result = -ENOSYS;
 	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_enqueue) {
-			result = slice->cls_ops->clo_enqueue(env,
-							     slice, io, flags);
-			if (result != 0)
-				break;
-		}
-	}
-	LASSERT(result != -ENOSYS);
-	return result;
-}
-
-/**
- * Tries to enqueue a lock.
- *
- * This function is called repeatedly by cl_enqueue() until either lock is
- * enqueued, or error occurs. This function does not block waiting for
- * networking communication to complete.
- *
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *			 lock->cll_state == CLS_HELD)
- *
- * \see cl_enqueue() cl_lock_operations::clo_enqueue()
- * \see cl_lock_state::CLS_ENQUEUED
- */
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-		   struct cl_io *io, __u32 flags)
-{
-	int result;
-
-	cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
-	do {
-		LINVRNT(cl_lock_is_mutexed(lock));
-
-		result = lock->cll_error;
-		if (result != 0)
-			break;
-
-		switch (lock->cll_state) {
-		case CLS_NEW:
-			cl_lock_state_set(env, lock, CLS_QUEUING);
-			/* fall-through */
-		case CLS_QUEUING:
-			/* kick layers. */
-			result = cl_enqueue_kick(env, lock, io, flags);
-			/* For AGL case, the cl_lock::cll_state may
-			 * become CLS_HELD already.
-			 */
-			if (result == 0 && lock->cll_state == CLS_QUEUING)
-				cl_lock_state_set(env, lock, CLS_ENQUEUED);
-			break;
-		case CLS_INTRANSIT:
-			LASSERT(cl_lock_is_intransit(lock));
-			result = CLO_WAIT;
-			break;
-		case CLS_CACHED:
-			/* yank lock from the cache. */
-			result = cl_use_try(env, lock, 0);
-			break;
-		case CLS_ENQUEUED:
-		case CLS_HELD:
-			result = 0;
-			break;
-		default:
-		case CLS_FREEING:
-			/*
-			 * impossible, only held locks with increased
-			 * ->cll_holds can be enqueued, and they cannot be
-			 * freed.
-			 */
-			LBUG();
-		}
-	} while (result == CLO_REPEAT);
-	return result;
-}
-EXPORT_SYMBOL(cl_enqueue_try);
-
-/**
- * Cancel the conflicting lock found during previous enqueue.
- *
- * \retval 0 conflicting lock has been canceled.
- * \retval -ve error code.
- */
-int cl_lock_enqueue_wait(const struct lu_env *env,
-			 struct cl_lock *lock,
-			 int keep_mutex)
-{
-	struct cl_lock  *conflict;
-	int	      rc = 0;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(lock->cll_state == CLS_QUEUING);
-	LASSERT(lock->cll_conflict);
+		if (!slice->cls_ops->clo_enqueue)
+			continue;
 
-	conflict = lock->cll_conflict;
-	lock->cll_conflict = NULL;
-
-	cl_lock_mutex_put(env, lock);
-	LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-	cl_lock_mutex_get(env, conflict);
-	cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
-	cl_lock_cancel(env, conflict);
-	cl_lock_delete(env, conflict);
-
-	while (conflict->cll_state != CLS_FREEING) {
-		rc = cl_lock_state_wait(env, conflict);
+		rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
 		if (rc != 0)
 			break;
-	}
-	cl_lock_mutex_put(env, conflict);
-	lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
-	cl_lock_put(env, conflict);
-
-	if (keep_mutex)
-		cl_lock_mutex_get(env, lock);
-
-	LASSERT(rc <= 0);
-	return rc;
-}
-EXPORT_SYMBOL(cl_lock_enqueue_wait);
-
-static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
-			     struct cl_io *io, __u32 enqflags)
-{
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_holds > 0);
-
-	cl_lock_user_add(env, lock);
-	do {
-		result = cl_enqueue_try(env, lock, io, enqflags);
-		if (result == CLO_WAIT) {
-			if (lock->cll_conflict)
-				result = cl_lock_enqueue_wait(env, lock, 1);
-			else
-				result = cl_lock_state_wait(env, lock);
-			if (result == 0)
-				continue;
-		}
-		break;
-	} while (1);
-	if (result != 0)
-		cl_unuse_try(env, lock);
-	LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
-		     lock->cll_state == CLS_ENQUEUED ||
-		     lock->cll_state == CLS_HELD));
-	return result;
-}
-
-/**
- * Tries to unlock a lock.
- *
- * This function is called to release underlying resource:
- * 1. for top lock, the resource is sublocks it held;
- * 2. for sublock, the resource is the reference to dlmlock.
- *
- * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see cl_unuse() cl_lock_operations::clo_unuse()
- * \see cl_lock_state::CLS_CACHED
- */
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
-{
-	int			 result;
-	enum cl_lock_state	  state = CLS_NEW;
-
-	cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
-
-	if (lock->cll_users > 1) {
-		cl_lock_user_del(env, lock);
-		return 0;
-	}
-
-	/* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
-	 * underlying resources.
-	 */
-	if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
-		cl_lock_user_del(env, lock);
-		return 0;
-	}
-
-	/*
-	 * New lock users (->cll_users) are not protecting unlocking
-	 * from proceeding. From this point, lock eventually reaches
-	 * CLS_CACHED, is reinitialized to CLS_NEW or fails into
-	 * CLS_FREEING.
-	 */
-	state = cl_lock_intransit(env, lock);
-
-	result = cl_unuse_try_internal(env, lock);
-	LASSERT(lock->cll_state == CLS_INTRANSIT);
-	LASSERT(result != CLO_WAIT);
-	cl_lock_user_del(env, lock);
-	if (result == 0 || result == -ESTALE) {
-		/*
-		 * Return lock back to the cache. This is the only
-		 * place where lock is moved into CLS_CACHED state.
-		 *
-		 * If one of ->clo_unuse() methods returned -ESTALE, lock
-		 * cannot be placed into cache and has to be
-		 * re-initialized. This happens e.g., when a sub-lock was
-		 * canceled while unlocking was in progress.
-		 */
-		if (state == CLS_HELD && result == 0)
-			state = CLS_CACHED;
-		else
-			state = CLS_NEW;
-		cl_lock_extransit(env, lock, state);
-
-		/*
-		 * Hide -ESTALE error.
-		 * If the lock is a glimpse lock, and it has multiple
-		 * stripes. Assuming that one of its sublock returned -ENAVAIL,
-		 * and other sublocks are matched write locks. In this case,
-		 * we can't set this lock to error because otherwise some of
-		 * its sublocks may not be canceled. This causes some dirty
-		 * pages won't be written to OSTs. -jay
-		 */
-		result = 0;
-	} else {
-		CERROR("result = %d, this is unlikely!\n", result);
-		state = CLS_NEW;
-		cl_lock_extransit(env, lock, state);
-	}
-	return result ?: lock->cll_error;
-}
-EXPORT_SYMBOL(cl_unuse_try);
-
-static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
-{
-	int result;
-
-	result = cl_unuse_try(env, lock);
-	if (result)
-		CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
-}
-
-/**
- * Unlocks a lock.
- */
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_mutex_get(env, lock);
-	cl_unuse_locked(env, lock);
-	cl_lock_mutex_put(env, lock);
-	cl_lock_lockdep_release(env, lock);
-}
-EXPORT_SYMBOL(cl_unuse);
-
-/**
- * Tries to wait for a lock.
- *
- * This function is called repeatedly by cl_wait() until either lock is
- * granted, or error occurs. This function does not block waiting for network
- * communication to complete.
- *
- * \see cl_wait() cl_lock_operations::clo_wait()
- * \see cl_lock_state::CLS_HELD
- */
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
-{
-	const struct cl_lock_slice *slice;
-	int			 result;
-
-	cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
-	do {
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(cl_lock_invariant(env, lock));
-		LASSERTF(lock->cll_state == CLS_QUEUING ||
-			 lock->cll_state == CLS_ENQUEUED ||
-			 lock->cll_state == CLS_HELD ||
-			 lock->cll_state == CLS_INTRANSIT,
-			 "lock state: %d\n", lock->cll_state);
-		LASSERT(lock->cll_users > 0);
-		LASSERT(lock->cll_holds > 0);
-
-		result = lock->cll_error;
-		if (result != 0)
-			break;
-
-		if (cl_lock_is_intransit(lock)) {
-			result = CLO_WAIT;
-			break;
-		}
-
-		if (lock->cll_state == CLS_HELD)
-			/* nothing to do */
-			break;
-
-		result = -ENOSYS;
-		list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-			if (slice->cls_ops->clo_wait) {
-				result = slice->cls_ops->clo_wait(env, slice);
-				if (result != 0)
-					break;
-			}
-		}
-		LASSERT(result != -ENOSYS);
-		if (result == 0) {
-			LASSERT(lock->cll_state != CLS_INTRANSIT);
-			cl_lock_state_set(env, lock, CLS_HELD);
-		}
-	} while (result == CLO_REPEAT);
-	return result;
-}
-EXPORT_SYMBOL(cl_wait_try);
-
-/**
- * Waits until enqueued lock is granted.
- *
- * \pre current thread or io owns a hold on the lock
- * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *			lock->cll_state == CLS_HELD)
- *
- * \post ergo(result == 0, lock->cll_state == CLS_HELD)
- */
-int cl_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-	int result;
-
-	cl_lock_mutex_get(env, lock);
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
-		 "Wrong state %d\n", lock->cll_state);
-	LASSERT(lock->cll_holds > 0);
-
-	do {
-		result = cl_wait_try(env, lock);
-		if (result == CLO_WAIT) {
-			result = cl_lock_state_wait(env, lock);
-			if (result == 0)
-				continue;
-		}
-		break;
-	} while (1);
-	if (result < 0) {
-		cl_unuse_try(env, lock);
-		cl_lock_lockdep_release(env, lock);
-	}
-	cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
-	cl_lock_mutex_put(env, lock);
-	LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
-	return result;
-}
-EXPORT_SYMBOL(cl_wait);
-
-/**
- * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
- * value.
- */
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
-{
-	const struct cl_lock_slice *slice;
-	unsigned long pound;
-	unsigned long ounce;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	pound = 0;
-	list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_weigh) {
-			ounce = slice->cls_ops->clo_weigh(env, slice);
-			pound += ounce;
-			if (pound < ounce) /* over-weight^Wflow */
-				pound = ~0UL;
-		}
-	}
-	return pound;
-}
-EXPORT_SYMBOL(cl_lock_weigh);
-
-/**
- * Notifies layers that lock description changed.
- *
- * The server can grant client a lock different from one that was requested
- * (e.g., larger in extent). This method is called when actually granted lock
- * description becomes known to let layers to accommodate for changed lock
- * description.
- *
- * \see cl_lock_operations::clo_modify()
- */
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-		   const struct cl_lock_descr *desc)
-{
-	const struct cl_lock_slice *slice;
-	struct cl_object	   *obj = lock->cll_descr.cld_obj;
-	struct cl_object_header    *hdr = cl_object_header(obj);
-	int result;
-
-	cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
-	/* don't allow object to change */
-	LASSERT(obj == desc->cld_obj);
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_modify) {
-			result = slice->cls_ops->clo_modify(env, slice, desc);
-			if (result != 0)
-				return result;
 		}
-	}
-	CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
-		      PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
-	/*
-	 * Just replace description in place. Nothing more is needed for
-	 * now. If locks were indexed according to their extent and/or mode,
-	 * that index would have to be updated here.
-	 */
-	spin_lock(&hdr->coh_lock_guard);
-	lock->cll_descr = *desc;
-	spin_unlock(&hdr->coh_lock_guard);
-	return 0;
-}
-EXPORT_SYMBOL(cl_lock_modify);
-
-/**
- * Initializes lock closure with a given origin.
- *
- * \see cl_lock_closure
- */
-void cl_lock_closure_init(const struct lu_env *env,
-			  struct cl_lock_closure *closure,
-			  struct cl_lock *origin, int wait)
-{
-	LINVRNT(cl_lock_is_mutexed(origin));
-	LINVRNT(cl_lock_invariant(env, origin));
-
-	INIT_LIST_HEAD(&closure->clc_list);
-	closure->clc_origin = origin;
-	closure->clc_wait   = wait;
-	closure->clc_nr     = 0;
-}
-EXPORT_SYMBOL(cl_lock_closure_init);
-
-/**
- * Builds a closure of \a lock.
- *
- * Building of a closure consists of adding initial lock (\a lock) into it,
- * and calling cl_lock_operations::clo_closure() methods of \a lock. These
- * methods might call cl_lock_closure_build() recursively again, adding more
- * locks to the closure, etc.
- *
- * \see cl_lock_closure
- */
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-			  struct cl_lock_closure *closure)
-{
-	const struct cl_lock_slice *slice;
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
-	LINVRNT(cl_lock_invariant(env, closure->clc_origin));
-
-	result = cl_lock_enclosure(env, lock, closure);
-	if (result == 0) {
-		list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-			if (slice->cls_ops->clo_closure) {
-				result = slice->cls_ops->clo_closure(env, slice,
-								     closure);
-				if (result != 0)
-					break;
-			}
-		}
-	}
-	if (result != 0)
-		cl_lock_disclosure(env, closure);
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_closure_build);
-
-/**
- * Adds new lock to a closure.
- *
- * Try-locks \a lock and if succeeded, adds it to the closure (never more than
- * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
- * until next try-lock is likely to succeed.
- */
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-		      struct cl_lock_closure *closure)
-{
-	int result = 0;
-
-	cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
-	if (!cl_lock_mutex_try(env, lock)) {
-		/*
-		 * If lock->cll_inclosure is not empty, lock is already in
-		 * this closure.
-		 */
-		if (list_empty(&lock->cll_inclosure)) {
-			cl_lock_get_trust(lock);
-			lu_ref_add(&lock->cll_reference, "closure", closure);
-			list_add(&lock->cll_inclosure, &closure->clc_list);
-			closure->clc_nr++;
-		} else
-			cl_lock_mutex_put(env, lock);
-		result = 0;
-	} else {
-		cl_lock_disclosure(env, closure);
-		if (closure->clc_wait) {
-			cl_lock_get_trust(lock);
-			lu_ref_add(&lock->cll_reference, "closure-w", closure);
-			cl_lock_mutex_put(env, closure->clc_origin);
-
-			LASSERT(cl_lock_nr_mutexed(env) == 0);
-			cl_lock_mutex_get(env, lock);
-			cl_lock_mutex_put(env, lock);
-
-			cl_lock_mutex_get(env, closure->clc_origin);
-			lu_ref_del(&lock->cll_reference, "closure-w", closure);
-			cl_lock_put(env, lock);
-		}
-		result = CLO_REPEAT;
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_enclosure);
-
-/** Releases mutices of enclosed locks. */
-void cl_lock_disclosure(const struct lu_env *env,
-			struct cl_lock_closure *closure)
-{
-	struct cl_lock *scan;
-	struct cl_lock *temp;
-
-	cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
-	list_for_each_entry_safe(scan, temp, &closure->clc_list,
-				 cll_inclosure) {
-		list_del_init(&scan->cll_inclosure);
-		cl_lock_mutex_put(env, scan);
-		lu_ref_del(&scan->cll_reference, "closure", closure);
-		cl_lock_put(env, scan);
-		closure->clc_nr--;
-	}
-	LASSERT(closure->clc_nr == 0);
-}
-EXPORT_SYMBOL(cl_lock_disclosure);
-
-/** Finalizes a closure. */
-void cl_lock_closure_fini(struct cl_lock_closure *closure)
-{
-	LASSERT(closure->clc_nr == 0);
-	LASSERT(list_empty(&closure->clc_list));
-}
-EXPORT_SYMBOL(cl_lock_closure_fini);
-
-/**
- * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
- * destroyed, then destroy the lock. If there are holds on the lock, postpone
- * destruction until all holds are released. This is called when a decision is
- * made to destroy the lock in the future. E.g., when a blocking AST is
- * received on it, or fatal communication error happens.
- *
- * Caller must have a reference on this lock to prevent a situation, when
- * deleted lock lingers in memory for indefinite time, because nobody calls
- * cl_lock_put() to finish it.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- *	   cl_lock_nr_mutexed(env) == 1)
- *      [i.e., if a top-lock is deleted, mutices of no other locks can be
- *      held, as deletion of sub-locks might require releasing a top-lock
- *      mutex]
- *
- * \see cl_lock_operations::clo_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
-		     cl_lock_nr_mutexed(env) == 1));
-
-	cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
-	if (lock->cll_holds == 0)
-		cl_lock_delete0(env, lock);
-	else
-		lock->cll_flags |= CLF_DOOMED;
-}
-EXPORT_SYMBOL(cl_lock_delete);
-
-/**
- * Mark lock as irrecoverably failed, and mark it for destruction. This
- * happens when, e.g., server fails to grant a lock to us, or networking
- * time-out happens.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- *
- * \see clo_lock_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_error == 0 && error != 0) {
-		cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
-		lock->cll_error = error;
-		cl_lock_signal(env, lock);
-		cl_lock_cancel(env, lock);
-		cl_lock_delete(env, lock);
-	}
-}
-EXPORT_SYMBOL(cl_lock_error);
-
-/**
- * Cancels this lock. Notifies layers
- * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
- * there are holds on the lock, postpone cancellation until
- * all holds are released.
- *
- * Cancellation notification is delivered to layers at most once.
- *
- * \see cl_lock_operations::clo_cancel()
- * \see cl_lock::cll_holds
- */
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
-	if (lock->cll_holds == 0)
-		cl_lock_cancel0(env, lock);
-	else
-		lock->cll_flags |= CLF_CANCELPEND;
-}
-EXPORT_SYMBOL(cl_lock_cancel);
-
-/**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
- */
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-				 struct cl_object *obj, pgoff_t index,
-				 struct cl_lock *except,
-				 int pending, int canceld)
-{
-	struct cl_object_header *head;
-	struct cl_lock	  *scan;
-	struct cl_lock	  *lock;
-	struct cl_lock_descr    *need;
-
-	head = cl_object_header(obj);
-	need = &cl_env_info(env)->clt_descr;
-	lock = NULL;
-
-	need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
-				    * not PHANTOM
-				    */
-	need->cld_start = need->cld_end = index;
-	need->cld_enq_flags = 0;
-
-	spin_lock(&head->coh_lock_guard);
-	/* It is fine to match any group lock since there could be only one
-	 * with a uniq gid and it conflicts with all other lock modes too
-	 */
-	list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
-		if (scan != except &&
-		    (scan->cll_descr.cld_mode == CLM_GROUP ||
-		    cl_lock_ext_match(&scan->cll_descr, need)) &&
-		    scan->cll_state >= CLS_HELD &&
-		    scan->cll_state < CLS_FREEING &&
-		    /*
-		     * This check is racy as the lock can be canceled right
-		     * after it is done, but this is fine, because page exists
-		     * already.
-		     */
-		    (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
-		    (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
-			/* Don't increase cs_hit here since this
-			 * is just a helper function.
-			 */
-			cl_lock_get_trust(scan);
-			lock = scan;
-			break;
-		}
-	}
-	spin_unlock(&head->coh_lock_guard);
-	return lock;
-}
-EXPORT_SYMBOL(cl_lock_at_pgoff);
-
-/**
- * Calculate the page offset at the layer of @lock.
- * At the time of this writing, @page is top page and @lock is sub lock.
- */
-static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock)
-{
-	struct lu_device_type *dtype;
-	const struct cl_page_slice *slice;
-
-	dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type;
-	slice = cl_page_at(page, dtype);
-	return slice->cpl_page->cp_index;
+	return rc;
 }
+EXPORT_SYMBOL(cl_lock_enqueue);
 
 /**
- * Check if page @page is covered by an extra lock or discard it.
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
  */
-static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
-				struct cl_page *page, void *cbdata)
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock)
 {
-	struct cl_thread_info *info = cl_env_info(env);
-	struct cl_lock *lock = cbdata;
-	pgoff_t index = pgoff_at_lock(page, lock);
+	struct cl_sync_io *anchor = NULL;
+	__u32 enq_flags = lock->cll_descr.cld_enq_flags;
+	int rc;
 
-	if (index >= info->clt_fn_index) {
-		struct cl_lock *tmp;
+	rc = cl_lock_init(env, lock, io);
+	if (rc < 0)
+		return rc;
 
-		/* refresh non-overlapped index */
-		tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
-				       lock, 1, 0);
-		if (tmp) {
-			/* Cache the first-non-overlapped index so as to skip
-			 * all pages within [index, clt_fn_index). This
-			 * is safe because if tmp lock is canceled, it will
-			 * discard these pages.
-			 */
-			info->clt_fn_index = tmp->cll_descr.cld_end + 1;
-			if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
-				info->clt_fn_index = CL_PAGE_EOF;
-			cl_lock_put(env, tmp);
-		} else if (cl_page_own(env, io, page) == 0) {
-			/* discard the page */
-			cl_page_unmap(env, io, page);
-			cl_page_discard(env, io, page);
-			cl_page_disown(env, io, page);
-		} else {
-			LASSERT(page->cp_state == CPS_FREEING);
-		}
+	if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
+		anchor = &cl_env_info(env)->clt_anchor;
+		cl_sync_io_init(anchor, 1, cl_sync_io_end);
 	}
 
-	info->clt_next_index = index + 1;
-	return CLP_GANG_OKAY;
-}
+	rc = cl_lock_enqueue(env, io, lock, anchor);
 
-static int discard_cb(const struct lu_env *env, struct cl_io *io,
-		      struct cl_page *page, void *cbdata)
-{
-	struct cl_thread_info *info = cl_env_info(env);
-	struct cl_lock *lock   = cbdata;
+	if (anchor) {
+		int rc2;
 
-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
-	KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
-		      !PageWriteback(cl_page_vmpage(env, page))));
-	KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
-		      !PageDirty(cl_page_vmpage(env, page))));
-
-	info->clt_next_index = pgoff_at_lock(page, lock) + 1;
-	if (cl_page_own(env, io, page) == 0) {
-		/* discard the page */
-		cl_page_unmap(env, io, page);
-		cl_page_discard(env, io, page);
-		cl_page_disown(env, io, page);
-	} else {
-		LASSERT(page->cp_state == CPS_FREEING);
+		/* drop the reference count held at initialization time */
+		cl_sync_io_note(env, anchor, 0);
+		rc2 = cl_sync_io_wait(env, anchor, 0);
+		if (rc2 < 0 && rc == 0)
+			rc = rc2;
 	}
 
-	return CLP_GANG_OKAY;
-}
+	if (rc < 0)
+		cl_lock_release(env, lock);
 
-/**
- * Discard pages protected by the given lock. This function traverses radix
- * tree to find all covering pages and discard them. If a page is being covered
- * by other locks, it should remain in cache.
- *
- * If error happens on any step, the process continues anyway (the reasoning
- * behind this being that lock cancellation cannot be delayed indefinitely).
- */
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_info *info  = cl_env_info(env);
-	struct cl_io	  *io    = &info->clt_io;
-	struct cl_lock_descr  *descr = &lock->cll_descr;
-	cl_page_gang_cb_t      cb;
-	int res;
-	int result;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	io->ci_obj = cl_object_top(descr->cld_obj);
-	io->ci_ignore_layout = 1;
-	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-	if (result != 0)
-		goto out;
-
-	cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
-	info->clt_fn_index = info->clt_next_index = descr->cld_start;
-	do {
-		res = cl_page_gang_lookup(env, descr->cld_obj, io,
-					  info->clt_next_index, descr->cld_end,
-					  cb, (void *)lock);
-		if (info->clt_next_index > descr->cld_end)
-			break;
-
-		if (res == CLP_GANG_RESCHED)
-			cond_resched();
-	} while (res != CLP_GANG_OKAY);
-out:
-	cl_io_fini(env, io);
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_discard_pages);
-
-/**
- * Eliminate all locks for a given object.
- *
- * Caller has to guarantee that no lock is in active use.
- *
- * \param cancel when this is set, cl_locks_prune() cancels locks before
- *	       destroying.
- */
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
-{
-	struct cl_object_header *head;
-	struct cl_lock	  *lock;
-
-	head = cl_object_header(obj);
-	/*
-	 * If locks are destroyed without cancellation, all pages must be
-	 * already destroyed (as otherwise they will be left unprotected).
-	 */
-	LASSERT(ergo(!cancel,
-		     !head->coh_tree.rnode && head->coh_pages == 0));
-
-	spin_lock(&head->coh_lock_guard);
-	while (!list_empty(&head->coh_locks)) {
-		lock = container_of(head->coh_locks.next,
-				    struct cl_lock, cll_linkage);
-		cl_lock_get_trust(lock);
-		spin_unlock(&head->coh_lock_guard);
-		lu_ref_add(&lock->cll_reference, "prune", current);
-
-again:
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state < CLS_FREEING) {
-			LASSERT(lock->cll_users <= 1);
-			if (unlikely(lock->cll_users == 1)) {
-				struct l_wait_info lwi = { 0 };
-
-				cl_lock_mutex_put(env, lock);
-				l_wait_event(lock->cll_wq,
-					     lock->cll_users == 0,
-					     &lwi);
-				goto again;
-			}
-
-			if (cancel)
-				cl_lock_cancel(env, lock);
-			cl_lock_delete(env, lock);
-		}
-		cl_lock_mutex_put(env, lock);
-		lu_ref_del(&lock->cll_reference, "prune", current);
-		cl_lock_put(env, lock);
-		spin_lock(&head->coh_lock_guard);
-	}
-	spin_unlock(&head->coh_lock_guard);
-}
-EXPORT_SYMBOL(cl_locks_prune);
-
-static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
-					  const struct cl_io *io,
-					  const struct cl_lock_descr *need,
-					  const char *scope, const void *source)
-{
-	struct cl_lock *lock;
-
-	while (1) {
-		lock = cl_lock_find(env, io, need);
-		if (IS_ERR(lock))
-			break;
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state < CLS_FREEING &&
-		    !(lock->cll_flags & CLF_CANCELLED)) {
-			cl_lock_hold_mod(env, lock, 1);
-			lu_ref_add(&lock->cll_holders, scope, source);
-			lu_ref_add(&lock->cll_reference, scope, source);
-			break;
-		}
-		cl_lock_mutex_put(env, lock);
-		cl_lock_put(env, lock);
-	}
-	return lock;
-}
-
-/**
- * Returns a lock matching \a need description with a reference and a hold on
- * it.
- *
- * This is much like cl_lock_find(), except that cl_lock_hold() additionally
- * guarantees that lock is not in the CLS_FREEING state on return.
- */
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source)
-{
-	struct cl_lock *lock;
-
-	lock = cl_lock_hold_mutex(env, io, need, scope, source);
-	if (!IS_ERR(lock))
-		cl_lock_mutex_put(env, lock);
-	return lock;
-}
-EXPORT_SYMBOL(cl_lock_hold);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-				const struct cl_lock_descr *need,
-				const char *scope, const void *source)
-{
-	struct cl_lock       *lock;
-	int		   rc;
-	__u32		 enqflags = need->cld_enq_flags;
-
-	do {
-		lock = cl_lock_hold_mutex(env, io, need, scope, source);
-		if (IS_ERR(lock))
-			break;
-
-		rc = cl_enqueue_locked(env, lock, io, enqflags);
-		if (rc == 0) {
-			if (cl_lock_fits_into(env, lock, need, io)) {
-				if (!(enqflags & CEF_AGL)) {
-					cl_lock_mutex_put(env, lock);
-					cl_lock_lockdep_acquire(env, lock,
-								enqflags);
-					break;
-				}
-				rc = 1;
-			}
-			cl_unuse_locked(env, lock);
-		}
-		cl_lock_trace(D_DLMTRACE, env,
-			      rc <= 0 ? "enqueue failed" : "agl succeed", lock);
-		cl_lock_hold_release(env, lock, scope, source);
-		cl_lock_mutex_put(env, lock);
-		lu_ref_del(&lock->cll_reference, scope, source);
-		cl_lock_put(env, lock);
-		if (rc > 0) {
-			LASSERT(enqflags & CEF_AGL);
-			lock = NULL;
-		} else if (rc != 0) {
-			lock = ERR_PTR(rc);
-		}
-	} while (rc == 0);
-	return lock;
+	return rc;
 }
 EXPORT_SYMBOL(cl_lock_request);
 
 /**
- * Adds a hold to a known lock.
- */
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-		      const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_state != CLS_FREEING);
-
-	cl_lock_hold_mod(env, lock, 1);
-	cl_lock_get(lock);
-	lu_ref_add(&lock->cll_holders, scope, source);
-	lu_ref_add(&lock->cll_reference, scope, source);
-}
-EXPORT_SYMBOL(cl_lock_hold_add);
-
-/**
- * Releases a hold and a reference on a lock, on which caller acquired a
- * mutex.
- */
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-		    const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_invariant(env, lock));
-	cl_lock_hold_release(env, lock, scope, source);
-	lu_ref_del(&lock->cll_reference, scope, source);
-	cl_lock_put(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_unhold);
-
-/**
  * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
  */
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-		     const char *scope, const void *source)
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
 {
-	LINVRNT(cl_lock_invariant(env, lock));
 	cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
-	cl_lock_mutex_get(env, lock);
-	cl_lock_hold_release(env, lock, scope, source);
-	cl_lock_mutex_put(env, lock);
-	lu_ref_del(&lock->cll_reference, scope, source);
-	cl_lock_put(env, lock);
+	cl_lock_cancel(env, lock);
+	cl_lock_fini(env, lock);
 }
 EXPORT_SYMBOL(cl_lock_release);
 
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	cl_lock_used_mod(env, lock, 1);
-}
-EXPORT_SYMBOL(cl_lock_user_add);
-
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_users > 0);
-
-	cl_lock_used_mod(env, lock, -1);
-	if (lock->cll_users == 0)
-		wake_up_all(&lock->cll_wq);
-}
-EXPORT_SYMBOL(cl_lock_user_del);
-
 const char *cl_lock_mode_name(const enum cl_lock_mode mode)
 {
 	static const char *names[] = {
-		[CLM_PHANTOM] = "P",
 		[CLM_READ]    = "R",
 		[CLM_WRITE]   = "W",
 		[CLM_GROUP]   = "G"
@@ -2189,10 +257,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
 		   lu_printer_t printer, const struct cl_lock *lock)
 {
 	const struct cl_lock_slice *slice;
-	(*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
-		   lock, atomic_read(&lock->cll_ref),
-		   lock->cll_state, lock->cll_error, lock->cll_holds,
-		   lock->cll_users, lock->cll_flags);
+
+	(*printer)(env, cookie, "lock@%p", lock);
 	cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
 	(*printer)(env, cookie, " {\n");
 
@@ -2207,13 +273,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
 	(*printer)(env, cookie, "} lock@%p\n", lock);
 }
 EXPORT_SYMBOL(cl_lock_print);
-
-int cl_lock_init(void)
-{
-	return lu_kmem_init(cl_lock_caches);
-}
-
-void cl_lock_fini(void)
-{
-	lu_kmem_fini(cl_lock_caches);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 43e299d4d416..91a5806d0239 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Client Lustre Object.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 /*
@@ -43,8 +40,6 @@
  *
  *  i_mutex
  *      PG_locked
- *	  ->coh_page_guard
- *	  ->coh_lock_guard
  *	  ->coh_attr_guard
  *	  ->ls_guard
  */
@@ -63,10 +58,6 @@
 
 static struct kmem_cache *cl_env_kmem;
 
-/** Lock class of cl_object_header::coh_page_guard */
-static struct lock_class_key cl_page_guard_class;
-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
 
@@ -81,17 +72,9 @@ int cl_object_header_init(struct cl_object_header *h)
 
 	result = lu_object_header_init(&h->coh_lu);
 	if (result == 0) {
-		spin_lock_init(&h->coh_page_guard);
-		spin_lock_init(&h->coh_lock_guard);
 		spin_lock_init(&h->coh_attr_guard);
-		lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class);
-		lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
 		lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
-		h->coh_pages = 0;
-		/* XXX hard coded GFP_* mask. */
-		INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC);
-		INIT_LIST_HEAD(&h->coh_locks);
-		h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8);
+		h->coh_page_bufsize = 0;
 	}
 	return result;
 }
@@ -145,7 +128,7 @@ EXPORT_SYMBOL(cl_object_get);
 /**
  * Returns the top-object for a given \a o.
  *
- * \see cl_page_top(), cl_io_top()
+ * \see cl_io_top()
  */
 struct cl_object *cl_object_top(struct cl_object *o)
 {
@@ -315,6 +298,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 EXPORT_SYMBOL(cl_conf_set);
 
 /**
+ * Prunes caches of pages and locks for this object.
+ */
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct lu_object_header *top;
+	struct cl_object *o;
+	int result;
+
+	top = obj->co_lu.lo_header;
+	result = 0;
+	list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
+		if (o->co_ops->coo_prune) {
+			result = o->co_ops->coo_prune(env, o);
+			if (result != 0)
+				break;
+		}
+	}
+
+	return result;
+}
+EXPORT_SYMBOL(cl_object_prune);
+
+/**
  * Helper function removing all object locks, and marking object for
  * deletion. All object pages must have been deleted at this point.
  *
@@ -323,34 +329,12 @@ EXPORT_SYMBOL(cl_conf_set);
  */
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
 {
-	struct cl_object_header *hdr;
-
-	hdr = cl_object_header(obj);
-	LASSERT(!hdr->coh_tree.rnode);
-	LASSERT(hdr->coh_pages == 0);
+	struct cl_object_header *hdr = cl_object_header(obj);
 
 	set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
-	/*
-	 * Destroy all locks. Object destruction (including cl_inode_fini())
-	 * cannot cancel the locks, because in the case of a local client,
-	 * where client and server share the same thread running
-	 * prune_icache(), this can dead-lock with ldlm_cancel_handler()
-	 * waiting on __wait_on_freeing_inode().
-	 */
-	cl_locks_prune(env, obj, 0);
 }
 EXPORT_SYMBOL(cl_object_kill);
 
-/**
- * Prunes caches of pages and locks for this object.
- */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
-	cl_pages_prune(env, obj);
-	cl_locks_prune(env, obj, 1);
-}
-EXPORT_SYMBOL(cl_object_prune);
-
 void cache_stats_init(struct cache_stats *cs, const char *name)
 {
 	int i;
@@ -383,6 +367,8 @@ static int cache_stats_print(const struct cache_stats *cs,
 	return 0;
 }
 
+static void cl_env_percpu_refill(void);
+
 /**
  * Initialize client site.
  *
@@ -397,11 +383,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
 	result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
 	if (result == 0) {
 		cache_stats_init(&s->cs_pages, "pages");
-		cache_stats_init(&s->cs_locks, "locks");
 		for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
 			atomic_set(&s->cs_pages_state[0], 0);
-		for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
-			atomic_set(&s->cs_locks_state[i], 0);
+		cl_env_percpu_refill();
 	}
 	return result;
 }
@@ -435,15 +419,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
 		[CPS_PAGEIN]  = "r",
 		[CPS_FREEING] = "f"
 	};
-	static const char *lstate[] = {
-		[CLS_NEW]       = "n",
-		[CLS_QUEUING]   = "q",
-		[CLS_ENQUEUED]  = "e",
-		[CLS_HELD]      = "h",
-		[CLS_INTRANSIT] = "t",
-		[CLS_CACHED]    = "c",
-		[CLS_FREEING]   = "f"
-	};
 /*
        lookup    hit  total   busy create
 pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
@@ -457,12 +432,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
 		seq_printf(m, "%s: %u ", pstate[i],
 			   atomic_read(&site->cs_pages_state[i]));
 	seq_printf(m, "]\n");
-	cache_stats_print(&site->cs_locks, m, 0);
-	seq_printf(m, " [");
-	for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
-		seq_printf(m, "%s: %u ", lstate[i],
-			   atomic_read(&site->cs_locks_state[i]));
-	seq_printf(m, "]\n");
 	cache_stats_print(&cl_env_stats, m, 0);
 	seq_printf(m, "\n");
 	return 0;
@@ -492,6 +461,13 @@ EXPORT_SYMBOL(cl_site_stats_print);
  * bz20044, bz22683.
  */
 
+static LIST_HEAD(cl_envs);
+static unsigned int cl_envs_cached_nr;
+static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
+					       * for now.
+					       */
+static DEFINE_SPINLOCK(cl_envs_guard);
+
 struct cl_env {
 	void	     *ce_magic;
 	struct lu_env     ce_lu;
@@ -597,7 +573,7 @@ static inline struct cl_env *cl_env_fetch(void)
 {
 	struct cl_env *cle;
 
-	cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid);
+	cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid);
 	LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
 	return cle;
 }
@@ -608,7 +584,7 @@ static inline void cl_env_attach(struct cl_env *cle)
 		int rc;
 
 		LASSERT(!cle->ce_owner);
-		cle->ce_owner = (void *) (long) current->pid;
+		cle->ce_owner = (void *)(long)current->pid;
 		rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
 					 &cle->ce_node);
 		LASSERT(rc == 0);
@@ -619,7 +595,7 @@ static inline void cl_env_do_detach(struct cl_env *cle)
 {
 	void *cookie;
 
-	LASSERT(cle->ce_owner == (void *) (long) current->pid);
+	LASSERT(cle->ce_owner == (void *)(long)current->pid);
 	cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
 			      &cle->ce_node);
 	LASSERT(cookie == cle);
@@ -674,8 +650,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
 				lu_context_enter(&cle->ce_ses);
 				env->le_ses = &cle->ce_ses;
 				cl_env_init0(cle, debug);
-			} else
+			} else {
 				lu_env_fini(env);
+			}
 		}
 		if (rc != 0) {
 			kmem_cache_free(cl_env_kmem, cle);
@@ -684,8 +661,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
 			CL_ENV_INC(create);
 			CL_ENV_INC(total);
 		}
-	} else
+	} else {
 		env = ERR_PTR(-ENOMEM);
+	}
 	return env;
 }
 
@@ -697,6 +675,39 @@ static void cl_env_fini(struct cl_env *cle)
 	kmem_cache_free(cl_env_kmem, cle);
 }
 
+static struct lu_env *cl_env_obtain(void *debug)
+{
+	struct cl_env *cle;
+	struct lu_env *env;
+
+	spin_lock(&cl_envs_guard);
+	LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+	if (cl_envs_cached_nr > 0) {
+		int rc;
+
+		cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+		list_del_init(&cle->ce_linkage);
+		cl_envs_cached_nr--;
+		spin_unlock(&cl_envs_guard);
+
+		env = &cle->ce_lu;
+		rc = lu_env_refill(env);
+		if (rc == 0) {
+			cl_env_init0(cle, debug);
+			lu_context_enter(&env->le_ctx);
+			lu_context_enter(&cle->ce_ses);
+		} else {
+			cl_env_fini(cle);
+			env = ERR_PTR(rc);
+		}
+	} else {
+		spin_unlock(&cl_envs_guard);
+		env = cl_env_new(lu_context_tags_default,
+				 lu_session_tags_default, debug);
+	}
+	return env;
+}
+
 static inline struct cl_env *cl_env_container(struct lu_env *env)
 {
 	return container_of(env, struct cl_env, ce_lu);
@@ -727,6 +738,8 @@ static struct lu_env *cl_env_peek(int *refcheck)
  * Returns lu_env: if there already is an environment associated with the
  * current thread, it is returned, otherwise, new environment is allocated.
  *
+ * Allocations are amortized through the global cache of environments.
+ *
  * \param refcheck pointer to a counter used to detect environment leaks. In
  * the usual case cl_env_get() and cl_env_put() are called in the same lexical
  * scope and pointer to the same integer is passed as \a refcheck. This is
@@ -740,10 +753,7 @@ struct lu_env *cl_env_get(int *refcheck)
 
 	env = cl_env_peek(refcheck);
 	if (!env) {
-		env = cl_env_new(lu_context_tags_default,
-				 lu_session_tags_default,
-				 __builtin_return_address(0));
-
+		env = cl_env_obtain(__builtin_return_address(0));
 		if (!IS_ERR(env)) {
 			struct cl_env *cle;
 
@@ -787,6 +797,32 @@ static void cl_env_exit(struct cl_env *cle)
 }
 
 /**
+ * Finalizes and frees a given number of cached environments. This is done to
+ * (1) free some memory (not currently hooked into VM), or (2) release
+ * references to modules.
+ */
+unsigned int cl_env_cache_purge(unsigned int nr)
+{
+	struct cl_env *cle;
+
+	spin_lock(&cl_envs_guard);
+	for (; !list_empty(&cl_envs) && nr > 0; --nr) {
+		cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+		list_del_init(&cle->ce_linkage);
+		LASSERT(cl_envs_cached_nr > 0);
+		cl_envs_cached_nr--;
+		spin_unlock(&cl_envs_guard);
+
+		cl_env_fini(cle);
+		spin_lock(&cl_envs_guard);
+	}
+	LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+	spin_unlock(&cl_envs_guard);
+	return nr;
+}
+EXPORT_SYMBOL(cl_env_cache_purge);
+
+/**
  * Release an environment.
  *
  * Decrement \a env reference counter. When counter drops to 0, nothing in
@@ -808,7 +844,22 @@ void cl_env_put(struct lu_env *env, int *refcheck)
 		cl_env_detach(cle);
 		cle->ce_debug = NULL;
 		cl_env_exit(cle);
-		cl_env_fini(cle);
+		/*
+		 * Don't bother to take a lock here.
+		 *
+		 * Return environment to the cache only when it was allocated
+		 * with the standard tags.
+		 */
+		if (cl_envs_cached_nr < cl_envs_cached_max &&
+		    (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
+		    (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
+			spin_lock(&cl_envs_guard);
+			list_add(&cle->ce_linkage, &cl_envs);
+			cl_envs_cached_nr++;
+			spin_unlock(&cl_envs_guard);
+		} else {
+			cl_env_fini(cle);
+		}
 	}
 }
 EXPORT_SYMBOL(cl_env_put);
@@ -914,6 +965,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 }
 EXPORT_SYMBOL(cl_lvb2attr);
 
+static struct cl_env cl_env_percpu[NR_CPUS];
+
+static int cl_env_percpu_init(void)
+{
+	struct cl_env *cle;
+	int tags = LCT_REMEMBER | LCT_NOREF;
+	int i, j;
+	int rc = 0;
+
+	for_each_possible_cpu(i) {
+		struct lu_env *env;
+
+		cle = &cl_env_percpu[i];
+		env = &cle->ce_lu;
+
+		INIT_LIST_HEAD(&cle->ce_linkage);
+		cle->ce_magic = &cl_env_init0;
+		rc = lu_env_init(env, LCT_CL_THREAD | tags);
+		if (rc == 0) {
+			rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
+			if (rc == 0) {
+				lu_context_enter(&cle->ce_ses);
+				env->le_ses = &cle->ce_ses;
+			} else {
+				lu_env_fini(env);
+			}
+		}
+		if (rc != 0)
+			break;
+	}
+	if (rc != 0) {
+		/* Indices 0 to i (excluding i) were correctly initialized,
+		 * thus we must uninitialize up to i, the rest are undefined.
+		 */
+		for (j = 0; j < i; j++) {
+			cle = &cl_env_percpu[i];
+			lu_context_exit(&cle->ce_ses);
+			lu_context_fini(&cle->ce_ses);
+			lu_env_fini(&cle->ce_lu);
+		}
+	}
+
+	return rc;
+}
+
+static void cl_env_percpu_fini(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cl_env *cle = &cl_env_percpu[i];
+
+		lu_context_exit(&cle->ce_ses);
+		lu_context_fini(&cle->ce_ses);
+		lu_env_fini(&cle->ce_lu);
+	}
+}
+
+static void cl_env_percpu_refill(void)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		lu_env_refill(&cl_env_percpu[i].ce_lu);
+}
+
+void cl_env_percpu_put(struct lu_env *env)
+{
+	struct cl_env *cle;
+	int cpu;
+
+	cpu = smp_processor_id();
+	cle = cl_env_container(env);
+	LASSERT(cle == &cl_env_percpu[cpu]);
+
+	cle->ce_ref--;
+	LASSERT(cle->ce_ref == 0);
+
+	CL_ENV_DEC(busy);
+	cl_env_detach(cle);
+	cle->ce_debug = NULL;
+
+	put_cpu();
+}
+EXPORT_SYMBOL(cl_env_percpu_put);
+
+struct lu_env *cl_env_percpu_get(void)
+{
+	struct cl_env *cle;
+
+	cle = &cl_env_percpu[get_cpu()];
+	cl_env_init0(cle, __builtin_return_address(0));
+
+	cl_env_attach(cle);
+	return &cle->ce_lu;
+}
+EXPORT_SYMBOL(cl_env_percpu_get);
+
 /*****************************************************************************
  *
  * Temporary prototype thing: mirror obd-devices into cl devices.
@@ -944,8 +1093,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
 			CERROR("can't init device '%s', %d\n", typename, rc);
 			d = ERR_PTR(rc);
 		}
-	} else
+	} else {
 		CERROR("Cannot allocate device: '%s'\n", typename);
+	}
 	return lu2cl_dev(d);
 }
 EXPORT_SYMBOL(cl_type_setup);
@@ -959,12 +1109,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
 }
 EXPORT_SYMBOL(cl_stack_fini);
 
-int  cl_lock_init(void);
-void cl_lock_fini(void);
-
-int  cl_page_init(void);
-void cl_page_fini(void);
-
 static struct lu_context_key cl_key;
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env)
@@ -1059,17 +1203,13 @@ int cl_global_init(void)
 	if (result)
 		goto out_kmem;
 
-	result = cl_lock_init();
+	result = cl_env_percpu_init();
 	if (result)
+		/* no cl_env_percpu_fini on error */
 		goto out_context;
 
-	result = cl_page_init();
-	if (result)
-		goto out_lock;
-
 	return 0;
-out_lock:
-	cl_lock_fini();
+
 out_context:
 	lu_context_key_degister(&cl_key);
 out_kmem:
@@ -1084,8 +1224,7 @@ out_store:
  */
 void cl_global_fini(void)
 {
-	cl_lock_fini();
-	cl_page_fini();
+	cl_env_percpu_fini();
 	lu_context_key_degister(&cl_key);
 	lu_kmem_fini(cl_object_caches);
 	cl_env_store_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 394580016638..db2dc6b39073 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Client Lustre Page.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -48,8 +45,7 @@
 #include "../include/cl_object.h"
 #include "cl_internal.h"
 
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
-			    int radix);
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
 
 # define PASSERT(env, page, expr)					   \
 	do {								   \
@@ -63,24 +59,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 	((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
 
 /**
- * Internal version of cl_page_top, it should be called if the page is
- * known to be not freed, says with page referenced, or radix tree lock held,
- * or page owned.
- */
-static struct cl_page *cl_page_top_trusted(struct cl_page *page)
-{
-	while (page->cp_parent)
-		page = page->cp_parent;
-	return page;
-}
-
-/**
  * Internal version of cl_page_get().
  *
  * This function can be used to obtain initial reference to previously
  * unreferenced cached object. It can be called only if concurrent page
- * reclamation is somehow prevented, e.g., by locking page radix-tree
- * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
+ * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
  * associated with \a page.
  *
  * Use with care! Not exported.
@@ -103,142 +86,12 @@ cl_page_at_trusted(const struct cl_page *page,
 {
 	const struct cl_page_slice *slice;
 
-	page = cl_page_top_trusted((struct cl_page *)page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
-				return slice;
-		}
-		page = page->cp_child;
-	} while (page);
-	return NULL;
-}
-
-/**
- * Returns a page with given index in the given object, or NULL if no page is
- * found. Acquires a reference on \a page.
- *
- * Locking: called under cl_object_header::coh_page_guard spin-lock.
- */
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
-{
-	struct cl_page *page;
-
-	assert_spin_locked(&hdr->coh_page_guard);
-
-	page = radix_tree_lookup(&hdr->coh_tree, index);
-	if (page)
-		cl_page_get_trust(page);
-	return page;
-}
-EXPORT_SYMBOL(cl_page_lookup);
-
-/**
- * Returns a list of pages by a given [start, end] of \a obj.
- *
- * \param resched If not NULL, then we give up before hogging CPU for too
- * long and set *resched = 1, in that case caller should implement a retry
- * logic.
- *
- * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
- * crucial in the face of [offset, EOF] locks.
- *
- * Return at least one page in @queue unless there is no covered page.
- */
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
-			struct cl_io *io, pgoff_t start, pgoff_t end,
-			cl_page_gang_cb_t cb, void *cbdata)
-{
-	struct cl_object_header *hdr;
-	struct cl_page	  *page;
-	struct cl_page	 **pvec;
-	const struct cl_page_slice  *slice;
-	const struct lu_device_type *dtype;
-	pgoff_t		  idx;
-	unsigned int	     nr;
-	unsigned int	     i;
-	unsigned int	     j;
-	int		      res = CLP_GANG_OKAY;
-	int		      tree_lock = 1;
-
-	idx = start;
-	hdr = cl_object_header(obj);
-	pvec = cl_env_info(env)->clt_pvec;
-	dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
-	spin_lock(&hdr->coh_page_guard);
-	while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
-					    idx, CLT_PVEC_SIZE)) > 0) {
-		int end_of_region = 0;
-
-		idx = pvec[nr - 1]->cp_index + 1;
-		for (i = 0, j = 0; i < nr; ++i) {
-			page = pvec[i];
-			pvec[i] = NULL;
-
-			LASSERT(page->cp_type == CPT_CACHEABLE);
-			if (page->cp_index > end) {
-				end_of_region = 1;
-				break;
-			}
-			if (page->cp_state == CPS_FREEING)
-				continue;
-
-			slice = cl_page_at_trusted(page, dtype);
-			/*
-			 * Pages for lsm-less file has no underneath sub-page
-			 * for osc, in case of ...
-			 */
-			PASSERT(env, page, slice);
-
-			page = slice->cpl_page;
-			/*
-			 * Can safely call cl_page_get_trust() under
-			 * radix-tree spin-lock.
-			 *
-			 * XXX not true, because @page is from object another
-			 * than @hdr and protected by different tree lock.
-			 */
-			cl_page_get_trust(page);
-			lu_ref_add_atomic(&page->cp_reference,
-					  "gang_lookup", current);
-			pvec[j++] = page;
-		}
-
-		/*
-		 * Here a delicate locking dance is performed. Current thread
-		 * holds a reference to a page, but has to own it before it
-		 * can be placed into queue. Owning implies waiting, so
-		 * radix-tree lock is to be released. After a wait one has to
-		 * check that pages weren't truncated (cl_page_own() returns
-		 * error in the latter case).
-		 */
-		spin_unlock(&hdr->coh_page_guard);
-		tree_lock = 0;
-
-		for (i = 0; i < j; ++i) {
-			page = pvec[i];
-			if (res == CLP_GANG_OKAY)
-				res = (*cb)(env, io, page, cbdata);
-			lu_ref_del(&page->cp_reference,
-				   "gang_lookup", current);
-			cl_page_put(env, page);
-		}
-		if (nr < CLT_PVEC_SIZE || end_of_region)
-			break;
-
-		if (res == CLP_GANG_OKAY && need_resched())
-			res = CLP_GANG_RESCHED;
-		if (res != CLP_GANG_OKAY)
-			break;
-
-		spin_lock(&hdr->coh_page_guard);
-		tree_lock = 1;
+	list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+		if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
+			return slice;
 	}
-	if (tree_lock)
-		spin_unlock(&hdr->coh_page_guard);
-	return res;
+	return NULL;
 }
-EXPORT_SYMBOL(cl_page_gang_lookup);
 
 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
 {
@@ -247,17 +100,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
 	PASSERT(env, page, list_empty(&page->cp_batch));
 	PASSERT(env, page, !page->cp_owner);
 	PASSERT(env, page, !page->cp_req);
-	PASSERT(env, page, !page->cp_parent);
 	PASSERT(env, page, page->cp_state == CPS_FREEING);
 
-	might_sleep();
 	while (!list_empty(&page->cp_layers)) {
 		struct cl_page_slice *slice;
 
 		slice = list_entry(page->cp_layers.next,
 				   struct cl_page_slice, cpl_linkage);
 		list_del_init(page->cp_layers.next);
-		slice->cpl_ops->cpo_fini(env, slice);
+		if (unlikely(slice->cpl_ops->cpo_fini))
+			slice->cpl_ops->cpo_fini(env, slice);
 	}
 	lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
 	cl_object_put(env, obj);
@@ -276,10 +128,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
 	*(enum cl_page_state *)&page->cp_state = state;
 }
 
-static struct cl_page *cl_page_alloc(const struct lu_env *env,
-				     struct cl_object *o, pgoff_t ind,
-				     struct page *vmpage,
-				     enum cl_page_type type)
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+			      struct cl_object *o, pgoff_t ind,
+			      struct page *vmpage,
+			      enum cl_page_type type)
 {
 	struct cl_page	  *page;
 	struct lu_object_header *head;
@@ -289,13 +141,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 		int result = 0;
 
 		atomic_set(&page->cp_ref, 1);
-		if (type == CPT_CACHEABLE) /* for radix tree */
-			atomic_inc(&page->cp_ref);
 		page->cp_obj = o;
 		cl_object_get(o);
 		lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
 				     page);
-		page->cp_index = ind;
+		page->cp_vmpage = vmpage;
 		cl_page_state_set_trust(page, CPS_CACHED);
 		page->cp_type = type;
 		INIT_LIST_HEAD(&page->cp_layers);
@@ -306,10 +156,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 		head = o->co_lu.lo_header;
 		list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
 			if (o->co_ops->coo_page_init) {
-				result = o->co_ops->coo_page_init(env, o,
-								  page, vmpage);
+				result = o->co_ops->coo_page_init(env, o, page,
+								  ind);
 				if (result != 0) {
-					cl_page_delete0(env, page, 0);
+					cl_page_delete0(env, page);
 					cl_page_free(env, page);
 					page = ERR_PTR(result);
 					break;
@@ -321,6 +171,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 	}
 	return page;
 }
+EXPORT_SYMBOL(cl_page_alloc);
 
 /**
  * Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -333,16 +184,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
  *
  * \see cl_object_find(), cl_lock_find()
  */
-static struct cl_page *cl_page_find0(const struct lu_env *env,
-				     struct cl_object *o,
-				     pgoff_t idx, struct page *vmpage,
-				     enum cl_page_type type,
-				     struct cl_page *parent)
+struct cl_page *cl_page_find(const struct lu_env *env,
+			     struct cl_object *o,
+			     pgoff_t idx, struct page *vmpage,
+			     enum cl_page_type type)
 {
 	struct cl_page	  *page = NULL;
-	struct cl_page	  *ghost = NULL;
 	struct cl_object_header *hdr;
-	int err;
 
 	LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
 	might_sleep();
@@ -368,120 +216,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
 		 *       reference on it.
 		 */
 		page = cl_vmpage_page(vmpage, o);
-		PINVRNT(env, page,
-			ergo(page,
-			     cl_page_vmpage(env, page) == vmpage &&
-			     (void *)radix_tree_lookup(&hdr->coh_tree,
-						       idx) == page));
-	}
 
-	if (page)
-		return page;
+		if (page)
+			return page;
+	}
 
 	/* allocate and initialize cl_page */
 	page = cl_page_alloc(env, o, idx, vmpage, type);
-	if (IS_ERR(page))
-		return page;
-
-	if (type == CPT_TRANSIENT) {
-		if (parent) {
-			LASSERT(!page->cp_parent);
-			page->cp_parent = parent;
-			parent->cp_child = page;
-		}
-		return page;
-	}
-
-	/*
-	 * XXX optimization: use radix_tree_preload() here, and change tree
-	 * gfp mask to GFP_KERNEL in cl_object_header_init().
-	 */
-	spin_lock(&hdr->coh_page_guard);
-	err = radix_tree_insert(&hdr->coh_tree, idx, page);
-	if (err != 0) {
-		ghost = page;
-		/*
-		 * Noted by Jay: a lock on \a vmpage protects cl_page_find()
-		 * from this race, but
-		 *
-		 *     0. it's better to have cl_page interface "locally
-		 *     consistent" so that its correctness can be reasoned
-		 *     about without appealing to the (obscure world of) VM
-		 *     locking.
-		 *
-		 *     1. handling this race allows ->coh_tree to remain
-		 *     consistent even when VM locking is somehow busted,
-		 *     which is very useful during diagnosing and debugging.
-		 */
-		page = ERR_PTR(err);
-		CL_PAGE_DEBUG(D_ERROR, env, ghost,
-			      "fail to insert into radix tree: %d\n", err);
-	} else {
-		if (parent) {
-			LASSERT(!page->cp_parent);
-			page->cp_parent = parent;
-			parent->cp_child = page;
-		}
-		hdr->coh_pages++;
-	}
-	spin_unlock(&hdr->coh_page_guard);
-
-	if (unlikely(ghost)) {
-		cl_page_delete0(env, ghost, 0);
-		cl_page_free(env, ghost);
-	}
 	return page;
 }
-
-struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
-			     pgoff_t idx, struct page *vmpage,
-			     enum cl_page_type type)
-{
-	return cl_page_find0(env, o, idx, vmpage, type, NULL);
-}
 EXPORT_SYMBOL(cl_page_find);
 
-struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
-				 pgoff_t idx, struct page *vmpage,
-				 struct cl_page *parent)
-{
-	return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
-}
-EXPORT_SYMBOL(cl_page_find_sub);
-
 static inline int cl_page_invariant(const struct cl_page *pg)
 {
-	struct cl_object_header *header;
-	struct cl_page	  *parent;
-	struct cl_page	  *child;
-	struct cl_io	    *owner;
-
 	/*
 	 * Page invariant is protected by a VM lock.
 	 */
 	LINVRNT(cl_page_is_vmlocked(NULL, pg));
 
-	header = cl_object_header(pg->cp_obj);
-	parent = pg->cp_parent;
-	child  = pg->cp_child;
-	owner  = pg->cp_owner;
-
-	return cl_page_in_use(pg) &&
-		ergo(parent, parent->cp_child == pg) &&
-		ergo(child, child->cp_parent == pg) &&
-		ergo(child, pg->cp_obj != child->cp_obj) &&
-		ergo(parent, pg->cp_obj != parent->cp_obj) &&
-		ergo(owner && parent,
-		     parent->cp_owner == pg->cp_owner->ci_parent) &&
-		ergo(owner && child, child->cp_owner->ci_parent == owner) &&
-		/*
-		 * Either page is early in initialization (has neither child
-		 * nor parent yet), or it is in the object radix tree.
-		 */
-		ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
-		     (void *)radix_tree_lookup(&header->coh_tree,
-					       pg->cp_index) == pg ||
-		     (!child && !parent));
+	return cl_page_in_use_noref(pg);
 }
 
 static void cl_page_state_set0(const struct lu_env *env,
@@ -534,13 +287,9 @@ static void cl_page_state_set0(const struct lu_env *env,
 	old = page->cp_state;
 	PASSERT(env, page, allowed_transitions[old][state]);
 	CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
-	for (; page; page = page->cp_child) {
-		PASSERT(env, page, page->cp_state == old);
-		PASSERT(env, page,
-			equi(state == CPS_OWNED, page->cp_owner));
-
-		cl_page_state_set_trust(page, state);
-	}
+	PASSERT(env, page, page->cp_state == old);
+	PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
+	cl_page_state_set_trust(page, state);
 }
 
 static void cl_page_state_set(const struct lu_env *env,
@@ -574,8 +323,6 @@ EXPORT_SYMBOL(cl_page_get);
  */
 void cl_page_put(const struct lu_env *env, struct cl_page *page)
 {
-	PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
-
 	CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
 		       atomic_read(&page->cp_ref));
 
@@ -595,34 +342,10 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
 EXPORT_SYMBOL(cl_page_put);
 
 /**
- * Returns a VM page associated with a given cl_page.
- */
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
-{
-	const struct cl_page_slice *slice;
-
-	/*
-	 * Find uppermost layer with ->cpo_vmpage() method, and return its
-	 * result.
-	 */
-	page = cl_page_top(page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			if (slice->cpl_ops->cpo_vmpage)
-				return slice->cpl_ops->cpo_vmpage(env, slice);
-		}
-		page = page->cp_child;
-	} while (page);
-	LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
-}
-EXPORT_SYMBOL(cl_page_vmpage);
-
-/**
  * Returns a cl_page associated with a VM page, and given cl_object.
  */
 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 {
-	struct cl_page *top;
 	struct cl_page *page;
 
 	KLASSERT(PageLocked(vmpage));
@@ -633,36 +356,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 	 *       bottom-to-top pass.
 	 */
 
-	/*
-	 * This loop assumes that ->private points to the top-most page. This
-	 * can be rectified easily.
-	 */
-	top = (struct cl_page *)vmpage->private;
-	if (!top)
-		return NULL;
-
-	for (page = top; page; page = page->cp_child) {
-		if (cl_object_same(page->cp_obj, obj)) {
-			cl_page_get_trust(page);
-			break;
-		}
+	page = (struct cl_page *)vmpage->private;
+	if (page) {
+		cl_page_get_trust(page);
+		LASSERT(page->cp_type == CPT_CACHEABLE);
 	}
-	LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
 	return page;
 }
 EXPORT_SYMBOL(cl_vmpage_page);
 
-/**
- * Returns the top-page for a given page.
- *
- * \see cl_object_top(), cl_io_top()
- */
-struct cl_page *cl_page_top(struct cl_page *page)
-{
-	return cl_page_top_trusted(page);
-}
-EXPORT_SYMBOL(cl_page_top);
-
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
 				       const struct lu_device_type *dtype)
 {
@@ -682,26 +384,43 @@ EXPORT_SYMBOL(cl_page_at);
 	int		       (*__method)_proto;		    \
 									\
 	__result = 0;						   \
-	__page = cl_page_top(__page);				   \
-	do {							    \
-		list_for_each_entry(__scan, &__page->cp_layers,     \
-					cpl_linkage) {		  \
-			__method = *(void **)((char *)__scan->cpl_ops + \
-					      __op);		    \
-			if (__method) {					\
-				__result = (*__method)(__env, __scan,   \
-						       ## __VA_ARGS__); \
-				if (__result != 0)		      \
-					break;			  \
-			}					       \
-		}						       \
-		__page = __page->cp_child;			      \
-	} while (__page && __result == 0);			      \
+	list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
+		__method = *(void **)((char *)__scan->cpl_ops +  __op); \
+		if (__method) {						\
+			__result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+			if (__result != 0)				\
+				break;					\
+		}							\
+	}								\
 	if (__result > 0)					       \
 		__result = 0;					   \
 	__result;						       \
 })
 
+#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...)		\
+({									\
+	const struct lu_env        *__env  = (_env);			\
+	struct cl_page             *__page = (_page);			\
+	const struct cl_page_slice *__scan;				\
+	int                         __result;				\
+	ptrdiff_t                   __op   = (_op);			\
+	int                       (*__method)_proto;			\
+									\
+	__result = 0;							\
+	list_for_each_entry_reverse(__scan, &__page->cp_layers,		\
+					cpl_linkage) {			\
+		__method = *(void **)((char *)__scan->cpl_ops +  __op);	\
+		if (__method) {						\
+			__result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+			if (__result != 0)				\
+				break;					\
+		}							\
+	}								\
+	if (__result > 0)						\
+		__result = 0;						\
+	__result;							\
+})
+
 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)		   \
 do {								    \
 	const struct lu_env	*__env  = (_env);		    \
@@ -710,18 +429,11 @@ do {								    \
 	ptrdiff_t		   __op   = (_op);		     \
 	void		      (*__method)_proto;		    \
 									\
-	__page = cl_page_top(__page);				   \
-	do {							    \
-		list_for_each_entry(__scan, &__page->cp_layers,     \
-					cpl_linkage) {		  \
-			__method = *(void **)((char *)__scan->cpl_ops + \
-					      __op);		    \
-			if (__method)				   \
-				(*__method)(__env, __scan,	      \
-					    ## __VA_ARGS__);	    \
-		}						       \
-		__page = __page->cp_child;			      \
-	} while (__page);					       \
+	list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {	\
+		__method = *(void **)((char *)__scan->cpl_ops + __op);	\
+		if (__method)						\
+			(*__method)(__env, __scan, ## __VA_ARGS__);	\
+	}								\
 } while (0)
 
 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)	       \
@@ -732,20 +444,11 @@ do {									\
 	ptrdiff_t		   __op   = (_op);			 \
 	void		      (*__method)_proto;			\
 									    \
-	/* get to the bottom page. */				       \
-	while (__page->cp_child)					    \
-		__page = __page->cp_child;				  \
-	do {								\
-		list_for_each_entry_reverse(__scan, &__page->cp_layers, \
-						cpl_linkage) {	      \
-			__method = *(void **)((char *)__scan->cpl_ops +     \
-					      __op);			\
-			if (__method)				       \
-				(*__method)(__env, __scan,		  \
-					    ## __VA_ARGS__);		\
-		}							   \
-		__page = __page->cp_parent;				 \
-	} while (__page);						   \
+	list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
+		__method = *(void **)((char *)__scan->cpl_ops + __op);	\
+		if (__method)						\
+			(*__method)(__env, __scan, ## __VA_ARGS__);	\
+	}								\
 } while (0)
 
 static int cl_page_invoke(const struct lu_env *env,
@@ -771,20 +474,17 @@ static void cl_page_invoid(const struct lu_env *env,
 
 static void cl_page_owner_clear(struct cl_page *page)
 {
-	for (page = cl_page_top(page); page; page = page->cp_child) {
-		if (page->cp_owner) {
-			LASSERT(page->cp_owner->ci_owned_nr > 0);
-			page->cp_owner->ci_owned_nr--;
-			page->cp_owner = NULL;
-			page->cp_task = NULL;
-		}
+	if (page->cp_owner) {
+		LASSERT(page->cp_owner->ci_owned_nr > 0);
+		page->cp_owner->ci_owned_nr--;
+		page->cp_owner = NULL;
+		page->cp_task = NULL;
 	}
 }
 
 static void cl_page_owner_set(struct cl_page *page)
 {
-	for (page = cl_page_top(page); page; page = page->cp_child)
-		page->cp_owner->ci_owned_nr++;
+	page->cp_owner->ci_owned_nr++;
 }
 
 void cl_page_disown0(const struct lu_env *env,
@@ -794,7 +494,7 @@ void cl_page_disown0(const struct lu_env *env,
 
 	state = pg->cp_state;
 	PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
-	PINVRNT(env, pg, cl_page_invariant(pg));
+	PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
 	cl_page_owner_clear(pg);
 
 	if (state == CPS_OWNED)
@@ -815,8 +515,9 @@ void cl_page_disown0(const struct lu_env *env,
  */
 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
 {
+	struct cl_io *top = cl_io_top((struct cl_io *)io);
 	LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
-	return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
+	return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
 }
 EXPORT_SYMBOL(cl_page_is_owned);
 
@@ -847,7 +548,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 
 	PINVRNT(env, pg, !cl_page_is_owned(pg, io));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 
 	if (pg->cp_state == CPS_FREEING) {
@@ -861,7 +561,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 		if (result == 0) {
 			PASSERT(env, pg, !pg->cp_owner);
 			PASSERT(env, pg, !pg->cp_req);
-			pg->cp_owner = io;
+			pg->cp_owner = cl_io_top(io);
 			pg->cp_task  = current;
 			cl_page_owner_set(pg);
 			if (pg->cp_state != CPS_FREEING) {
@@ -914,12 +614,11 @@ void cl_page_assume(const struct lu_env *env,
 {
 	PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 
 	cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
 	PASSERT(env, pg, !pg->cp_owner);
-	pg->cp_owner = io;
+	pg->cp_owner = cl_io_top(io);
 	pg->cp_task = current;
 	cl_page_owner_set(pg);
 	cl_page_state_set(env, pg, CPS_OWNED);
@@ -943,7 +642,6 @@ void cl_page_unassume(const struct lu_env *env,
 	PINVRNT(env, pg, cl_page_is_owned(pg, io));
 	PINVRNT(env, pg, cl_page_invariant(pg));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 	cl_page_owner_clear(pg);
 	cl_page_state_set(env, pg, CPS_CACHED);
@@ -968,9 +666,9 @@ EXPORT_SYMBOL(cl_page_unassume);
 void cl_page_disown(const struct lu_env *env,
 		    struct cl_io *io, struct cl_page *pg)
 {
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
+	PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
+		pg->cp_state == CPS_FREEING);
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 	cl_page_disown0(env, io, pg);
 }
@@ -1001,12 +699,8 @@ EXPORT_SYMBOL(cl_page_discard);
  * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
  * path. Doesn't check page invariant.
  */
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
-			    int radix)
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
 {
-	struct cl_page *tmp = pg;
-
-	PASSERT(env, pg, pg == cl_page_top(pg));
 	PASSERT(env, pg, pg->cp_state != CPS_FREEING);
 
 	/*
@@ -1014,41 +708,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 	 */
 	cl_page_owner_clear(pg);
 
-	/*
-	 * unexport the page firstly before freeing it so that
-	 * the page content is considered to be invalid.
-	 * We have to do this because a CPS_FREEING cl_page may
-	 * be NOT under the protection of a cl_lock.
-	 * Afterwards, if this page is found by other threads, then this
-	 * page will be forced to reread.
-	 */
-	cl_page_export(env, pg, 0);
 	cl_page_state_set0(env, pg, CPS_FREEING);
 
-	CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
-		       (const struct lu_env *, const struct cl_page_slice *));
-
-	if (tmp->cp_type == CPT_CACHEABLE) {
-		if (!radix)
-			/* !radix means that @pg is not yet in the radix tree,
-			 * skip removing it.
-			 */
-			tmp = pg->cp_child;
-		for (; tmp; tmp = tmp->cp_child) {
-			void		    *value;
-			struct cl_object_header *hdr;
-
-			hdr = cl_object_header(tmp->cp_obj);
-			spin_lock(&hdr->coh_page_guard);
-			value = radix_tree_delete(&hdr->coh_tree,
-						  tmp->cp_index);
-			PASSERT(env, tmp, value == tmp);
-			PASSERT(env, tmp, hdr->coh_pages > 0);
-			hdr->coh_pages--;
-			spin_unlock(&hdr->coh_page_guard);
-			cl_page_put(env, tmp);
-		}
-	}
+	CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
+			       (const struct lu_env *,
+				const struct cl_page_slice *));
 }
 
 /**
@@ -1070,7 +734,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
  * drain after some time, at which point page will be recycled.
  *
- * \pre  pg == cl_page_top(pg)
  * \pre  VM page is locked
  * \post pg->cp_state == CPS_FREEING
  *
@@ -1079,30 +742,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
 {
 	PINVRNT(env, pg, cl_page_invariant(pg));
-	cl_page_delete0(env, pg, 1);
+	cl_page_delete0(env, pg);
 }
 EXPORT_SYMBOL(cl_page_delete);
 
 /**
- * Unmaps page from user virtual memory.
- *
- * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
- * layer responsible for VM interaction has to unmap page from user space
- * virtual memory.
- *
- * \see cl_page_operations::cpo_unmap()
- */
-int cl_page_unmap(const struct lu_env *env,
-		  struct cl_io *io, struct cl_page *pg)
-{
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
-	PINVRNT(env, pg, cl_page_invariant(pg));
-
-	return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
-}
-EXPORT_SYMBOL(cl_page_unmap);
-
-/**
  * Marks page up-to-date.
  *
  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
@@ -1129,7 +773,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
 	int result;
 	const struct cl_page_slice *slice;
 
-	pg = cl_page_top_trusted((struct cl_page *)pg);
 	slice = container_of(pg->cp_layers.next,
 			     const struct cl_page_slice, cpl_linkage);
 	PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
@@ -1241,7 +884,7 @@ void cl_page_completion(const struct lu_env *env,
 	cl_page_put(env, pg);
 
 	if (anchor)
-		cl_sync_io_note(anchor, ioret);
+		cl_sync_io_note(env, anchor, ioret);
 }
 EXPORT_SYMBOL(cl_page_completion);
 
@@ -1276,44 +919,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
 EXPORT_SYMBOL(cl_page_make_ready);
 
 /**
- * Notify layers that high level io decided to place this page into a cache
- * for future transfer.
- *
- * The layer implementing transfer engine (osc) has to register this page in
- * its queues.
- *
- * \pre  cl_page_is_owned(pg, io)
- * \post cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_cache_add()
- */
-int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
-		      struct cl_page *pg, enum cl_req_type crt)
-{
-	const struct cl_page_slice *scan;
-	int result = 0;
-
-	PINVRNT(env, pg, crt < CRT_NR);
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
-	PINVRNT(env, pg, cl_page_invariant(pg));
-
-	if (crt >= CRT_NR)
-		return -EINVAL;
-
-	list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
-		if (!scan->cpl_ops->io[crt].cpo_cache_add)
-			continue;
-
-		result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
-		if (result != 0)
-			break;
-	}
-	CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
-	return result;
-}
-EXPORT_SYMBOL(cl_page_cache_add);
-
-/**
  * Called if a pge is being written back by kernel's intention.
  *
  * \pre  cl_page_is_owned(pg, io)
@@ -1344,68 +949,21 @@ EXPORT_SYMBOL(cl_page_flush);
  * \see cl_page_operations::cpo_is_under_lock()
  */
 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page)
+			  struct cl_page *page, pgoff_t *max_index)
 {
 	int rc;
 
 	PINVRNT(env, page, cl_page_invariant(page));
 
-	rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
-			    (const struct lu_env *,
-			     const struct cl_page_slice *, struct cl_io *),
-			    io);
-	PASSERT(env, page, rc != 0);
+	rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
+				    (const struct lu_env *,
+				     const struct cl_page_slice *,
+				      struct cl_io *, pgoff_t *),
+				    io, max_index);
 	return rc;
 }
 EXPORT_SYMBOL(cl_page_is_under_lock);
 
-static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
-			 struct cl_page *page, void *cbdata)
-{
-	cl_page_own(env, io, page);
-	cl_page_unmap(env, io, page);
-	cl_page_discard(env, io, page);
-	cl_page_disown(env, io, page);
-	return CLP_GANG_OKAY;
-}
-
-/**
- * Purges all cached pages belonging to the object \a obj.
- */
-int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
-{
-	struct cl_thread_info   *info;
-	struct cl_object	*obj = cl_object_top(clobj);
-	struct cl_io	    *io;
-	int		      result;
-
-	info  = cl_env_info(env);
-	io    = &info->clt_io;
-
-	/*
-	 * initialize the io. This is ugly since we never do IO in this
-	 * function, we just make cl_page_list functions happy. -jay
-	 */
-	io->ci_obj = obj;
-	io->ci_ignore_layout = 1;
-	result = cl_io_init(env, io, CIT_MISC, obj);
-	if (result != 0) {
-		cl_io_fini(env, io);
-		return io->ci_result;
-	}
-
-	do {
-		result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
-					     page_prune_cb, NULL);
-		if (result == CLP_GANG_RESCHED)
-			cond_resched();
-	} while (result != CLP_GANG_OKAY);
-
-	cl_io_fini(env, io);
-	return result;
-}
-EXPORT_SYMBOL(cl_pages_prune);
-
 /**
  * Tells transfer engine that only part of a page is to be transmitted.
  *
@@ -1431,9 +989,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
 			  lu_printer_t printer, const struct cl_page *pg)
 {
 	(*printer)(env, cookie,
-		   "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
+		   "page@%p[%d %p %d %d %d %p %p %#x]\n",
 		   pg, atomic_read(&pg->cp_ref), pg->cp_obj,
-		   pg->cp_index, pg->cp_parent, pg->cp_child,
 		   pg->cp_state, pg->cp_error, pg->cp_type,
 		   pg->cp_owner, pg->cp_req, pg->cp_flags);
 }
@@ -1445,11 +1002,7 @@ EXPORT_SYMBOL(cl_page_header_print);
 void cl_page_print(const struct lu_env *env, void *cookie,
 		   lu_printer_t printer, const struct cl_page *pg)
 {
-	struct cl_page *scan;
-
-	for (scan = cl_page_top((struct cl_page *)pg); scan;
-	     scan = scan->cp_child)
-		cl_page_header_print(env, cookie, printer, scan);
+	cl_page_header_print(env, cookie, printer, pg);
 	CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
 		       (const struct lu_env *env,
 			const struct cl_page_slice *slice,
@@ -1509,21 +1062,59 @@ EXPORT_SYMBOL(cl_page_size);
  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
  */
 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
-		       struct cl_object *obj,
+		       struct cl_object *obj, pgoff_t index,
 		       const struct cl_page_operations *ops)
 {
 	list_add_tail(&slice->cpl_linkage, &page->cp_layers);
 	slice->cpl_obj  = obj;
+	slice->cpl_index = index;
 	slice->cpl_ops  = ops;
 	slice->cpl_page = page;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
 
-int  cl_page_init(void)
+/**
+ * Allocate and initialize cl_cache, called by ll_init_sbi().
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
 {
-	return 0;
+	struct cl_client_cache	*cache = NULL;
+
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return NULL;
+
+	/* Initialize cache data */
+	atomic_set(&cache->ccc_users, 1);
+	cache->ccc_lru_max = lru_page_max;
+	atomic_set(&cache->ccc_lru_left, lru_page_max);
+	spin_lock_init(&cache->ccc_lru_lock);
+	INIT_LIST_HEAD(&cache->ccc_lru);
+
+	atomic_set(&cache->ccc_unstable_nr, 0);
+	init_waitqueue_head(&cache->ccc_unstable_waitq);
+
+	return cache;
+}
+EXPORT_SYMBOL(cl_cache_init);
+
+/**
+ * Increase cl_cache refcount
+ */
+void cl_cache_incref(struct cl_client_cache *cache)
+{
+	atomic_inc(&cache->ccc_users);
 }
+EXPORT_SYMBOL(cl_cache_incref);
 
-void cl_page_fini(void)
+/**
+ * Decrease cl_cache refcount and free the cache if refcount=0.
+ * Since llite, lov and osc all hold cl_cache refcount,
+ * the free will not cause race. (LU-6173)
+ */
+void cl_cache_decref(struct cl_client_cache *cache)
 {
+	if (atomic_dec_and_test(&cache->ccc_users))
+		kfree(cache);
 }
+EXPORT_SYMBOL(cl_cache_decref);
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index c2cf015962dd..d9d2a1952b8b 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -60,6 +56,8 @@ unsigned int obd_dump_on_eviction;
 EXPORT_SYMBOL(obd_dump_on_eviction);
 unsigned int obd_max_dirty_pages = 256;
 EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_t obd_unstable_pages;
+EXPORT_SYMBOL(obd_unstable_pages);
 atomic_t obd_dirty_pages;
 EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
@@ -335,7 +333,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 		err = 0;
 		goto out;
 	}
-
 	}
 
 	if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
@@ -461,7 +458,7 @@ static int obd_init_checks(void)
 		CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
 		ret = -EINVAL;
 	}
-	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
+	if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
 		CWARN("mask failed: u64val %llu >= %llu\n", u64val,
 		      (__u64)PAGE_SIZE);
 		ret = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 43a7f7a79b35..8acf67239fa8 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -68,8 +64,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
 
 	LASSERT(addr);
 
-	ne_off = le64_to_cpu (off);
-	id = le64_to_cpu (id);
+	ne_off = le64_to_cpu(off);
+	id = le64_to_cpu(id);
 	if (memcmp(addr, (char *)&ne_off, LPDS)) {
 		CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n",
 		       who, id, off, *(__u64 *)addr, ne_off);
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cf97b8f06764..99c2da632b51 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -604,7 +600,6 @@ int obd_init_caches(void)
  out:
 	obd_cleanup_caches();
 	return -ENOMEM;
-
 }
 
 /* map connection to client */
diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
index 8405eccdac19..a0f65c470f4d 100644
--- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
+++ b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 8eddf206f1ed..33342bfcc90e 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -158,9 +154,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
 {
 	int err;
 
-	err = copy_to_user(arg, data, len);
-	if (err)
-		err = -EFAULT;
+	err = copy_to_user(arg, data, len) ? -EFAULT : 0;
 	return err;
 }
 EXPORT_SYMBOL(obd_ioctl_popdata);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index b41b65e2f021..c6cc6a7666e3 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index e6bf414a4444..8f70dd2686f9 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 992573eae1b1..1784ca063428 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -123,8 +119,10 @@ static int llog_read_header(const struct lu_env *env,
 		handle->lgh_last_idx = 0; /* header is record with index 0 */
 		llh->llh_count = 1;	 /* for the header record */
 		llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
-		llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
-		llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
+		llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE;
+		llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+		llh->llh_hdr.lrh_index = 0;
+		llh->llh_tail.lrt_index = 0;
 		llh->llh_timestamp = ktime_get_real_seconds();
 		if (uuid)
 			memcpy(&llh->llh_tgtuuid, uuid,
@@ -265,7 +263,6 @@ repeat:
 		for (rec = (struct llog_rec_hdr *)buf;
 		     (char *)rec < buf + LLOG_CHUNK_SIZE;
 		     rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
-
 			CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
 			       rec, rec->lrh_type);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
index c27d4ec1df9e..a82a2950295a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
index 7fb48dda355e..f7949525d952 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index 826623f528da..6ace7e097859 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index 967ba2e1bfcb..f7b9b190350c 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index d93f42fee420..279b625f1afe 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -49,7 +45,7 @@
 static const char * const obd_connect_names[] = {
 	"read_only",
 	"lov_index",
-	"unused",
+	"connect_from_mds",
 	"write_grant",
 	"server_lock",
 	"version",
@@ -122,6 +118,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
 }
 EXPORT_SYMBOL(obd_connect_flags2str);
 
+static void obd_connect_data_seqprint(struct seq_file *m,
+				      struct obd_connect_data *ocd)
+{
+	int flags;
+
+	LASSERT(ocd);
+	flags = ocd->ocd_connect_flags;
+
+	seq_printf(m, "    connect_data:\n"
+		   "       flags: %llx\n"
+		   "       instance: %u\n",
+		   ocd->ocd_connect_flags,
+		   ocd->ocd_instance);
+	if (flags & OBD_CONNECT_VERSION)
+		seq_printf(m, "       target_version: %u.%u.%u.%u\n",
+			   OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+			   OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+			   OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+			   OBD_OCD_VERSION_FIX(ocd->ocd_version));
+	if (flags & OBD_CONNECT_MDS)
+		seq_printf(m, "       mdt_index: %d\n", ocd->ocd_group);
+	if (flags & OBD_CONNECT_GRANT)
+		seq_printf(m, "       initial_grant: %d\n", ocd->ocd_grant);
+	if (flags & OBD_CONNECT_INDEX)
+		seq_printf(m, "       target_index: %u\n", ocd->ocd_index);
+	if (flags & OBD_CONNECT_BRW_SIZE)
+		seq_printf(m, "       max_brw_size: %d\n", ocd->ocd_brw_size);
+	if (flags & OBD_CONNECT_IBITS)
+		seq_printf(m, "       ibits_known: %llx\n",
+			   ocd->ocd_ibits_known);
+	if (flags & OBD_CONNECT_GRANT_PARAM)
+		seq_printf(m, "       grant_block_size: %d\n"
+			   "       grant_inode_size: %d\n"
+			   "       grant_extent_overhead: %d\n",
+			   ocd->ocd_blocksize,
+			   ocd->ocd_inodespace,
+			   ocd->ocd_grant_extent);
+	if (flags & OBD_CONNECT_TRANSNO)
+		seq_printf(m, "       first_transno: %llx\n",
+			   ocd->ocd_transno);
+	if (flags & OBD_CONNECT_CKSUM)
+		seq_printf(m, "       cksum_types: %#x\n",
+			   ocd->ocd_cksum_types);
+	if (flags & OBD_CONNECT_MAX_EASIZE)
+		seq_printf(m, "       max_easize: %d\n", ocd->ocd_max_easize);
+	if (flags & OBD_CONNECT_MAXBYTES)
+		seq_printf(m, "       max_object_bytes: %llx\n",
+			   ocd->ocd_maxbytes);
+}
+
 int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
 			     int mult)
 {
@@ -624,6 +670,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 	struct obd_device		*obd	= data;
 	struct obd_import		*imp;
 	struct obd_import_conn		*conn;
+	struct obd_connect_data *ocd;
 	int				j;
 	int				k;
 	int				rw	= 0;
@@ -635,9 +682,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 		return rc;
 
 	imp = obd->u.cli.cl_import;
+	ocd = &imp->imp_connect_data;
 
-	seq_printf(m,
-		   "import:\n"
+	seq_printf(m, "import:\n"
 		   "    name: %s\n"
 		   "    target: %s\n"
 		   "    state: %s\n"
@@ -649,9 +696,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 		   imp->imp_connect_data.ocd_instance);
 	obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
 				  ", ");
-	seq_printf(m,
-		   " ]\n"
-		   "    import_flags: [ ");
+	seq_printf(m, " ]\n");
+	obd_connect_data_seqprint(m, ocd);
+	seq_printf(m, "    import_flags: [ ");
 	obd_import_flags2str(imp, m);
 
 	seq_printf(m,
@@ -694,8 +741,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 
 		do_div(sum, ret.lc_count);
 		ret.lc_sum = sum;
-	} else
+	} else {
 		ret.lc_sum = 0;
+	}
 	seq_printf(m,
 		   "    rpcs:\n"
 		   "       inflight: %u\n"
@@ -1471,10 +1519,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally);
 
 void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
 {
-	unsigned int val;
+	unsigned int val = 0;
 
-	for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
-		;
+	if (likely(value != 0))
+		val = min(fls(value - 1), OBD_HIST_MAX);
 
 	lprocfs_oh_tally(oh, val);
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 978568ada8e9..9b03059f34d6 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -55,6 +51,7 @@
 #include "../include/lustre_disk.h"
 #include "../include/lustre_fid.h"
 #include "../include/lu_object.h"
+#include "../include/cl_object.h"
 #include "../include/lu_ref.h"
 #include <linux/list.h>
 
@@ -103,7 +100,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
 
 	if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
 		if (lu_object_is_dying(top)) {
-
 			/*
 			 * somebody may be waiting for this, currently only
 			 * used for cl_object, see cl_object_put_last().
@@ -357,7 +353,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
 
 			if (count > 0 && --count == 0)
 				break;
-
 		}
 		cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
 		cond_resched();
@@ -715,8 +710,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
 		obj = lu_object_locate(top->lo_header, dev->ld_type);
 		if (!obj)
 			lu_object_put(env, top);
-	} else
+	} else {
 		obj = top;
+	}
 	return obj;
 }
 EXPORT_SYMBOL(lu_object_find_slice);
@@ -935,7 +931,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
  * Initialize site \a s, with \a d as the top level device.
  */
 #define LU_SITE_BITS_MIN    12
-#define LU_SITE_BITS_MAX    24
+#define LU_SITE_BITS_MAX    19
 /**
  * total 256 buckets, we don't want too many buckets because:
  * - consume too much memory
@@ -1468,6 +1464,7 @@ void lu_context_key_quiesce(struct lu_context_key *key)
 		/*
 		 * XXX layering violation.
 		 */
+		cl_env_cache_purge(~0);
 		key->lct_tags |= LCT_QUIESCENT;
 		/*
 		 * XXX memory barrier has to go here.
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
index 993697b660f6..e9f6040d19eb 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
index 403ceea06186..082f530c527c 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5f812460b3ea..5974a9bf77c0 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -163,8 +159,9 @@ int class_del_uuid(const char *uuid)
 				break;
 			}
 		}
-	} else
+	} else {
 		list_splice_init(&g_uuid_list, &deathrow);
+	}
 	spin_unlock(&g_uuid_lock);
 
 	if (uuid && list_empty(&deathrow)) {
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 5395e994deab..0eab1236501b 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -606,7 +602,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
 	return rc;
 }
 
-LIST_HEAD(lustre_profile_list);
+static LIST_HEAD(lustre_profile_list);
 
 struct lustre_profile *class_get_profile(const char *prof)
 {
@@ -961,7 +957,6 @@ int class_process_config(struct lustre_cfg *lcfg)
 	default: {
 		err = obd_process_config(obd, sizeof(*lcfg), lcfg);
 		goto out;
-
 	}
 	}
 out:
@@ -1001,7 +996,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 	for (i = 1; i < lcfg->lcfg_bufcount; i++) {
 		key = lustre_cfg_buf(lcfg, i);
 		/* Strip off prefix */
-		class_match_param(key, prefix, &key);
+		if (class_match_param(key, prefix, &key)) {
+			/*
+			 * If the prefix doesn't match, return error so we
+			 * can pass it down the stack
+			 */
+			return -ENOSYS;
+		}
 		sval = strchr(key, '=');
 		if (!sval || (*(sval + 1) == 0)) {
 			CERROR("Can't parse param %s (missing '=')\n", key);
@@ -1016,8 +1017,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 		/* Search proc entries */
 		while (lvars[j].name) {
 			var = &lvars[j];
-			if (!class_match_param(key, var->name, NULL)
-			    && keylen == strlen(var->name)) {
+			if (!class_match_param(key, var->name, NULL) &&
+			    keylen == strlen(var->name)) {
 				matched++;
 				rc = -EROFS;
 				if (var->fops && var->fops->write) {
@@ -1034,18 +1035,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 			j++;
 		}
 		if (!matched) {
-			/* If the prefix doesn't match, return error so we
-			 * can pass it down the stack
-			 */
-			if (strnchr(key, keylen, '.'))
-				return -ENOSYS;
-			CERROR("%s: unknown param %s\n",
+			CERROR("%.*s: %s unknown param %s\n",
+			       (int)strlen(prefix) - 1, prefix,
 			       (char *)lustre_cfg_string(lcfg, 0), key);
 			/* rc = -EINVAL;	continue parsing other params */
 			skip++;
 		} else if (rc < 0) {
-			CERROR("writing proc entry %s err %d\n",
-			       var->name, rc);
+			CERROR("%s: error writing proc entry '%s': rc = %d\n",
+			       prefix, var->name, rc);
 			rc = 0;
 		} else {
 			CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
@@ -1076,7 +1073,7 @@ int class_config_llog_handler(const struct lu_env *env,
 {
 	struct config_llog_instance *clli = data;
 	int cfg_len = rec->lrh_len;
-	char *cfg_buf = (char *) (rec + 1);
+	char *cfg_buf = (char *)(rec + 1);
 	int rc = 0;
 
 	switch (rec->lrh_type) {
@@ -1350,6 +1347,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
 					lustre_cfg_string(lcfg, i));
 		}
 	}
+	ptr += snprintf(ptr, end - ptr, "\n");
 	/* return consumed bytes */
 	rc = ptr - buf;
 	return rc;
@@ -1368,7 +1366,7 @@ int class_config_dump_handler(const struct lu_env *env,
 
 	if (rec->lrh_type == OBD_CFG_REC) {
 		class_config_parse_rec(rec, outstr, 256);
-		LCONSOLE(D_WARNING, "   %s\n", outstr);
+		LCONSOLE(D_WARNING, "   %s", outstr);
 	} else {
 		LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
 		rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index d3e28a389ac1..aa84a50e9904 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -102,7 +98,7 @@ int lustre_process_log(struct super_block *sb, char *logname,
 		LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d).  Make sure this client and the MGS are running compatible versions of Lustre.\n",
 				   mgc->obd_name, logname, rc);
 
-	if (rc)
+	else if (rc)
 		LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
 				   mgc->obd_name, logname,
 				   rc);
@@ -192,7 +188,7 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid,
 	return rc;
 }
 
-DEFINE_MUTEX(mgc_start_lock);
+static DEFINE_MUTEX(mgc_start_lock);
 
 /** Set up a mgc obd to process startup logs
  *
@@ -307,7 +303,8 @@ int lustre_start_mgc(struct super_block *sb)
 	while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 		rc = do_lcfg(mgcname, nid,
 			     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
-		i++;
+		if (!rc)
+			i++;
 		/* Stop at the first failover nid */
 		if (*ptr == ':')
 			break;
@@ -345,16 +342,18 @@ int lustre_start_mgc(struct super_block *sb)
 		sprintf(niduuid, "%s_%x", mgcname, i);
 		j = 0;
 		while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
-			j++;
-			rc = do_lcfg(mgcname, nid,
-				     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
+			rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid,
+				     NULL, NULL, NULL);
+			if (!rc)
+				++j;
 			if (*ptr == ':')
 				break;
 		}
 		if (j > 0) {
 			rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
 				     niduuid, NULL, NULL, NULL);
-			i++;
+			if (!rc)
+				i++;
 		} else {
 			/* at ":/fsname" */
 			break;
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index e6436cb4ac62..8583a4a8c206 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -185,8 +181,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
 		op_data->op_attr.ia_valid |= ATTR_BLOCKS;
 	}
 	if (valid & OBD_MD_FLFLAGS) {
-		((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
-			oa->o_flags;
+		op_data->op_attr_flags = oa->o_flags;
 		op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
 	}
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
index fb4e3ae845e0..4bad1fa27d40 100644
--- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
+++ b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c
index b0b0157a6334..abd9b1ae72cd 100644
--- a/drivers/staging/lustre/lustre/obdclass/uuid.c
+++ b/drivers/staging/lustre/lustre/obdclass/uuid.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 1e83669c204d..5b29c4a44fe5 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -81,7 +77,6 @@ struct echo_object_conf {
 struct echo_page {
 	struct cl_page_slice   ep_cl;
 	struct mutex		ep_lock;
-	struct page	    *ep_vmpage;
 };
 
 struct echo_lock {
@@ -164,15 +159,13 @@ static int cl_echo_object_put(struct echo_object *eco);
 static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 			      struct page **pages, int npages, int async);
 
-static struct echo_thread_info *echo_env_info(const struct lu_env *env);
-
 struct echo_thread_info {
 	struct echo_object_conf eti_conf;
 	struct lustre_md	eti_md;
 
 	struct cl_2queue	eti_queue;
 	struct cl_io	    eti_io;
-	struct cl_lock_descr    eti_descr;
+	struct cl_lock          eti_lock;
 	struct lu_fid	   eti_fid;
 	struct lu_fid		eti_fid2;
 };
@@ -219,12 +212,6 @@ static struct lu_kmem_descr echo_caches[] = {
  *
  * @{
  */
-static struct page *echo_page_vmpage(const struct lu_env *env,
-				     const struct cl_page_slice *slice)
-{
-	return cl2echo_page(slice)->ep_vmpage;
-}
-
 static int echo_page_own(const struct lu_env *env,
 			 const struct cl_page_slice *slice,
 			 struct cl_io *io, int nonblock)
@@ -273,12 +260,10 @@ static void echo_page_completion(const struct lu_env *env,
 static void echo_page_fini(const struct lu_env *env,
 			   struct cl_page_slice *slice)
 {
-	struct echo_page *ep    = cl2echo_page(slice);
 	struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
-	struct page *vmpage      = ep->ep_vmpage;
 
 	atomic_dec(&eco->eo_npages);
-	put_page(vmpage);
+	put_page(slice->cpl_page->cp_vmpage);
 }
 
 static int echo_page_prep(const struct lu_env *env,
@@ -295,7 +280,8 @@ static int echo_page_print(const struct lu_env *env,
 	struct echo_page *ep = cl2echo_page(slice);
 
 	(*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
-		   ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
+		   ep, mutex_is_locked(&ep->ep_lock),
+		   slice->cpl_page->cp_vmpage);
 	return 0;
 }
 
@@ -303,7 +289,6 @@ static const struct cl_page_operations echo_page_ops = {
 	.cpo_own	   = echo_page_own,
 	.cpo_disown	= echo_page_disown,
 	.cpo_discard       = echo_page_discard,
-	.cpo_vmpage	= echo_page_vmpage,
 	.cpo_fini	  = echo_page_fini,
 	.cpo_print	 = echo_page_print,
 	.cpo_is_vmlocked   = echo_page_is_vmlocked,
@@ -336,26 +321,8 @@ static void echo_lock_fini(const struct lu_env *env,
 	kmem_cache_free(echo_lock_kmem, ecl);
 }
 
-static void echo_lock_delete(const struct lu_env *env,
-			     const struct cl_lock_slice *slice)
-{
-	struct echo_lock *ecl      = cl2echo_lock(slice);
-
-	LASSERT(list_empty(&ecl->el_chain));
-}
-
-static int echo_lock_fits_into(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       const struct cl_lock_descr *need,
-			       const struct cl_io *unused)
-{
-	return 1;
-}
-
 static struct cl_lock_operations echo_lock_ops = {
 	.clo_fini      = echo_lock_fini,
-	.clo_delete    = echo_lock_delete,
-	.clo_fits_into = echo_lock_fits_into
 };
 
 /** @} echo_lock */
@@ -367,15 +334,14 @@ static struct cl_lock_operations echo_lock_ops = {
  * @{
  */
 static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
-			  struct cl_page *page, struct page *vmpage)
+			  struct cl_page *page, pgoff_t index)
 {
 	struct echo_page *ep = cl_object_page_slice(obj, page);
 	struct echo_object *eco = cl2echo_obj(obj);
 
-	ep->ep_vmpage = vmpage;
-	get_page(vmpage);
+	get_page(page->cp_vmpage);
 	mutex_init(&ep->ep_lock);
-	cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
+	cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
 	atomic_inc(&eco->eo_npages);
 	return 0;
 }
@@ -568,6 +534,8 @@ static struct lu_object *echo_object_alloc(const struct lu_env *env,
 
 		obj = &echo_obj2cl(eco)->co_lu;
 		cl_object_header_init(hdr);
+		hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
 		lu_object_init(obj, &hdr->coh_lu, dev);
 		lu_object_add_top(&hdr->coh_lu, obj);
 
@@ -694,8 +662,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 	struct obd_device  *obd = NULL; /* to keep compiler happy */
 	struct obd_device  *tgt;
 	const char *tgt_type_name;
-	int rc;
-	int cleanup = 0;
+	int rc, err;
 
 	ed = kzalloc(sizeof(*ed), GFP_NOFS);
 	if (!ed) {
@@ -703,16 +670,14 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 		goto out;
 	}
 
-	cleanup = 1;
 	cd = &ed->ed_cl;
 	rc = cl_device_init(cd, t);
 	if (rc)
-		goto out;
+		goto out_free;
 
 	cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
 	cd->cd_ops = &echo_device_cl_ops;
 
-	cleanup = 2;
 	obd = class_name2obd(lustre_cfg_string(cfg, 0));
 	LASSERT(obd);
 	LASSERT(env);
@@ -722,28 +687,25 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 		CERROR("Can not find tgt device %s\n",
 		       lustre_cfg_string(cfg, 1));
 		rc = -ENODEV;
-		goto out;
+		goto out_device_fini;
 	}
 
 	next = tgt->obd_lu_dev;
 	if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
 		CERROR("echo MDT client must be run on server\n");
 		rc = -EOPNOTSUPP;
-		goto out;
+		goto out_device_fini;
 	}
 
 	rc = echo_site_init(env, ed);
 	if (rc)
-		goto out;
-
-	cleanup = 3;
+		goto out_device_fini;
 
 	rc = echo_client_setup(env, obd, cfg);
 	if (rc)
-		goto out;
+		goto out_site_fini;
 
 	ed->ed_ec = &obd->u.echo_client;
-	cleanup = 4;
 
 	/* if echo client is to be stacked upon ost device, the next is
 	 * NULL since ost is not a clio device so far
@@ -755,7 +717,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 	if (next) {
 		if (next->ld_site) {
 			rc = -EBUSY;
-			goto out;
+			goto out_cleanup;
 		}
 
 		next->ld_site = &ed->ed_site->cs_lu;
@@ -763,7 +725,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 						next->ld_type->ldt_name,
 							      NULL);
 		if (rc)
-			goto out;
+			goto out_cleanup;
 
 	} else {
 		LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
@@ -771,27 +733,19 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 
 	ed->ed_next = next;
 	return &cd->cd_lu_dev;
-out:
-	switch (cleanup) {
-	case 4: {
-		int rc2;
-
-		rc2 = echo_client_cleanup(obd);
-		if (rc2)
-			CERROR("Cleanup obd device %s error(%d)\n",
-			       obd->obd_name, rc2);
-	}
 
-	case 3:
-		echo_site_fini(env, ed);
-	case 2:
-		cl_device_fini(&ed->ed_cl);
-	case 1:
-		kfree(ed);
-	case 0:
-	default:
-		break;
-	}
+out_cleanup:
+	err = echo_client_cleanup(obd);
+	if (err)
+		CERROR("Cleanup obd device %s error(%d)\n",
+		       obd->obd_name, err);
+out_site_fini:
+	echo_site_fini(env, ed);
+out_device_fini:
+	cl_device_fini(&ed->ed_cl);
+out_free:
+	kfree(ed);
+out:
 	return ERR_PTR(rc);
 }
 
@@ -819,16 +773,7 @@ static void echo_lock_release(const struct lu_env *env,
 {
 	struct cl_lock *clk = echo_lock2cl(ecl);
 
-	cl_lock_get(clk);
-	cl_unuse(env, clk);
-	cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
-	if (!still_used) {
-		cl_lock_mutex_get(env, clk);
-		cl_lock_cancel(env, clk);
-		cl_lock_delete(env, clk);
-		cl_lock_mutex_put(env, clk);
-	}
-	cl_lock_put(env, clk);
+	cl_lock_release(env, clk);
 }
 
 static struct lu_device *echo_device_free(const struct lu_env *env,
@@ -1022,9 +967,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 
 	info = echo_env_info(env);
 	io = &info->eti_io;
-	descr = &info->eti_descr;
+	lck = &info->eti_lock;
 	obj = echo_obj2cl(eco);
 
+	memset(lck, 0, sizeof(*lck));
+	descr = &lck->cll_descr;
 	descr->cld_obj   = obj;
 	descr->cld_start = cl_index(obj, start);
 	descr->cld_end   = cl_index(obj, end);
@@ -1032,25 +979,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 	descr->cld_enq_flags = enqflags;
 	io->ci_obj = obj;
 
-	lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
-	if (lck) {
+	rc = cl_lock_request(env, io, lck);
+	if (rc == 0) {
 		struct echo_client_obd *ec = eco->eo_dev->ed_ec;
 		struct echo_lock *el;
 
-		rc = cl_wait(env, lck);
-		if (rc == 0) {
-			el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
-			spin_lock(&ec->ec_lock);
-			if (list_empty(&el->el_chain)) {
-				list_add(&el->el_chain, &ec->ec_locks);
-				el->el_cookie = ++ec->ec_unique;
-			}
-			atomic_inc(&el->el_refcount);
-			*cookie = el->el_cookie;
-			spin_unlock(&ec->ec_lock);
-		} else {
-			cl_lock_release(env, lck, "ec enqueue", current);
+		el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+		spin_lock(&ec->ec_lock);
+		if (list_empty(&el->el_chain)) {
+			list_add(&el->el_chain, &ec->ec_locks);
+			el->el_cookie = ++ec->ec_unique;
 		}
+		atomic_inc(&el->el_refcount);
+		*cookie = el->el_cookie;
+		spin_unlock(&ec->ec_lock);
 	}
 	return rc;
 }
@@ -1085,22 +1027,17 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
 	return 0;
 }
 
-static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
-			     enum cl_req_type unused, struct cl_2queue *queue)
+static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
+				 struct cl_page *page)
 {
-	struct cl_page *clp;
-	struct cl_page *temp;
-	int result = 0;
+	struct echo_thread_info *info;
+	struct cl_2queue *queue;
 
-	cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
-		int rc;
+	info = echo_env_info(env);
+	LASSERT(io == &info->eti_io);
 
-		rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
-		if (rc == 0)
-			continue;
-		result = result ?: rc;
-	}
-	return result;
+	queue = &info->eti_queue;
+	cl_page_list_add(&queue->c2_qout, page);
 }
 
 static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
@@ -1119,7 +1056,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 	int rc;
 	int i;
 
-	LASSERT((offset & ~CFS_PAGE_MASK) == 0);
+	LASSERT((offset & ~PAGE_MASK) == 0);
 	LASSERT(ed->ed_next);
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
@@ -1179,7 +1116,9 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 
 		async = async && (typ == CRT_WRITE);
 		if (async)
-			rc = cl_echo_async_brw(env, io, typ, queue);
+			rc = cl_io_commit_async(env, io, &queue->c2_qin,
+						0, PAGE_SIZE,
+						echo_commit_callback);
 		else
 			rc = cl_io_submit_sync(env, io, typ, queue, 0);
 		CDEBUG(D_INFO, "echo_client %s write returns %d\n",
@@ -1387,7 +1326,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 	LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
 
 	if (count <= 0 ||
-	    (count & (~CFS_PAGE_MASK)) != 0)
+	    (count & (~PAGE_MASK)) != 0)
 		return -EINVAL;
 
 	/* XXX think again with misaligned I/O */
@@ -1409,7 +1348,6 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 	for (i = 0, pgp = pga, off = offset;
 	     i < npages;
 	     i++, pgp++, off += PAGE_SIZE) {
-
 		LASSERT(!pgp->pg);      /* for cleanup */
 
 		rc = -ENOMEM;
@@ -1470,7 +1408,7 @@ static int echo_client_prep_commit(const struct lu_env *env,
 	u64 npages, tot_pages;
 	int i, ret = 0, brw_flags = 0;
 
-	if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
+	if (count <= 0 || (count & (~PAGE_MASK)) != 0)
 		return -EINVAL;
 
 	npages = batch >> PAGE_SHIFT;
@@ -1886,7 +1824,6 @@ static int __init obdecho_init(void)
 static void /*__exit*/ obdecho_exit(void)
 {
 	echo_client_exit();
-
 }
 
 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index a3358c39b2f1..7e83d395b998 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -121,9 +117,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 		atomic_add(added, &osc_pool_req_count);
 	}
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_rpcs_in_flight = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
@@ -139,9 +135,9 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj,
 	long val;
 	int mult;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	val = cli->cl_dirty_max;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	mult = 1 << 20;
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
@@ -169,10 +165,10 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
 	    pages_number > totalram_pages / 4) /* 1/4 of RAM */
 		return -ERANGE;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
@@ -222,8 +218,16 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
 		return -ERANGE;
 
 	rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
-	if (rc > 0)
-		(void)osc_lru_shrink(cli, rc);
+	if (rc > 0) {
+		struct lu_env *env;
+		int refcheck;
+
+		env = cl_env_get(&refcheck);
+		if (!IS_ERR(env)) {
+			(void)osc_lru_shrink(env, cli, rc, true);
+			cl_env_put(env, &refcheck);
+		}
+	}
 
 	return count;
 }
@@ -239,9 +243,9 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_dirty);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -256,9 +260,9 @@ static ssize_t cur_grant_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_avail_grant);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -279,12 +283,12 @@ static ssize_t cur_grant_bytes_store(struct kobject *kobj,
 		return rc;
 
 	/* this is only for shrinking grant */
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (val >= cli->cl_avail_grant) {
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		return -EINVAL;
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
 		rc = osc_shrink_grant_to_target(cli, val);
@@ -303,9 +307,9 @@ static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_lost_grant);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -577,14 +581,31 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
 	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
 		return -ERANGE;
 	}
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_pages_per_rpc = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
 LUSTRE_RW_ATTR(max_pages_per_rpc);
 
+static ssize_t unstable_stats_show(struct kobject *kobj,
+				   struct attribute *attr,
+				   char *buf)
+{
+	struct obd_device *dev = container_of(kobj, struct obd_device,
+					      obd_kobj);
+	struct client_obd *cli = &dev->u.cli;
+	int pages, mb;
+
+	pages = atomic_read(&cli->cl_unstable_count);
+	mb = (pages * PAGE_SIZE) >> 20;
+
+	return sprintf(buf, "unstable_pages: %8d\n"
+		       "unstable_mb:    %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
 LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
 LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
 LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid);
@@ -623,7 +644,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
 
 	ktime_get_real_ts64(&now);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 
 	seq_printf(seq, "snapshot_time:	 %llu.%9lu (secs.usecs)\n",
 		   (s64)now.tv_sec, (unsigned long)now.tv_nsec);
@@ -707,7 +728,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
 			break;
 	}
 
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return 0;
 }
@@ -794,6 +815,7 @@ static struct attribute *osc_attrs[] = {
 	&lustre_attr_max_pages_per_rpc.attr,
 	&lustre_attr_max_rpcs_in_flight.attr,
 	&lustre_attr_resend_count.attr,
+	&lustre_attr_unstable_stats.attr,
 	NULL,
 };
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 5f25bf83dcfc..d011135802d5 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -76,6 +72,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags)
 	*buf++ = ext->oe_rw ? 'r' : 'w';
 	if (ext->oe_intree)
 		*buf++ = 'i';
+	if (ext->oe_sync)
+		*buf++ = 'S';
 	if (ext->oe_srvlock)
 		*buf++ = 's';
 	if (ext->oe_hp)
@@ -121,9 +119,13 @@ static const char *oes_strings[] = {
 		__ext->oe_grants, __ext->oe_nr_pages,			      \
 		list_empty_marker(&__ext->oe_pages),			      \
 		waitqueue_active(&__ext->oe_waitq) ? '+' : '-',		      \
-		__ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner,	      \
+		__ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner,	      \
 		/* ----- part 4 ----- */				      \
 		## __VA_ARGS__);					      \
+	if (lvl == D_ERROR && __ext->oe_dlmlock)			      \
+		LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext);	      \
+	else								      \
+		LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext);	      \
 } while (0)
 
 #undef EASSERTF
@@ -240,20 +242,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 		goto out;
 	}
 
-	if (!ext->oe_osclock && ext->oe_grants > 0) {
+	if (ext->oe_sync && ext->oe_grants > 0) {
 		rc = 90;
 		goto out;
 	}
 
-	if (ext->oe_osclock) {
-		struct cl_lock_descr *descr;
+	if (ext->oe_dlmlock) {
+		struct ldlm_extent *extent;
 
-		descr = &ext->oe_osclock->cll_descr;
-		if (!(descr->cld_start <= ext->oe_start &&
-		      descr->cld_end >= ext->oe_max_end)) {
+		extent = &ext->oe_dlmlock->l_policy_data.l_extent;
+		if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
+		      extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) {
 			rc = 100;
 			goto out;
 		}
+
+		if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) {
+			rc = 102;
+			goto out;
+		}
 	}
 
 	if (ext->oe_nr_pages > ext->oe_mppr) {
@@ -276,7 +283,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 
 	page_count = 0;
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
-		pgoff_t index = oap2cl_page(oap)->cp_index;
+		pgoff_t index = osc_index(oap2osc(oap));
 		++page_count;
 		if (index > ext->oe_end || index < ext->oe_start) {
 			rc = 110;
@@ -359,7 +366,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 	ext->oe_state = OES_INV;
 	INIT_LIST_HEAD(&ext->oe_pages);
 	init_waitqueue_head(&ext->oe_waitq);
-	ext->oe_osclock = NULL;
+	ext->oe_dlmlock = NULL;
 
 	return ext;
 }
@@ -385,9 +392,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
 		LASSERT(ext->oe_state == OES_INV);
 		LASSERT(!ext->oe_intree);
 
-		if (ext->oe_osclock) {
-			cl_lock_put(env, ext->oe_osclock);
-			ext->oe_osclock = NULL;
+		if (ext->oe_dlmlock) {
+			lu_ref_add(&ext->oe_dlmlock->l_reference,
+				   "osc_extent", ext);
+			LDLM_LOCK_PUT(ext->oe_dlmlock);
+			ext->oe_dlmlock = NULL;
 		}
 		osc_extent_free(ext);
 	}
@@ -543,7 +552,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
 	if (cur->oe_max_end != victim->oe_max_end)
 		return -ERANGE;
 
-	LASSERT(cur->oe_osclock == victim->oe_osclock);
+	LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
 	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
 	chunk_start = cur->oe_start >> ppc_bits;
 	chunk_end = cur->oe_end >> ppc_bits;
@@ -624,10 +633,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
 static struct osc_extent *osc_extent_find(const struct lu_env *env,
 					  struct osc_object *obj, pgoff_t index,
 					  int *grants)
-
 {
 	struct client_obd *cli = osc_cli(obj);
-	struct cl_lock *lock;
+	struct osc_lock   *olck;
+	struct cl_lock_descr *descr;
 	struct osc_extent *cur;
 	struct osc_extent *ext;
 	struct osc_extent *conflict = NULL;
@@ -644,8 +653,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	if (!cur)
 		return ERR_PTR(-ENOMEM);
 
-	lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+	olck = osc_env_io(env)->oi_write_osclock;
+	LASSERTF(olck, "page %lu is not covered by lock\n", index);
+	LASSERT(olck->ols_state == OLS_GRANTED);
+
+	descr = &olck->ols_cl.cls_lock->cll_descr;
+	LASSERT(descr->cld_mode >= CLM_WRITE);
 
 	LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
 	ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
@@ -657,19 +670,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	max_pages = cli->cl_max_pages_per_rpc;
 	LASSERT((max_pages & ~chunk_mask) == 0);
 	max_end = index - (index % max_pages) + max_pages - 1;
-	max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+	max_end = min_t(pgoff_t, max_end, descr->cld_end);
 
 	/* initialize new extent by parameters so far */
 	cur->oe_max_end = max_end;
 	cur->oe_start = index & chunk_mask;
 	cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
-	if (cur->oe_start < lock->cll_descr.cld_start)
-		cur->oe_start = lock->cll_descr.cld_start;
+	if (cur->oe_start < descr->cld_start)
+		cur->oe_start = descr->cld_start;
 	if (cur->oe_end > max_end)
 		cur->oe_end = max_end;
-	cur->oe_osclock = lock;
 	cur->oe_grants = 0;
 	cur->oe_mppr = max_pages;
+	if (olck->ols_dlmlock) {
+		LASSERT(olck->ols_hold);
+		cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
+		lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
+	}
 
 	/* grants has been allocated by caller */
 	LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
@@ -691,7 +708,7 @@ restart:
 			break;
 
 		/* if covering by different locks, no chance to match */
-		if (lock != ext->oe_osclock) {
+		if (olck->ols_dlmlock != ext->oe_dlmlock) {
 			EASSERTF(!overlapped(ext, cur), ext,
 				 EXTSTR"\n", EXTPARA(cur));
 
@@ -795,7 +812,7 @@ restart:
 	if (found) {
 		LASSERT(!conflict);
 		if (!IS_ERR(found)) {
-			LASSERT(found->oe_osclock == cur->oe_osclock);
+			LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
 			OSC_EXTENT_DUMP(D_CACHE, found,
 					"found caching ext for %lu.\n", index);
 		}
@@ -810,7 +827,7 @@ restart:
 		found = osc_extent_hold(cur);
 		osc_extent_insert(obj, cur);
 		OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
-				index, lock->cll_descr.cld_end);
+				index, descr->cld_end);
 	}
 	osc_object_unlock(obj);
 
@@ -856,6 +873,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 
 	ext->oe_rc = rc ?: ext->oe_nr_pages;
 	EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
+
+	osc_lru_add_batch(cli, &ext->oe_pages);
 	list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
 		list_del_init(&oap->oap_rpc_item);
 		list_del_init(&oap->oap_pending_item);
@@ -877,10 +896,9 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		 * span a whole chunk on the OST side, or our accounting goes
 		 * wrong.  Should match the code in filter_grant_check.
 		 */
-		int offset = oap->oap_page_off & ~CFS_PAGE_MASK;
-		int count = oap->oap_count + (offset & (blocksize - 1));
-		int end = (offset + oap->oap_count) & (blocksize - 1);
-
+		int offset = last_off & ~PAGE_MASK;
+		int count = last_count + (offset & (blocksize - 1));
+		int end = (offset + last_count) & (blocksize - 1);
 		if (end)
 			count += blocksize - end;
 
@@ -943,7 +961,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
 			"%s: wait ext to %d timedout, recovery in progress?\n",
 			osc_export(obj)->exp_obd->obd_name, state);
 
-		lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+		lwi = LWI_INTR(NULL, NULL);
 		rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
 				  &lwi);
 	}
@@ -990,19 +1008,19 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
 
 	/* discard all pages with index greater then trunc_index */
 	list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
-		struct cl_page *sub = oap2cl_page(oap);
-		struct cl_page *page = cl_page_top(sub);
+		pgoff_t index = osc_index(oap2osc(oap));
+		struct cl_page *page = oap2cl_page(oap);
 
 		LASSERT(list_empty(&oap->oap_rpc_item));
 
 		/* only discard the pages with their index greater than
 		 * trunc_index, and ...
 		 */
-		if (sub->cp_index < trunc_index ||
-		    (sub->cp_index == trunc_index && partial)) {
+		if (index < trunc_index ||
+		    (index == trunc_index && partial)) {
 			/* accounting how many pages remaining in the chunk
 			 * so that we can calculate grants correctly. */
-			if (sub->cp_index >> ppc_bits == trunc_chunk)
+			if (index >> ppc_bits == trunc_chunk)
 				++pages_in_chunk;
 			continue;
 		}
@@ -1013,7 +1031,6 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
 		lu_ref_add(&page->cp_reference, "truncate", current);
 
 		if (cl_page_own(env, io, page) == 0) {
-			cl_page_unmap(env, io, page);
 			cl_page_discard(env, io, page);
 			cl_page_disown(env, io, page);
 		} else {
@@ -1126,7 +1143,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
 		last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
 		LASSERT(last->oap_count > 0);
 		LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
+		spin_lock(&last->oap_lock);
 		last->oap_async_flags |= ASYNC_COUNT_STABLE;
+		spin_unlock(&last->oap_lock);
 	}
 
 	/* for the rest of pages, we don't need to call osf_refresh_count()
@@ -1135,7 +1154,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
 		if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
 			oap->oap_count = PAGE_SIZE - oap->oap_page_off;
+			spin_lock(&last->oap_lock);
 			oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+			spin_unlock(&last->oap_lock);
 		}
 	}
 
@@ -1256,7 +1277,7 @@ static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
 			  int cmd)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = cl_page_top(oap2cl_page(oap));
+	struct cl_page  *page = oap2cl_page(oap);
 	int result;
 
 	LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */
@@ -1271,7 +1292,7 @@ static int osc_refresh_count(const struct lu_env *env,
 			     struct osc_async_page *oap, int cmd)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = oap2cl_page(oap);
+	pgoff_t index = osc_index(oap2osc(oap));
 	struct cl_object *obj;
 	struct cl_attr *attr = &osc_env_info(env)->oti_attr;
 
@@ -1288,10 +1309,10 @@ static int osc_refresh_count(const struct lu_env *env,
 	if (result < 0)
 		return result;
 	kms = attr->cat_kms;
-	if (cl_offset(obj, page->cp_index) >= kms)
+	if (cl_offset(obj, index) >= kms)
 		/* catch race with truncate */
 		return 0;
-	else if (cl_offset(obj, page->cp_index + 1) > kms)
+	else if (cl_offset(obj, index + 1) > kms)
 		/* catch sub-page write at end of file */
 		return kms % PAGE_SIZE;
 	else
@@ -1302,14 +1323,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 			  int cmd, int rc)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = cl_page_top(oap2cl_page(oap));
+	struct cl_page    *page = oap2cl_page(oap);
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 	enum cl_req_type crt;
 	int srvlock;
 
 	cmd &= ~OBD_BRW_NOQUOTA;
-	LASSERT(equi(page->cp_state == CPS_PAGEIN,  cmd == OBD_BRW_READ));
-	LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE));
+	LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ),
+		 "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
+	LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE),
+		 "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
 	LASSERT(opg->ops_transfer_pinned);
 
 	/*
@@ -1358,22 +1381,28 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 	return 0;
 }
 
-#define OSC_DUMP_GRANT(cli, fmt, args...) do {				      \
+#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do {			      \
 	struct client_obd *__tmp = (cli);				      \
-	CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d "	      \
-	       "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt,   \
+	CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d "	      \
+	       "unstable_pages: %d/%d dropped: %ld avail: %ld, "	      \
+	       "reserved: %ld, flight: %d } lru {in list: %d, "		      \
+	       "left: %d, waiters: %d }" fmt,                                 \
 	       __tmp->cl_import->imp_obd->obd_name,			      \
 	       __tmp->cl_dirty, __tmp->cl_dirty_max,			      \
 	       atomic_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
+	       atomic_read(&obd_unstable_pages), obd_max_dirty_pages,	      \
 	       __tmp->cl_lost_grant, __tmp->cl_avail_grant,		      \
-	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args);      \
+	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight,		      \
+	       atomic_read(&__tmp->cl_lru_in_list),			      \
+	       atomic_read(&__tmp->cl_lru_busy),			      \
+	       atomic_read(&__tmp->cl_lru_shrinkers), ##args);		      \
 } while (0)
 
 /* caller must hold loi_list_lock */
 static void osc_consume_write_grant(struct client_obd *cli,
 				    struct brw_page *pga)
 {
-	assert_spin_locked(&cli->cl_loi_list_lock.lock);
+	assert_spin_locked(&cli->cl_loi_list_lock);
 	LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
 	atomic_inc(&obd_dirty_pages);
 	cli->cl_dirty += PAGE_SIZE;
@@ -1389,7 +1418,7 @@ static void osc_consume_write_grant(struct client_obd *cli,
 static void osc_release_write_grant(struct client_obd *cli,
 				    struct brw_page *pga)
 {
-	assert_spin_locked(&cli->cl_loi_list_lock.lock);
+	assert_spin_locked(&cli->cl_loi_list_lock);
 	if (!(pga->flag & OBD_BRW_FROM_GRANT)) {
 		return;
 	}
@@ -1408,7 +1437,7 @@ static void osc_release_write_grant(struct client_obd *cli,
  * To avoid sleeping with object lock held, it's good for us allocate enough
  * grants before entering into critical section.
  *
- * client_obd_list_lock held by caller
+ * spin_lock held by caller
  */
 static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes)
 {
@@ -1442,11 +1471,11 @@ static void __osc_unreserve_grant(struct client_obd *cli,
 static void osc_unreserve_grant(struct client_obd *cli,
 				unsigned int reserved, unsigned int unused)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	__osc_unreserve_grant(cli, reserved, unused);
 	if (unused > 0)
 		osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 /**
@@ -1467,7 +1496,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 {
 	int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	atomic_sub(nr_pages, &obd_dirty_pages);
 	cli->cl_dirty -= nr_pages << PAGE_SHIFT;
 	cli->cl_lost_grant += lost_grant;
@@ -1479,7 +1508,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 		cli->cl_avail_grant += grant;
 	}
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
 	       lost_grant, cli->cl_lost_grant,
 	       cli->cl_avail_grant, cli->cl_dirty);
@@ -1491,9 +1520,9 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
  */
 static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	osc_release_write_grant(cli, &oap->oap_brw_page);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 /**
@@ -1506,14 +1535,15 @@ static int osc_enter_cache_try(struct client_obd *cli,
 {
 	int rc;
 
-	OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
 
 	rc = osc_reserve_grant(cli, bytes);
 	if (rc < 0)
 		return 0;
 
 	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
-	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
+	    atomic_read(&obd_unstable_pages) + 1 +
+	    atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
 			cli->cl_dirty_transit += PAGE_SIZE;
@@ -1532,9 +1562,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&ocw->ocw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 }
 
@@ -1551,12 +1581,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	struct osc_object *osc = oap->oap_obj;
 	struct lov_oinfo *loi = osc->oo_oinfo;
 	struct osc_cache_waiter ocw;
-	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+						  LWI_ON_SIGNAL_NOOP, NULL);
 	int rc = -EDQUOT;
 
-	OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 
 	/* force the caller to try sync io.  this can jump the list
 	 * of queued writes and create a discontiguous rpc stream
@@ -1587,7 +1618,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
 		list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
 		ocw.ocw_rc = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 
 		osc_io_unplug_async(env, cli, NULL);
 
@@ -1596,10 +1627,17 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 
 		rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 
-		/* l_wait_event is interrupted by signal */
+		/* l_wait_event is interrupted by signal, or timed out */
 		if (rc < 0) {
+			if (rc == -ETIMEDOUT) {
+				OSC_DUMP_GRANT(D_ERROR, cli,
+					       "try to reserve %d.\n", bytes);
+				osc_extent_tree_dump(D_ERROR, osc);
+				rc = -EDQUOT;
+			}
+
 			list_del_init(&ocw.ocw_entry);
 			goto out;
 		}
@@ -1615,8 +1653,8 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 		}
 	}
 out:
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
-	OSC_DUMP_GRANT(cli, "returned %d.\n", rc);
+	spin_unlock(&cli->cl_loi_list_lock);
+	OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
 	return rc;
 }
 
@@ -1633,8 +1671,8 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
 		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
-		    (atomic_read(&obd_dirty_pages) + 1 >
-		     obd_max_dirty_pages)) {
+		    (atomic_read(&obd_unstable_pages) + 1 +
+		     atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
 			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
 			       cli->cl_dirty,
 			       cli->cl_dirty_max, obd_max_dirty_pages);
@@ -1776,9 +1814,9 @@ static int osc_list_maint(struct client_obd *cli, struct osc_object *osc)
 {
 	int is_ready;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	is_ready = __osc_list_maint(cli, osc);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return is_ready;
 }
@@ -1799,13 +1837,103 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
 		ar->ar_force_sync = 1;
 		ar->ar_min_xid = ptlrpc_sample_next_xid();
 		return;
-
 	}
 
 	if (ar->ar_force_sync && (xid >= ar->ar_min_xid))
 		ar->ar_force_sync = 0;
 }
 
+/**
+ * Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable.
+ */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	int page_count = desc->bd_iov_count;
+	int i;
+
+	/* No unstable page tracking */
+	if (!cli->cl_cache)
+		return;
+
+	LASSERT(page_count >= 0);
+
+	for (i = 0; i < page_count; i++)
+		dec_node_page_state(desc->bd_iov[i].kiov_page,
+							NR_UNSTABLE_NFS);
+
+	atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
+	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+
+	atomic_sub(page_count, &cli->cl_unstable_count);
+	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+
+	atomic_sub(page_count, &obd_unstable_pages);
+	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+
+	spin_lock(&req->rq_lock);
+	req->rq_committed = 1;
+	req->rq_unstable  = 0;
+	spin_unlock(&req->rq_lock);
+
+	wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+}
+
+/* "unstable" page accounting. See: osc_dec_unstable_pages. */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	long page_count = desc->bd_iov_count;
+	int i;
+
+	/* No unstable page tracking */
+	if (!cli->cl_cache)
+		return;
+
+	LASSERT(page_count >= 0);
+
+	for (i = 0; i < page_count; i++)
+		inc_node_page_state(desc->bd_iov[i].kiov_page,
+							NR_UNSTABLE_NFS);
+
+	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+	atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+	atomic_add(page_count, &cli->cl_unstable_count);
+
+	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+	atomic_add(page_count, &obd_unstable_pages);
+
+	spin_lock(&req->rq_lock);
+
+	/*
+	 * If the request has already been committed (i.e. brw_commit
+	 * called via rq_commit_cb), we need to undo the unstable page
+	 * increments we just performed because rq_commit_cb wont be
+	 * called again. Otherwise, just set the commit callback so the
+	 * unstable page accounting is properly updated when the request
+	 * is committed
+	 */
+	if (req->rq_committed) {
+		/* Drop lock before calling osc_dec_unstable_pages */
+		spin_unlock(&req->rq_lock);
+		osc_dec_unstable_pages(req);
+		spin_lock(&req->rq_lock);
+	} else {
+		req->rq_unstable = 1;
+		req->rq_commit_cb = osc_dec_unstable_pages;
+	}
+
+	spin_unlock(&req->rq_lock);
+}
+
 /* this must be called holding the loi list lock to give coverage to exit_cache,
  * async_flag maintenance, and oap_request
  */
@@ -1817,6 +1945,9 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 	__u64 xid = 0;
 
 	if (oap->oap_request) {
+		if (!rc)
+			osc_inc_unstable_pages(oap->oap_request);
+
 		xid = ptlrpc_req_xid(oap->oap_request);
 		ptlrpc_req_finished(oap->oap_request);
 		oap->oap_request = NULL;
@@ -1829,10 +1960,10 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 	oap->oap_interrupted = 0;
 
 	if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) {
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		osc_process_ar(&cli->cl_ar, xid, rc);
 		osc_process_ar(&loi->loi_ar, xid, rc);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 
 	rc = osc_completion(env, oap, oap->oap_cmd, rc);
@@ -2133,9 +2264,8 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
 		}
 
 		cl_object_get(obj);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
-		lu_object_ref_add_at(&obj->co_lu, &link, "check",
-				     current);
+		spin_unlock(&cli->cl_loi_list_lock);
+		lu_object_ref_add_at(&obj->co_lu, &link, "check", current);
 
 		/* attempt some read/write balancing by alternating between
 		 * reads and writes in an object.  The makes_rpc checks here
@@ -2178,11 +2308,10 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
 		osc_object_unlock(osc);
 
 		osc_list_maint(cli, osc);
-		lu_object_ref_del_at(&obj->co_lu, &link, "check",
-				     current);
+		lu_object_ref_del_at(&obj->co_lu, &link, "check", current);
 		cl_object_put(env, obj);
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 	}
 }
 
@@ -2199,9 +2328,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
 		 * potential stack overrun problem. LU-2859
 		 */
 		atomic_inc(&cli->cl_lru_shrinkers);
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		osc_check_rpcs(env, cli);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		atomic_dec(&cli->cl_lru_shrinkers);
 	} else {
 		CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
@@ -2238,9 +2367,9 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
 
 	oap->oap_page = page;
 	oap->oap_obj_off = offset;
-	LASSERT(!(offset & ~CFS_PAGE_MASK));
+	LASSERT(!(offset & ~PAGE_MASK));
 
-	if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE))
+	if (capable(CFS_CAP_SYS_RESOURCE))
 		oap->oap_brw_flags = OBD_BRW_NOQUOTA;
 
 	INIT_LIST_HEAD(&oap->oap_pending_item);
@@ -2279,8 +2408,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 
 	/* Set the OBD_BRW_SRVLOCK before the page is queued. */
 	brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
-	if (!client_is_remote(osc_export(osc)) &&
-	    capable(CFS_CAP_SYS_RESOURCE)) {
+	if (capable(CFS_CAP_SYS_RESOURCE)) {
 		brw_flags |= OBD_BRW_NOQUOTA;
 		cmd |= OBD_BRW_NOQUOTA;
 	}
@@ -2306,16 +2434,23 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			return rc;
 	}
 
+	if (osc_over_unstable_soft_limit(cli))
+		brw_flags |= OBD_BRW_SOFT_SYNC;
+
 	oap->oap_cmd = cmd;
 	oap->oap_page_off = ops->ops_from;
 	oap->oap_count = ops->ops_to - ops->ops_from;
+	/*
+	 * No need to hold a lock here,
+	 * since this page is not in any list yet.
+	 */
 	oap->oap_async_flags = 0;
 	oap->oap_brw_flags = brw_flags;
 
 	OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n",
 		     oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK);
 
-	index = oap2cl_page(oap)->cp_index;
+	index = osc_index(oap2osc(oap));
 
 	/* Add this page into extent by the following steps:
 	 * 1. if there exists an active extent for this IO, mostly this page
@@ -2334,9 +2469,9 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			grants = 0;
 
 		/* it doesn't need any grant to dirty this page */
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		rc = osc_enter_cache_try(cli, oap, grants, 0);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		if (rc == 0) { /* try failed */
 			grants = 0;
 			need_release = 1;
@@ -2427,21 +2562,21 @@ int osc_teardown_async_page(const struct lu_env *env,
 	LASSERT(oap->oap_magic == OAP_MAGIC);
 
 	CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
-	       oap, ops, oap2cl_page(oap)->cp_index);
+	       oap, ops, osc_index(oap2osc(oap)));
 
 	osc_object_lock(obj);
 	if (!list_empty(&oap->oap_rpc_item)) {
 		CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
 		rc = -EBUSY;
 	} else if (!list_empty(&oap->oap_pending_item)) {
-		ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index);
+		ext = osc_extent_lookup(obj, osc_index(oap2osc(oap)));
 		/* only truncated pages are allowed to be taken out.
 		 * See osc_extent_truncate() and osc_cache_truncate_start()
 		 * for details.
 		 */
 		if (ext && ext->oe_state != OES_TRUNC) {
 			OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n",
-					oap2cl_page(oap)->cp_index);
+					osc_index(oap2osc(oap)));
 			rc = -EBUSY;
 		}
 	}
@@ -2464,7 +2599,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 	struct osc_extent *ext = NULL;
 	struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj);
 	struct cl_page *cp = ops->ops_cl.cpl_page;
-	pgoff_t	index = cp->cp_index;
+	pgoff_t            index = osc_index(ops);
 	struct osc_async_page *oap = &ops->ops_oap;
 	bool unplug = false;
 	int rc = 0;
@@ -2479,8 +2614,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 	switch (ext->oe_state) {
 	case OES_RPC:
 	case OES_LOCK_DONE:
-		CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp),
-			      "flush an in-rpc page?\n");
+		CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n");
 		LASSERT(0);
 		break;
 	case OES_LOCKING:
@@ -2506,7 +2640,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 		break;
 	}
 
-	rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE);
+	rc = cl_page_prep(env, io, cp, CRT_WRITE);
 	if (rc)
 		goto out;
 
@@ -2550,7 +2684,7 @@ int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops)
 	struct osc_extent *ext;
 	struct osc_extent *found = NULL;
 	struct list_head *plist;
-	pgoff_t index = oap2cl_page(oap)->cp_index;
+	pgoff_t index = osc_index(ops);
 	int rc = -EBUSY;
 	int cmd;
 
@@ -2613,12 +2747,12 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	pgoff_t end = 0;
 
 	list_for_each_entry(oap, list, oap_pending_item) {
-		struct cl_page *cp = oap2cl_page(oap);
+		pgoff_t index = osc_index(oap2osc(oap));
 
-		if (cp->cp_index > end)
-			end = cp->cp_index;
-		if (cp->cp_index < start)
-			start = cp->cp_index;
+		if (index > end)
+			end = index;
+		if (index < start)
+			start = index;
 		++page_count;
 		mppr <<= (page_count > mppr);
 	}
@@ -2633,9 +2767,11 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	}
 
 	ext->oe_rw = !!(cmd & OBD_BRW_READ);
+	ext->oe_sync = 1;
 	ext->oe_urgent = 1;
 	ext->oe_start = start;
-	ext->oe_end = ext->oe_max_end = end;
+	ext->oe_end = end;
+	ext->oe_max_end = end;
 	ext->oe_obj = obj;
 	ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
 	ext->oe_nr_pages = page_count;
@@ -2988,7 +3124,201 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
 			result = rc;
 	}
 
-	OSC_IO_DEBUG(obj, "cache page out.\n");
+	OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
+	return result;
+}
+
+/**
+ * Returns a list of pages by a given [start, end] of \a obj.
+ *
+ * \param resched If not NULL, then we give up before hogging CPU for too
+ * long and set *resched = 1, in that case caller should implement a retry
+ * logic.
+ *
+ * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
+ * crucial in the face of [offset, EOF] locks.
+ *
+ * Return at least one page in @queue unless there is no covered page.
+ */
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+			 struct osc_object *osc, pgoff_t start, pgoff_t end,
+			 osc_page_gang_cbt cb, void *cbdata)
+{
+	struct osc_page *ops;
+	void            **pvec;
+	pgoff_t         idx;
+	unsigned int    nr;
+	unsigned int    i;
+	unsigned int    j;
+	int             res = CLP_GANG_OKAY;
+	bool            tree_lock = true;
+
+	idx = start;
+	pvec = osc_env_info(env)->oti_pvec;
+	spin_lock(&osc->oo_tree_lock);
+	while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec,
+					    idx, OTI_PVEC_SIZE)) > 0) {
+		struct cl_page *page;
+		bool end_of_region = false;
+
+		for (i = 0, j = 0; i < nr; ++i) {
+			ops = pvec[i];
+			pvec[i] = NULL;
+
+			idx = osc_index(ops);
+			if (idx > end) {
+				end_of_region = true;
+				break;
+			}
+
+			page = ops->ops_cl.cpl_page;
+			LASSERT(page->cp_type == CPT_CACHEABLE);
+			if (page->cp_state == CPS_FREEING)
+				continue;
+
+			cl_page_get(page);
+			lu_ref_add_atomic(&page->cp_reference,
+					  "gang_lookup", current);
+			pvec[j++] = ops;
+		}
+		++idx;
+
+		/*
+		 * Here a delicate locking dance is performed. Current thread
+		 * holds a reference to a page, but has to own it before it
+		 * can be placed into queue. Owning implies waiting, so
+		 * radix-tree lock is to be released. After a wait one has to
+		 * check that pages weren't truncated (cl_page_own() returns
+		 * error in the latter case).
+		 */
+		spin_unlock(&osc->oo_tree_lock);
+		tree_lock = false;
+
+		for (i = 0; i < j; ++i) {
+			ops = pvec[i];
+			if (res == CLP_GANG_OKAY)
+				res = (*cb)(env, io, ops, cbdata);
+
+			page = ops->ops_cl.cpl_page;
+			lu_ref_del(&page->cp_reference, "gang_lookup", current);
+			cl_page_put(env, page);
+		}
+		if (nr < OTI_PVEC_SIZE || end_of_region)
+			break;
+
+		if (res == CLP_GANG_OKAY && need_resched())
+			res = CLP_GANG_RESCHED;
+		if (res != CLP_GANG_OKAY)
+			break;
+
+		spin_lock(&osc->oo_tree_lock);
+		tree_lock = true;
+	}
+	if (tree_lock)
+		spin_unlock(&osc->oo_tree_lock);
+	return res;
+}
+
+/**
+ * Check if page @page is covered by an extra lock or discard it.
+ */
+static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
+				struct osc_page *ops, void *cbdata)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct osc_object *osc = cbdata;
+	pgoff_t index;
+
+	index = osc_index(ops);
+	if (index >= info->oti_fn_index) {
+		struct ldlm_lock *tmp;
+		struct cl_page *page = ops->ops_cl.cpl_page;
+
+		/* refresh non-overlapped index */
+		tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
+		if (tmp) {
+			__u64 end = tmp->l_policy_data.l_extent.end;
+			/* Cache the first-non-overlapped index so as to skip
+			 * all pages within [index, oti_fn_index). This is safe
+			 * because if tmp lock is canceled, it will discard
+			 * these pages.
+			 */
+			info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
+			if (end == OBD_OBJECT_EOF)
+				info->oti_fn_index = CL_PAGE_EOF;
+			LDLM_LOCK_PUT(tmp);
+		} else if (cl_page_own(env, io, page) == 0) {
+			/* discard the page */
+			cl_page_discard(env, io, page);
+			cl_page_disown(env, io, page);
+		} else {
+			LASSERT(page->cp_state == CPS_FREEING);
+		}
+	}
+
+	info->oti_next_index = index + 1;
+	return CLP_GANG_OKAY;
+}
+
+static int discard_cb(const struct lu_env *env, struct cl_io *io,
+		      struct osc_page *ops, void *cbdata)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct cl_page *page = ops->ops_cl.cpl_page;
+
+	/* page is top page. */
+	info->oti_next_index = osc_index(ops) + 1;
+	if (cl_page_own(env, io, page) == 0) {
+		KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
+			      !PageDirty(cl_page_vmpage(page))));
+
+		/* discard the page */
+		cl_page_discard(env, io, page);
+		cl_page_disown(env, io, page);
+	} else {
+		LASSERT(page->cp_state == CPS_FREEING);
+	}
+
+	return CLP_GANG_OKAY;
+}
+
+/**
+ * Discard pages protected by the given lock. This function traverses radix
+ * tree to find all covering pages and discard them. If a page is being covered
+ * by other locks, it should remain in cache.
+ *
+ * If error happens on any step, the process continues anyway (the reasoning
+ * behind this being that lock cancellation cannot be delayed indefinitely).
+ */
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct cl_io *io = &info->oti_io;
+	osc_page_gang_cbt cb;
+	int res;
+	int result;
+
+	io->ci_obj = cl_object_top(osc2cl(osc));
+	io->ci_ignore_layout = 1;
+	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+	if (result != 0)
+		goto out;
+
+	cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
+	info->oti_fn_index = start;
+	info->oti_next_index = start;
+	do {
+		res = osc_page_gang_lookup(env, io, osc,
+					   info->oti_next_index, end, cb, osc);
+		if (info->oti_next_index > end)
+			break;
+
+		if (res == CLP_GANG_RESCHED)
+			cond_resched();
+	} while (res != CLP_GANG_OKAY);
+out:
+	cl_io_fini(env, io);
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index d55d04d0428b..c8c3f1ca77be 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -51,7 +47,6 @@
 #include "../include/obd.h"
 /* osc_build_res_name() */
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 #include "osc_internal.h"
 
 /** \defgroup osc osc
@@ -67,7 +62,10 @@ struct osc_io {
 	/** super class */
 	struct cl_io_slice oi_cl;
 	/** true if this io is lockless. */
-	int		oi_lockless;
+	unsigned int		oi_lockless;
+	/** how many LRU pages are reserved for this IO */
+	int oi_lru_reserved;
+
 	/** active extents, we know how many bytes is going to be written,
 	 * so having an active extent will prevent it from being fragmented
 	 */
@@ -77,6 +75,8 @@ struct osc_io {
 	 */
 	struct osc_extent *oi_trunc;
 
+	/** write osc_lock for this IO, used by osc_extent_find(). */
+	struct osc_lock   *oi_write_osclock;
 	struct obd_info    oi_info;
 	struct obdo	oi_oa;
 	struct osc_async_cbargs {
@@ -100,7 +100,7 @@ struct osc_session {
 	struct osc_io       os_io;
 };
 
-#define OTI_PVEC_SIZE 64
+#define OTI_PVEC_SIZE 256
 struct osc_thread_info {
 	struct ldlm_res_id      oti_resname;
 	ldlm_policy_data_t      oti_policy;
@@ -109,7 +109,13 @@ struct osc_thread_info {
 	struct lustre_handle    oti_handle;
 	struct cl_page_list     oti_plist;
 	struct cl_io		oti_io;
-	struct cl_page	       *oti_pvec[OTI_PVEC_SIZE];
+	void			*oti_pvec[OTI_PVEC_SIZE];
+	/**
+	 * Fields used by cl_lock_discard_pages().
+	 */
+	pgoff_t			oti_next_index;
+	pgoff_t			oti_fn_index; /* first non-overlapped index */
+	struct cl_sync_io	oti_anchor;
 };
 
 struct osc_object {
@@ -125,7 +131,7 @@ struct osc_object {
 	 */
 	struct list_head	 oo_inflight[CRT_NR];
 	/**
-	 * Lock, protecting ccc_object::cob_inflight, because a seat-belt is
+	 * Lock, protecting osc_page::ops_inflight, because a seat-belt is
 	 * locked during take-off and landing.
 	 */
 	spinlock_t	   oo_seatbelt;
@@ -159,6 +165,17 @@ struct osc_object {
 	 * oo_{read|write}_pages soon.
 	 */
 	spinlock_t	    oo_lock;
+
+	/**
+	 * Radix tree for caching pages
+	 */
+	struct radix_tree_root	oo_tree;
+	spinlock_t		oo_tree_lock;
+	unsigned long		oo_npages;
+
+	/* Protect osc_lock this osc_object has */
+	spinlock_t		oo_ol_spin;
+	struct list_head	oo_ol_list;
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
@@ -198,8 +215,6 @@ enum osc_lock_state {
 	OLS_ENQUEUED,
 	OLS_UPCALL_RECEIVED,
 	OLS_GRANTED,
-	OLS_RELEASED,
-	OLS_BLOCKED,
 	OLS_CANCELLED
 };
 
@@ -208,10 +223,8 @@ enum osc_lock_state {
  *
  * Interaction with DLM.
  *
- * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
- *
  * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
  *
  * This pointer is protected through a reference, acquired by
  * osc_lock_upcall0(). Also, an additional reference is acquired by
@@ -249,26 +262,27 @@ enum osc_lock_state {
  */
 struct osc_lock {
 	struct cl_lock_slice     ols_cl;
+	/** Internal lock to protect states, etc. */
+	spinlock_t		ols_lock;
+	/** Owner sleeps on this channel for state change */
+	struct cl_sync_io	*ols_owner;
+	/** waiting list for this lock to be cancelled */
+	struct list_head	ols_waiting_list;
+	/** wait entry of ols_waiting_list */
+	struct list_head	ols_wait_entry;
+	/** list entry for osc_object::oo_ol_list */
+	struct list_head	ols_nextlock_oscobj;
+
 	/** underlying DLM lock */
-	struct ldlm_lock	*ols_lock;
-	/** lock value block */
-	struct ost_lvb	   ols_lvb;
+	struct ldlm_lock	*ols_dlmlock;
 	/** DLM flags with which osc_lock::ols_lock was enqueued */
 	__u64		    ols_flags;
 	/** osc_lock::ols_lock handle */
 	struct lustre_handle     ols_handle;
 	struct ldlm_enqueue_info ols_einfo;
 	enum osc_lock_state      ols_state;
-
-	/**
-	 * How many pages are using this lock for io, currently only used by
-	 * read-ahead. If non-zero, the underlying dlm lock won't be cancelled
-	 * during recovery to avoid deadlock. see bz16774.
-	 *
-	 * \see osc_page::ops_lock
-	 * \see osc_page_addref_lock(), osc_page_putref_lock()
-	 */
-	atomic_t	     ols_pageref;
+	/** lock value block */
+	struct ost_lvb		ols_lvb;
 
 	/**
 	 * true, if ldlm_lock_addref() was called against
@@ -299,16 +313,6 @@ struct osc_lock {
 	 */
 				 ols_locklessable:1,
 	/**
-	 * set by osc_lock_use() to wait until blocking AST enters into
-	 * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
-	 * further synchronization.
-	 */
-				 ols_ast_wait:1,
-	/**
-	 * If the data of this lock has been flushed to server side.
-	 */
-				 ols_flush:1,
-	/**
 	 * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
 	 * the EVAVAIL error as tolerable, this will make upper logic happy
 	 * to wait all glimpse locks to each OSTs to be completed.
@@ -321,15 +325,6 @@ struct osc_lock {
 	 * For async glimpse lock.
 	 */
 				 ols_agl:1;
-	/**
-	 * IO that owns this lock. This field is used for a dead-lock
-	 * avoidance by osc_lock_enqueue_wait().
-	 *
-	 * XXX: unfortunately, the owner of a osc_lock is not unique,
-	 * the lock may have multiple users, if the lock is granted and
-	 * then matched.
-	 */
-	struct osc_io	   *ols_owner;
 };
 
 /**
@@ -357,11 +352,6 @@ struct osc_page {
 	 */
 	unsigned	      ops_transfer_pinned:1,
 	/**
-	 * True for a `temporary page' created by read-ahead code, probably
-	 * outside of any DLM lock.
-	 */
-			      ops_temp:1,
-	/**
 	 * in LRU?
 	 */
 			      ops_in_lru:1,
@@ -369,18 +359,15 @@ struct osc_page {
 	 * Set if the page must be transferred with OBD_BRW_SRVLOCK.
 	 */
 			      ops_srvlock:1;
-	union {
-		/**
-		 * lru page list. ops_inflight and ops_lru are exclusive so
-		 * that they can share the same data.
-		 */
-		struct list_head	      ops_lru;
-		/**
-		 * Linkage into a per-osc_object list of pages in flight. For
-		 * debugging.
-		 */
-		struct list_head	    ops_inflight;
-	};
+	/**
+	 * lru page list. See osc_lru_{del|use}() in osc_page.c for usage.
+	 */
+	struct list_head	      ops_lru;
+	/**
+	 * Linkage into a per-osc_object list of pages in flight. For
+	 * debugging.
+	 */
+	struct list_head	    ops_inflight;
 	/**
 	 * Thread that submitted this page for transfer. For debugging.
 	 */
@@ -389,16 +376,6 @@ struct osc_page {
 	 * Submit time - the time when the page is starting RPC. For debugging.
 	 */
 	unsigned long	    ops_submit_time;
-
-	/**
-	 * A lock of which we hold a reference covers this page. Only used by
-	 * read-ahead: for a readahead page, we hold it's covering lock to
-	 * prevent it from being canceled during recovery.
-	 *
-	 * \see osc_lock::ols_pageref
-	 * \see osc_page_addref_lock(), osc_page_putref_lock().
-	 */
-	struct cl_lock       *ops_lock;
 };
 
 extern struct kmem_cache *osc_lock_kmem;
@@ -417,21 +394,22 @@ extern struct lu_context_key osc_session_key;
 int osc_lock_init(const struct lu_env *env,
 		  struct cl_object *obj, struct cl_lock *lock,
 		  const struct cl_io *io);
-int osc_io_init  (const struct lu_env *env,
-		  struct cl_object *obj, struct cl_io *io);
-int osc_req_init (const struct lu_env *env, struct cl_device *dev,
-		  struct cl_req *req);
+int osc_io_init(const struct lu_env *env,
+		struct cl_object *obj, struct cl_io *io);
+int osc_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req);
 struct lu_object *osc_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
 int osc_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t ind);
 
-void osc_index2policy  (ldlm_policy_data_t *policy, const struct cl_object *obj,
-			pgoff_t start, pgoff_t end);
-int  osc_lvb_print     (const struct lu_env *env, void *cookie,
-			lu_printer_t p, const struct ost_lvb *lvb);
+void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+		      pgoff_t start, pgoff_t end);
+int osc_lvb_print(const struct lu_env *env, void *cookie,
+		  lu_printer_t p, const struct ost_lvb *lvb);
 
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *list);
 void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 		     enum cl_req_type crt, int brw_flags);
 int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
@@ -441,6 +419,8 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
 			struct page *page, loff_t offset);
 int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 		       struct osc_page *ops);
+int osc_page_cache_add(const struct lu_env *env,
+		       const struct cl_page_slice *slice, struct cl_io *io);
 int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
 			    struct osc_page *ops);
 int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
@@ -457,12 +437,13 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
 			 pgoff_t start, pgoff_t end);
 void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
 		   struct osc_object *osc);
+int lru_queue_work(const struct lu_env *env, void *data);
 
-void osc_object_set_contended  (struct osc_object *obj);
+void osc_object_set_contended(struct osc_object *obj);
 void osc_object_clear_contended(struct osc_object *obj);
-int  osc_object_is_contended   (struct osc_object *obj);
+int  osc_object_is_contended(struct osc_object *obj);
 
-int  osc_lock_is_lockless      (const struct osc_lock *olck);
+int  osc_lock_is_lockless(const struct osc_lock *olck);
 
 /*****************************************************************************
  *
@@ -558,6 +539,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap)
 	return container_of0(oap, struct osc_page, ops_oap);
 }
 
+static inline pgoff_t osc_index(struct osc_page *opg)
+{
+	return opg->ops_cl.cpl_index;
+}
+
 static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
 {
 	return oap2osc(oap)->ops_cl.cpl_page;
@@ -608,7 +594,7 @@ enum osc_extent_state {
  *
  * LOCKING ORDER
  * =============
- * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock)
+ * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock)
  */
 struct osc_extent {
 	/** red-black tree node */
@@ -627,6 +613,8 @@ struct osc_extent {
 	unsigned int       oe_intree:1,
 	/** 0 is write, 1 is read */
 			   oe_rw:1,
+	/** sync extent, queued by osc_queue_sync_pages() */
+				oe_sync:1,
 			   oe_srvlock:1,
 			   oe_memalloc:1,
 	/** an ACTIVE extent is going to be truncated, so when this extent
@@ -675,7 +663,7 @@ struct osc_extent {
 	 */
 	wait_queue_head_t	oe_waitq;
 	/** lock covering this extent */
-	struct cl_lock    *oe_osclock;
+	struct ldlm_lock	*oe_dlmlock;
 	/** terminator of this extent. Must be true if this extent is in IO. */
 	struct task_struct	*oe_owner;
 	/** return value of writeback. If somebody is waiting for this extent,
@@ -690,6 +678,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		      int sent, int rc);
 void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
 
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode);
+
+typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
+				 struct osc_page *, void *);
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+			 struct osc_object *osc, pgoff_t start, pgoff_t end,
+			 osc_page_gang_cbt cb, void *cbdata);
 /** @} osc */
 
 #endif /* OSC_CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
index d4fe507f165f..83d30c135ba4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_dev.c
+++ b/drivers/staging/lustre/lustre/osc/osc_dev.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index ea695c2099ee..7a27f0961955 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -83,6 +79,12 @@ struct osc_async_page {
 #define oap_count       oap_brw_page.count
 #define oap_brw_flags   oap_brw_page.flag
 
+static inline struct osc_async_page *brw_page2oap(struct brw_page *pga)
+{
+	return (struct osc_async_page *)container_of(pga, struct osc_async_page,
+						     oap_brw_page);
+}
+
 struct osc_cache_waiter {
 	struct list_head	      ocw_entry;
 	wait_queue_head_t	     ocw_waitq;
@@ -102,12 +104,14 @@ void osc_update_next_shrink(struct client_obd *cli);
 
 extern struct ptlrpc_request_set *PTLRPCD_SET;
 
+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+				    int rc);
+
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall,
+		     osc_enqueue_upcall_f upcall,
 		     void *cookie, struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl);
 int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
 
@@ -130,9 +134,11 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
 int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		  struct list_head *ext_list, int cmd);
-int osc_lru_shrink(struct client_obd *cli, int target);
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		   int target, bool force);
+int osc_lru_reclaim(struct client_obd *cli);
 
-extern spinlock_t osc_ast_guard;
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
 
 int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
 
@@ -173,8 +179,6 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
 	return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
 }
 
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
-
 extern struct kmem_cache *osc_quota_kmem;
 struct osc_quota_info {
 	/** linkage for quota hash table */
@@ -192,5 +196,12 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
 int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
 		   struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+void osc_inc_unstable_pages(struct ptlrpc_request *req);
+void osc_dec_unstable_pages(struct ptlrpc_request *req);
+int  osc_over_unstable_soft_limit(struct client_obd *cli);
+
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+				       struct osc_object *obj, pgoff_t index,
+				       int pending, int canceling);
 
 #endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 6bd0a45d8b06..6e3dcd38913f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -68,11 +64,15 @@ static struct osc_io *cl2osc_io(const struct lu_env *env,
 	return oio;
 }
 
-static struct osc_page *osc_cl_page_osc(struct cl_page *page)
+static struct osc_page *osc_cl_page_osc(struct cl_page *page,
+					struct osc_object *osc)
 {
 	const struct cl_page_slice *slice;
 
-	slice = cl_page_at(page, &osc_device_type);
+	if (osc)
+		slice = cl_object_page_slice(&osc->oo_cl, page);
+	else
+		slice = cl_page_at(page, &osc_device_type);
 	LASSERT(slice);
 
 	return cl2osc_page(slice);
@@ -137,7 +137,7 @@ static int osc_io_submit(const struct lu_env *env,
 		io = page->cp_owner;
 		LASSERT(io);
 
-		opg = osc_cl_page_osc(page);
+		opg = osc_cl_page_osc(page, osc);
 		oap = &opg->ops_oap;
 		LASSERT(osc == oap->oap_obj);
 
@@ -164,8 +164,10 @@ static int osc_io_submit(const struct lu_env *env,
 		}
 
 		cl_page_list_move(qout, qin, page);
+		spin_lock(&oap->oap_lock);
 		oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
 		oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+		spin_unlock(&oap->oap_lock);
 
 		osc_page_submit(env, opg, crt, brw_flags);
 		list_add_tail(&oap->oap_pending_item, &list);
@@ -185,6 +187,13 @@ static int osc_io_submit(const struct lu_env *env,
 	return qout->pl_nr > 0 ? 0 : result;
 }
 
+/**
+ * This is called when a page is accessed within file in a way that creates
+ * new page, if one were missing (i.e., if there were a hole at that place in
+ * the file, or accessed page is beyond the current file size).
+ *
+ * Expand stripe KMS if necessary.
+ */
 static void osc_page_touch_at(const struct lu_env *env,
 			      struct cl_object *obj, pgoff_t idx, unsigned to)
 {
@@ -208,7 +217,9 @@ static void osc_page_touch_at(const struct lu_env *env,
 	       kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
 	       loi->loi_lvb.lvb_size);
 
-	valid = 0;
+	attr->cat_ctime = LTIME_S(CURRENT_TIME);
+	attr->cat_mtime = attr->cat_ctime;
+	valid = CAT_MTIME | CAT_CTIME;
 	if (kms > loi->loi_kms) {
 		attr->cat_kms = kms;
 		valid |= CAT_KMS;
@@ -221,91 +232,128 @@ static void osc_page_touch_at(const struct lu_env *env,
 	cl_object_attr_unlock(obj);
 }
 
-/**
- * This is called when a page is accessed within file in a way that creates
- * new page, if one were missing (i.e., if there were a hole at that place in
- * the file, or accessed page is beyond the current file size). Examples:
- * ->commit_write() and ->nopage() methods.
- *
- * Expand stripe KMS if necessary.
- */
-static void osc_page_touch(const struct lu_env *env,
-			   struct osc_page *opage, unsigned to)
-{
-	struct cl_page *page = opage->ops_cl.cpl_page;
-	struct cl_object *obj = opage->ops_cl.cpl_obj;
-
-	osc_page_touch_at(env, obj, page->cp_index, to);
-}
-
-/**
- * Implements cl_io_operations::cio_prepare_write() method for osc layer.
- *
- * \retval -EIO transfer initiated against this osc will most likely fail
- * \retval 0    transfer initiated against this osc will most likely succeed.
- *
- * The reason for this check is to immediately return an error to the caller
- * in the case of a deactivated import. Note, that import can be deactivated
- * later, while pages, dirtied by this IO, are still in the cache, but this is
- * irrelevant, because that would still return an error to the application (if
- * it does fsync), but many applications don't do fsync because of performance
- * issues, and we wanted to return an -EIO at write time to notify the
- * application.
- */
-static int osc_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
+static int osc_io_commit_async(const struct lu_env *env,
+			       const struct cl_io_slice *ios,
+			       struct cl_page_list *qin, int from, int to,
+			       cl_commit_cbt cb)
 {
-	struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
-	struct obd_import *imp = class_exp2cliimp(dev->od_exp);
+	struct cl_io *io = ios->cis_io;
 	struct osc_io *oio = cl2osc_io(env, ios);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct cl_page *page;
+	struct cl_page *last_page;
+	struct osc_page *opg;
 	int result = 0;
 
-	/*
-	 * This implements OBD_BRW_CHECK logic from old client.
-	 */
+	LASSERT(qin->pl_nr > 0);
+
+	/* Handle partial page cases */
+	last_page = cl_page_list_last(qin);
+	if (oio->oi_lockless) {
+		page = cl_page_list_first(qin);
+		if (page == last_page) {
+			cl_page_clip(env, page, from, to);
+		} else {
+			if (from != 0)
+				cl_page_clip(env, page, from, PAGE_SIZE);
+			if (to != PAGE_SIZE)
+				cl_page_clip(env, last_page, 0, to);
+		}
+	}
+
+	while (qin->pl_nr > 0) {
+		struct osc_async_page *oap;
 
-	if (!imp || imp->imp_invalid)
-		result = -EIO;
-	if (result == 0 && oio->oi_lockless)
-		/* this page contains `invalid' data, but who cares?
-		 * nobody can access the invalid data.
-		 * in osc_io_commit_write(), we're going to write exact
-		 * [from, to) bytes of this page to OST. -jay
+		page = cl_page_list_first(qin);
+		opg = osc_cl_page_osc(page, osc);
+		oap = &opg->ops_oap;
+
+		if (!list_empty(&oap->oap_rpc_item)) {
+			CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
+			       oap, opg);
+			result = -EBUSY;
+			break;
+		}
+
+		/* The page may be already in dirty cache. */
+		if (list_empty(&oap->oap_pending_item)) {
+			result = osc_page_cache_add(env, &opg->ops_cl, io);
+			if (result != 0)
+				break;
+		}
+
+		osc_page_touch_at(env, osc2cl(osc), osc_index(opg),
+				  page == last_page ? to : PAGE_SIZE);
+
+		cl_page_list_del(env, qin, page);
+
+		(*cb)(env, io, page);
+		/* Can't access page any more. Page can be in transfer and
+		 * complete at any time.
 		 */
-		cl_page_export(env, slice->cpl_page, 1);
+	}
 
+	/* for sync write, kernel will wait for this page to be flushed before
+	 * osc_io_end() is called, so release it earlier.
+	 * for mkwrite(), it's known there is no further pages.
+	 */
+	if (cl_io_is_sync_write(io) && oio->oi_active) {
+		osc_extent_release(env, oio->oi_active);
+		oio->oi_active = NULL;
+	}
+
+	CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result);
 	return result;
 }
 
-static int osc_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
+static int osc_io_rw_iter_init(const struct lu_env *env,
+			       const struct cl_io_slice *ios)
 {
-	struct osc_io *oio = cl2osc_io(env, ios);
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-	struct osc_async_page *oap = &opg->ops_oap;
+	struct cl_io *io = ios->cis_io;
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct client_obd *cli = osc_cli(osc);
+	unsigned long c;
+	unsigned int npages;
+	unsigned int max_pages;
+
+	if (cl_io_is_append(io))
+		return 0;
+
+	npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
+	if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
+		++npages;
+
+	max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
+	if (npages > max_pages)
+		npages = max_pages;
+
+	c = atomic_read(cli->cl_lru_left);
+	if (c < npages && osc_lru_reclaim(cli) > 0)
+		c = atomic_read(cli->cl_lru_left);
+	while (c >= npages) {
+		if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+			oio->oi_lru_reserved = npages;
+			break;
+		}
+		c = atomic_read(cli->cl_lru_left);
+	}
 
-	LASSERT(to > 0);
-	/*
-	 * XXX instead of calling osc_page_touch() here and in
-	 * osc_io_fault_start() it might be more logical to introduce
-	 * cl_page_touch() method, that generic cl_io_commit_write() and page
-	 * fault code calls.
-	 */
-	osc_page_touch(env, cl2osc_page(slice), to);
-	if (!client_is_remote(osc_export(obj)) &&
-	    capable(CFS_CAP_SYS_RESOURCE))
-		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+	return 0;
+}
 
-	if (oio->oi_lockless)
-		/* see osc_io_prepare_write() for lockless io handling. */
-		cl_page_clip(env, slice->cpl_page, from, to);
+static void osc_io_rw_iter_fini(const struct lu_env *env,
+				const struct cl_io_slice *ios)
+{
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct client_obd *cli = osc_cli(osc);
 
-	return 0;
+	if (oio->oi_lru_reserved > 0) {
+		atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+		oio->oi_lru_reserved = 0;
+	}
+	oio->oi_write_osclock = NULL;
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
@@ -342,31 +390,21 @@ static int osc_async_upcall(void *a, int rc)
  * Checks that there are no pages being written in the extent being truncated.
  */
 static int trunc_check_cb(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page, void *cbdata)
+			  struct osc_page *ops, void *cbdata)
 {
-	const struct cl_page_slice *slice;
-	struct osc_page *ops;
+	struct cl_page *page = ops->ops_cl.cpl_page;
 	struct osc_async_page *oap;
 	__u64 start = *(__u64 *)cbdata;
 
-	slice = cl_page_at(page, &osc_device_type);
-	LASSERT(slice);
-	ops = cl2osc_page(slice);
 	oap = &ops->ops_oap;
-
 	if (oap->oap_cmd & OBD_BRW_WRITE &&
 	    !list_empty(&oap->oap_pending_item))
 		CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n",
 			      start, current->comm);
 
-	{
-		struct page *vmpage = cl_page_vmpage(env, page);
-
-		if (PageLocked(vmpage))
-			CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
-			       ops, page->cp_index,
-			       (oap->oap_cmd & OBD_BRW_RWMASK));
-	}
+	if (PageLocked(page->cp_vmpage))
+		CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
+		       ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK);
 
 	return CLP_GANG_OKAY;
 }
@@ -385,8 +423,9 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
 	/*
 	 * Complain if there are pages in the truncated region.
 	 */
-	cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF,
-			    trunc_check_cb, (void *)&size);
+	osc_page_gang_lookup(env, io, cl2osc(clob),
+			     start + partial, CL_PAGE_EOF,
+			     trunc_check_cb, (void *)&size);
 }
 
 static int osc_io_setattr_start(const struct lu_env *env,
@@ -416,7 +455,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
 			unsigned int cl_valid = 0;
 
 			if (ia_valid & ATTR_SIZE) {
-				attr->cat_size = attr->cat_kms = size;
+				attr->cat_size = size;
+				attr->cat_kms = size;
 				cl_valid = CAT_SIZE | CAT_KMS;
 			}
 			if (ia_valid & ATTR_MTIME_SET) {
@@ -484,7 +524,8 @@ static void osc_io_setattr_end(const struct lu_env *env,
 
 	if (cbargs->opc_rpc_sent) {
 		wait_for_completion(&cbargs->opc_sync);
-		result = io->ci_result = cbargs->opc_rc;
+		result = cbargs->opc_rc;
+		io->ci_result = cbargs->opc_rc;
 	}
 	if (result == 0) {
 		if (oio->oi_lockless) {
@@ -533,7 +574,8 @@ static int osc_io_write_start(const struct lu_env *env,
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
 	cl_object_attr_lock(obj);
-	attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds();
+	attr->cat_ctime = ktime_get_real_seconds();
+	attr->cat_mtime = attr->cat_ctime;
 	rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME);
 	cl_object_attr_unlock(obj);
 
@@ -650,6 +692,8 @@ static const struct cl_io_operations osc_io_ops = {
 			.cio_fini   = osc_io_fini
 		},
 		[CIT_WRITE] = {
+			.cio_iter_init = osc_io_rw_iter_init,
+			.cio_iter_fini = osc_io_rw_iter_fini,
 			.cio_start  = osc_io_write_start,
 			.cio_end    = osc_io_end,
 			.cio_fini   = osc_io_fini
@@ -672,16 +716,8 @@ static const struct cl_io_operations osc_io_ops = {
 			.cio_fini   = osc_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = osc_io_submit
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = osc_io_submit
-		 }
-	 },
-	.cio_prepare_write = osc_io_prepare_write,
-	.cio_commit_write  = osc_io_commit_write
+	.cio_submit                 = osc_io_submit,
+	.cio_commit_async           = osc_io_commit_async
 };
 
 /*****************************************************************************
@@ -718,8 +754,7 @@ static void osc_req_attr_set(const struct lu_env *env,
 	struct lov_oinfo *oinfo;
 	struct cl_req *clerq;
 	struct cl_page *apage; /* _some_ page in @clerq */
-	struct cl_lock *lock;  /* _some_ lock protecting @apage */
-	struct osc_lock *olck;
+	struct ldlm_lock *lock;  /* _some_ lock protecting @apage */
 	struct osc_page *opg;
 	struct obdo *oa;
 	struct ost_lvb *lvb;
@@ -753,31 +788,32 @@ static void osc_req_attr_set(const struct lu_env *env,
 		LASSERT(!list_empty(&clerq->crq_pages));
 		apage = container_of(clerq->crq_pages.next,
 				     struct cl_page, cp_flight);
-		opg = osc_cl_page_osc(apage);
-		apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */
-		lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1);
-		if (!lock) {
-			struct cl_object_header *head;
-			struct cl_lock *scan;
-
-			head = cl_object_header(apage->cp_obj);
-			list_for_each_entry(scan, &head->coh_locks, cll_linkage)
-				CL_LOCK_DEBUG(D_ERROR, env, scan,
-					      "no cover page!\n");
-			CL_PAGE_DEBUG(D_ERROR, env, apage,
-				      "dump uncover page!\n");
+		opg = osc_cl_page_osc(apage, NULL);
+		lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+					    1, 1);
+		if (!lock && !opg->ops_srvlock) {
+			struct ldlm_resource *res;
+			struct ldlm_res_id *resname;
+
+			CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
+
+			resname = &osc_env_info(env)->oti_resname;
+			ostid_build_res_name(&oinfo->loi_oi, resname);
+			res = ldlm_resource_get(
+				osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+				NULL, resname, LDLM_EXTENT, 0);
+			ldlm_resource_dump(D_ERROR, res);
+
 			dump_stack();
 			LBUG();
 		}
 
-		olck = osc_lock_at(lock);
-		LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock));
 		/* check for lockless io. */
-		if (olck->ols_lock) {
-			oa->o_handle = olck->ols_lock->l_remote_handle;
+		if (lock) {
+			oa->o_handle = lock->l_remote_handle;
 			oa->o_valid |= OBD_MD_FLHANDLE;
+			LDLM_LOCK_PUT(lock);
 		}
-		cl_lock_put(env, lock);
 	}
 }
 
@@ -807,8 +843,9 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (or) {
 		cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 013df9787f3e..717d3ffb6789 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Implementation of cl_lock for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
@@ -50,8 +47,6 @@
  *  @{
  */
 
-#define _PAGEREF_MAGIC  (-10000000)
-
 /*****************************************************************************
  *
  * Type conversions.
@@ -62,7 +57,6 @@ static const struct cl_lock_operations osc_lock_ops;
 static const struct cl_lock_operations osc_lock_lockless_ops;
 static void osc_lock_to_lockless(const struct lu_env *env,
 				 struct osc_lock *ols, int force);
-static int osc_lock_has_pages(struct osc_lock *olck);
 
 int osc_lock_is_lockless(const struct osc_lock *olck)
 {
@@ -90,11 +84,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
 static int osc_lock_invariant(struct osc_lock *ols)
 {
 	struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle);
-	struct ldlm_lock *olock = ols->ols_lock;
+	struct ldlm_lock *olock = ols->ols_dlmlock;
 	int handle_used = lustre_handle_is_used(&ols->ols_handle);
 
 	if (ergo(osc_lock_is_lockless(ols),
-		 ols->ols_locklessable && !ols->ols_lock))
+		 ols->ols_locklessable && !ols->ols_dlmlock))
 		return 1;
 
 	/*
@@ -111,7 +105,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
 		  ergo(!lock, !olock)))
 		return 0;
 	/*
-	 * Check that ->ols_handle and ->ols_lock are consistent, but
+	 * Check that ->ols_handle and ->ols_dlmlock are consistent, but
 	 * take into account that they are set at the different time.
 	 */
 	if (!ergo(ols->ols_state == OLS_CANCELLED,
@@ -122,7 +116,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
 	 * ast.
 	 */
 	if (!ergo(olock && ols->ols_state < OLS_CANCELLED,
-		  ((olock->l_flags & LDLM_FL_DESTROYED) == 0)))
+		  !ldlm_is_destroyed(olock)))
 		return 0;
 
 	if (!ergo(ols->ols_state == OLS_GRANTED,
@@ -138,117 +132,13 @@ static int osc_lock_invariant(struct osc_lock *ols)
  *
  */
 
-/**
- * Breaks a link between osc_lock and dlm_lock.
- */
-static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
-{
-	struct ldlm_lock *dlmlock;
-
-	spin_lock(&osc_ast_guard);
-	dlmlock = olck->ols_lock;
-	if (!dlmlock) {
-		spin_unlock(&osc_ast_guard);
-		return;
-	}
-
-	olck->ols_lock = NULL;
-	/* wb(); --- for all who checks (ols->ols_lock != NULL) before
-	 * call to osc_lock_detach()
-	 */
-	dlmlock->l_ast_data = NULL;
-	olck->ols_handle.cookie = 0ULL;
-	spin_unlock(&osc_ast_guard);
-
-	lock_res_and_lock(dlmlock);
-	if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
-		struct cl_object *obj = olck->ols_cl.cls_obj;
-		struct cl_attr *attr = &osc_env_info(env)->oti_attr;
-		__u64 old_kms;
-
-		cl_object_attr_lock(obj);
-		/* Must get the value under the lock to avoid possible races. */
-		old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
-		/* Update the kms. Need to loop all granted locks.
-		 * Not a problem for the client
-		 */
-		attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
-
-		cl_object_attr_set(env, obj, attr, CAT_KMS);
-		cl_object_attr_unlock(obj);
-	}
-	unlock_res_and_lock(dlmlock);
-
-	/* release a reference taken in osc_lock_upcall0(). */
-	LASSERT(olck->ols_has_ref);
-	lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
-	LDLM_LOCK_RELEASE(dlmlock);
-	olck->ols_has_ref = 0;
-}
-
-static int osc_lock_unhold(struct osc_lock *ols)
-{
-	int result = 0;
-
-	if (ols->ols_hold) {
-		ols->ols_hold = 0;
-		result = osc_cancel_base(&ols->ols_handle,
-					 ols->ols_einfo.ei_mode);
-	}
-	return result;
-}
-
-static int osc_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-
-	LINVRNT(osc_lock_invariant(ols));
-
-	switch (ols->ols_state) {
-	case OLS_NEW:
-		LASSERT(!ols->ols_hold);
-		LASSERT(ols->ols_agl);
-		return 0;
-	case OLS_UPCALL_RECEIVED:
-		osc_lock_unhold(ols);
-	case OLS_ENQUEUED:
-		LASSERT(!ols->ols_hold);
-		osc_lock_detach(env, ols);
-		ols->ols_state = OLS_NEW;
-		return 0;
-	case OLS_GRANTED:
-		LASSERT(!ols->ols_glimpse);
-		LASSERT(ols->ols_hold);
-		/*
-		 * Move lock into OLS_RELEASED state before calling
-		 * osc_cancel_base() so that possible synchronous cancellation
-		 * sees that lock is released.
-		 */
-		ols->ols_state = OLS_RELEASED;
-		return osc_lock_unhold(ols);
-	default:
-		CERROR("Impossible state: %d\n", ols->ols_state);
-		LBUG();
-	}
-}
-
 static void osc_lock_fini(const struct lu_env *env,
 			  struct cl_lock_slice *slice)
 {
 	struct osc_lock *ols = cl2osc_lock(slice);
 
 	LINVRNT(osc_lock_invariant(ols));
-	/*
-	 * ->ols_hold can still be true at this point if, for example, a
-	 * thread that requested a lock was killed (and released a reference
-	 * to the lock), before reply from a server was received. In this case
-	 * lock is destroyed immediately after upcall.
-	 */
-	osc_lock_unhold(ols);
-	LASSERT(!ols->ols_lock);
-	LASSERT(atomic_read(&ols->ols_pageref) == 0 ||
-		atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC);
+	LASSERT(!ols->ols_dlmlock);
 
 	kmem_cache_free(osc_lock_kmem, ols);
 }
@@ -275,55 +165,12 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
 		result |= LDLM_FL_HAS_INTENT;
 	if (enqflags & CEF_DISCARD_DATA)
 		result |= LDLM_FL_AST_DISCARD_DATA;
+	if (enqflags & CEF_PEEK)
+		result |= LDLM_FL_TEST_LOCK;
 	return result;
 }
 
 /**
- * Global spin-lock protecting consistency of ldlm_lock::l_ast_data
- * pointers. Initialized in osc_init().
- */
-spinlock_t osc_ast_guard;
-
-static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock)
-{
-	struct osc_lock *olck;
-
-	lock_res_and_lock(dlm_lock);
-	spin_lock(&osc_ast_guard);
-	olck = dlm_lock->l_ast_data;
-	if (olck) {
-		struct cl_lock *lock = olck->ols_cl.cls_lock;
-		/*
-		 * If osc_lock holds a reference on ldlm lock, return it even
-		 * when cl_lock is in CLS_FREEING state. This way
-		 *
-		 *	 osc_ast_data_get(dlmlock) == NULL
-		 *
-		 * guarantees that all osc references on dlmlock were
-		 * released. osc_dlm_blocking_ast0() relies on that.
-		 */
-		if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) {
-			cl_lock_get_trust(lock);
-			lu_ref_add_atomic(&lock->cll_reference,
-					  "ast", current);
-		} else
-			olck = NULL;
-	}
-	spin_unlock(&osc_ast_guard);
-	unlock_res_and_lock(dlm_lock);
-	return olck;
-}
-
-static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
-{
-	struct cl_lock *lock;
-
-	lock = olck->ols_cl.cls_lock;
-	lu_ref_del(&lock->cll_reference, "ast", current);
-	cl_lock_put(env, lock);
-}
-
-/**
  * Updates object attributes from a lock value block (lvb) received together
  * with the DLM lock reply from the server. Copy of osc_update_enqueue()
  * logic.
@@ -333,35 +180,30 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
  *
  * Called under lock and resource spin-locks.
  */
-static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
-				int rc)
+static void osc_lock_lvb_update(const struct lu_env *env,
+				struct osc_object *osc,
+				struct ldlm_lock *dlmlock,
+				struct ost_lvb *lvb)
 {
-	struct ost_lvb *lvb;
-	struct cl_object *obj;
-	struct lov_oinfo *oinfo;
-	struct cl_attr *attr;
+	struct cl_object *obj = osc2cl(osc);
+	struct lov_oinfo *oinfo = osc->oo_oinfo;
+	struct cl_attr *attr = &osc_env_info(env)->oti_attr;
 	unsigned valid;
 
-	if (!(olck->ols_flags & LDLM_FL_LVB_READY))
-		return;
-
-	lvb = &olck->ols_lvb;
-	obj = olck->ols_cl.cls_obj;
-	oinfo = cl2osc(obj)->oo_oinfo;
-	attr = &osc_env_info(env)->oti_attr;
 	valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
+	if (!lvb)
+		lvb = dlmlock->l_lvb_data;
+
 	cl_lvb2attr(attr, lvb);
 
 	cl_object_attr_lock(obj);
-	if (rc == 0) {
-		struct ldlm_lock *dlmlock;
+	if (dlmlock) {
 		__u64 size;
 
-		dlmlock = olck->ols_lock;
-
-		/* re-grab LVB from a dlm lock under DLM spin-locks. */
-		*lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+		check_res_locked(dlmlock->l_resource);
+		LASSERT(lvb == dlmlock->l_lvb_data);
 		size = lvb->lvb_size;
+
 		/* Extend KMS up to the end of this lock and no further
 		 * A lock on [x,y] means a KMS of up to y + 1 bytes!
 		 */
@@ -378,102 +220,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
 				   dlmlock->l_policy_data.l_extent.end);
 		}
 		ldlm_lock_allow_match_locked(dlmlock);
-	} else if (rc == -ENAVAIL && olck->ols_glimpse) {
-		CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n",
-		       lvb->lvb_size, oinfo->loi_kms);
-	} else
-		valid = 0;
-
-	if (valid != 0)
-		cl_object_attr_set(env, obj, attr, valid);
+	}
 
+	cl_object_attr_set(env, obj, attr, valid);
 	cl_object_attr_unlock(obj);
 }
 
-/**
- * Called when a lock is granted, from an upcall (when server returned a
- * granted lock), or from completion AST, when server returned a blocked lock.
- *
- * Called under lock and resource spin-locks, that are released temporarily
- * here.
- */
-static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
-			     struct ldlm_lock *dlmlock, int rc)
+static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
+			     struct lustre_handle *lockh, bool lvb_update)
 {
-	struct ldlm_extent *ext;
-	struct cl_lock *lock;
-	struct cl_lock_descr *descr;
+	struct ldlm_lock *dlmlock;
 
-	LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+	dlmlock = ldlm_handle2lock_long(lockh, 0);
+	LASSERT(dlmlock);
 
-	if (olck->ols_state < OLS_GRANTED) {
-		lock = olck->ols_cl.cls_lock;
-		ext = &dlmlock->l_policy_data.l_extent;
-		descr = &osc_env_info(env)->oti_descr;
-		descr->cld_obj = lock->cll_descr.cld_obj;
+	/* lock reference taken by ldlm_handle2lock_long() is
+	 * owned by osc_lock and released in osc_lock_detach()
+	 */
+	lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl);
+	oscl->ols_has_ref = 1;
 
-		/* XXX check that ->l_granted_mode is valid. */
-		descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
-		descr->cld_start = cl_index(descr->cld_obj, ext->start);
-		descr->cld_end = cl_index(descr->cld_obj, ext->end);
-		descr->cld_gid = ext->gid;
-		/*
-		 * tell upper layers the extent of the lock that was actually
-		 * granted
-		 */
-		olck->ols_state = OLS_GRANTED;
-		osc_lock_lvb_update(env, olck, rc);
-
-		/* release DLM spin-locks to allow cl_lock_{modify,signal}()
-		 * to take a semaphore on a parent lock. This is safe, because
-		 * spin-locks are needed to protect consistency of
-		 * dlmlock->l_*_mode and LVB, and we have finished processing
-		 * them.
+	LASSERT(!oscl->ols_dlmlock);
+	oscl->ols_dlmlock = dlmlock;
+
+	/* This may be a matched lock for glimpse request, do not hold
+	 * lock reference in that case.
+	 */
+	if (!oscl->ols_glimpse) {
+		/* hold a refc for non glimpse lock which will
+		 * be released in osc_lock_cancel()
 		 */
-		unlock_res_and_lock(dlmlock);
-		cl_lock_modify(env, lock, descr);
-		cl_lock_signal(env, lock);
-		LINVRNT(osc_lock_invariant(olck));
-		lock_res_and_lock(dlmlock);
+		lustre_handle_copy(&oscl->ols_handle, lockh);
+		ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode);
+		oscl->ols_hold = 1;
 	}
-}
-
-static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
-
-{
-	struct ldlm_lock *dlmlock;
-
-	dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0);
-	LASSERT(dlmlock);
 
+	/* Lock must have been granted. */
 	lock_res_and_lock(dlmlock);
-	spin_lock(&osc_ast_guard);
-	LASSERT(dlmlock->l_ast_data == olck);
-	LASSERT(!olck->ols_lock);
-	olck->ols_lock = dlmlock;
-	spin_unlock(&osc_ast_guard);
+	if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
+		struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent;
+		struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
 
-	/*
-	 * Lock might be not yet granted. In this case, completion ast
-	 * (osc_ldlm_completion_ast()) comes later and finishes lock
-	 * granting.
-	 */
-	if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
-		osc_lock_granted(env, olck, dlmlock, 0);
-	unlock_res_and_lock(dlmlock);
+		/* extend the lock extent, otherwise it will have problem when
+		 * we decide whether to grant a lockless lock.
+		 */
+		descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
+		descr->cld_start = cl_index(descr->cld_obj, ext->start);
+		descr->cld_end = cl_index(descr->cld_obj, ext->end);
+		descr->cld_gid = ext->gid;
 
-	/*
-	 * osc_enqueue_interpret() decrefs asynchronous locks, counter
-	 * this.
-	 */
-	ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode);
-	olck->ols_hold = 1;
+		/* no lvb update for matched lock */
+		if (lvb_update) {
+			LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+			osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
+					    dlmlock, NULL);
+		}
+		LINVRNT(osc_lock_invariant(oscl));
+	}
+	unlock_res_and_lock(dlmlock);
 
-	/* lock reference taken by ldlm_handle2lock_long() is owned by
-	 * osc_lock and released in osc_lock_detach()
-	 */
-	lu_ref_add(&dlmlock->l_reference, "osc_lock", olck);
-	olck->ols_has_ref = 1;
+	LASSERT(oscl->ols_state != OLS_GRANTED);
+	oscl->ols_state = OLS_GRANTED;
 }
 
 /**
@@ -481,143 +288,124 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
  * received from a server, or after osc_enqueue_base() matched a local DLM
  * lock.
  */
-static int osc_lock_upcall(void *cookie, int errcode)
+static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
+			   int errcode)
 {
-	struct osc_lock	*olck = cookie;
-	struct cl_lock_slice *slice = &olck->ols_cl;
-	struct cl_lock *lock = slice->cls_lock;
+	struct osc_lock *oscl = cookie;
+	struct cl_lock_slice *slice = &oscl->ols_cl;
 	struct lu_env *env;
 	struct cl_env_nest nest;
+	int rc;
 
 	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		int rc;
+	/* should never happen, similar to osc_ldlm_blocking_ast(). */
+	LASSERT(!IS_ERR(env));
+
+	rc = ldlm_error2errno(errcode);
+	if (oscl->ols_state == OLS_ENQUEUED) {
+		oscl->ols_state = OLS_UPCALL_RECEIVED;
+	} else if (oscl->ols_state == OLS_CANCELLED) {
+		rc = -EIO;
+	} else {
+		CERROR("Impossible state: %d\n", oscl->ols_state);
+		LBUG();
+	}
 
-		cl_lock_mutex_get(env, lock);
+	if (rc == 0)
+		osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
 
-		LASSERT(lock->cll_state >= CLS_QUEUING);
-		if (olck->ols_state == OLS_ENQUEUED) {
-			olck->ols_state = OLS_UPCALL_RECEIVED;
-			rc = ldlm_error2errno(errcode);
-		} else if (olck->ols_state == OLS_CANCELLED) {
-			rc = -EIO;
-		} else {
-			CERROR("Impossible state: %d\n", olck->ols_state);
-			LBUG();
-		}
-		if (rc) {
-			struct ldlm_lock *dlmlock;
-
-			dlmlock = ldlm_handle2lock(&olck->ols_handle);
-			if (dlmlock) {
-				lock_res_and_lock(dlmlock);
-				spin_lock(&osc_ast_guard);
-				LASSERT(!olck->ols_lock);
-				dlmlock->l_ast_data = NULL;
-				olck->ols_handle.cookie = 0ULL;
-				spin_unlock(&osc_ast_guard);
-				ldlm_lock_fail_match_locked(dlmlock);
-				unlock_res_and_lock(dlmlock);
-				LDLM_LOCK_PUT(dlmlock);
-			}
-		} else {
-			if (olck->ols_glimpse)
-				olck->ols_glimpse = 0;
-			osc_lock_upcall0(env, olck);
-		}
+	/* Error handling, some errors are tolerable. */
+	if (oscl->ols_locklessable && rc == -EUSERS) {
+		/* This is a tolerable error, turn this lock into
+		 * lockless lock.
+		 */
+		osc_object_set_contended(cl2osc(slice->cls_obj));
+		LASSERT(slice->cls_ops == &osc_lock_ops);
+
+		/* Change this lock to ldlmlock-less lock. */
+		osc_lock_to_lockless(env, oscl, 1);
+		oscl->ols_state = OLS_GRANTED;
+		rc = 0;
+	} else if (oscl->ols_glimpse && rc == -ENAVAIL) {
+		LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+		osc_lock_lvb_update(env, cl2osc(slice->cls_obj),
+				    NULL, &oscl->ols_lvb);
+		/* Hide the error. */
+		rc = 0;
+	}
 
-		/* Error handling, some errors are tolerable. */
-		if (olck->ols_locklessable && rc == -EUSERS) {
-			/* This is a tolerable error, turn this lock into
-			 * lockless lock.
-			 */
-			osc_object_set_contended(cl2osc(slice->cls_obj));
-			LASSERT(slice->cls_ops == &osc_lock_ops);
+	if (oscl->ols_owner)
+		cl_sync_io_note(env, oscl->ols_owner, rc);
+	cl_env_nested_put(&nest, env);
 
-			/* Change this lock to ldlmlock-less lock. */
-			osc_lock_to_lockless(env, olck, 1);
-			olck->ols_state = OLS_GRANTED;
-			rc = 0;
-		} else if (olck->ols_glimpse && rc == -ENAVAIL) {
-			osc_lock_lvb_update(env, olck, rc);
-			cl_lock_delete(env, lock);
-			/* Hide the error. */
-			rc = 0;
-		}
-
-		if (rc == 0) {
-			/* For AGL case, the RPC sponsor may exits the cl_lock
-			*  processing without wait() called before related OSC
-			*  lock upcall(). So update the lock status according
-			*  to the enqueue result inside AGL upcall().
-			*/
-			if (olck->ols_agl) {
-				lock->cll_flags |= CLF_FROM_UPCALL;
-				cl_wait_try(env, lock);
-				lock->cll_flags &= ~CLF_FROM_UPCALL;
-				if (!olck->ols_glimpse)
-					olck->ols_agl = 0;
-			}
-			cl_lock_signal(env, lock);
-			/* del user for lock upcall cookie */
-			cl_unuse_try(env, lock);
-		} else {
-			/* del user for lock upcall cookie */
-			cl_lock_user_del(env, lock);
-			cl_lock_error(env, lock, rc);
-		}
+	return rc;
+}
 
-		/* release cookie reference, acquired by osc_lock_enqueue() */
-		cl_lock_hold_release(env, lock, "upcall", lock);
-		cl_lock_mutex_put(env, lock);
+static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
+			       int errcode)
+{
+	struct osc_object *osc = cookie;
+	struct ldlm_lock *dlmlock;
+	struct lu_env *env;
+	struct cl_env_nest nest;
 
-		lu_ref_del(&lock->cll_reference, "upcall", lock);
-		/* This maybe the last reference, so must be called after
-		 * cl_lock_mutex_put().
-		 */
-		cl_lock_put(env, lock);
+	env = cl_env_nested_get(&nest);
+	LASSERT(!IS_ERR(env));
 
-		cl_env_nested_put(&nest, env);
-	} else {
-		/* should never happen, similar to osc_ldlm_blocking_ast(). */
-		LBUG();
+	if (errcode == ELDLM_LOCK_MATCHED) {
+		errcode = ELDLM_OK;
+		goto out;
 	}
-	return errcode;
+
+	if (errcode != ELDLM_OK)
+		goto out;
+
+	dlmlock = ldlm_handle2lock(lockh);
+	LASSERT(dlmlock);
+
+	lock_res_and_lock(dlmlock);
+	LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+
+	/* there is no osc_lock associated with AGL lock */
+	osc_lock_lvb_update(env, osc, dlmlock, NULL);
+
+	unlock_res_and_lock(dlmlock);
+	LDLM_LOCK_PUT(dlmlock);
+
+out:
+	cl_object_put(env, osc2cl(osc));
+	cl_env_nested_put(&nest, env);
+	return ldlm_error2errno(errcode);
 }
 
-/**
- * Core of osc_dlm_blocking_ast() logic.
- */
-static void osc_lock_blocking(const struct lu_env *env,
-			      struct ldlm_lock *dlmlock,
-			      struct osc_lock *olck, int blocking)
+static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
+			  enum cl_lock_mode mode, int discard)
 {
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
+	struct lu_env *env;
+	struct cl_env_nest nest;
+	int rc = 0;
+	int rc2 = 0;
 
-	LASSERT(olck->ols_lock == dlmlock);
-	CLASSERT(OLS_BLOCKED < OLS_CANCELLED);
-	LASSERT(!osc_lock_is_lockless(olck));
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	if (mode == CLM_WRITE) {
+		rc = osc_cache_writeback_range(env, obj, start, end, 1,
+					       discard);
+		CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
+		       obj, start, end, rc,
+		       discard ? "discarded" : "written back");
+		if (rc > 0)
+			rc = 0;
+	}
 
-	/*
-	 * Lock might be still addref-ed here, if e.g., blocking ast
-	 * is sent for a failed lock.
-	 */
-	osc_lock_unhold(olck);
+	rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
+	if (rc == 0 && rc2 < 0)
+		rc = rc2;
 
-	if (blocking && olck->ols_state < OLS_BLOCKED)
-		/*
-		 * Move osc_lock into OLS_BLOCKED before canceling the lock,
-		 * because it recursively re-enters osc_lock_blocking(), with
-		 * the state set to OLS_CANCELLED.
-		 */
-		olck->ols_state = OLS_BLOCKED;
-	/*
-	 * cancel and destroy lock at least once no matter how blocking ast is
-	 * entered (see comment above osc_ldlm_blocking_ast() for use
-	 * cases). cl_lock_cancel() and cl_lock_delete() are idempotent.
-	 */
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
+	cl_env_nested_put(&nest, env);
+	return rc;
 }
 
 /**
@@ -628,65 +416,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 				 struct ldlm_lock *dlmlock,
 				 void *data, int flag)
 {
-	struct osc_lock *olck;
-	struct cl_lock *lock;
-	int result;
-	int cancel;
-
-	LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING);
-
-	cancel = 0;
-	olck = osc_ast_data_get(dlmlock);
-	if (olck) {
-		lock = olck->ols_cl.cls_lock;
-		cl_lock_mutex_get(env, lock);
-		LINVRNT(osc_lock_invariant(olck));
-		if (olck->ols_ast_wait) {
-			/* wake up osc_lock_use() */
-			cl_lock_signal(env, lock);
-			olck->ols_ast_wait = 0;
-		}
-		/*
-		 * Lock might have been canceled while this thread was
-		 * sleeping for lock mutex, but olck is pinned in memory.
-		 */
-		if (olck == dlmlock->l_ast_data) {
-			/*
-			 * NOTE: DLM sends blocking AST's for failed locks
-			 *       (that are still in pre-OLS_GRANTED state)
-			 *       too, and they have to be canceled otherwise
-			 *       DLM lock is never destroyed and stuck in
-			 *       the memory.
-			 *
-			 *       Alternatively, ldlm_cli_cancel() can be
-			 *       called here directly for osc_locks with
-			 *       ols_state < OLS_GRANTED to maintain an
-			 *       invariant that ->clo_cancel() is only called
-			 *       for locks that were granted.
-			 */
-			LASSERT(data == olck);
-			osc_lock_blocking(env, dlmlock,
-					  olck, flag == LDLM_CB_BLOCKING);
-		} else
-			cancel = 1;
-		cl_lock_mutex_put(env, lock);
-		osc_ast_data_put(env, olck);
-	} else
-		/*
-		 * DLM lock exists, but there is no cl_lock attached to it.
-		 * This is a `normal' race. cl_object and its cl_lock's can be
-		 * removed by memory pressure, together with all pages.
+	struct cl_object *obj = NULL;
+	int result = 0;
+	int discard;
+	enum cl_lock_mode mode = CLM_READ;
+
+	LASSERT(flag == LDLM_CB_CANCELING);
+
+	lock_res_and_lock(dlmlock);
+	if (dlmlock->l_granted_mode != dlmlock->l_req_mode) {
+		dlmlock->l_ast_data = NULL;
+		unlock_res_and_lock(dlmlock);
+		return 0;
+	}
+
+	discard = ldlm_is_discard_data(dlmlock);
+	if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))
+		mode = CLM_WRITE;
+
+	if (dlmlock->l_ast_data) {
+		obj = osc2cl(dlmlock->l_ast_data);
+		dlmlock->l_ast_data = NULL;
+
+		cl_object_get(obj);
+	}
+
+	unlock_res_and_lock(dlmlock);
+
+	/* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
+	 * the object has been destroyed.
+	 */
+	if (obj) {
+		struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent;
+		struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+		__u64 old_kms;
+
+		/* Destroy pages covered by the extent of the DLM lock */
+		result = osc_lock_flush(cl2osc(obj),
+					cl_index(obj, extent->start),
+					cl_index(obj, extent->end),
+					mode, discard);
+
+		/* losing a lock, update kms */
+		lock_res_and_lock(dlmlock);
+		cl_object_attr_lock(obj);
+		/* Must get the value under the lock to avoid race. */
+		old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
+		/* Update the kms. Need to loop all granted locks.
+		 * Not a problem for the client
 		 */
-		cancel = (flag == LDLM_CB_BLOCKING);
+		attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
 
-	if (cancel) {
-		struct lustre_handle *lockh;
+		cl_object_attr_set(env, obj, attr, CAT_KMS);
+		cl_object_attr_unlock(obj);
+		unlock_res_and_lock(dlmlock);
 
-		lockh = &osc_env_info(env)->oti_handle;
-		ldlm_lock2handle(dlmlock, lockh);
-		result = ldlm_cli_cancel(lockh, LCF_ASYNC);
-	} else
-		result = 0;
+		cl_object_put(env, obj);
+	}
 	return result;
 }
 
@@ -736,107 +522,52 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
 				 struct ldlm_lock_desc *new, void *data,
 				 int flag)
 {
-	struct lu_env *env;
-	struct cl_env_nest nest;
-	int result;
+	int result = 0;
 
-	/*
-	 * This can be called in the context of outer IO, e.g.,
-	 *
-	 *     cl_enqueue()->...
-	 *       ->osc_enqueue_base()->...
-	 *	 ->ldlm_prep_elc_req()->...
-	 *	   ->ldlm_cancel_callback()->...
-	 *	     ->osc_ldlm_blocking_ast()
-	 *
-	 * new environment has to be created to not corrupt outer context.
-	 */
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
-		cl_env_nested_put(&nest, env);
-	} else {
-		result = PTR_ERR(env);
-		/*
-		 * XXX This should never happen, as cl_lock is
-		 * stuck. Pre-allocated environment a la vvp_inode_fini_env
-		 * should be used.
-		 */
-		LBUG();
-	}
-	if (result != 0) {
+	switch (flag) {
+	case LDLM_CB_BLOCKING: {
+		struct lustre_handle lockh;
+
+		ldlm_lock2handle(dlmlock, &lockh);
+		result = ldlm_cli_cancel(&lockh, LCF_ASYNC);
 		if (result == -ENODATA)
 			result = 0;
-		else
-			CERROR("BAST failed: %d\n", result);
+		break;
 	}
-	return result;
-}
+	case LDLM_CB_CANCELING: {
+		struct lu_env *env;
+		struct cl_env_nest nest;
 
-static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
-				   __u64 flags, void *data)
-{
-	struct cl_env_nest nest;
-	struct lu_env *env;
-	struct osc_lock *olck;
-	struct cl_lock *lock;
-	int result;
-	int dlmrc;
-
-	/* first, do dlm part of the work */
-	dlmrc = ldlm_completion_ast_async(dlmlock, flags, data);
-	/* then, notify cl_lock */
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		olck = osc_ast_data_get(dlmlock);
-		if (olck) {
-			lock = olck->ols_cl.cls_lock;
-			cl_lock_mutex_get(env, lock);
-			/*
-			 * ldlm_handle_cp_callback() copied LVB from request
-			 * to lock->l_lvb_data, store it in osc_lock.
-			 */
-			LASSERT(dlmlock->l_lvb_data);
-			lock_res_and_lock(dlmlock);
-			olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
-			if (!olck->ols_lock) {
-				/*
-				 * upcall (osc_lock_upcall()) hasn't yet been
-				 * called. Do nothing now, upcall will bind
-				 * olck to dlmlock and signal the waiters.
-				 *
-				 * This maintains an invariant that osc_lock
-				 * and ldlm_lock are always bound when
-				 * osc_lock is in OLS_GRANTED state.
-				 */
-			} else if (dlmlock->l_granted_mode ==
-				   dlmlock->l_req_mode) {
-				osc_lock_granted(env, olck, dlmlock, dlmrc);
-			}
-			unlock_res_and_lock(dlmlock);
+		/*
+		 * This can be called in the context of outer IO, e.g.,
+		 *
+		 *     osc_enqueue_base()->...
+		 *	 ->ldlm_prep_elc_req()->...
+		 *	   ->ldlm_cancel_callback()->...
+		 *	     ->osc_ldlm_blocking_ast()
+		 *
+		 * new environment has to be created to not corrupt outer
+		 * context.
+		 */
+		env = cl_env_nested_get(&nest);
+		if (IS_ERR(env)) {
+			result = PTR_ERR(env);
+			break;
+		}
 
-			if (dlmrc != 0) {
-				CL_LOCK_DEBUG(D_ERROR, env, lock,
-					      "dlmlock returned %d\n", dlmrc);
-				cl_lock_error(env, lock, dlmrc);
-			}
-			cl_lock_mutex_put(env, lock);
-			osc_ast_data_put(env, olck);
-			result = 0;
-		} else
-			result = -ELDLM_NO_LOCK_DATA;
+		result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
 		cl_env_nested_put(&nest, env);
-	} else
-		result = PTR_ERR(env);
-	return dlmrc ?: result;
+		break;
+		}
+	default:
+		LBUG();
+	}
+	return result;
 }
 
 static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 {
 	struct ptlrpc_request *req = data;
-	struct osc_lock	*olck;
-	struct cl_lock *lock;
-	struct cl_object *obj;
 	struct cl_env_nest nest;
 	struct lu_env *env;
 	struct ost_lvb *lvb;
@@ -847,14 +578,16 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 
 	env = cl_env_nested_get(&nest);
 	if (!IS_ERR(env)) {
-		/* osc_ast_data_get() has to go after environment is
-		 * allocated, because osc_ast_data() acquires a
-		 * reference to a lock, and it can only be released in
-		 * environment.
-		 */
-		olck = osc_ast_data_get(dlmlock);
-		if (olck) {
-			lock = olck->ols_cl.cls_lock;
+		struct cl_object *obj = NULL;
+
+		lock_res_and_lock(dlmlock);
+		if (dlmlock->l_ast_data) {
+			obj = osc2cl(dlmlock->l_ast_data);
+			cl_object_get(obj);
+		}
+		unlock_res_and_lock(dlmlock);
+
+		if (obj) {
 			/* Do not grab the mutex of cl_lock for glimpse.
 			 * See LU-1274 for details.
 			 * BTW, it's okay for cl_lock to be cancelled during
@@ -869,7 +602,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 			result = req_capsule_server_pack(cap);
 			if (result == 0) {
 				lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
-				obj = lock->cll_descr.cld_obj;
 				result = cl_object_glimpse(env, obj, lvb);
 			}
 			if (!exp_connect_lvb_type(req->rq_export))
@@ -877,7 +609,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 						   &RMF_DLM_LVB,
 						   sizeof(struct ost_lvb_v1),
 						   RCL_SERVER);
-			osc_ast_data_put(env, olck);
+			cl_object_put(env, obj);
 		} else {
 			/*
 			 * These errors are normal races, so we don't want to
@@ -888,44 +620,123 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 			result = -ELDLM_NO_LOCK_DATA;
 		}
 		cl_env_nested_put(&nest, env);
-	} else
+	} else {
 		result = PTR_ERR(env);
+	}
 	req->rq_status = result;
 	return result;
 }
 
-static unsigned long osc_lock_weigh(const struct lu_env *env,
-				    const struct cl_lock_slice *slice)
+static int weigh_cb(const struct lu_env *env, struct cl_io *io,
+		    struct osc_page *ops, void *cbdata)
 {
-	/*
-	 * don't need to grab coh_page_guard since we don't care the exact #
-	 * of pages..
-	 */
-	return cl_object_header(slice->cls_obj)->coh_pages;
+	struct cl_page *page = ops->ops_cl.cpl_page;
+
+	if (cl_page_is_vmlocked(env, page) ||
+	    PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage)
+	   )
+		return CLP_GANG_ABORT;
+
+	*(pgoff_t *)cbdata = osc_index(ops) + 1;
+	return CLP_GANG_OKAY;
 }
 
-static void osc_lock_build_einfo(const struct lu_env *env,
-				 const struct cl_lock *clock,
-				 struct osc_lock *lock,
-				 struct ldlm_enqueue_info *einfo)
+static unsigned long osc_lock_weight(const struct lu_env *env,
+				     struct osc_object *oscobj,
+				     struct ldlm_extent *extent)
+{
+	struct cl_io *io = &osc_env_info(env)->oti_io;
+	struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
+	pgoff_t page_index;
+	int result;
+
+	io->ci_obj = obj;
+	io->ci_ignore_layout = 1;
+	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+	if (result != 0)
+		return result;
+
+	page_index = cl_index(obj, extent->start);
+	do {
+		result = osc_page_gang_lookup(env, io, oscobj,
+					      page_index,
+					      cl_index(obj, extent->end),
+					      weigh_cb, (void *)&page_index);
+		if (result == CLP_GANG_ABORT)
+			break;
+		if (result == CLP_GANG_RESCHED)
+			cond_resched();
+	} while (result != CLP_GANG_OKAY);
+	cl_io_fini(env, io);
+
+	return result == CLP_GANG_ABORT ? 1 : 0;
+}
+
+/**
+ * Get the weight of dlm lock for early cancellation.
+ */
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
 {
-	enum cl_lock_mode mode;
+	struct cl_env_nest       nest;
+	struct lu_env           *env;
+	struct osc_object	*obj;
+	struct osc_lock		*oscl;
+	unsigned long            weight;
+	bool			 found = false;
+
+	might_sleep();
+	/*
+	 * osc_ldlm_weigh_ast has a complex context since it might be called
+	 * because of lock canceling, or from user's input. We have to make
+	 * a new environment for it. Probably it is implementation safe to use
+	 * the upper context because cl_lock_put don't modify environment
+	 * variables. But just in case ..
+	 */
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		/* Mostly because lack of memory, do not eliminate this lock */
+		return 1;
 
-	mode = clock->cll_descr.cld_mode;
-	if (mode == CLM_PHANTOM)
+	LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
+	obj = dlmlock->l_ast_data;
+	if (!obj) {
+		weight = 1;
+		goto out;
+	}
+
+	spin_lock(&obj->oo_ol_spin);
+	list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) {
+		if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock)
+			continue;
+		found = true;
+	}
+	spin_unlock(&obj->oo_ol_spin);
+	if (found) {
 		/*
-		 * For now, enqueue all glimpse locks in read mode. In the
-		 * future, client might choose to enqueue LCK_PW lock for
-		 * glimpse on a file opened for write.
+		 * If the lock is being used by an IO, definitely not cancel it.
 		 */
-		mode = CLM_READ;
+		weight = 1;
+		goto out;
+	}
+
+	weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
+
+out:
+	cl_env_nested_put(&nest, env);
+	return weight;
+}
 
+static void osc_lock_build_einfo(const struct lu_env *env,
+				 const struct cl_lock *lock,
+				 struct osc_object *osc,
+				 struct ldlm_enqueue_info *einfo)
+{
 	einfo->ei_type = LDLM_EXTENT;
-	einfo->ei_mode = osc_cl_lock2ldlm(mode);
+	einfo->ei_mode   = osc_cl_lock2ldlm(lock->cll_descr.cld_mode);
 	einfo->ei_cb_bl = osc_ldlm_blocking_ast;
-	einfo->ei_cb_cp = osc_ldlm_completion_ast;
+	einfo->ei_cb_cp  = ldlm_completion_ast;
 	einfo->ei_cb_gl = osc_ldlm_glimpse_ast;
-	einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */
+	einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
 }
 
 /**
@@ -981,113 +792,100 @@ static void osc_lock_to_lockless(const struct lu_env *env,
 	LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
 }
 
-static int osc_lock_compatible(const struct osc_lock *qing,
-			       const struct osc_lock *qed)
+static bool osc_lock_compatible(const struct osc_lock *qing,
+				const struct osc_lock *qed)
 {
-	enum cl_lock_mode qing_mode;
-	enum cl_lock_mode qed_mode;
+	struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
+	struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
 
-	qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode;
-	if (qed->ols_glimpse &&
-	    (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ))
-		return 1;
+	if (qed->ols_glimpse)
+		return true;
+
+	if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
+		return true;
+
+	if (qed->ols_state < OLS_GRANTED)
+		return true;
+
+	if (qed_descr->cld_mode  >= qing_descr->cld_mode &&
+	    qed_descr->cld_start <= qing_descr->cld_start &&
+	    qed_descr->cld_end   >= qing_descr->cld_end)
+		return true;
 
-	qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode;
-	return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ));
+	return false;
 }
 
-/**
- * Cancel all conflicting locks and wait for them to be destroyed.
- *
- * This function is used for two purposes:
- *
- *     - early cancel all conflicting locks before starting IO, and
- *
- *     - guarantee that pages added to the page cache by lockless IO are never
- *       covered by locks other than lockless IO lock, and, hence, are not
- *       visible to other threads.
- */
-static int osc_lock_enqueue_wait(const struct lu_env *env,
-				 const struct osc_lock *olck)
+static void osc_lock_wake_waiters(const struct lu_env *env,
+				  struct osc_object *osc,
+				  struct osc_lock *oscl)
 {
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
-	struct cl_lock_descr *descr = &lock->cll_descr;
-	struct cl_object_header *hdr = cl_object_header(descr->cld_obj);
-	struct cl_lock *scan;
-	struct cl_lock *conflict = NULL;
-	int lockless = osc_lock_is_lockless(olck);
-	int rc = 0;
+	spin_lock(&osc->oo_ol_spin);
+	list_del_init(&oscl->ols_nextlock_oscobj);
+	spin_unlock(&osc->oo_ol_spin);
 
-	LASSERT(cl_lock_is_mutexed(lock));
+	spin_lock(&oscl->ols_lock);
+	while (!list_empty(&oscl->ols_waiting_list)) {
+		struct osc_lock *scan;
 
-	/* make it enqueue anyway for glimpse lock, because we actually
-	 * don't need to cancel any conflicting locks.
-	 */
-	if (olck->ols_glimpse)
-		return 0;
+		scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock,
+				  ols_wait_entry);
+		list_del_init(&scan->ols_wait_entry);
 
-	spin_lock(&hdr->coh_lock_guard);
-	list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
-		struct cl_lock_descr *cld = &scan->cll_descr;
-		const struct osc_lock *scan_ols;
+		cl_sync_io_note(env, scan->ols_owner, 0);
+	}
+	spin_unlock(&oscl->ols_lock);
+}
+
+static void osc_lock_enqueue_wait(const struct lu_env *env,
+				  struct osc_object *obj,
+				  struct osc_lock *oscl)
+{
+	struct osc_lock *tmp_oscl;
+	struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr;
+	struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor;
 
-		if (scan == lock)
+	spin_lock(&obj->oo_ol_spin);
+	list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list);
+
+restart:
+	list_for_each_entry(tmp_oscl, &obj->oo_ol_list,
+			    ols_nextlock_oscobj) {
+		struct cl_lock_descr *descr;
+
+		if (tmp_oscl == oscl)
 			break;
 
-		if (scan->cll_state < CLS_QUEUING ||
-		    scan->cll_state == CLS_FREEING ||
-		    cld->cld_start > descr->cld_end ||
-		    cld->cld_end < descr->cld_start)
+		descr = &tmp_oscl->ols_cl.cls_lock->cll_descr;
+		if (descr->cld_start > need->cld_end ||
+		    descr->cld_end   < need->cld_start)
 			continue;
 
-		/* overlapped and living locks. */
+		/* We're not supposed to give up group lock */
+		if (descr->cld_mode == CLM_GROUP)
+			break;
 
-		/* We're not supposed to give up group lock. */
-		if (scan->cll_descr.cld_mode == CLM_GROUP) {
-			LASSERT(descr->cld_mode != CLM_GROUP ||
-				descr->cld_gid != scan->cll_descr.cld_gid);
+		if (!osc_lock_is_lockless(oscl) &&
+		    osc_lock_compatible(oscl, tmp_oscl))
 			continue;
-		}
 
-		scan_ols = osc_lock_at(scan);
+		/* wait for conflicting lock to be canceled */
+		cl_sync_io_init(waiter, 1, cl_sync_io_end);
+		oscl->ols_owner = waiter;
 
-		/* We need to cancel the compatible locks if we're enqueuing
-		 * a lockless lock, for example:
-		 * imagine that client has PR lock on [0, 1000], and thread T0
-		 * is doing lockless IO in [500, 1500] region. Concurrent
-		 * thread T1 can see lockless data in [500, 1000], which is
-		 * wrong, because these data are possibly stale.
-		 */
-		if (!lockless && osc_lock_compatible(olck, scan_ols))
-			continue;
+		spin_lock(&tmp_oscl->ols_lock);
+		/* add oscl into tmp's ols_waiting list */
+		list_add_tail(&oscl->ols_wait_entry,
+			      &tmp_oscl->ols_waiting_list);
+		spin_unlock(&tmp_oscl->ols_lock);
 
-		cl_lock_get_trust(scan);
-		conflict = scan;
-		break;
-	}
-	spin_unlock(&hdr->coh_lock_guard);
+		spin_unlock(&obj->oo_ol_spin);
+		(void)cl_sync_io_wait(env, waiter, 0);
 
-	if (conflict) {
-		if (lock->cll_descr.cld_mode == CLM_GROUP) {
-			/* we want a group lock but a previous lock request
-			 * conflicts, we do not wait but return 0 so the
-			 * request is send to the server
-			 */
-			CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n",
-			       lock, conflict);
-			cl_lock_put(env, conflict);
-			rc = 0;
-		} else {
-			CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n",
-			       lock, conflict);
-			LASSERT(!lock->cll_conflict);
-			lu_ref_add(&conflict->cll_reference, "cancel-wait",
-				   lock);
-			lock->cll_conflict = conflict;
-			rc = CLO_WAIT;
-		}
+		spin_lock(&obj->oo_ol_spin);
+		oscl->ols_owner = NULL;
+		goto restart;
 	}
-	return rc;
+	spin_unlock(&obj->oo_ol_spin);
 }
 
 /**
@@ -1106,188 +904,122 @@ static int osc_lock_enqueue_wait(const struct lu_env *env,
  */
 static int osc_lock_enqueue(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *unused, __u32 enqflags)
+			    struct cl_io *unused, struct cl_sync_io *anchor)
 {
-	struct osc_lock *ols = cl2osc_lock(slice);
-	struct cl_lock *lock = ols->ols_cl.cls_lock;
+	struct osc_thread_info *info = osc_env_info(env);
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(slice->cls_obj);
+	struct osc_lock *oscl = cl2osc_lock(slice);
+	struct cl_lock *lock = slice->cls_lock;
+	struct ldlm_res_id *resname = &info->oti_resname;
+	ldlm_policy_data_t *policy = &info->oti_policy;
+	osc_enqueue_upcall_f upcall = osc_lock_upcall;
+	void *cookie = oscl;
+	bool async = false;
 	int result;
 
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERTF(ols->ols_state == OLS_NEW,
-		 "Impossible state: %d\n", ols->ols_state);
-
-	LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
-		 "lock = %p, ols = %p\n", lock, ols);
+	LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
+		 "lock = %p, ols = %p\n", lock, oscl);
 
-	result = osc_lock_enqueue_wait(env, ols);
-	if (result == 0) {
-		if (!osc_lock_is_lockless(ols)) {
-			struct osc_object *obj = cl2osc(slice->cls_obj);
-			struct osc_thread_info *info = osc_env_info(env);
-			struct ldlm_res_id *resname = &info->oti_resname;
-			ldlm_policy_data_t *policy = &info->oti_policy;
-			struct ldlm_enqueue_info *einfo = &ols->ols_einfo;
+	if (oscl->ols_state == OLS_GRANTED)
+		return 0;
 
-			/* lock will be passed as upcall cookie,
-			 * hold ref to prevent to be released.
-			 */
-			cl_lock_hold_add(env, lock, "upcall", lock);
-			/* a user for lock also */
-			cl_lock_user_add(env, lock);
-			ols->ols_state = OLS_ENQUEUED;
+	if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
+		goto enqueue_base;
 
-			/*
-			 * XXX: this is possible blocking point as
-			 * ldlm_lock_match(LDLM_FL_LVB_READY) waits for
-			 * LDLM_CP_CALLBACK.
-			 */
-			ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
-			osc_lock_build_policy(env, lock, policy);
-			result = osc_enqueue_base(osc_export(obj), resname,
-						  &ols->ols_flags, policy,
-						  &ols->ols_lvb,
-						  obj->oo_oinfo->loi_kms_valid,
-						  osc_lock_upcall,
-						  ols, einfo, &ols->ols_handle,
-						  PTLRPCD_SET, 1, ols->ols_agl);
-			if (result != 0) {
-				cl_lock_user_del(env, lock);
-				cl_lock_unhold(env, lock, "upcall", lock);
-				if (unlikely(result == -ECANCELED)) {
-					ols->ols_state = OLS_NEW;
-					result = 0;
-				}
-			}
-		} else {
-			ols->ols_state = OLS_GRANTED;
-			ols->ols_owner = osc_env_io(env);
-		}
+	if (oscl->ols_glimpse) {
+		LASSERT(equi(oscl->ols_agl, !anchor));
+		async = true;
+		goto enqueue_base;
 	}
-	LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
-	return result;
-}
 
-static int osc_lock_wait(const struct lu_env *env,
-			 const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
-
-	LINVRNT(osc_lock_invariant(olck));
-
-	if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) {
-		if (olck->ols_flags & LDLM_FL_LVB_READY) {
-			return 0;
-		} else if (olck->ols_agl) {
-			if (lock->cll_flags & CLF_FROM_UPCALL)
-				/* It is from enqueue RPC reply upcall for
-				 * updating state. Do not re-enqueue.
-				 */
-				return -ENAVAIL;
-			olck->ols_state = OLS_NEW;
-		} else {
-			LASSERT(lock->cll_error);
-			return lock->cll_error;
-		}
+	osc_lock_enqueue_wait(env, osc, oscl);
+
+	/* we can grant lockless lock right after all conflicting locks
+	 * are canceled.
+	 */
+	if (osc_lock_is_lockless(oscl)) {
+		oscl->ols_state = OLS_GRANTED;
+		oio->oi_lockless = 1;
+		return 0;
 	}
 
-	if (olck->ols_state == OLS_NEW) {
-		int rc;
-
-		LASSERT(olck->ols_agl);
-		olck->ols_agl = 0;
-		olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT;
-		rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST);
-		if (rc != 0)
-			return rc;
-		else
-			return CLO_REENQUEUED;
+enqueue_base:
+	oscl->ols_state = OLS_ENQUEUED;
+	if (anchor) {
+		atomic_inc(&anchor->csi_sync_nr);
+		oscl->ols_owner = anchor;
 	}
 
-	LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED &&
-		     lock->cll_error == 0, olck->ols_lock));
+	/**
+	 * DLM lock's ast data must be osc_object;
+	 * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
+	 * DLM's enqueue callback set to osc_lock_upcall() with cookie as
+	 * osc_lock.
+	 */
+	ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+	osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo);
+	osc_lock_build_policy(env, lock, policy);
+	if (oscl->ols_agl) {
+		oscl->ols_einfo.ei_cbdata = NULL;
+		/* hold a reference for callback */
+		cl_object_get(osc2cl(osc));
+		upcall = osc_lock_upcall_agl;
+		cookie = osc;
+	}
+	result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
+				  policy, &oscl->ols_lvb,
+				  osc->oo_oinfo->loi_kms_valid,
+				  upcall, cookie,
+				  &oscl->ols_einfo, PTLRPCD_SET, async,
+				  oscl->ols_agl);
+	if (result != 0) {
+		oscl->ols_state = OLS_CANCELLED;
+		osc_lock_wake_waiters(env, osc, oscl);
 
-	return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT;
+		/* hide error for AGL lock. */
+		if (oscl->ols_agl) {
+			cl_object_put(env, osc2cl(osc));
+			result = 0;
+		}
+		if (anchor)
+			cl_sync_io_note(env, anchor, result);
+	} else {
+		if (osc_lock_is_lockless(oscl)) {
+			oio->oi_lockless = 1;
+		} else if (!async) {
+			LASSERT(oscl->ols_state == OLS_GRANTED);
+			LASSERT(oscl->ols_hold);
+			LASSERT(oscl->ols_dlmlock);
+		}
+	}
+	return result;
 }
 
 /**
- * An implementation of cl_lock_operations::clo_use() method that pins cached
- * lock.
+ * Breaks a link between osc_lock and dlm_lock.
  */
-static int osc_lock_use(const struct lu_env *env,
-			const struct cl_lock_slice *slice)
+static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
 {
-	struct osc_lock *olck = cl2osc_lock(slice);
-	int rc;
-
-	LASSERT(!olck->ols_hold);
+	struct ldlm_lock *dlmlock;
 
-	/*
-	 * Atomically check for LDLM_FL_CBPENDING and addref a lock if this
-	 * flag is not set. This protects us from a concurrent blocking ast.
-	 */
-	rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode);
-	if (rc == 0) {
-		olck->ols_hold = 1;
-		olck->ols_state = OLS_GRANTED;
-	} else {
-		struct cl_lock *lock;
+	dlmlock = olck->ols_dlmlock;
+	if (!dlmlock)
+		return;
 
-		/*
-		 * Lock is being cancelled somewhere within
-		 * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already
-		 * set, but osc_ldlm_blocking_ast() hasn't yet acquired
-		 * cl_lock mutex.
-		 */
-		lock = slice->cls_lock;
-		LASSERT(lock->cll_state == CLS_INTRANSIT);
-		LASSERT(lock->cll_users > 0);
-		/* set a flag for osc_dlm_blocking_ast0() to signal the
-		 * lock.
-		 */
-		olck->ols_ast_wait = 1;
-		rc = CLO_WAIT;
+	if (olck->ols_hold) {
+		olck->ols_hold = 0;
+		osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+		olck->ols_handle.cookie = 0ULL;
 	}
-	return rc;
-}
 
-static int osc_lock_flush(struct osc_lock *ols, int discard)
-{
-	struct cl_lock *lock = ols->ols_cl.cls_lock;
-	struct cl_env_nest nest;
-	struct lu_env *env;
-	int result = 0;
-
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj);
-		struct cl_lock_descr *descr = &lock->cll_descr;
-		int rc = 0;
-
-		if (descr->cld_mode >= CLM_WRITE) {
-			result = osc_cache_writeback_range(env, obj,
-							   descr->cld_start,
-							   descr->cld_end,
-							   1, discard);
-			LDLM_DEBUG(ols->ols_lock,
-				   "lock %p: %d pages were %s.\n", lock, result,
-				   discard ? "discarded" : "written");
-			if (result > 0)
-				result = 0;
-		}
+	olck->ols_dlmlock = NULL;
 
-		rc = cl_lock_discard_pages(env, lock);
-		if (result == 0 && rc < 0)
-			result = rc;
-
-		cl_env_nested_put(&nest, env);
-	} else
-		result = PTR_ERR(env);
-	if (result == 0) {
-		ols->ols_flush = 1;
-		LINVRNT(!osc_lock_has_pages(ols));
-	}
-	return result;
+	/* release a reference taken in osc_lock_upcall(). */
+	LASSERT(olck->ols_has_ref);
+	lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
+	LDLM_LOCK_RELEASE(dlmlock);
+	olck->ols_has_ref = 0;
 }
 
 /**
@@ -1307,96 +1039,16 @@ static int osc_lock_flush(struct osc_lock *ols, int discard)
 static void osc_lock_cancel(const struct lu_env *env,
 			    const struct cl_lock_slice *slice)
 {
-	struct cl_lock *lock = slice->cls_lock;
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct ldlm_lock *dlmlock = olck->ols_lock;
-	int result = 0;
-	int discard;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LINVRNT(osc_lock_invariant(olck));
-
-	if (dlmlock) {
-		int do_cancel;
-
-		discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA);
-		if (olck->ols_state >= OLS_GRANTED)
-			result = osc_lock_flush(olck, discard);
-		osc_lock_unhold(olck);
-
-		lock_res_and_lock(dlmlock);
-		/* Now that we're the only user of dlm read/write reference,
-		 * mostly the ->l_readers + ->l_writers should be zero.
-		 * However, there is a corner case.
-		 * See bug 18829 for details.
-		 */
-		do_cancel = (dlmlock->l_readers == 0 &&
-			     dlmlock->l_writers == 0);
-		dlmlock->l_flags |= LDLM_FL_CBPENDING;
-		unlock_res_and_lock(dlmlock);
-		if (do_cancel)
-			result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC);
-		if (result < 0)
-			CL_LOCK_DEBUG(D_ERROR, env, lock,
-				      "lock %p cancel failure with error(%d)\n",
-				      lock, result);
-	}
-	olck->ols_state = OLS_CANCELLED;
-	olck->ols_flags &= ~LDLM_FL_LVB_READY;
-	osc_lock_detach(env, olck);
-}
-
-static int osc_lock_has_pages(struct osc_lock *olck)
-{
-	return 0;
-}
-
-static void osc_lock_delete(const struct lu_env *env,
-			    const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck;
+	struct osc_object *obj  = cl2osc(slice->cls_obj);
+	struct osc_lock *oscl = cl2osc_lock(slice);
 
-	olck = cl2osc_lock(slice);
-	if (olck->ols_glimpse) {
-		LASSERT(!olck->ols_hold);
-		LASSERT(!olck->ols_lock);
-		return;
-	}
+	LINVRNT(osc_lock_invariant(oscl));
 
-	LINVRNT(osc_lock_invariant(olck));
-	LINVRNT(!osc_lock_has_pages(olck));
+	osc_lock_detach(env, oscl);
+	oscl->ols_state = OLS_CANCELLED;
+	oscl->ols_flags &= ~LDLM_FL_LVB_READY;
 
-	osc_lock_unhold(olck);
-	osc_lock_detach(env, olck);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for osc layer.
- *
- * Maintains osc_lock::ols_owner field.
- *
- * This assumes that lock always enters CLS_HELD (from some other state) in
- * the same IO context as one that requested the lock. This should not be a
- * problem, because context is by definition shared by all activity pertaining
- * to the same high-level IO.
- */
-static void osc_lock_state(const struct lu_env *env,
-			   const struct cl_lock_slice *slice,
-			   enum cl_lock_state state)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
-
-	/*
-	 * XXX multiple io contexts can use the lock at the same time.
-	 */
-	LINVRNT(osc_lock_invariant(lock));
-	if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) {
-		struct osc_io *oio = osc_env_io(env);
-
-		LASSERT(!lock->ols_owner);
-		lock->ols_owner = oio;
-	} else if (state != CLS_HELD)
-		lock->ols_owner = NULL;
+	osc_lock_wake_waiters(env, obj, oscl);
 }
 
 static int osc_lock_print(const struct lu_env *env, void *cookie,
@@ -1404,221 +1056,162 @@ static int osc_lock_print(const struct lu_env *env, void *cookie,
 {
 	struct osc_lock *lock = cl2osc_lock(slice);
 
-	/*
-	 * XXX print ldlm lock and einfo properly.
-	 */
 	(*p)(env, cookie, "%p %#16llx %#llx %d %p ",
-	     lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie,
+	     lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
 	     lock->ols_state, lock->ols_owner);
 	osc_lvb_print(env, cookie, p, &lock->ols_lvb);
 	return 0;
 }
 
-static int osc_lock_fits_into(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      const struct cl_lock_descr *need,
-			      const struct cl_io *io)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-
-	if (need->cld_enq_flags & CEF_NEVER)
-		return 0;
-
-	if (ols->ols_state >= OLS_CANCELLED)
-		return 0;
-
-	if (need->cld_mode == CLM_PHANTOM) {
-		if (ols->ols_agl)
-			return !(ols->ols_state > OLS_RELEASED);
-
-		/*
-		 * Note: the QUEUED lock can't be matched here, otherwise
-		 * it might cause the deadlocks.
-		 * In read_process,
-		 * P1: enqueued read lock, create sublock1
-		 * P2: enqueued write lock, create sublock2(conflicted
-		 *     with sublock1).
-		 * P1: Grant read lock.
-		 * P1: enqueued glimpse lock(with holding sublock1_read),
-		 *     matched with sublock2, waiting sublock2 to be granted.
-		 *     But sublock2 can not be granted, because P1
-		 *     will not release sublock1. Bang!
-		 */
-		if (ols->ols_state < OLS_GRANTED ||
-		    ols->ols_state > OLS_RELEASED)
-			return 0;
-	} else if (need->cld_enq_flags & CEF_MUST) {
-		/*
-		 * If the lock hasn't ever enqueued, it can't be matched
-		 * because enqueue process brings in many information
-		 * which can be used to determine things such as lockless,
-		 * CEF_MUST, etc.
-		 */
-		if (ols->ols_state < OLS_UPCALL_RECEIVED &&
-		    ols->ols_locklessable)
-			return 0;
-	}
-	return 1;
-}
-
 static const struct cl_lock_operations osc_lock_ops = {
 	.clo_fini    = osc_lock_fini,
 	.clo_enqueue = osc_lock_enqueue,
-	.clo_wait    = osc_lock_wait,
-	.clo_unuse   = osc_lock_unuse,
-	.clo_use     = osc_lock_use,
-	.clo_delete  = osc_lock_delete,
-	.clo_state   = osc_lock_state,
 	.clo_cancel  = osc_lock_cancel,
-	.clo_weigh   = osc_lock_weigh,
 	.clo_print   = osc_lock_print,
-	.clo_fits_into = osc_lock_fits_into,
 };
 
-static int osc_lock_lockless_unuse(const struct lu_env *env,
-				   const struct cl_lock_slice *slice)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-	struct cl_lock *lock = slice->cls_lock;
-
-	LASSERT(ols->ols_state == OLS_GRANTED);
-	LINVRNT(osc_lock_invariant(ols));
-
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
-	return 0;
-}
-
 static void osc_lock_lockless_cancel(const struct lu_env *env,
 				     const struct cl_lock_slice *slice)
 {
 	struct osc_lock *ols = cl2osc_lock(slice);
+	struct osc_object *osc = cl2osc(slice->cls_obj);
+	struct cl_lock_descr *descr = &slice->cls_lock->cll_descr;
 	int result;
 
-	result = osc_lock_flush(ols, 0);
+	LASSERT(!ols->ols_dlmlock);
+	result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
+				descr->cld_mode, 0);
 	if (result)
 		CERROR("Pages for lockless lock %p were not purged(%d)\n",
 		       ols, result);
-	ols->ols_state = OLS_CANCELLED;
-}
-
-static int osc_lock_lockless_wait(const struct lu_env *env,
-				  const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
 
-	LINVRNT(osc_lock_invariant(olck));
-	LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED);
-
-	return lock->cll_error;
+	osc_lock_wake_waiters(env, osc, ols);
 }
 
-static void osc_lock_lockless_state(const struct lu_env *env,
-				    const struct cl_lock_slice *slice,
-				    enum cl_lock_state state)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
+static const struct cl_lock_operations osc_lock_lockless_ops = {
+	.clo_fini	= osc_lock_fini,
+	.clo_enqueue	= osc_lock_enqueue,
+	.clo_cancel	= osc_lock_lockless_cancel,
+	.clo_print	= osc_lock_print
+};
 
-	LINVRNT(osc_lock_invariant(lock));
-	if (state == CLS_HELD) {
-		struct osc_io *oio = osc_env_io(env);
+static void osc_lock_set_writer(const struct lu_env *env,
+				const struct cl_io *io,
+				struct cl_object *obj, struct osc_lock *oscl)
+{
+	struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
+	pgoff_t io_start;
+	pgoff_t io_end;
 
-		LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio));
-		lock->ols_owner = oio;
+	if (!cl_object_same(io->ci_obj, obj))
+		return;
 
-		/* set the io to be lockless if this lock is for io's
-		 * host object
-		 */
-		if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj))
-			oio->oi_lockless = 1;
+	if (likely(io->ci_type == CIT_WRITE)) {
+		io_start = cl_index(obj, io->u.ci_rw.crw_pos);
+		io_end = cl_index(obj, io->u.ci_rw.crw_pos +
+				  io->u.ci_rw.crw_count - 1);
+		if (cl_io_is_append(io)) {
+			io_start = 0;
+			io_end = CL_PAGE_EOF;
+		}
+	} else {
+		LASSERT(cl_io_is_mkwrite(io));
+		io_start = io->u.ci_fault.ft_index;
+		io_end = io->u.ci_fault.ft_index;
 	}
-}
 
-static int osc_lock_lockless_fits_into(const struct lu_env *env,
-				       const struct cl_lock_slice *slice,
-				       const struct cl_lock_descr *need,
-				       const struct cl_io *io)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
-
-	if (!(need->cld_enq_flags & CEF_NEVER))
-		return 0;
+	if (descr->cld_mode >= CLM_WRITE &&
+	    descr->cld_start <= io_start && descr->cld_end >= io_end) {
+		struct osc_io *oio = osc_env_io(env);
 
-	/* lockless lock should only be used by its owning io. b22147 */
-	return (lock->ols_owner == osc_env_io(env));
+		/* There must be only one lock to match the write region */
+		LASSERT(!oio->oi_write_osclock);
+		oio->oi_write_osclock = oscl;
+	}
 }
 
-static const struct cl_lock_operations osc_lock_lockless_ops = {
-	.clo_fini      = osc_lock_fini,
-	.clo_enqueue   = osc_lock_enqueue,
-	.clo_wait      = osc_lock_lockless_wait,
-	.clo_unuse     = osc_lock_lockless_unuse,
-	.clo_state     = osc_lock_lockless_state,
-	.clo_fits_into = osc_lock_lockless_fits_into,
-	.clo_cancel    = osc_lock_lockless_cancel,
-	.clo_print     = osc_lock_print
-};
-
 int osc_lock_init(const struct lu_env *env,
 		  struct cl_object *obj, struct cl_lock *lock,
-		  const struct cl_io *unused)
+		  const struct cl_io *io)
 {
-	struct osc_lock *clk;
-	int result;
-
-	clk = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
-	if (clk) {
-		__u32 enqflags = lock->cll_descr.cld_enq_flags;
+	struct osc_lock *oscl;
+	__u32 enqflags = lock->cll_descr.cld_enq_flags;
+
+	oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
+	if (!oscl)
+		return -ENOMEM;
+
+	oscl->ols_state = OLS_NEW;
+	spin_lock_init(&oscl->ols_lock);
+	INIT_LIST_HEAD(&oscl->ols_waiting_list);
+	INIT_LIST_HEAD(&oscl->ols_wait_entry);
+	INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
+
+	oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
+	oscl->ols_agl = !!(enqflags & CEF_AGL);
+	if (oscl->ols_agl)
+		oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+	if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
+		oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
+		oscl->ols_glimpse = 1;
+	}
 
-		osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo);
-		atomic_set(&clk->ols_pageref, 0);
-		clk->ols_state = OLS_NEW;
+	cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops);
 
-		clk->ols_flags = osc_enq2ldlm_flags(enqflags);
-		clk->ols_agl = !!(enqflags & CEF_AGL);
-		if (clk->ols_agl)
-			clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
-		if (clk->ols_flags & LDLM_FL_HAS_INTENT)
-			clk->ols_glimpse = 1;
+	if (!(enqflags & CEF_MUST))
+		/* try to convert this lock to a lockless lock */
+		osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER));
+	if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
+		oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
 
-		cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops);
+	if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
+		osc_lock_set_writer(env, io, obj, oscl);
 
-		if (!(enqflags & CEF_MUST))
-			/* try to convert this lock to a lockless lock */
-			osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER));
-		if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
-			clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
 
-		LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
-				  lock, clk, clk->ols_flags);
+	LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
+			  lock, oscl, oscl->ols_flags);
 
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
+	return 0;
 }
 
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm)
+/**
+ * Finds an existing lock covering given index and optionally different from a
+ * given \a except lock.
+ */
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+				       struct osc_object *obj, pgoff_t index,
+				       int pending, int canceling)
 {
-	struct osc_lock *olock;
-	int rc = 0;
-
-	spin_lock(&osc_ast_guard);
-	olock = dlm->l_ast_data;
+	struct osc_thread_info *info = osc_env_info(env);
+	struct ldlm_res_id *resname = &info->oti_resname;
+	ldlm_policy_data_t *policy  = &info->oti_policy;
+	struct lustre_handle lockh;
+	struct ldlm_lock *lock = NULL;
+	enum ldlm_mode mode;
+	__u64 flags;
+
+	ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
+	osc_index2policy(policy, osc2cl(obj), index, index);
+	policy->l_extent.gid = LDLM_GID_ANY;
+
+	flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+	if (pending)
+		flags |= LDLM_FL_CBPENDING;
 	/*
-	 * there's a very rare race with osc_page_addref_lock(), but that
-	 * doesn't matter because in the worst case we don't cancel a lock
-	 * which we actually can, that's no harm.
+	 * It is fine to match any group lock since there could be only one
+	 * with a uniq gid and it conflicts with all other lock modes too
 	 */
-	if (olock &&
-	    atomic_add_return(_PAGEREF_MAGIC,
-			      &olock->ols_pageref) != _PAGEREF_MAGIC) {
-		atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref);
-		rc = 1;
+again:
+	mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
+			       flags, resname, LDLM_EXTENT, policy,
+			       LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling);
+	if (mode != 0) {
+		lock = ldlm_handle2lock(&lockh);
+		/* RACE: the lock is cancelled so let's try again */
+		if (unlikely(!lock))
+			goto again;
 	}
-	spin_unlock(&osc_ast_guard);
-	return rc;
+	return lock;
 }
 
 /** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index 9d474fcdd9a7..d211d1905e83 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,6 +32,7 @@
  * Implementation of cl_object for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
@@ -94,6 +91,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
 	atomic_set(&osc->oo_nr_reads, 0);
 	atomic_set(&osc->oo_nr_writes, 0);
 	spin_lock_init(&osc->oo_lock);
+	spin_lock_init(&osc->oo_tree_lock);
+	spin_lock_init(&osc->oo_ol_spin);
+	INIT_LIST_HEAD(&osc->oo_ol_list);
 
 	cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
@@ -120,6 +120,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
 	LASSERT(list_empty(&osc->oo_reading_exts));
 	LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
 	LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+	LASSERT(list_empty(&osc->oo_ol_list));
 
 	lu_object_fini(obj);
 	kmem_cache_free(osc_object_kmem, osc);
@@ -192,6 +193,32 @@ static int osc_object_glimpse(const struct lu_env *env,
 	return 0;
 }
 
+static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+	LASSERT(lock->l_granted_mode == lock->l_req_mode);
+	if (lock->l_ast_data == data)
+		lock->l_ast_data = NULL;
+	return LDLM_ITER_CONTINUE;
+}
+
+static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct osc_object       *osc = cl2osc(obj);
+	struct ldlm_res_id      *resname = &osc_env_info(env)->oti_resname;
+
+	LASSERTF(osc->oo_npages == 0,
+		 DFID "still have %lu pages, obj: %p, osc: %p\n",
+		 PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc);
+
+	/* DLM locks don't hold a reference of osc_object so we have to
+	 * clear it before the object is being destroyed.
+	 */
+	ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+	ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+			      osc_object_ast_clear, osc);
+	return 0;
+}
+
 void osc_object_set_contended(struct osc_object *obj)
 {
 	obj->oo_contention_time = cfs_time_current();
@@ -236,12 +263,12 @@ static const struct cl_object_operations osc_ops = {
 	.coo_io_init   = osc_io_init,
 	.coo_attr_get  = osc_attr_get,
 	.coo_attr_set  = osc_attr_set,
-	.coo_glimpse   = osc_object_glimpse
+	.coo_glimpse   = osc_object_glimpse,
+	.coo_prune     = osc_object_prune
 };
 
 static const struct lu_object_operations osc_lu_obj_ops = {
 	.loo_object_init      = osc_object_init,
-	.loo_object_delete    = NULL,
 	.loo_object_release   = NULL,
 	.loo_object_free      = osc_object_free,
 	.loo_object_print     = osc_object_print,
@@ -261,8 +288,9 @@ struct lu_object *osc_object_alloc(const struct lu_env *env,
 		lu_object_init(obj, NULL, dev);
 		osc->oo_cl.co_ops = &osc_ops;
 		obj->lo_ops = &osc_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index ce9ddd515f64..355f496a2093 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -36,14 +32,15 @@
  * Implementation of cl_page for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
 
 #include "osc_cl_internal.h"
 
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del);
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg);
 static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 			   struct osc_page *opg);
 
@@ -51,30 +48,14 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
  *  @{
  */
 
-static int osc_page_protected(const struct lu_env *env,
-			      const struct osc_page *opg,
-			      enum cl_lock_mode mode, int unref)
-{
-	return 1;
-}
-
 /*****************************************************************************
  *
  * Page operations.
  *
  */
-static void osc_page_fini(const struct lu_env *env,
-			  struct cl_page_slice *slice)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-
-	CDEBUG(D_TRACE, "%p\n", opg);
-	LASSERT(!opg->ops_lock);
-}
-
 static void osc_page_transfer_get(struct osc_page *opg, const char *label)
 {
-	struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+	struct cl_page *page = opg->ops_cl.cpl_page;
 
 	LASSERT(!opg->ops_transfer_pinned);
 	cl_page_get(page);
@@ -85,11 +66,11 @@ static void osc_page_transfer_get(struct osc_page *opg, const char *label)
 static void osc_page_transfer_put(const struct lu_env *env,
 				  struct osc_page *opg)
 {
-	struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+	struct cl_page *page = opg->ops_cl.cpl_page;
 
 	if (opg->ops_transfer_pinned) {
-		lu_ref_del(&page->cp_reference, "transfer", page);
 		opg->ops_transfer_pinned = 0;
+		lu_ref_del(&page->cp_reference, "transfer", page);
 		cl_page_put(env, page);
 	}
 }
@@ -104,10 +85,7 @@ static void osc_page_transfer_add(const struct lu_env *env,
 {
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 
-	/* ops_lru and ops_inflight share the same field, so take it from LRU
-	 * first and then use it as inflight.
-	 */
-	osc_lru_del(osc_cli(obj), opg, false);
+	osc_lru_use(osc_cli(obj), opg);
 
 	spin_lock(&obj->oo_seatbelt);
 	list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
@@ -115,16 +93,12 @@ static void osc_page_transfer_add(const struct lu_env *env,
 	spin_unlock(&obj->oo_seatbelt);
 }
 
-static int osc_page_cache_add(const struct lu_env *env,
-			      const struct cl_page_slice *slice,
-			      struct cl_io *io)
+int osc_page_cache_add(const struct lu_env *env,
+		       const struct cl_page_slice *slice, struct cl_io *io)
 {
-	struct osc_io *oio = osc_env_io(env);
 	struct osc_page *opg = cl2osc_page(slice);
 	int result;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0));
-
 	osc_page_transfer_get(opg, "transfer\0cache");
 	result = osc_queue_async_io(env, io, opg);
 	if (result != 0)
@@ -132,17 +106,6 @@ static int osc_page_cache_add(const struct lu_env *env,
 	else
 		osc_page_transfer_add(env, opg, CRT_WRITE);
 
-	/* for sync write, kernel will wait for this page to be flushed before
-	 * osc_io_end() is called, so release it earlier.
-	 * for mkwrite(), it's known there is no further pages.
-	 */
-	if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) {
-		if (oio->oi_active) {
-			osc_extent_release(env, oio->oi_active);
-			oio->oi_active = NULL;
-		}
-	}
-
 	return result;
 }
 
@@ -154,102 +117,25 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
 	policy->l_extent.end = cl_offset(obj, end + 1) - 1;
 }
 
-static int osc_page_addref_lock(const struct lu_env *env,
-				struct osc_page *opg,
-				struct cl_lock *lock)
-{
-	struct osc_lock *olock;
-	int rc;
-
-	LASSERT(!opg->ops_lock);
-
-	olock = osc_lock_at(lock);
-	if (atomic_inc_return(&olock->ols_pageref) <= 0) {
-		atomic_dec(&olock->ols_pageref);
-		rc = -ENODATA;
-	} else {
-		cl_lock_get(lock);
-		opg->ops_lock = lock;
-		rc = 0;
-	}
-	return rc;
-}
-
-static void osc_page_putref_lock(const struct lu_env *env,
-				 struct osc_page *opg)
-{
-	struct cl_lock *lock = opg->ops_lock;
-	struct osc_lock *olock;
-
-	LASSERT(lock);
-	olock = osc_lock_at(lock);
-
-	atomic_dec(&olock->ols_pageref);
-	opg->ops_lock = NULL;
-
-	cl_lock_put(env, lock);
-}
-
 static int osc_page_is_under_lock(const struct lu_env *env,
 				  const struct cl_page_slice *slice,
-				  struct cl_io *unused)
+				  struct cl_io *unused, pgoff_t *max_index)
 {
-	struct cl_lock *lock;
+	struct osc_page *opg = cl2osc_page(slice);
+	struct ldlm_lock *dlmlock;
 	int result = -ENODATA;
 
-	lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page,
-			       NULL, 1, 0);
-	if (lock) {
-		if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0)
-			result = -EBUSY;
-		cl_lock_put(env, lock);
+	dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
+				       osc_index(opg), 1, 0);
+	if (dlmlock) {
+		*max_index = cl_index(slice->cpl_obj,
+				      dlmlock->l_policy_data.l_extent.end);
+		LDLM_LOCK_PUT(dlmlock);
+		result = 0;
 	}
 	return result;
 }
 
-static void osc_page_disown(const struct lu_env *env,
-			    const struct cl_page_slice *slice,
-			    struct cl_io *io)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-
-	if (unlikely(opg->ops_lock))
-		osc_page_putref_lock(env, opg);
-}
-
-static void osc_page_completion_read(const struct lu_env *env,
-				     const struct cl_page_slice *slice,
-				     int ioret)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-
-	if (likely(opg->ops_lock))
-		osc_page_putref_lock(env, opg);
-	osc_lru_add(osc_cli(obj), opg);
-}
-
-static void osc_page_completion_write(const struct lu_env *env,
-				      const struct cl_page_slice *slice,
-				      int ioret)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(slice->cpl_obj);
-
-	osc_lru_add(osc_cli(obj), opg);
-}
-
-static int osc_page_fail(const struct lu_env *env,
-			 const struct cl_page_slice *slice,
-			 struct cl_io *unused)
-{
-	/*
-	 * Cached read?
-	 */
-	LBUG();
-	return 0;
-}
-
 static const char *osc_list(struct list_head *head)
 {
 	return list_empty(head) ? "-" : "+";
@@ -272,8 +158,8 @@ static int osc_page_print(const struct lu_env *env,
 	struct osc_object *obj = cl2osc(slice->cpl_obj);
 	struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
 
-	return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
-			  opg,
+	return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
+			  opg, osc_index(opg),
 			  /* 1 */
 			  oap->oap_magic, oap->oap_cmd,
 			  oap->oap_interrupted,
@@ -315,13 +201,11 @@ static void osc_page_delete(const struct lu_env *env,
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 	int rc;
 
-	LINVRNT(opg->ops_temp || osc_page_protected(env, opg, CLM_READ, 1));
-
 	CDEBUG(D_TRACE, "%p\n", opg);
 	osc_page_transfer_put(env, opg);
 	rc = osc_teardown_async_page(env, obj, opg);
 	if (rc) {
-		CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page),
+		CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page,
 			      "Trying to teardown failed: %d\n", rc);
 		LASSERT(0);
 	}
@@ -334,7 +218,19 @@ static void osc_page_delete(const struct lu_env *env,
 	}
 	spin_unlock(&obj->oo_seatbelt);
 
-	osc_lru_del(osc_cli(obj), opg, true);
+	osc_lru_del(osc_cli(obj), opg);
+
+	if (slice->cpl_page->cp_type == CPT_CACHEABLE) {
+		void *value;
+
+		spin_lock(&obj->oo_tree_lock);
+		value = radix_tree_delete(&obj->oo_tree, osc_index(opg));
+		if (value)
+			--obj->oo_npages;
+		spin_unlock(&obj->oo_tree_lock);
+
+		LASSERT(ergo(value, value == opg));
+	}
 }
 
 static void osc_page_clip(const struct lu_env *env,
@@ -343,8 +239,6 @@ static void osc_page_clip(const struct lu_env *env,
 	struct osc_page *opg = cl2osc_page(slice);
 	struct osc_async_page *oap = &opg->ops_oap;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
 	opg->ops_from = from;
 	opg->ops_to = to;
 	spin_lock(&oap->oap_lock);
@@ -358,8 +252,6 @@ static int osc_page_cancel(const struct lu_env *env,
 	struct osc_page *opg = cl2osc_page(slice);
 	int rc = 0;
 
-	LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
 	/* Check if the transferring against this page
 	 * is completed, or not even queued.
 	 */
@@ -382,28 +274,16 @@ static int osc_page_flush(const struct lu_env *env,
 }
 
 static const struct cl_page_operations osc_page_ops = {
-	.cpo_fini	  = osc_page_fini,
 	.cpo_print	 = osc_page_print,
 	.cpo_delete	= osc_page_delete,
 	.cpo_is_under_lock = osc_page_is_under_lock,
-	.cpo_disown	= osc_page_disown,
-	.io = {
-		[CRT_READ] = {
-			.cpo_cache_add  = osc_page_fail,
-			.cpo_completion = osc_page_completion_read
-		},
-		[CRT_WRITE] = {
-			.cpo_cache_add  = osc_page_cache_add,
-			.cpo_completion = osc_page_completion_write
-		}
-	},
 	.cpo_clip	   = osc_page_clip,
 	.cpo_cancel	 = osc_page_cancel,
 	.cpo_flush	  = osc_page_flush
 };
 
 int osc_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		  struct cl_page *page, pgoff_t index)
 {
 	struct osc_object *osc = cl2osc(obj);
 	struct osc_page *opg = cl_object_page_slice(obj, page);
@@ -412,18 +292,15 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	opg->ops_from = 0;
 	opg->ops_to = PAGE_SIZE;
 
-	result = osc_prep_async_page(osc, opg, vmpage,
-				     cl_offset(obj, page->cp_index));
+	result = osc_prep_async_page(osc, opg, page->cp_vmpage,
+				     cl_offset(obj, index));
 	if (result == 0) {
 		struct osc_io *oio = osc_env_io(env);
 
 		opg->ops_srvlock = osc_io_srvlock(oio);
-		cl_page_slice_add(page, &opg->ops_cl, obj, &osc_page_ops);
+		cl_page_slice_add(page, &opg->ops_cl, obj, index,
+				  &osc_page_ops);
 	}
-	/*
-	 * Cannot assert osc_page_protected() here as read-ahead
-	 * creates temporary pages outside of a lock.
-	 */
 	/* ops_inflight and ops_lru are the same field, but it doesn't
 	 * hurt to initialize it twice :-)
 	 */
@@ -431,12 +308,47 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	INIT_LIST_HEAD(&opg->ops_lru);
 
 	/* reserve an LRU space for this page */
-	if (page->cp_type == CPT_CACHEABLE && result == 0)
+	if (page->cp_type == CPT_CACHEABLE && result == 0) {
 		result = osc_lru_reserve(env, osc, opg);
+		if (result == 0) {
+			spin_lock(&osc->oo_tree_lock);
+			result = radix_tree_insert(&osc->oo_tree, index, opg);
+			if (result == 0)
+				++osc->oo_npages;
+			spin_unlock(&osc->oo_tree_lock);
+			LASSERT(result == 0);
+		}
+	}
 
 	return result;
 }
 
+int osc_over_unstable_soft_limit(struct client_obd *cli)
+{
+	long obd_upages, obd_dpages, osc_upages;
+
+	/* Can't check cli->cl_unstable_count, therefore, no soft limit */
+	if (!cli)
+		return 0;
+
+	obd_upages = atomic_read(&obd_unstable_pages);
+	obd_dpages = atomic_read(&obd_dirty_pages);
+
+	osc_upages = atomic_read(&cli->cl_unstable_count);
+
+	/*
+	 * obd_max_dirty_pages is the max number of (dirty + unstable)
+	 * pages allowed at any given time. To simulate an unstable page
+	 * only limit, we subtract the current number of dirty pages
+	 * from this max. This difference is roughly the amount of pages
+	 * currently available for unstable pages. Thus, the soft limit
+	 * is half of that difference. Check osc_upages to ensure we don't
+	 * set SOFT_SYNC for OSCs without any outstanding unstable pages.
+	 */
+	return osc_upages &&
+	       obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2;
+}
+
 /**
  * Helper function called by osc_io_submit() for every page in an immediate
  * transfer (i.e., transferred synchronously).
@@ -445,10 +357,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 		     enum cl_req_type crt, int brw_flags)
 {
 	struct osc_async_page *oap = &opg->ops_oap;
-	struct osc_object *obj = oap->oap_obj;
-
-	LINVRNT(osc_page_protected(env, opg,
-				   crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
 
 	LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n",
 		 oap, oap->oap_magic);
@@ -460,8 +368,10 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 	oap->oap_count = opg->ops_to - opg->ops_from;
 	oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
 
-	if (!client_is_remote(osc_export(obj)) &&
-	    capable(CFS_CAP_SYS_RESOURCE)) {
+	if (osc_over_unstable_soft_limit(oap->oap_cli))
+		oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
+
+	if (capable(CFS_CAP_SYS_RESOURCE)) {
 		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
 		oap->oap_cmd |= OBD_BRW_NOQUOTA;
 	}
@@ -483,13 +393,12 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
  */
 
 static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq);
-static atomic_t osc_lru_waiters = ATOMIC_INIT(0);
 /* LRU pages are freed in batch mode. OSC should at least free this
  * number of pages to avoid running out of LRU budget, and..
  */
 static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT);  /* 2M */
 /* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
+static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
 
 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
  * we should free slots aggressively. In this way, slots are freed in a steady
@@ -500,65 +409,142 @@ static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
 static int osc_cache_too_much(struct client_obd *cli)
 {
 	struct cl_client_cache *cache = cli->cl_cache;
-	int pages = atomic_read(&cli->cl_lru_in_list) >> 1;
+	int pages = atomic_read(&cli->cl_lru_in_list);
+	unsigned long budget;
 
-	if (atomic_read(&osc_lru_waiters) > 0 &&
-	    atomic_read(cli->cl_lru_left) < lru_shrink_max)
-		/* drop lru pages aggressively */
-		return min(pages, lru_shrink_max);
+	budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
 
 	/* if it's going to run out LRU slots, we should free some, but not
 	 * too much to maintain fairness among OSCs.
 	 */
 	if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
-		unsigned long tmp;
+		if (pages >= budget)
+			return lru_shrink_max;
+		else if (pages >= budget / 2)
+			return lru_shrink_min;
+	} else if (pages >= budget * 2) {
+		return lru_shrink_min;
+	}
+	return 0;
+}
 
-		tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
-		if (pages > tmp)
-			return min(pages, lru_shrink_max);
+int lru_queue_work(const struct lu_env *env, void *data)
+{
+	struct client_obd *cli = data;
 
-		return pages > lru_shrink_min ? lru_shrink_min : 0;
-	}
+	CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli);
+
+	if (osc_cache_too_much(cli))
+		osc_lru_shrink(env, cli, lru_shrink_max, true);
 
 	return 0;
 }
 
-/* Return how many pages are not discarded in @pvec. */
-static int discard_pagevec(const struct lu_env *env, struct cl_io *io,
-			   struct cl_page **pvec, int max_index)
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
+{
+	LIST_HEAD(lru);
+	struct osc_async_page *oap;
+	int npages = 0;
+
+	list_for_each_entry(oap, plist, oap_pending_item) {
+		struct osc_page *opg = oap2osc_page(oap);
+
+		if (!opg->ops_in_lru)
+			continue;
+
+		++npages;
+		LASSERT(list_empty(&opg->ops_lru));
+		list_add(&opg->ops_lru, &lru);
+	}
+
+	if (npages > 0) {
+		spin_lock(&cli->cl_lru_list_lock);
+		list_splice_tail(&lru, &cli->cl_lru_list);
+		atomic_sub(npages, &cli->cl_lru_busy);
+		atomic_add(npages, &cli->cl_lru_in_list);
+		spin_unlock(&cli->cl_lru_list_lock);
+
+		/* XXX: May set force to be true for better performance */
+		if (osc_cache_too_much(cli))
+			(void)ptlrpcd_queue_work(cli->cl_lru_work);
+	}
+}
+
+static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+	LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+	list_del_init(&opg->ops_lru);
+	atomic_dec(&cli->cl_lru_in_list);
+}
+
+/**
+ * Page is being destroyed. The page may be not in LRU list, if the transfer
+ * has never finished(error occurred).
+ */
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+	if (opg->ops_in_lru) {
+		spin_lock(&cli->cl_lru_list_lock);
+		if (!list_empty(&opg->ops_lru)) {
+			__osc_lru_del(cli, opg);
+		} else {
+			LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
+			atomic_dec(&cli->cl_lru_busy);
+		}
+		spin_unlock(&cli->cl_lru_list_lock);
+
+		atomic_inc(cli->cl_lru_left);
+		/* this is a great place to release more LRU pages if
+		 * this osc occupies too many LRU pages and kernel is
+		 * stealing one of them.
+		 */
+		if (!memory_pressure_get())
+			(void)ptlrpcd_queue_work(cli->cl_lru_work);
+		wake_up(&osc_lru_waitq);
+	} else {
+		LASSERT(list_empty(&opg->ops_lru));
+	}
+}
+
+/**
+ * Delete page from LRUlist for redirty.
+ */
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
+{
+	/* If page is being transferred for the first time,
+	 * ops_lru should be empty
+	 */
+	if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) {
+		spin_lock(&cli->cl_lru_list_lock);
+		__osc_lru_del(cli, opg);
+		spin_unlock(&cli->cl_lru_list_lock);
+		atomic_inc(&cli->cl_lru_busy);
+	}
+}
+
+static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
+			    struct cl_page **pvec, int max_index)
 {
-	int count;
 	int i;
 
-	for (count = 0, i = 0; i < max_index; i++) {
+	for (i = 0; i < max_index; i++) {
 		struct cl_page *page = pvec[i];
 
-		if (cl_page_own_try(env, io, page) == 0) {
-			/* free LRU page only if nobody is using it.
-			 * This check is necessary to avoid freeing the pages
-			 * having already been removed from LRU and pinned
-			 * for IO.
-			 */
-			if (!cl_page_in_use(page)) {
-				cl_page_unmap(env, io, page);
-				cl_page_discard(env, io, page);
-				++count;
-			}
-			cl_page_disown(env, io, page);
-		}
+		LASSERT(cl_page_is_owned(page, io));
+		cl_page_discard(env, io, page);
+		cl_page_disown(env, io, page);
 		cl_page_put(env, page);
+
 		pvec[i] = NULL;
 	}
-	return max_index - count;
 }
 
 /**
  * Drop @target of pages from LRU at most.
  */
-int osc_lru_shrink(struct client_obd *cli, int target)
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		   int target, bool force)
 {
-	struct cl_env_nest nest;
-	struct lu_env *env;
 	struct cl_io *io;
 	struct cl_object *clobj = NULL;
 	struct cl_page **pvec;
@@ -573,23 +559,31 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 	if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
 		return 0;
 
-	env = cl_env_nested_get(&nest);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
+	if (!force) {
+		if (atomic_read(&cli->cl_lru_shrinkers) > 0)
+			return -EBUSY;
+
+		if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) {
+			atomic_dec(&cli->cl_lru_shrinkers);
+			return -EBUSY;
+		}
+	} else {
+		atomic_inc(&cli->cl_lru_shrinkers);
+	}
 
-	pvec = osc_env_info(env)->oti_pvec;
+	pvec = (struct cl_page **)osc_env_info(env)->oti_pvec;
 	io = &osc_env_info(env)->oti_io;
 
-	client_obd_list_lock(&cli->cl_lru_list_lock);
-	atomic_inc(&cli->cl_lru_shrinkers);
+	spin_lock(&cli->cl_lru_list_lock);
 	maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
 	list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
 		struct cl_page *page;
+		bool will_free = false;
 
 		if (--maxscan < 0)
 			break;
 
-		page = cl_page_top(opg->ops_cl.cpl_page);
+		page = opg->ops_cl.cpl_page;
 		if (cl_page_in_use_noref(page)) {
 			list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
 			continue;
@@ -600,10 +594,10 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			struct cl_object *tmp = page->cp_obj;
 
 			cl_object_get(tmp);
-			client_obd_list_unlock(&cli->cl_lru_list_lock);
+			spin_unlock(&cli->cl_lru_list_lock);
 
 			if (clobj) {
-				count -= discard_pagevec(env, io, pvec, index);
+				discard_pagevec(env, io, pvec, index);
 				index = 0;
 
 				cl_io_fini(env, io);
@@ -616,7 +610,7 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			io->ci_ignore_layout = 1;
 			rc = cl_io_init(env, io, CIT_MISC, clobj);
 
-			client_obd_list_lock(&cli->cl_lru_list_lock);
+			spin_lock(&cli->cl_lru_list_lock);
 
 			if (rc != 0)
 				break;
@@ -625,98 +619,54 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			continue;
 		}
 
-		/* move this page to the end of list as it will be discarded
-		 * soon. The page will be finally removed from LRU list in
-		 * osc_page_delete().
-		 */
-		list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+		if (cl_page_own_try(env, io, page) == 0) {
+			if (!cl_page_in_use_noref(page)) {
+				/* remove it from lru list earlier to avoid
+				 * lock contention
+				 */
+				__osc_lru_del(cli, opg);
+				opg->ops_in_lru = 0; /* will be discarded */
+
+				cl_page_get(page);
+				will_free = true;
+			} else {
+				cl_page_disown(env, io, page);
+			}
+		}
 
-		/* it's okay to grab a refcount here w/o holding lock because
-		 * it has to grab cl_lru_list_lock to delete the page.
-		 */
-		cl_page_get(page);
-		pvec[index++] = page;
-		if (++count >= target)
-			break;
+		if (!will_free) {
+			list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+			continue;
+		}
 
+		/* Don't discard and free the page with cl_lru_list held */
+		pvec[index++] = page;
 		if (unlikely(index == OTI_PVEC_SIZE)) {
-			client_obd_list_unlock(&cli->cl_lru_list_lock);
-			count -= discard_pagevec(env, io, pvec, index);
+			spin_unlock(&cli->cl_lru_list_lock);
+			discard_pagevec(env, io, pvec, index);
 			index = 0;
 
-			client_obd_list_lock(&cli->cl_lru_list_lock);
+			spin_lock(&cli->cl_lru_list_lock);
 		}
+
+		if (++count >= target)
+			break;
 	}
-	client_obd_list_unlock(&cli->cl_lru_list_lock);
+	spin_unlock(&cli->cl_lru_list_lock);
 
 	if (clobj) {
-		count -= discard_pagevec(env, io, pvec, index);
+		discard_pagevec(env, io, pvec, index);
 
 		cl_io_fini(env, io);
 		cl_object_put(env, clobj);
 	}
-	cl_env_nested_put(&nest, env);
 
 	atomic_dec(&cli->cl_lru_shrinkers);
-	return count > 0 ? count : rc;
-}
-
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg)
-{
-	bool wakeup = false;
-
-	if (!opg->ops_in_lru)
-		return;
-
-	atomic_dec(&cli->cl_lru_busy);
-	client_obd_list_lock(&cli->cl_lru_list_lock);
-	if (list_empty(&opg->ops_lru)) {
-		list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
-		atomic_inc_return(&cli->cl_lru_in_list);
-		wakeup = atomic_read(&osc_lru_waiters) > 0;
-	}
-	client_obd_list_unlock(&cli->cl_lru_list_lock);
-
-	if (wakeup) {
-		osc_lru_shrink(cli, osc_cache_too_much(cli));
+	if (count > 0) {
+		atomic_add(count, cli->cl_lru_left);
 		wake_up_all(&osc_lru_waitq);
 	}
-}
-
-/* delete page from LRUlist. The page can be deleted from LRUlist for two
- * reasons: redirtied or deleted from page cache.
- */
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del)
-{
-	if (opg->ops_in_lru) {
-		client_obd_list_lock(&cli->cl_lru_list_lock);
-		if (!list_empty(&opg->ops_lru)) {
-			LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
-			list_del_init(&opg->ops_lru);
-			atomic_dec(&cli->cl_lru_in_list);
-			if (!del)
-				atomic_inc(&cli->cl_lru_busy);
-		} else if (del) {
-			LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
-			atomic_dec(&cli->cl_lru_busy);
-		}
-		client_obd_list_unlock(&cli->cl_lru_list_lock);
-		if (del) {
-			atomic_inc(cli->cl_lru_left);
-			/* this is a great place to release more LRU pages if
-			 * this osc occupies too many LRU pages and kernel is
-			 * stealing one of them.
-			 * cl_lru_shrinkers is to avoid recursive call in case
-			 * we're already in the context of osc_lru_shrink().
-			 */
-			if (atomic_read(&cli->cl_lru_shrinkers) == 0 &&
-			    !memory_pressure_get())
-				osc_lru_shrink(cli, osc_cache_too_much(cli));
-			wake_up(&osc_lru_waitq);
-		}
-	} else {
-		LASSERT(list_empty(&opg->ops_lru));
-	}
+	return count > 0 ? count : rc;
 }
 
 static inline int max_to_shrink(struct client_obd *cli)
@@ -724,19 +674,28 @@ static inline int max_to_shrink(struct client_obd *cli)
 	return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
 }
 
-static int osc_lru_reclaim(struct client_obd *cli)
+int osc_lru_reclaim(struct client_obd *cli)
 {
+	struct cl_env_nest nest;
+	struct lu_env *env;
 	struct cl_client_cache *cache = cli->cl_cache;
 	int max_scans;
-	int rc;
+	int rc = 0;
 
 	LASSERT(cache);
 
-	rc = osc_lru_shrink(cli, lru_shrink_min);
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		return 0;
+
+	rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false);
 	if (rc != 0) {
+		if (rc == -EBUSY)
+			rc = 0;
+
 		CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
 		       cli->cl_import->imp_obd->obd_name, rc, cli);
-		return rc;
+		goto out;
 	}
 
 	CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
@@ -753,7 +712,7 @@ static int osc_lru_reclaim(struct client_obd *cli)
 	cache->ccc_lru_shrinkers++;
 	list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
 
-	max_scans = atomic_read(&cache->ccc_users);
+	max_scans = atomic_read(&cache->ccc_users) - 2;
 	while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
 		cli = list_entry(cache->ccc_lru.next, struct client_obd,
 				 cl_lru_osc);
@@ -764,10 +723,11 @@ static int osc_lru_reclaim(struct client_obd *cli)
 		       atomic_read(&cli->cl_lru_busy));
 
 		list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
-		if (atomic_read(&cli->cl_lru_in_list) > 0) {
+		if (osc_cache_too_much(cli) > 0) {
 			spin_unlock(&cache->ccc_lru_lock);
 
-			rc = osc_lru_shrink(cli, max_to_shrink(cli));
+			rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli),
+					    true);
 			spin_lock(&cache->ccc_lru_lock);
 			if (rc != 0)
 				break;
@@ -775,6 +735,8 @@ static int osc_lru_reclaim(struct client_obd *cli)
 	}
 	spin_unlock(&cache->ccc_lru_lock);
 
+out:
+	cl_env_nested_put(&nest, env);
 	CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
 	       cli->cl_import->imp_obd->obd_name, cli, rc);
 	return rc;
@@ -784,16 +746,20 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 			   struct osc_page *opg)
 {
 	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+	struct osc_io *oio = osc_env_io(env);
 	struct client_obd *cli = osc_cli(obj);
 	int rc = 0;
 
 	if (!cli->cl_cache) /* shall not be in LRU */
 		return 0;
 
+	if (oio->oi_lru_reserved > 0) {
+		--oio->oi_lru_reserved;
+		goto out;
+	}
+
 	LASSERT(atomic_read(cli->cl_lru_left) >= 0);
 	while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) {
-		int gen;
-
 		/* run out of LRU spaces, try to drop some by itself */
 		rc = osc_lru_reclaim(cli);
 		if (rc < 0)
@@ -803,23 +769,15 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 
 		cond_resched();
 
-		/* slowest case, all of caching pages are busy, notifying
-		 * other OSCs that we're lack of LRU slots.
-		 */
-		atomic_inc(&osc_lru_waiters);
-
-		gen = atomic_read(&cli->cl_lru_in_list);
 		rc = l_wait_event(osc_lru_waitq,
-				  atomic_read(cli->cl_lru_left) > 0 ||
-				  (atomic_read(&cli->cl_lru_in_list) > 0 &&
-				   gen != atomic_read(&cli->cl_lru_in_list)),
+				  atomic_read(cli->cl_lru_left) > 0,
 				  &lwi);
 
-		atomic_dec(&osc_lru_waiters);
 		if (rc < 0)
 			break;
 	}
 
+out:
 	if (rc >= 0) {
 		atomic_inc(&cli->cl_lru_busy);
 		opg->ops_in_lru = 1;
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 30526ebcad04..536b868ff776 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -92,12 +88,13 @@ struct osc_fsync_args {
 
 struct osc_enqueue_args {
 	struct obd_export	*oa_exp;
+	enum ldlm_type		oa_type;
+	enum ldlm_mode		oa_mode;
 	__u64		    *oa_flags;
-	obd_enqueue_update_f      oa_upcall;
+	osc_enqueue_upcall_f	oa_upcall;
 	void		     *oa_cookie;
 	struct ost_lvb	   *oa_lvb;
-	struct lustre_handle     *oa_lockh;
-	struct ldlm_enqueue_info *oa_ei;
+	struct lustre_handle	oa_lockh;
 	unsigned int	      oa_agl:1;
 };
 
@@ -473,7 +470,8 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
 		DEBUG_REQ(D_HA, req,
 			  "delorphan from OST integration");
 		/* Don't resend the delorphan req */
-		req->rq_no_resend = req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
 	}
 
 	rc = ptlrpc_queue_wait(req);
@@ -801,21 +799,24 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 	LASSERT(!(oa->o_valid & bits));
 
 	oa->o_valid |= bits;
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	oa->o_dirty = cli->cl_dirty;
 	if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
 		     cli->cl_dirty_max)) {
 		CERROR("dirty %lu - %lu > dirty_max %lu\n",
 		       cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
 		oa->o_undirty = 0;
-	} else if (unlikely(atomic_read(&obd_dirty_pages) -
+	} else if (unlikely(atomic_read(&obd_unstable_pages) +
+			    atomic_read(&obd_dirty_pages) -
 			    atomic_read(&obd_dirty_transit_pages) >
 			    (long)(obd_max_dirty_pages + 1))) {
 		/* The atomic_read() allowing the atomic_inc() are
 		 * not covered by a lock thus they may safely race and trip
 		 * this CERROR() unless we add in a small fudge factor (+1).
 		 */
-		CERROR("dirty %d - %d > system dirty_max %d\n",
+		CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+		       cli->cl_import->imp_obd->obd_name,
+		       atomic_read(&obd_unstable_pages),
 		       atomic_read(&obd_dirty_pages),
 		       atomic_read(&obd_dirty_transit_pages),
 		       obd_max_dirty_pages);
@@ -833,10 +834,9 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 	oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
 	oa->o_dropped = cli->cl_lost_grant;
 	cli->cl_lost_grant = 0;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n",
 	       oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
-
 }
 
 void osc_update_next_shrink(struct client_obd *cli)
@@ -849,9 +849,9 @@ void osc_update_next_shrink(struct client_obd *cli)
 
 static void __osc_update_grant(struct client_obd *cli, u64 grant)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_avail_grant += grant;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
@@ -889,10 +889,10 @@ out:
 
 static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	oa->o_grant = cli->cl_avail_grant / 4;
 	cli->cl_avail_grant -= oa->o_grant;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
 		oa->o_valid |= OBD_MD_FLFLAGS;
 		oa->o_flags = 0;
@@ -911,10 +911,10 @@ static int osc_shrink_grant(struct client_obd *cli)
 	__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
 			     (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_avail_grant <= target_bytes)
 		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return osc_shrink_grant_to_target(cli, target_bytes);
 }
@@ -924,7 +924,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 	int rc = 0;
 	struct ost_body	*body;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	/* Don't shrink if we are already above or below the desired limit
 	 * We don't want to shrink below a single RPC, as that will negatively
 	 * impact block allocation and long-term performance.
@@ -933,10 +933,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 	if (target_bytes >= cli->cl_avail_grant) {
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		return 0;
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	body = kzalloc(sizeof(*body), GFP_NOFS);
 	if (!body)
@@ -944,10 +944,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 
 	osc_announce_cached(cli, &body->oa, 0);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	body->oa.o_grant = cli->cl_avail_grant - target_bytes;
 	cli->cl_avail_grant = target_bytes;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
 		body->oa.o_valid |= OBD_MD_FLFLAGS;
 		body->oa.o_flags = 0;
@@ -1035,7 +1035,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 	 * race is tolerable here: if we're evicted, but imp_state already
 	 * left EVICTED state, then cl_dirty must be 0 already.
 	 */
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
 		cli->cl_avail_grant = ocd->ocd_grant;
 	else
@@ -1053,7 +1053,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 
 	/* determine the appropriate chunk size used by osc_extent. */
 	cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
 	       cli->cl_import->imp_obd->obd_name,
@@ -1082,7 +1082,7 @@ static void handle_short_read(int nob_read, u32 page_count,
 		if (pga[i]->count > nob_read) {
 			/* EOF inside this page */
 			ptr = kmap(pga[i]->pg) +
-				(pga[i]->off & ~CFS_PAGE_MASK);
+				(pga[i]->off & ~PAGE_MASK);
 			memset(ptr + nob_read, 0, pga[i]->count - nob_read);
 			kunmap(pga[i]->pg);
 			page_count--;
@@ -1097,7 +1097,7 @@ static void handle_short_read(int nob_read, u32 page_count,
 
 	/* zero remaining pages */
 	while (page_count-- > 0) {
-		ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+		ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
 		memset(ptr, 0, pga[i]->count);
 		kunmap(pga[i]->pg);
 		i++;
@@ -1144,7 +1144,8 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
 {
 	if (p1->flag != p2->flag) {
 		unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
-				  OBD_BRW_SYNC | OBD_BRW_ASYNC|OBD_BRW_NOQUOTA);
+				  OBD_BRW_SYNC | OBD_BRW_ASYNC |
+				  OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
 
 		/* warn if we try to combine flags that we don't know to be
 		 * safe to combine
@@ -1188,32 +1189,29 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
 		if (i == 0 && opc == OST_READ &&
 		    OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
 			unsigned char *ptr = kmap(pga[i]->pg);
-			int off = pga[i]->off & ~CFS_PAGE_MASK;
+			int off = pga[i]->off & ~PAGE_MASK;
 
 			memcpy(ptr + off, "bad1", min(4, nob));
 			kunmap(pga[i]->pg);
 		}
 		cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
-					    pga[i]->off & ~CFS_PAGE_MASK,
+					    pga[i]->off & ~PAGE_MASK,
 				  count);
 		CDEBUG(D_PAGE,
 		       "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n",
 		       pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index,
 		       (long)pga[i]->pg->flags, page_count(pga[i]->pg),
 		       page_private(pga[i]->pg),
-		       (int)(pga[i]->off & ~CFS_PAGE_MASK));
+		       (int)(pga[i]->off & ~PAGE_MASK));
 
 		nob -= pga[i]->count;
 		pg_count--;
 		i++;
 	}
 
-	bufsize = 4;
+	bufsize = sizeof(cksum);
 	err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
 
-	if (err)
-		cfs_crypto_hash_final(hdesc, NULL, NULL);
-
 	/* For sending we only compute the wrong checksum instead
 	 * of corrupting the data so it is still correct on a redo
 	 */
@@ -1312,7 +1310,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 	pg_prev = pga[0];
 	for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
 		struct brw_page *pg = pga[i];
-		int poff = pg->off & ~CFS_PAGE_MASK;
+		int poff = pg->off & ~PAGE_MASK;
 
 		LASSERT(pg->count > 0);
 		/* make sure there is no gap in the middle of page array */
@@ -1658,6 +1656,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
 	aa->aa_resends++;
 	new_req->rq_interpret_reply = request->rq_interpret_reply;
 	new_req->rq_async_args = request->rq_async_args;
+	new_req->rq_commit_cb = request->rq_commit_cb;
 	/* cap resend delay to the current request timeout, this is similar to
 	 * what ptlrpc does (see after_reply())
 	 */
@@ -1737,7 +1736,6 @@ static int brw_interpret(const struct lu_env *env,
 	struct osc_brw_async_args *aa = data;
 	struct osc_extent *ext;
 	struct osc_extent *tmp;
-	struct cl_object *obj = NULL;
 	struct client_obd *cli = aa->aa_cli;
 
 	rc = osc_brw_fini_request(req, rc);
@@ -1766,24 +1764,17 @@ static int brw_interpret(const struct lu_env *env,
 			rc = -EIO;
 	}
 
-	list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
-		if (!obj && rc == 0) {
-			obj = osc2cl(ext->oe_obj);
-			cl_object_get(obj);
-		}
-
-		list_del_init(&ext->oe_link);
-		osc_extent_finish(env, ext, 1, rc);
-	}
-	LASSERT(list_empty(&aa->aa_exts));
-	LASSERT(list_empty(&aa->aa_oaps));
-
-	if (obj) {
+	if (rc == 0) {
 		struct obdo *oa = aa->aa_oa;
 		struct cl_attr *attr  = &osc_env_info(env)->oti_attr;
 		unsigned long valid = 0;
+		struct cl_object *obj;
+		struct osc_async_page *last;
 
-		LASSERT(rc == 0);
+		last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
+		obj = osc2cl(last->oap_obj);
+
+		cl_object_attr_lock(obj);
 		if (oa->o_valid & OBD_MD_FLBLOCKS) {
 			attr->cat_blocks = oa->o_blocks;
 			valid |= CAT_BLOCKS;
@@ -1800,21 +1791,45 @@ static int brw_interpret(const struct lu_env *env,
 			attr->cat_ctime = oa->o_ctime;
 			valid |= CAT_CTIME;
 		}
-		if (valid != 0) {
-			cl_object_attr_lock(obj);
-			cl_object_attr_set(env, obj, attr, valid);
-			cl_object_attr_unlock(obj);
+
+		if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
+			struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
+			loff_t last_off = last->oap_count + last->oap_obj_off;
+
+			/* Change file size if this is an out of quota or
+			 * direct IO write and it extends the file size
+			 */
+			if (loi->loi_lvb.lvb_size < last_off) {
+				attr->cat_size = last_off;
+				valid |= CAT_SIZE;
+			}
+			/* Extend KMS if it's not a lockless write */
+			if (loi->loi_kms < last_off &&
+			    oap2osc_page(last)->ops_srvlock == 0) {
+				attr->cat_kms = last_off;
+				valid |= CAT_KMS;
+			}
 		}
-		cl_object_put(env, obj);
+
+		if (valid != 0)
+			cl_object_attr_set(env, obj, attr, valid);
+		cl_object_attr_unlock(obj);
 	}
 	kmem_cache_free(obdo_cachep, aa->aa_oa);
 
+	list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
+		list_del_init(&ext->oe_link);
+		osc_extent_finish(env, ext, 1, rc);
+	}
+	LASSERT(list_empty(&aa->aa_exts));
+	LASSERT(list_empty(&aa->aa_oaps));
+
 	cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
 			  req->rq_bulk->bd_nob_transferred);
 	osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
 	ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	/* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
 	 * is called so we know whether to go to sync BRWs or wait for more
 	 * RPCs to complete
@@ -1824,12 +1839,31 @@ static int brw_interpret(const struct lu_env *env,
 	else
 		cli->cl_r_in_flight--;
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	osc_io_unplug(env, cli, NULL);
 	return rc;
 }
 
+static void brw_commit(struct ptlrpc_request *req)
+{
+	spin_lock(&req->rq_lock);
+	/*
+	 * If osc_inc_unstable_pages (via osc_extent_finish) races with
+	 * this called via the rq_commit_cb, I need to ensure
+	 * osc_dec_unstable_pages is still called. Otherwise unstable
+	 * pages may be leaked.
+	 */
+	if (req->rq_unstable) {
+		spin_unlock(&req->rq_lock);
+		osc_dec_unstable_pages(req);
+		spin_lock(&req->rq_lock);
+	} else {
+		req->rq_committed = 1;
+	}
+	spin_unlock(&req->rq_lock);
+}
+
 /**
  * Build an RPC by the list of extent @ext_list. The caller must ensure
  * that the total pages in this list are NOT over max pages per RPC.
@@ -1920,7 +1954,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		pga[i] = &oap->oap_brw_page;
 		pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
 		CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
-		       pga[i]->pg, page_index(oap->oap_page), oap,
+		       pga[i]->pg, oap->oap_page->index, oap,
 		       pga[i]->flag);
 		i++;
 		cl_req_page_add(env, clerq, page);
@@ -1949,6 +1983,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		goto out;
 	}
 
+	req->rq_commit_cb = brw_commit;
 	req->rq_interpret_reply = brw_interpret;
 
 	if (mem_tight != 0)
@@ -1992,7 +2027,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	if (tmp)
 		tmp->oap_request = ptlrpc_request_addref(req);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	starting_offset >>= PAGE_SHIFT;
 	if (cmd == OBD_BRW_READ) {
 		cli->cl_r_in_flight++;
@@ -2007,7 +2042,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
 				      starting_offset + 1);
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
 		  page_count, aa, cli->cl_r_in_flight,
@@ -2055,14 +2090,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
 	LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
 
 	lock_res_and_lock(lock);
-	spin_lock(&osc_ast_guard);
 
 	if (!lock->l_ast_data)
 		lock->l_ast_data = data;
 	if (lock->l_ast_data == data)
 		set = 1;
 
-	spin_unlock(&osc_ast_guard);
 	unlock_res_and_lock(lock);
 
 	return set;
@@ -2104,36 +2137,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
 	return rc;
 }
 
-static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
-			    obd_enqueue_update_f upcall, void *cookie,
-			    __u64 *flags, int agl, int rc)
+static int osc_enqueue_fini(struct ptlrpc_request *req,
+			    osc_enqueue_upcall_f upcall, void *cookie,
+			    struct lustre_handle *lockh, enum ldlm_mode mode,
+			    __u64 *flags, int agl, int errcode)
 {
-	int intent = *flags & LDLM_FL_HAS_INTENT;
-
-	if (intent) {
-		/* The request was created before ldlm_cli_enqueue call. */
-		if (rc == ELDLM_LOCK_ABORTED) {
-			struct ldlm_reply *rep;
+	bool intent = *flags & LDLM_FL_HAS_INTENT;
+	int rc;
 
-			rep = req_capsule_server_get(&req->rq_pill,
-						     &RMF_DLM_REP);
+	/* The request was created before ldlm_cli_enqueue call. */
+	if (intent && errcode == ELDLM_LOCK_ABORTED) {
+		struct ldlm_reply *rep;
 
-			rep->lock_policy_res1 =
-				ptlrpc_status_ntoh(rep->lock_policy_res1);
-			if (rep->lock_policy_res1)
-				rc = rep->lock_policy_res1;
-		}
-	}
+		rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
 
-	if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
-	    (rc == 0)) {
+		rep->lock_policy_res1 =
+			ptlrpc_status_ntoh(rep->lock_policy_res1);
+		if (rep->lock_policy_res1)
+			errcode = rep->lock_policy_res1;
+		if (!agl)
+			*flags |= LDLM_FL_LVB_READY;
+	} else if (errcode == ELDLM_OK) {
 		*flags |= LDLM_FL_LVB_READY;
-		CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n",
-		       lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
 	}
 
 	/* Call the update callback. */
-	rc = (*upcall)(cookie, rc);
+	rc = (*upcall)(cookie, lockh, errcode);
+	/* release the reference taken in ldlm_cli_enqueue() */
+	if (errcode == ELDLM_LOCK_MATCHED)
+		errcode = ELDLM_OK;
+	if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
+		ldlm_lock_decref(lockh, mode);
+
 	return rc;
 }
 
@@ -2142,62 +2177,50 @@ static int osc_enqueue_interpret(const struct lu_env *env,
 				 struct osc_enqueue_args *aa, int rc)
 {
 	struct ldlm_lock *lock;
-	struct lustre_handle handle;
-	__u32 mode;
-	struct ost_lvb *lvb;
-	__u32 lvb_len;
-	__u64 *flags = aa->oa_flags;
-
-	/* Make a local copy of a lock handle and a mode, because aa->oa_*
-	 * might be freed anytime after lock upcall has been called.
-	 */
-	lustre_handle_copy(&handle, aa->oa_lockh);
-	mode = aa->oa_ei->ei_mode;
+	struct lustre_handle *lockh = &aa->oa_lockh;
+	enum ldlm_mode mode = aa->oa_mode;
+	struct ost_lvb *lvb = aa->oa_lvb;
+	__u32 lvb_len = sizeof(*lvb);
+	__u64 flags = 0;
+
 
 	/* ldlm_cli_enqueue is holding a reference on the lock, so it must
 	 * be valid.
 	 */
-	lock = ldlm_handle2lock(&handle);
+	lock = ldlm_handle2lock(lockh);
+	LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
+		 lockh->cookie, req, aa);
 
 	/* Take an additional reference so that a blocking AST that
 	 * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
 	 * to arrive after an upcall has been executed by
 	 * osc_enqueue_fini().
 	 */
-	ldlm_lock_addref(&handle, mode);
+	ldlm_lock_addref(lockh, mode);
+
+	/* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
+	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
 
 	/* Let CP AST to grant the lock first. */
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
 
-	if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
-		lvb = NULL;
-		lvb_len = 0;
-	} else {
-		lvb = aa->oa_lvb;
-		lvb_len = sizeof(*aa->oa_lvb);
+	if (aa->oa_agl) {
+		LASSERT(!aa->oa_lvb);
+		LASSERT(!aa->oa_flags);
+		aa->oa_flags = &flags;
 	}
 
 	/* Complete obtaining the lock procedure. */
-	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
-				   mode, flags, lvb, lvb_len, &handle, rc);
+	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
+				   aa->oa_mode, aa->oa_flags, lvb, lvb_len,
+				   lockh, rc);
 	/* Complete osc stuff. */
-	rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
-			      flags, aa->oa_agl, rc);
+	rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
+			      aa->oa_flags, aa->oa_agl, rc);
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
 
-	/* Release the lock for async request. */
-	if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
-		/*
-		 * Releases a reference taken by ldlm_cli_enqueue(), if it is
-		 * not already released by
-		 * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
-		 */
-		ldlm_lock_decref(&handle, mode);
-
-	LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n",
-		 aa->oa_lockh, req, aa);
-	ldlm_lock_decref(&handle, mode);
+	ldlm_lock_decref(lockh, mode);
 	LDLM_LOCK_PUT(lock);
 	return rc;
 }
@@ -2209,29 +2232,29 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
  * other synchronous requests, however keeping some locks and trying to obtain
  * others may take a considerable amount of time in a case of ost failure; and
  * when other sync requests do not get released lock from a client, the client
- * is excluded from the cluster -- such scenarious make the life difficult, so
+ * is evicted from the cluster -- such scenaries make the life difficult, so
  * release locks just after they are obtained.
  */
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall, void *cookie,
+		     osc_enqueue_upcall_f upcall, void *cookie,
 		     struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl)
 {
 	struct obd_device *obd = exp->exp_obd;
+	struct lustre_handle lockh = { 0 };
 	struct ptlrpc_request *req = NULL;
 	int intent = *flags & LDLM_FL_HAS_INTENT;
-	__u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+	__u64 match_flags = *flags;
 	enum ldlm_mode mode;
 	int rc;
 
 	/* Filesystem lock extents are extended to page boundaries so that
 	 * dealing with the page cache is a little smoother.
 	 */
-	policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
-	policy->l_extent.end |= ~CFS_PAGE_MASK;
+	policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+	policy->l_extent.end |= ~PAGE_MASK;
 
 	/*
 	 * kms is not valid when either object is completely fresh (so that no
@@ -2258,65 +2281,51 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	mode = einfo->ei_mode;
 	if (einfo->ei_mode == LCK_PR)
 		mode |= LCK_PW;
-	mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
-			       einfo->ei_type, policy, mode, lockh, 0);
+	if (agl == 0)
+		match_flags |= LDLM_FL_LVB_READY;
+	if (intent != 0)
+		match_flags |= LDLM_FL_BLOCK_GRANTED;
+	mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
+			       einfo->ei_type, policy, mode, &lockh, 0);
 	if (mode) {
-		struct ldlm_lock *matched = ldlm_handle2lock(lockh);
+		struct ldlm_lock *matched;
 
-		if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
-			/* For AGL, if enqueue RPC is sent but the lock is not
-			 * granted, then skip to process this strpe.
-			 * Return -ECANCELED to tell the caller.
+		if (*flags & LDLM_FL_TEST_LOCK)
+			return ELDLM_OK;
+
+		matched = ldlm_handle2lock(&lockh);
+		if (agl) {
+			/* AGL enqueues DLM locks speculatively. Therefore if
+			 * it already exists a DLM lock, it wll just inform the
+			 * caller to cancel the AGL process for this stripe.
 			 */
-			ldlm_lock_decref(lockh, mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return -ECANCELED;
-		}
-
-		if (osc_set_lock_data_with_check(matched, einfo)) {
+		} else if (osc_set_lock_data_with_check(matched, einfo)) {
 			*flags |= LDLM_FL_LVB_READY;
-			/* addref the lock only if not async requests and PW
-			 * lock is matched whereas we asked for PR.
-			 */
-			if (!rqset && einfo->ei_mode != mode)
-				ldlm_lock_addref(lockh, LCK_PR);
-			if (intent) {
-				/* I would like to be able to ASSERT here that
-				 * rss <= kms, but I can't, for reasons which
-				 * are explained in lov_enqueue()
-				 */
-			}
-
-			/* We already have a lock, and it's referenced.
-			 *
-			 * At this point, the cl_lock::cll_state is CLS_QUEUING,
-			 * AGL upcall may change it to CLS_HELD directly.
-			 */
-			(*upcall)(cookie, ELDLM_OK);
+			/* We already have a lock, and it's referenced. */
+			(*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
 
-			if (einfo->ei_mode != mode)
-				ldlm_lock_decref(lockh, LCK_PW);
-			else if (rqset)
-				/* For async requests, decref the lock. */
-				ldlm_lock_decref(lockh, einfo->ei_mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return ELDLM_OK;
+		} else {
+			ldlm_lock_decref(&lockh, mode);
+			LDLM_LOCK_PUT(matched);
 		}
-
-		ldlm_lock_decref(lockh, mode);
-		LDLM_LOCK_PUT(matched);
 	}
 
- no_match:
+no_match:
+	if (*flags & LDLM_FL_TEST_LOCK)
+		return -ENOLCK;
 	if (intent) {
-		LIST_HEAD(cancels);
-
 		req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 					   &RQF_LDLM_ENQUEUE_LVB);
 		if (!req)
 			return -ENOMEM;
 
-		rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0);
+		rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 		if (rc) {
 			ptlrpc_request_free(req);
 			return rc;
@@ -2331,21 +2340,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	*flags &= ~LDLM_FL_BLOCK_GRANTED;
 
 	rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
-			      sizeof(*lvb), LVB_T_OST, lockh, async);
-	if (rqset) {
+			      sizeof(*lvb), LVB_T_OST, &lockh, async);
+	if (async) {
 		if (!rc) {
 			struct osc_enqueue_args *aa;
 
-			CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+			CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
 			aa = ptlrpc_req_async_args(req);
-			aa->oa_ei = einfo;
 			aa->oa_exp = exp;
-			aa->oa_flags  = flags;
+			aa->oa_mode = einfo->ei_mode;
+			aa->oa_type = einfo->ei_type;
+			lustre_handle_copy(&aa->oa_lockh, &lockh);
 			aa->oa_upcall = upcall;
 			aa->oa_cookie = cookie;
-			aa->oa_lvb    = lvb;
-			aa->oa_lockh  = lockh;
 			aa->oa_agl    = !!agl;
+			if (!agl) {
+				aa->oa_flags = flags;
+				aa->oa_lvb = lvb;
+			} else {
+				/* AGL is essentially to enqueue an DLM lock
+				* in advance, so we don't care about the
+				* result of AGL enqueue.
+				*/
+				aa->oa_lvb = NULL;
+				aa->oa_flags = NULL;
+			}
 
 			req->rq_interpret_reply =
 				(ptlrpc_interpterer_t)osc_enqueue_interpret;
@@ -2359,7 +2378,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		return rc;
 	}
 
-	rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
+	rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
+			      flags, agl, rc);
 	if (intent)
 		ptlrpc_req_finished(req);
 
@@ -2381,8 +2401,8 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	/* Filesystem lock extents are extended to page boundaries so that
 	 * dealing with the page cache is a little smoother
 	 */
-	policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
-	policy->l_extent.end |= ~CFS_PAGE_MASK;
+	policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+	policy->l_extent.end |= ~PAGE_MASK;
 
 	/* Next, search for already existing extent locks that will cover us */
 	/* If we're trying to read, we also search for an existing PW lock.  The
@@ -2493,7 +2513,7 @@ static int osc_statfs_async(struct obd_export *exp,
 	}
 
 	req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
-	CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+	CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
 	aa = ptlrpc_req_async_args(req);
 	aa->aa_oi = oinfo;
 
@@ -2756,7 +2776,8 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
 		tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
 		memcpy(tmp, key, keylen);
 
-		req->rq_no_delay = req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
 		ptlrpc_request_set_replen(req);
 		rc = ptlrpc_queue_wait(req);
 		if (rc)
@@ -2787,7 +2808,7 @@ out:
 			goto skip_locking;
 
 		policy.l_extent.start = fm_key->fiemap.fm_start &
-						CFS_PAGE_MASK;
+						PAGE_MASK;
 
 		if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
 		    fm_key->fiemap.fm_start + PAGE_SIZE - 1)
@@ -2795,7 +2816,7 @@ out:
 		else
 			policy.l_extent.end = (fm_key->fiemap.fm_start +
 				fm_key->fiemap.fm_length +
-				PAGE_SIZE - 1) & CFS_PAGE_MASK;
+				PAGE_SIZE - 1) & PAGE_MASK;
 
 		ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
 		mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
@@ -2896,7 +2917,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 
 		LASSERT(!cli->cl_cache); /* only once */
 		cli->cl_cache = val;
-		atomic_inc(&cli->cl_cache->ccc_users);
+		cl_cache_incref(cli->cl_cache);
 		cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
 
 		/* add this osc into entity list */
@@ -2913,7 +2934,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
 		int target = *(int *)val;
 
-		nr = osc_lru_shrink(cli, min(nr, target));
+		nr = osc_lru_shrink(env, cli, min(nr, target), true);
 		*(int *)val -= nr;
 		return 0;
 	}
@@ -2992,12 +3013,12 @@ static int osc_reconnect(const struct lu_env *env,
 	if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
 		long lost_grant;
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
 				2 * cli_brw_size(obd);
 		lost_grant = cli->cl_lost_grant;
 		cli->cl_lost_grant = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 
 		CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n",
 		       data->ocd_connect_flags,
@@ -3047,10 +3068,10 @@ static int osc_import_event(struct obd_device *obd,
 	switch (event) {
 	case IMP_EVENT_DISCON: {
 		cli = &obd->u.cli;
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		cli->cl_avail_grant = 0;
 		cli->cl_lost_grant = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		break;
 	}
 	case IMP_EVENT_INACTIVE: {
@@ -3073,8 +3094,9 @@ static int osc_import_event(struct obd_device *obd,
 
 			ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 			cl_env_put(env, &refcheck);
-		} else
+		} else {
 			rc = PTR_ERR(env);
+		}
 		break;
 	}
 	case IMP_EVENT_ACTIVE: {
@@ -3116,20 +3138,14 @@ static int osc_import_event(struct obd_device *obd,
  * \retval zero the lock can't be canceled
  * \retval other ok to cancel
  */
-static int osc_cancel_for_recovery(struct ldlm_lock *lock)
+static int osc_cancel_weight(struct ldlm_lock *lock)
 {
-	check_res_locked(lock->l_resource);
-
 	/*
-	 * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR.
-	 *
-	 * XXX as a future improvement, we can also cancel unused write lock
-	 * if it doesn't have dirty data and active mmaps.
+	 * Cancel all unused and granted extent lock.
 	 */
 	if (lock->l_resource->lr_type == LDLM_EXTENT &&
-	    (lock->l_granted_mode == LCK_PR ||
-	     lock->l_granted_mode == LCK_CR) &&
-	    (osc_dlm_lock_pageref(lock) == 0))
+	    lock->l_granted_mode == lock->l_req_mode &&
+	    osc_ldlm_weigh_ast(lock) == 0)
 		return 1;
 
 	return 0;
@@ -3170,6 +3186,14 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	}
 	cli->cl_writeback_work = handler;
 
+	handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli);
+	if (IS_ERR(handler)) {
+		rc = PTR_ERR(handler);
+		goto out_ptlrpcd_work;
+	}
+
+	cli->cl_lru_work = handler;
+
 	rc = osc_quota_setup(obd);
 	if (rc)
 		goto out_ptlrpcd_work;
@@ -3198,11 +3222,18 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	}
 
 	INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
-	ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
+	ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
 	return rc;
 
 out_ptlrpcd_work:
-	ptlrpcd_destroy_work(handler);
+	if (cli->cl_writeback_work) {
+		ptlrpcd_destroy_work(cli->cl_writeback_work);
+		cli->cl_writeback_work = NULL;
+	}
+	if (cli->cl_lru_work) {
+		ptlrpcd_destroy_work(cli->cl_lru_work);
+		cli->cl_lru_work = NULL;
+	}
 out_client_setup:
 	client_obd_cleanup(obd);
 out_ptlrpcd:
@@ -3241,6 +3272,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 			ptlrpcd_destroy_work(cli->cl_writeback_work);
 			cli->cl_writeback_work = NULL;
 		}
+		if (cli->cl_lru_work) {
+			ptlrpcd_destroy_work(cli->cl_lru_work);
+			cli->cl_lru_work = NULL;
+		}
 		obd_cleanup_client_import(obd);
 		ptlrpc_lprocfs_unregister_obd(obd);
 		lprocfs_obd_cleanup(obd);
@@ -3262,7 +3297,7 @@ static int osc_cleanup(struct obd_device *obd)
 		list_del_init(&cli->cl_lru_osc);
 		spin_unlock(&cli->cl_cache->ccc_lru_lock);
 		cli->cl_lru_left = NULL;
-		atomic_dec(&cli->cl_cache->ccc_users);
+		cl_cache_decref(cli->cl_cache);
 		cli->cl_cache = NULL;
 	}
 
@@ -3330,7 +3365,6 @@ static struct obd_ops osc_obd_ops = {
 };
 
 extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
 extern struct lock_class_key osc_ast_guard_class;
 
 static int __init osc_init(void)
@@ -3357,9 +3391,6 @@ static int __init osc_init(void)
 	if (rc)
 		goto out_kmem;
 
-	spin_lock_init(&osc_ast_guard);
-	lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
-
 	/* This is obviously too much memory, only prevent overflow here */
 	if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
 		rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index cf3ac8eee9ee..d4463d7c81d2 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -587,35 +583,36 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
 	spin_unlock(&pool->prp_lock);
 }
 
-static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
-				      __u32 version, int opcode,
-				      int count, __u32 *lengths, char **bufs,
-				      struct ptlrpc_cli_ctx *ctx)
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+			     __u32 version, int opcode, char **bufs,
+			     struct ptlrpc_cli_ctx *ctx)
 {
-	struct obd_import *imp = request->rq_import;
+	int count;
+	struct obd_import *imp;
+	__u32 *lengths;
 	int rc;
 
-	if (unlikely(ctx))
+	count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+	imp = request->rq_import;
+	lengths = request->rq_pill.rc_area[RCL_CLIENT];
+
+	if (unlikely(ctx)) {
 		request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
-	else {
+	} else {
 		rc = sptlrpc_req_get_ctx(request);
 		if (rc)
 			goto out_free;
 	}
-
 	sptlrpc_req_set_flavor(request, opcode);
 
 	rc = lustre_pack_request(request, imp->imp_msg_magic, count,
 				 lengths, bufs);
-	if (rc) {
-		LASSERT(!request->rq_pool);
+	if (rc)
 		goto out_ctx;
-	}
 
 	lustre_msg_add_version(request->rq_reqmsg, version);
 	request->rq_send_state = LUSTRE_IMP_FULL;
 	request->rq_type = PTL_RPC_MSG_REQUEST;
-	request->rq_export = NULL;
 
 	request->rq_req_cbid.cbid_fn = request_out_callback;
 	request->rq_req_cbid.cbid_arg = request;
@@ -624,6 +621,8 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 	request->rq_reply_cbid.cbid_arg = request;
 
 	request->rq_reply_deadline = 0;
+	request->rq_bulk_deadline = 0;
+	request->rq_req_deadline = 0;
 	request->rq_phase = RQ_PHASE_NEW;
 	request->rq_next_phase = RQ_PHASE_UNDEFINED;
 
@@ -632,40 +631,49 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 
 	ptlrpc_at_set_req_timeout(request);
 
-	spin_lock_init(&request->rq_lock);
-	INIT_LIST_HEAD(&request->rq_list);
-	INIT_LIST_HEAD(&request->rq_timed_list);
-	INIT_LIST_HEAD(&request->rq_replay_list);
-	INIT_LIST_HEAD(&request->rq_ctx_chain);
-	INIT_LIST_HEAD(&request->rq_set_chain);
-	INIT_LIST_HEAD(&request->rq_history_list);
-	INIT_LIST_HEAD(&request->rq_exp_list);
-	init_waitqueue_head(&request->rq_reply_waitq);
-	init_waitqueue_head(&request->rq_set_waitq);
 	request->rq_xid = ptlrpc_next_xid();
-	atomic_set(&request->rq_refcount, 1);
-
 	lustre_msg_set_opc(request->rq_reqmsg, opcode);
 
+	/* Let's setup deadline for req/reply/bulk unlink for opcode. */
+	if (cfs_fail_val == opcode) {
+		time_t *fail_t = NULL, *fail2_t = NULL;
+
+		if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+			fail_t = &request->rq_bulk_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+			fail_t = &request->rq_reply_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
+			fail_t = &request->rq_req_deadline;
+		} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
+			fail_t = &request->rq_reply_deadline;
+			fail2_t = &request->rq_bulk_deadline;
+		}
+
+		if (fail_t) {
+			*fail_t = ktime_get_real_seconds() + LONG_UNLINK;
+
+			if (fail2_t)
+				*fail2_t = ktime_get_real_seconds() +
+						 LONG_UNLINK;
+
+			/* The RPC is infected, let the test change the
+			 * fail_loc
+			 */
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(cfs_time_seconds(2));
+			set_current_state(TASK_RUNNING);
+		}
+	}
+
 	return 0;
+
 out_ctx:
+	LASSERT(!request->rq_pool);
 	sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
 out_free:
 	class_import_put(imp);
 	return rc;
 }
-
-int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
-			     __u32 version, int opcode, char **bufs,
-			     struct ptlrpc_cli_ctx *ctx)
-{
-	int count;
-
-	count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
-	return __ptlrpc_request_bufs_pack(request, version, opcode, count,
-					  request->rq_pill.rc_area[RCL_CLIENT],
-					  bufs, ctx);
-}
 EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
 
 /**
@@ -722,7 +730,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
 		request = ptlrpc_prep_req_from_pool(pool);
 
 	if (request) {
-		LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp);
+		ptlrpc_cli_req_init(request);
+
+		LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
 		LASSERT(imp != LP_POISON);
 		LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n",
 			 imp->imp_client);
@@ -1082,7 +1092,6 @@ static int ptlrpc_console_allow(struct ptlrpc_request *req)
 	 */
 	if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
 	    (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
-
 		/* Suppress timed out reconnect requests */
 		if (req->rq_timedout)
 			return 0;
@@ -1164,9 +1173,9 @@ static int after_reply(struct ptlrpc_request *req)
 
 	LASSERT(obd);
 	/* repbuf must be unlinked */
-	LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+	LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
 
-	if (req->rq_reply_truncate) {
+	if (req->rq_reply_truncated) {
 		if (ptlrpc_no_resend(req)) {
 			DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
 				  req->rq_nob_received, req->rq_repbuf_len);
@@ -1240,8 +1249,9 @@ static int after_reply(struct ptlrpc_request *req)
 	}
 
 	ktime_get_real_ts64(&work_start);
-	timediff = (work_start.tv_sec - req->rq_arrival_time.tv_sec) * USEC_PER_SEC +
-		   (work_start.tv_nsec - req->rq_arrival_time.tv_nsec) / NSEC_PER_USEC;
+	timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
+		   (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
+								 NSEC_PER_USEC;
 	if (obd->obd_svc_stats) {
 		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
 				    timediff);
@@ -1504,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 		if (!(req->rq_phase == RQ_PHASE_RPC ||
 		      req->rq_phase == RQ_PHASE_BULK ||
 		      req->rq_phase == RQ_PHASE_INTERPRET ||
-		      req->rq_phase == RQ_PHASE_UNREGISTERING ||
+		      req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		      req->rq_phase == RQ_PHASE_UNREG_BULK ||
 		      req->rq_phase == RQ_PHASE_COMPLETE)) {
 			DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
 			LBUG();
 		}
 
-		if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+		if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+		    req->rq_phase == RQ_PHASE_UNREG_BULK) {
 			LASSERT(req->rq_next_phase != req->rq_phase);
 			LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
 
+			if (req->rq_req_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
+				req->rq_req_deadline = 0;
+			if (req->rq_reply_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
+				req->rq_reply_deadline = 0;
+			if (req->rq_bulk_deadline &&
+			    !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
+				req->rq_bulk_deadline = 0;
+
 			/*
 			 * Skip processing until reply is unlinked. We
 			 * can't return to pool before that and we can't
@@ -1521,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 			 * sure that all rdma transfers finished and will
 			 * not corrupt any data.
 			 */
-			if (ptlrpc_client_recv_or_unlink(req) ||
+			if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
+			    ptlrpc_client_recv_or_unlink(req))
+				continue;
+			if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
 			    ptlrpc_client_bulk_active(req))
 				continue;
 
@@ -1999,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data)
 			list_entry(tmp, struct ptlrpc_request, rq_set_chain);
 
 		if (req->rq_phase != RQ_PHASE_RPC &&
-		    req->rq_phase != RQ_PHASE_UNREGISTERING)
+		    req->rq_phase != RQ_PHASE_UNREG_RPC)
 			continue;
 
 		ptlrpc_mark_interrupted(req);
@@ -2087,7 +2112,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 		CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
 		       set, timeout);
 
-		if (timeout == 0 && !cfs_signal_pending())
+		if (timeout == 0 && !signal_pending(current))
 			/*
 			 * No requests are in-flight (ether timed out
 			 * or delayed), so we can allow interrupts.
@@ -2114,7 +2139,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 		 * it being ignored forever
 		 */
 		if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
-		    cfs_signal_pending()) {
+		    signal_pending(current)) {
 			sigset_t blocked_sigs =
 					   cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
 
@@ -2124,7 +2149,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 			 * important signals since ptlrpc set is not easily
 			 * reentrant from userspace again
 			 */
-			if (cfs_signal_pending())
+			if (signal_pending(current))
 				ptlrpc_interrupted_set(set);
 			cfs_restore_sigs(blocked_sigs);
 		}
@@ -2196,11 +2221,11 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 {
 	if (!request)
 		return;
+	LASSERT(!request->rq_srv_req);
+	LASSERT(!request->rq_export);
 	LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
-	LASSERTF(!request->rq_rqbd, "req %p\n", request);/* client-side */
 	LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
 	LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
-	LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
 	LASSERTF(!request->rq_replay, "req %p\n", request);
 
 	req_capsule_fini(&request->rq_pill);
@@ -2226,10 +2251,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 
 	if (request->rq_repbuf)
 		sptlrpc_cli_free_repbuf(request);
-	if (request->rq_export) {
-		class_export_put(request->rq_export);
-		request->rq_export = NULL;
-	}
+
 	if (request->rq_import) {
 		class_import_put(request->rq_import);
 		request->rq_import = NULL;
@@ -2314,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
 	/* Let's setup deadline for reply unlink. */
 	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-	    async && request->rq_reply_deadline == 0)
-		request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK;
+	    async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
+		request->rq_reply_deadline =
+			ktime_get_real_seconds() + LONG_UNLINK;
 
 	/* Nothing left to do. */
 	if (!ptlrpc_client_recv_or_unlink(request))
@@ -2328,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 		return 1;
 
 	/* Move to "Unregistering" phase as reply was not unlinked yet. */
-	ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+	ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
 
 	/* Do not wait for unlink to finish. */
 	if (async)
@@ -2360,9 +2383,10 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
 		LASSERT(rc == -ETIMEDOUT);
 		DEBUG_REQ(D_WARNING, request,
-			  "Unexpectedly long timeout rvcng=%d unlnk=%d/%d",
+			  "Unexpectedly long timeout receiving_reply=%d req_ulinked=%d reply_unlinked=%d",
 			  request->rq_receiving_reply,
-			  request->rq_req_unlink, request->rq_reply_unlink);
+			  request->rq_req_unlinked,
+			  request->rq_reply_unlinked);
 	}
 	return 0;
 }
@@ -2619,11 +2643,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 }
 EXPORT_SYMBOL(ptlrpc_queue_wait);
 
-struct ptlrpc_replay_async_args {
-	int praa_old_state;
-	int praa_old_status;
-};
-
 /**
  * Callback used for replayed requests reply processing.
  * In case of successful reply calls registered request replay callback.
@@ -2962,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
 	req->rq_timeout		= obd_timeout;
 	req->rq_sent		= ktime_get_real_seconds();
 	req->rq_deadline	= req->rq_sent + req->rq_timeout;
-	req->rq_reply_deadline	= req->rq_deadline;
 	req->rq_phase		= RQ_PHASE_INTERPRET;
 	req->rq_next_phase	= RQ_PHASE_COMPLETE;
 	req->rq_xid		= ptlrpc_next_xid();
@@ -3018,27 +3036,17 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	ptlrpc_cli_req_init(req);
+
 	req->rq_send_state = LUSTRE_IMP_FULL;
 	req->rq_type = PTL_RPC_MSG_REQUEST;
 	req->rq_import = class_import_get(imp);
-	req->rq_export = NULL;
 	req->rq_interpret_reply = work_interpreter;
 	/* don't want reply */
-	req->rq_receiving_reply = 0;
-	req->rq_req_unlink = req->rq_reply_unlink = 0;
-	req->rq_no_delay = req->rq_no_resend = 1;
+	req->rq_no_delay = 1;
+	req->rq_no_resend = 1;
 	req->rq_pill.rc_fmt = (void *)&worker_format;
 
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_list);
-	INIT_LIST_HEAD(&req->rq_replay_list);
-	INIT_LIST_HEAD(&req->rq_set_chain);
-	INIT_LIST_HEAD(&req->rq_history_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
-	atomic_set(&req->rq_refcount, 1);
-
 	CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
 	args = ptlrpc_req_async_args(req);
 	args->cb = cb;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index a14daff3fca0..177a379da9fa 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index 47be21ac9f10..b1ce72511509 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -55,28 +51,33 @@ void request_out_callback(lnet_event_t *ev)
 {
 	struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
 	struct ptlrpc_request *req = cbid->cbid_arg;
+	bool wakeup = false;
 
-	LASSERT(ev->type == LNET_EVENT_SEND ||
-		ev->type == LNET_EVENT_UNLINK);
+	LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK);
 	LASSERT(ev->unlinked);
 
 	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
 
 	sptlrpc_request_out_callback(req);
+
 	spin_lock(&req->rq_lock);
 	req->rq_real_sent = ktime_get_real_seconds();
-	if (ev->unlinked)
-		req->rq_req_unlink = 0;
+	req->rq_req_unlinked = 1;
+	/* reply_in_callback happened before request_out_callback? */
+	if (req->rq_reply_unlinked)
+		wakeup = true;
 
 	if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
-
 		/* Failed send: make it seem like the reply timed out, just
 		 * like failing sends in client.c does currently...
 		 */
-
 		req->rq_net_err = 1;
-		ptlrpc_client_wake_req(req);
+		wakeup = true;
 	}
+
+	if (wakeup)
+		ptlrpc_client_wake_req(req);
+
 	spin_unlock(&req->rq_lock);
 
 	ptlrpc_req_finished(req);
@@ -105,7 +106,7 @@ void reply_in_callback(lnet_event_t *ev)
 	req->rq_receiving_reply = 0;
 	req->rq_early = 0;
 	if (ev->unlinked)
-		req->rq_reply_unlink = 0;
+		req->rq_reply_unlinked = 1;
 
 	if (ev->status)
 		goto out_wake;
@@ -119,7 +120,7 @@ void reply_in_callback(lnet_event_t *ev)
 	if (ev->mlength < ev->rlength) {
 		CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
 		       req->rq_replen, ev->rlength, ev->offset);
-		req->rq_reply_truncate = 1;
+		req->rq_reply_truncated = 1;
 		req->rq_replied = 1;
 		req->rq_status = -EOVERFLOW;
 		req->rq_nob_received = ev->rlength + ev->offset;
@@ -136,7 +137,8 @@ void reply_in_callback(lnet_event_t *ev)
 
 		req->rq_early_count++; /* number received, client side */
 
-		if (req->rq_replied)   /* already got the real reply */
+		/* already got the real reply or buffers are already unlinked */
+		if (req->rq_replied || req->rq_reply_unlinked == 1)
 			goto out_wake;
 
 		req->rq_early = 1;
@@ -329,6 +331,7 @@ void request_in_callback(lnet_event_t *ev)
 		}
 	}
 
+	ptlrpc_srv_req_init(req);
 	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
 	 * flags are reset and scalars are zero.  We only set the message
 	 * size to non-zero if this was a successful receive.
@@ -342,10 +345,6 @@ void request_in_callback(lnet_event_t *ev)
 	req->rq_self = ev->target.nid;
 	req->rq_rqbd = rqbd;
 	req->rq_phase = RQ_PHASE_NEW;
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_timed_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	atomic_set(&req->rq_refcount, 1);
 	if (ev->type == LNET_EVENT_PUT)
 		CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
 		       req, req->rq_xid, ev->mlength);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index cd94fed0ffdf..3292e6ea0102 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -360,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
 						  "still on delayed list");
 				}
 
-				CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+				CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
 				       cli_tgt,
-				       ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
 				       atomic_read(&imp->
 						   imp_unregistering));
 			}
@@ -698,7 +693,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
 
 	lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
 
-	request->rq_no_resend = request->rq_no_delay = 1;
+	request->rq_no_resend = 1;
+	request->rq_no_delay = 1;
 	request->rq_send_state = LUSTRE_IMP_CONNECTING;
 	/* Allow a slightly larger reply for future growth compatibility */
 	req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
@@ -1001,6 +997,7 @@ finish:
 			return 0;
 		}
 	} else {
+		static bool warned;
 
 		spin_lock(&imp->imp_lock);
 		list_del(&imp->imp_conn_current->oic_item);
@@ -1021,7 +1018,7 @@ finish:
 			goto out;
 		}
 
-		if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+		if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
 		    (ocd->ocd_version > LUSTRE_VERSION_CODE +
 					LUSTRE_VERSION_OFFSET_WARN ||
 		     ocd->ocd_version < LUSTRE_VERSION_CODE -
@@ -1029,10 +1026,8 @@ finish:
 			/* Sigh, some compilers do not like #ifdef in the middle
 			 * of macro arguments
 			 */
-			const char *older = "older. Consider upgrading server or downgrading client"
-				;
-			const char *newer = "newer than client version. Consider upgrading client"
-					    ;
+			const char *older = "older than client. Consider upgrading server";
+			const char *newer = "newer than client. Consider recompiling application";
 
 			LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
 				      obd2cli_tgt(imp->imp_obd),
@@ -1042,6 +1037,7 @@ finish:
 				      OBD_OCD_VERSION_FIX(ocd->ocd_version),
 				      ocd->ocd_version > LUSTRE_VERSION_CODE ?
 				      newer : older, LUSTRE_VERSION_STRING);
+			warned = true;
 		}
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
@@ -1370,7 +1366,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 			if (rc)
 				goto out;
 		}
-
 	}
 
 	if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
@@ -1453,7 +1448,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
 				       back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
 		rc = l_wait_event(imp->imp_recovery_waitq,
 				  !ptlrpc_import_in_recovery(imp), &lwi);
-
 	}
 
 	spin_lock(&imp->imp_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index 5b06901e5729..ab5d85174245 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -160,6 +156,16 @@ static const struct req_msg_field *fld_query_server[] = {
 	&RMF_FLD_MDFLD
 };
 
+static const struct req_msg_field *fld_read_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_read_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GENERIC_DATA
+};
+
 static const struct req_msg_field *mds_getattr_name_client[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_MDT_BODY,
@@ -188,7 +194,7 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = {
 	&RMF_DLM_REQ
 };
 
-static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+static const struct req_msg_field *mds_reint_create_acl_client[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_REC_REINT,
 	&RMF_CAPA1,
@@ -566,7 +572,7 @@ static const struct req_msg_field *ost_get_info_generic_server[] = {
 
 static const struct req_msg_field *ost_get_info_generic_client[] = {
 	&RMF_PTLRPC_BODY,
-	&RMF_SETINFO_KEY
+	&RMF_GETINFO_KEY
 };
 
 static const struct req_msg_field *ost_get_last_id_server[] = {
@@ -574,6 +580,12 @@ static const struct req_msg_field *ost_get_last_id_server[] = {
 	&RMF_OBD_ID
 };
 
+static const struct req_msg_field *ost_get_last_fid_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GETINFO_KEY,
+	&RMF_FID,
+};
+
 static const struct req_msg_field *ost_get_last_fid_server[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_FID,
@@ -643,6 +655,7 @@ static struct req_format *req_formats[] = {
 	&RQF_MGS_CONFIG_READ,
 	&RQF_SEQ_QUERY,
 	&RQF_FLD_QUERY,
+	&RQF_FLD_READ,
 	&RQF_MDS_CONNECT,
 	&RQF_MDS_DISCONNECT,
 	&RQF_MDS_GET_INFO,
@@ -662,7 +675,7 @@ static struct req_format *req_formats[] = {
 	&RQF_MDS_DONE_WRITING,
 	&RQF_MDS_REINT,
 	&RQF_MDS_REINT_CREATE,
-	&RQF_MDS_REINT_CREATE_RMT_ACL,
+	&RQF_MDS_REINT_CREATE_ACL,
 	&RQF_MDS_REINT_CREATE_SLAVE,
 	&RQF_MDS_REINT_CREATE_SYM,
 	&RQF_MDS_REINT_OPEN,
@@ -696,7 +709,7 @@ static struct req_format *req_formats[] = {
 	&RQF_OST_BRW_WRITE,
 	&RQF_OST_STATFS,
 	&RQF_OST_SET_GRANT_INFO,
-	&RQF_OST_GET_INFO_GENERIC,
+	&RQF_OST_GET_INFO,
 	&RQF_OST_GET_INFO_LAST_ID,
 	&RQF_OST_GET_INFO_LAST_FID,
 	&RQF_OST_SET_INFO_LAST_FID,
@@ -1162,6 +1175,10 @@ struct req_format RQF_FLD_QUERY =
 	DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
 EXPORT_SYMBOL(RQF_FLD_QUERY);
 
+struct req_format RQF_FLD_READ =
+	DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server);
+EXPORT_SYMBOL(RQF_FLD_READ);
+
 struct req_format RQF_LOG_CANCEL =
 	DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
 EXPORT_SYMBOL(RQF_LOG_CANCEL);
@@ -1221,10 +1238,10 @@ struct req_format RQF_MDS_REINT_CREATE =
 			mds_reint_create_client, mdt_body_capa);
 EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
 
-struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
-	DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
-			mds_reint_create_rmt_acl_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+struct req_format RQF_MDS_REINT_CREATE_ACL =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL",
+			mds_reint_create_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL);
 
 struct req_format RQF_MDS_REINT_CREATE_SLAVE =
 	DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
@@ -1519,10 +1536,10 @@ struct req_format RQF_OST_SET_GRANT_INFO =
 			ost_body_only);
 EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
 
-struct req_format RQF_OST_GET_INFO_GENERIC =
+struct req_format RQF_OST_GET_INFO =
 	DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
 			ost_get_info_generic_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+EXPORT_SYMBOL(RQF_OST_GET_INFO);
 
 struct req_format RQF_OST_GET_INFO_LAST_ID =
 	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
@@ -1530,7 +1547,7 @@ struct req_format RQF_OST_GET_INFO_LAST_ID =
 EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
 
 struct req_format RQF_OST_GET_INFO_LAST_FID =
-	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client,
 			ost_get_last_fid_server);
 EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
index a23ac5f9ae96..0f55c01feba8 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
index fbccb62213b5..bccdace7e51f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index c95a91ce26c9..bc93b75744e1 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -131,6 +127,7 @@ static struct ll_rpc_opcode {
 	{ SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
 	{ SEC_CTX_FINI,     "sec_ctx_fini" },
 	{ FLD_QUERY,	"fld_query" },
+	{ FLD_READ,	"fld_read" },
 };
 
 static struct ll_eopcode {
@@ -679,11 +676,11 @@ static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
 	/**
 	 * The second token is either NULL, or an optional [reg|hp] string
 	 */
-	if (strcmp(cmd, "reg") == 0)
+	if (strcmp(cmd, "reg") == 0) {
 		queue = PTLRPC_NRS_QUEUE_REG;
-	else if (strcmp(cmd, "hp") == 0)
+	} else if (strcmp(cmd, "hp") == 0) {
 		queue = PTLRPC_NRS_QUEUE_HP;
-	else {
+	} else {
 		rc = -EINVAL;
 		goto out;
 	}
@@ -693,8 +690,9 @@ default_queue:
 	if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
 		rc = -ENODEV;
 		goto out;
-	} else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+	} else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) {
 		queue = PTLRPC_NRS_QUEUE_REG;
+	}
 
 	/**
 	 * Serialize NRS core lprocfs operations with policy registration/
@@ -870,7 +868,8 @@ ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
 
 		if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
 			srhi->srhi_req = NULL;
-			seq = srhi->srhi_seq = 0;
+			seq = 0;
+			srhi->srhi_seq = 0;
 		} else { /* the next sequence */
 			seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
 		}
@@ -1159,7 +1158,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
 
 	lprocfs_counter_add(svc_stats, idx, bytes);
 }
-
 EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
 
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
@@ -1320,6 +1318,5 @@ int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
 	up_read(&obd->u.cli.cl_sem);
 
 	return count;
-
 }
 EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 10b8fe82a342..11ec82545347 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -251,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 
 	/* Let's setup deadline for reply unlink. */
 	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-	    async && req->rq_bulk_deadline == 0)
+	    async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
 		req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
 
 	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
@@ -270,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 		return 1;				/* never registered */
 
 	/* Move to "Unregistering" phase as bulk was not unlinked yet. */
-	ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+	ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
 
 	/* Do not wait for unlink to finish. */
 	if (async)
@@ -581,19 +577,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	}
 
 	spin_lock(&request->rq_lock);
-	/* If the MD attach succeeds, there _will_ be a reply_in callback */
-	request->rq_receiving_reply = !noreply;
-	request->rq_req_unlink = 1;
 	/* We are responsible for unlinking the reply buffer */
-	request->rq_reply_unlink = !noreply;
+	request->rq_reply_unlinked = noreply;
+	request->rq_receiving_reply = !noreply;
 	/* Clear any flags that may be present from previous sends. */
+	request->rq_req_unlinked = 0;
 	request->rq_replied = 0;
 	request->rq_err = 0;
 	request->rq_timedout = 0;
 	request->rq_net_err = 0;
 	request->rq_resend = 0;
 	request->rq_restart = 0;
-	request->rq_reply_truncate = 0;
+	request->rq_reply_truncated = 0;
 	spin_unlock(&request->rq_lock);
 
 	if (!noreply) {
@@ -608,7 +603,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 		reply_md.user_ptr = &request->rq_reply_cbid;
 		reply_md.eq_handle = ptlrpc_eq_h;
 
-		/* We must see the unlink callback to unset rq_reply_unlink,
+		/* We must see the unlink callback to set rq_reply_unlinked,
 		 * so we can't auto-unlink
 		 */
 		rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
@@ -637,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
-	ktime_get_real_ts64(&request->rq_arrival_time);
+	ktime_get_real_ts64(&request->rq_sent_tv);
 	request->rq_sent = ktime_get_real_seconds();
 	/* We give the server rq_timeout secs to process the req, and
 	 * add the network latency for our local timeout.
@@ -655,9 +650,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 			  connection,
 			  request->rq_request_portal,
 			  request->rq_xid, 0);
-	if (rc == 0)
+	if (likely(rc == 0))
 		goto out;
 
+	request->rq_req_unlinked = 1;
 	ptlrpc_req_finished(request);
 	if (noreply)
 		goto out;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
index 710fb806f122..d88faf61e740 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -769,7 +769,7 @@ static int nrs_policy_register(struct ptlrpc_nrs *nrs,
 	spin_unlock(&nrs->nrs_lock);
 
 	if (rc != 0)
-		(void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+		(void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
 
 	return rc;
 }
@@ -975,7 +975,11 @@ static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 
 again:
-	nrs = nrs_svcpt2nrs(svcpt, hp);
+	/* scp_nrs_hp could be NULL due to short of memory. */
+	nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+	/* check the nrs_svcpt to see if nrs is initialized. */
+	if (!nrs || !nrs->nrs_svcpt)
+		return;
 	nrs->nrs_stopping = 1;
 
 	list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
@@ -1038,7 +1042,6 @@ static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
 	LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
 
 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
-
 		if (!nrs_policy_compatible(svc, desc) ||
 		    unlikely(svc->srv_is_stopping))
 			continue;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index 492d63fad6f9..b514f18fae50 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -1160,7 +1156,6 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg)
 		if (!pb) {
 			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
 			return 0;
-
 		}
 		return pb->pb_timeout;
 	}
@@ -1179,7 +1174,6 @@ __u32 lustre_msg_get_service_time(struct lustre_msg *msg)
 		if (!pb) {
 			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
 			return 0;
-
 		}
 		return pb->pb_service_time;
 	}
@@ -1572,7 +1566,6 @@ static void lustre_swab_obdo(struct obdo *o)
 	CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
 	CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
 	CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
-
 }
 
 void lustre_swab_obd_statfs(struct obd_statfs *os)
@@ -1809,19 +1802,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q)
 }
 EXPORT_SYMBOL(lustre_swab_obd_quotactl);
 
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p)
-{
-	__swab32s(&p->rp_uid);
-	__swab32s(&p->rp_gid);
-	__swab32s(&p->rp_fsuid);
-	__swab32s(&p->rp_fsuid_h);
-	__swab32s(&p->rp_fsgid);
-	__swab32s(&p->rp_fsgid_h);
-	__swab32s(&p->rp_access_perm);
-	__swab32s(&p->rp_padding);
-};
-EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
-
 void lustre_swab_fid2path(struct getinfo_fid2path *gf)
 {
 	lustre_swab_lu_fid(&gf->gf_fid);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index ec3af109a1d7..6c820e944171 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index 8a869315c258..c0529d808d81 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -57,7 +53,8 @@ ptlrpc_prep_ping(struct obd_import *imp)
 					LUSTRE_OBD_VERSION, OBD_PING);
 	if (req) {
 		ptlrpc_request_set_replen(req);
-		req->rq_no_resend = req->rq_no_delay = 1;
+		req->rq_no_resend = 1;
+		req->rq_no_delay = 1;
 	}
 	return req;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index 6ca26c98de1b..a9831fab80f3 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -292,4 +288,47 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
 	if (atomic_dec_and_test(&set->set_refcount))
 		kfree(set);
 }
+
+/** initialise ptlrpc common fields */
+static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req)
+{
+	spin_lock_init(&req->rq_lock);
+	atomic_set(&req->rq_refcount, 1);
+	INIT_LIST_HEAD(&req->rq_list);
+	INIT_LIST_HEAD(&req->rq_replay_list);
+}
+
+/** initialise client side ptlrpc request */
+static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_req *cr = &req->rq_cli;
+
+	ptlrpc_req_comm_init(req);
+
+	req->rq_receiving_reply = 0;
+	req->rq_req_unlinked = 1;
+	req->rq_reply_unlinked = 1;
+
+	req->rq_receiving_reply = 0;
+	req->rq_req_unlinked = 1;
+	req->rq_reply_unlinked = 1;
+
+	INIT_LIST_HEAD(&cr->cr_set_chain);
+	INIT_LIST_HEAD(&cr->cr_ctx_chain);
+	init_waitqueue_head(&cr->cr_reply_waitq);
+	init_waitqueue_head(&cr->cr_set_waitq);
+}
+
+/** initialise server side ptlrpc request */
+static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_srv_req *sr = &req->rq_srv;
+
+	ptlrpc_req_comm_init(req);
+	req->rq_srv_req = 1;
+	INIT_LIST_HEAD(&sr->sr_exp_list);
+	INIT_LIST_HEAD(&sr->sr_timed_list);
+	INIT_LIST_HEAD(&sr->sr_hist_list);
+}
+
 #endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
index a8ec0e9d7b2e..a70d5843f30e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index db003f5da09e..0a374b6c2f71 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -161,9 +157,9 @@ static int ptlrpcd_users;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
 {
-	struct ptlrpc_request_set *rq_set = req->rq_set;
+	struct ptlrpc_request_set *set = req->rq_set;
 
-	wake_up(&rq_set->set_waitq);
+	wake_up(&set->set_waitq);
 }
 EXPORT_SYMBOL(ptlrpcd_wake);
 
@@ -387,7 +383,8 @@ static int ptlrpcd(void *arg)
 {
 	struct ptlrpcd_ctl *pc = arg;
 	struct ptlrpc_request_set *set;
-	struct lu_env env = { .le_ses = NULL };
+	struct lu_context ses = { 0 };
+	struct lu_env env = { .le_ses = &ses };
 	int rc = 0;
 	int exit = 0;
 
@@ -416,6 +413,13 @@ static int ptlrpcd(void *arg)
 	 */
 	rc = lu_context_init(&env.le_ctx,
 			     LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+	if (rc == 0) {
+		rc = lu_context_init(env.le_ses,
+				     LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
+		if (rc != 0)
+			lu_context_fini(&env.le_ctx);
+	}
+
 	if (rc != 0)
 		goto failed;
 
@@ -436,9 +440,10 @@ static int ptlrpcd(void *arg)
 				  ptlrpc_expired_set, set);
 
 		lu_context_enter(&env.le_ctx);
-		l_wait_event(set->set_waitq,
-			     ptlrpcd_check(&env, pc), &lwi);
+		lu_context_enter(env.le_ses);
+		l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi);
 		lu_context_exit(&env.le_ctx);
+		lu_context_exit(env.le_ses);
 
 		/*
 		 * Abort inflight rpcs for forced stop case.
@@ -461,6 +466,7 @@ static int ptlrpcd(void *arg)
 	if (!list_empty(&set->set_requests))
 		ptlrpc_set_wait(set);
 	lu_context_fini(&env.le_ctx);
+	lu_context_fini(env.le_ses);
 
 	complete(&pc->pc_finishing);
 
@@ -899,8 +905,11 @@ int ptlrpcd_addref(void)
 	int rc = 0;
 
 	mutex_lock(&ptlrpcd_mutex);
-	if (++ptlrpcd_users == 1)
+	if (++ptlrpcd_users == 1) {
 		rc = ptlrpcd_init();
+		if (rc < 0)
+			ptlrpcd_users--;
+	}
 	mutex_unlock(&ptlrpcd_mutex);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 30d9a164e52d..718b3a8d61c6 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index 187fd1d6898c..dbd819fa6b75 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -867,11 +863,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
 	if (!req)
 		return -ENOMEM;
 
-	spin_lock_init(&req->rq_lock);
+	ptlrpc_cli_req_init(req);
 	atomic_set(&req->rq_refcount, 10000);
-	INIT_LIST_HEAD(&req->rq_ctx_chain);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
+
 	req->rq_import = imp;
 	req->rq_flvr = sec->ps_flvr;
 	req->rq_cli_ctx = ctx;
@@ -1051,6 +1045,8 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 	if (!early_req)
 		return -ENOMEM;
 
+	ptlrpc_cli_req_init(early_req);
+
 	early_size = req->rq_nob_received;
 	early_bufsz = size_roundup_power2(early_size);
 	early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS);
@@ -1099,12 +1095,11 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 	memcpy(early_buf, req->rq_repbuf, early_size);
 	spin_unlock(&req->rq_lock);
 
-	spin_lock_init(&early_req->rq_lock);
 	early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
 	early_req->rq_flvr = req->rq_flvr;
 	early_req->rq_repbuf = early_buf;
 	early_req->rq_repbuf_len = early_bufsz;
-	early_req->rq_repdata = (struct lustre_msg *) early_buf;
+	early_req->rq_repdata = (struct lustre_msg *)early_buf;
 	early_req->rq_repdata_len = early_size;
 	early_req->rq_early = 1;
 	early_req->rq_reqmsg = req->rq_reqmsg;
@@ -1556,7 +1551,7 @@ void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
 	/* move from segment + 1 to end segment */
 	LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
 	oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
-	movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+	movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg);
 	LASSERT(movesize >= 0);
 
 	if (movesize)
@@ -2196,6 +2191,9 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
 
 	pud = lustre_msg_buf(msg, offset, 0);
 
+	if (!pud)
+		return -EINVAL;
+
 	pud->pud_uid = from_kuid(&init_user_ns, current_uid());
 	pud->pud_gid = from_kgid(&init_user_ns, current_gid());
 	pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index d3872b8c9a6e..5f4d79718589 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -41,7 +37,6 @@
 #define DEBUG_SUBSYSTEM S_SEC
 
 #include "../../include/linux/libcfs/libcfs.h"
-#include <linux/crypto.h>
 
 #include "../include/obd.h"
 #include "../include/obd_cksum.h"
@@ -274,7 +269,7 @@ static unsigned long enc_pools_shrink_scan(struct shrinker *s,
 static inline
 int npages_to_npools(unsigned long npages)
 {
-	return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+	return (int)((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
 }
 
 /*
@@ -511,7 +506,6 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 {
 	struct cfs_crypto_hash_desc *hdesc;
 	int hashsize;
-	char hashbuf[64];
 	unsigned int bufsize;
 	int i, err;
 
@@ -529,21 +523,23 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 
 	for (i = 0; i < desc->bd_iov_count; i++) {
 		cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
-				  desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+				  desc->bd_iov[i].kiov_offset & ~PAGE_MASK,
 				  desc->bd_iov[i].kiov_len);
 	}
+
 	if (hashsize > buflen) {
+		unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
+
 		bufsize = sizeof(hashbuf);
-		err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
-					    &bufsize);
+		LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
+			 bufsize, hashsize);
+		err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
 		memcpy(buf, hashbuf, buflen);
 	} else {
 		bufsize = buflen;
 		err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
 	}
 
-	if (err)
-		cfs_crypto_hash_final(hdesc, NULL, NULL);
 	return err;
 }
 EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
index a51b18bbfd34..c14035479c5f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -648,7 +644,7 @@ static int logname2fsname(const char *logname, char *buf, int buflen)
 		return -EINVAL;
 	}
 
-	len = min((int) (ptr - logname), buflen - 1);
+	len = min((int)(ptr - logname), buflen - 1);
 
 	memcpy(buf, logname, len);
 	buf[len] = '\0';
@@ -819,7 +815,7 @@ void sptlrpc_conf_client_adapt(struct obd_device *obd)
 	CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
 
 	/* serialize with connect/disconnect import */
-	down_read(&obd->u.cli.cl_sem);
+	down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC);
 
 	imp = obd->u.cli.cl_import;
 	if (imp) {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
index 9082da06b28a..9b9801ece582 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
index e610a8ddd223..07273f577969 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
index 40e5349de38c..70a61e12bb7b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -60,7 +56,7 @@ static struct ptlrpc_svc_ctx    null_svc_ctx;
 static inline
 void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
 {
-	msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+	msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24;
 }
 
 static inline
@@ -265,7 +261,8 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
 		memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
 
 		kvfree(req->rq_reqbuf);
-		req->rq_reqbuf = req->rq_reqmsg = newbuf;
+		req->rq_reqbuf = newbuf;
+		req->rq_reqmsg = newbuf;
 		req->rq_reqbuf_len = alloc_size;
 
 		if (req->rq_import)
@@ -329,7 +326,7 @@ int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
 	rs->rs_svc_ctx = req->rq_svc_ctx;
 	atomic_inc(&req->rq_svc_ctx->sc_refcount);
 
-	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
 	rs->rs_repbuf_len = rs_size - sizeof(*rs);
 	rs->rs_msg = rs->rs_repbuf;
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 6276bf59c3aa..5c4590b0c521 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -162,7 +158,7 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
 			continue;
 
 		ptr = kmap(desc->bd_iov[i].kiov_page);
-		off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+		off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
 		ptr[off] ^= 0x1;
 		kunmap(desc->bd_iov[i].kiov_page);
 		return;
@@ -298,7 +294,7 @@ int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
 	LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
 
 	bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
-	token = (struct plain_bulk_token *) bsd->bsd_data;
+	token = (struct plain_bulk_token *)bsd->bsd_data;
 
 	bsd->bsd_version = 0;
 	bsd->bsd_flags = 0;
@@ -343,7 +339,7 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
 	LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
 
 	bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
-	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+	tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
 
 	if (req->rq_bulk_write) {
 		if (bsdv->bsd_flags & BSD_FL_ERR)
@@ -574,8 +570,12 @@ int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
 	lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
 	req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
 
-	if (req->rq_pack_udesc)
-		sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+	if (req->rq_pack_udesc) {
+		int rc = sptlrpc_pack_user_desc(req->rq_reqbuf,
+					      PLAIN_PACK_USER_OFF);
+		if (rc < 0)
+			return rc;
+	}
 
 	return 0;
 }
@@ -811,7 +811,7 @@ int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
 
 	rs->rs_svc_ctx = req->rq_svc_ctx;
 	atomic_inc(&req->rq_svc_ctx->sc_refcount);
-	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
 	rs->rs_repbuf_len = rs_size - sizeof(*rs);
 
 	lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
@@ -891,7 +891,7 @@ int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
 	LASSERT(req->rq_pack_bulk);
 
 	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
-	tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+	tokenr = (struct plain_bulk_token *)bsdr->bsd_data;
 	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
 
 	bsdv->bsd_version = 0;
@@ -926,7 +926,7 @@ int plain_svc_wrap_bulk(struct ptlrpc_request *req,
 
 	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
 	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
-	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+	tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
 
 	bsdv->bsd_version = 0;
 	bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 1bbd1d39ccf8..4788c4940c2a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -838,6 +834,11 @@ static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
 {
 	ptlrpc_server_hpreq_fini(req);
 
+	if (req->rq_session.lc_thread) {
+		lu_context_exit(&req->rq_session);
+		lu_context_fini(&req->rq_session);
+	}
+
 	ptlrpc_server_drop_request(req);
 }
 
@@ -1579,6 +1580,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
 	}
 
 	req->rq_svc_thread = thread;
+	if (thread) {
+		/* initialize request session, it is needed for request
+		 * processing by target
+		 */
+		rc = lu_context_init(&req->rq_session,
+				     LCT_SERVER_SESSION | LCT_NOREF);
+		if (rc) {
+			CERROR("%s: failure to initialize session: rc = %d\n",
+			       thread->t_name, rc);
+			goto err_req;
+		}
+		req->rq_session.lc_thread = thread;
+		lu_context_enter(&req->rq_session);
+		req->rq_svc_thread->t_env->le_ses = &req->rq_session;
+	}
 
 	ptlrpc_at_add_timed(req);
 
@@ -1612,7 +1628,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 	struct timespec64 arrived;
 	unsigned long timediff_usecs;
 	unsigned long arrived_usecs;
-	int rc;
 	int fail_opc = 0;
 
 	request = ptlrpc_server_request_get(svcpt, false);
@@ -1649,21 +1664,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 				    at_get(&svcpt->scp_at_estimate));
 	}
 
-	rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
-	if (rc) {
-		CERROR("Failure to initialize session: %d\n", rc);
-		goto out_req;
-	}
-	request->rq_session.lc_thread = thread;
-	request->rq_session.lc_cookie = 0x5;
-	lu_context_enter(&request->rq_session);
-
-	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
-
-	request->rq_svc_thread = thread;
-	if (thread)
-		request->rq_svc_thread->t_env->le_ses = &request->rq_session;
-
 	if (likely(request->rq_export)) {
 		if (unlikely(ptlrpc_check_req(request)))
 			goto put_conn;
@@ -1695,14 +1695,21 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 	if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
 		CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
 
-	rc = svc->srv_ops.so_req_handler(request);
+	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
+
+	/* re-assign request and sesson thread to the current one */
+	request->rq_svc_thread = thread;
+	if (thread) {
+		LASSERT(request->rq_session.lc_thread);
+		request->rq_session.lc_thread = thread;
+		request->rq_session.lc_cookie = 0x55;
+		thread->t_env->le_ses = &request->rq_session;
+	}
+	svc->srv_ops.so_req_handler(request);
 
 	ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
 
 put_conn:
-	lu_context_exit(&request->rq_session);
-	lu_context_fini(&request->rq_session);
-
 	if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) {
 		DEBUG_REQ(D_WARNING, request,
 			  "Request took longer than estimated (%lld:%llds); "
@@ -1756,7 +1763,6 @@ put_conn:
 			  request->rq_arrival_time.tv_sec);
 	}
 
-out_req:
 	ptlrpc_server_finish_active_request(svcpt, request);
 
 	return 1;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 3ffd2d91f274..6cc2b2edf3fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -15,11 +15,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -276,7 +272,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)FLD_QUERY);
 	LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
 		 (long long)FLD_FIRST_OPC);
-	LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+	LASSERTF(FLD_READ == 901, "found %lld\n",
+		 (long long)FLD_READ);
+	LASSERTF(FLD_LAST_OPC == 902, "found %lld\n",
 		 (long long)FLD_LAST_OPC);
 	LASSERTF(SEQ_QUERY == 700, "found %lld\n",
 		 (long long)SEQ_QUERY);
@@ -1069,6 +1067,8 @@ void lustre_assert_wire_constants(void)
 		 OBD_CONNECT_PINGLESS);
 	LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
 		 "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
+	LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
+		 "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
 	LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
 		(unsigned)OBD_CKSUM_CRC32);
 	LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
@@ -1265,8 +1265,6 @@ void lustre_assert_wire_constants(void)
 		 OBD_MD_FLXATTRRM);
 	LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLACL);
-	LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTPERM);
 	LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLMDSCAPA);
 	LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
@@ -1277,14 +1275,6 @@ void lustre_assert_wire_constants(void)
 		 OBD_MD_FLCROSSREF);
 	LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLGETATTRLOCK);
-	LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTLSETFACL);
-	LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTLGETFACL);
-	LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTRSETFACL);
-	LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
-		 OBD_MD_FLRMTRGETFACL);
 	LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
 		 OBD_MD_FLDATAVERSION);
 	CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
@@ -1639,6 +1629,12 @@ void lustre_assert_wire_constants(void)
 		OBD_BRW_ASYNC);
 	LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
 		OBD_BRW_MEMALLOC);
+	LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
+		 OBD_BRW_OVER_USRQUOTA);
+	LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
+		 OBD_BRW_OVER_GRPQUOTA);
+	LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n",
+		 OBD_BRW_SOFT_SYNC);
 
 	/* Checks for struct ost_body */
 	LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
@@ -1885,44 +1881,6 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
 
-	/* Checks for struct mdt_remote_perm */
-	LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
-		 (long long)(int)sizeof(struct mdt_remote_perm));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
-	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
-	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
-	LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETUID_PERM);
-	LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETGID_PERM);
-	LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_SETGRP_PERM);
-	LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_RMTACL_PERM);
-	LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)CFS_RMTOWN_PERM);
-
 	/* Checks for struct mdt_rec_setattr */
 	LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
 		 (long long)(int)sizeof(struct mdt_rec_setattr));