27 files changed, 1810 insertions, 1900 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index 89292c93dcd5..dc685610c4c4 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -59,10 +59,6 @@
  *		 read/write system call it is associated with the single user
  *		 thread, that issued the system call).
  *
- *   - cl_req      represents a collection of pages for a transfer. cl_req is
- *		 constructed by req-forming engine that tries to saturate
- *		 transport with large and continuous transfers.
- *
  * Terminology
  *
  *     - to avoid confusion high-level I/O operation like read or write system
@@ -103,11 +99,8 @@
 struct inode;
 
 struct cl_device;
-struct cl_device_operations;
 
 struct cl_object;
-struct cl_object_page_operations;
-struct cl_object_lock_operations;
 
 struct cl_page;
 struct cl_page_slice;
@@ -120,27 +113,7 @@ struct cl_page_operations;
 struct cl_io;
 struct cl_io_slice;
 
-struct cl_req;
-struct cl_req_slice;
-
-/**
- * Operations for each data device in the client stack.
- *
- * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops
- */
-struct cl_device_operations {
-	/**
-	 * Initialize cl_req. This method is called top-to-bottom on all
-	 * devices in the stack to get them a chance to allocate layer-private
-	 * data, and to attach them to the cl_req by calling
-	 * cl_req_slice_add().
-	 *
-	 * \see osc_req_init(), lov_req_init(), lovsub_req_init()
-	 * \see vvp_req_init()
-	 */
-	int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
-			    struct cl_req *req);
-};
+struct cl_req_attr;
 
 /**
  * Device in the client stack.
@@ -150,8 +123,6 @@ struct cl_device_operations {
 struct cl_device {
 	/** Super-class. */
 	struct lu_device		   cd_lu_dev;
-	/** Per-layer operation vector. */
-	const struct cl_device_operations *cd_ops;
 };
 
 /** \addtogroup cl_object cl_object
@@ -267,7 +238,7 @@ struct cl_object_conf {
 		/**
 		 * Object layout. This is consumed by lov.
 		 */
-		struct lustre_md *coc_md;
+		struct lu_buf	  coc_layout;
 		/**
 		 * Description of particular stripe location in the
 		 * cluster. This is consumed by osc.
@@ -301,6 +272,26 @@ enum {
 	OBJECT_CONF_WAIT = 2
 };
 
+enum {
+	CL_LAYOUT_GEN_NONE	= (u32)-2,	/* layout lock was cancelled */
+	CL_LAYOUT_GEN_EMPTY	= (u32)-1,	/* for empty layout */
+};
+
+struct cl_layout {
+	/** the buffer to return the layout in lov_mds_md format. */
+	struct lu_buf	cl_buf;
+	/** size of layout in lov_mds_md format. */
+	size_t		cl_size;
+	/** Layout generation. */
+	u32		cl_layout_gen;
+	/**
+	 * True if this is a released file.
+	 * Temporarily added for released file truncate in ll_setattr_raw().
+	 * It will be removed later. -Jinshan
+	 */
+	bool		cl_is_released;
+};
+
 /**
  * Operations implemented for each cl object layer.
  *
@@ -400,6 +391,27 @@ struct cl_object_operations {
 	 */
 	int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
 			     struct lov_user_md __user *lum);
+	/**
+	 * Get FIEMAP mapping from the object.
+	 */
+	int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj,
+			  struct ll_fiemap_info_key *fmkey,
+			  struct fiemap *fiemap, size_t *buflen);
+	/**
+	 * Get layout and generation of the object.
+	 */
+	int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj,
+			      struct cl_layout *layout);
+	/**
+	 * Get maximum size of the object.
+	 */
+	loff_t (*coo_maxbytes)(struct cl_object *obj);
+	/**
+	 * Set request attributes.
+	 */
+	void (*coo_req_attr_set)(const struct lu_env *env,
+				 struct cl_object *obj,
+				 struct cl_req_attr *attr);
 };
 
 /**
@@ -591,7 +603,7 @@ enum cl_page_state {
 	 *
 	 *     - [cl_page_state::CPS_PAGEOUT] page is dirty, the
 	 *     req-formation engine decides that it wants to include this page
-	 *     into an cl_req being constructed, and yanks it from the cache;
+	 *     into an RPC being constructed, and yanks it from the cache;
 	 *
 	 *     - [cl_page_state::CPS_FREEING] VM callback is executed to
 	 *     evict the page form the memory;
@@ -660,7 +672,7 @@ enum cl_page_state {
 	 * Page is being read in, as a part of a transfer. This is quite
 	 * similar to the cl_page_state::CPS_PAGEOUT state, except that
 	 * read-in is always "immediate"---there is no such thing a sudden
-	 * construction of read cl_req from cached, presumably not up to date,
+	 * construction of read request from cached, presumably not up to date,
 	 * pages.
 	 *
 	 * Underlying VM page is locked for the duration of transfer.
@@ -714,8 +726,6 @@ struct cl_page {
 	struct list_head	 cp_batch;
 	/** List of slices. Immutable after creation. */
 	struct list_head	 cp_layers;
-	/** Linkage of pages within cl_req. */
-	struct list_head         cp_flight;
 	/**
 	 * Page state. This field is const to avoid accidental update, it is
 	 * modified only internally within cl_page.c. Protected by a VM lock.
@@ -732,12 +742,6 @@ struct cl_page {
 	 * by sub-io. Protected by a VM lock.
 	 */
 	struct cl_io	    *cp_owner;
-	/**
-	 * Owning IO request in cl_page_state::CPS_PAGEOUT and
-	 * cl_page_state::CPS_PAGEIN states. This field is maintained only in
-	 * the top-level pages. Protected by a VM lock.
-	 */
-	struct cl_req	   *cp_req;
 	/** List of references to this page, for debugging. */
 	struct lu_ref	    cp_reference;
 	/** Link to an object, for debugging. */
@@ -779,7 +783,6 @@ enum cl_lock_mode {
 
 /**
  * Requested transfer type.
- * \ingroup cl_req
  */
 enum cl_req_type {
 	CRT_READ,
@@ -884,26 +887,6 @@ struct cl_page_operations {
 	/** Destructor. Frees resources and slice itself. */
 	void (*cpo_fini)(const struct lu_env *env,
 			 struct cl_page_slice *slice);
-
-	/**
-	 * Checks whether the page is protected by a cl_lock. This is a
-	 * per-layer method, because certain layers have ways to check for the
-	 * lock much more efficiently than through the generic locks scan, or
-	 * implement locking mechanisms separate from cl_lock, e.g.,
-	 * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
-	 * being canceled, or scheduled for cancellation as soon as the last
-	 * user goes away, too.
-	 *
-	 * \retval    -EBUSY: page is protected by a lock of a given mode;
-	 * \retval  -ENODATA: page is not protected by a lock;
-	 * \retval	 0: this layer cannot decide.
-	 *
-	 * \see cl_page_is_under_lock()
-	 */
-	int (*cpo_is_under_lock)(const struct lu_env *env,
-				 const struct cl_page_slice *slice,
-				 struct cl_io *io, pgoff_t *max);
-
 	/**
 	 * Optional debugging helper. Prints given page slice.
 	 *
@@ -915,8 +898,7 @@ struct cl_page_operations {
 	/**
 	 * \name transfer
 	 *
-	 * Transfer methods. See comment on cl_req for a description of
-	 * transfer formation and life-cycle.
+	 * Transfer methods.
 	 *
 	 * @{
 	 */
@@ -962,7 +944,7 @@ struct cl_page_operations {
 				       int ioret);
 		/**
 		 * Called when cached page is about to be added to the
-		 * cl_req as a part of req formation.
+		 * ptlrpc request as a part of req formation.
 		 *
 		 * \return    0       : proceed with this page;
 		 * \return    -EAGAIN : skip this page;
@@ -1365,7 +1347,6 @@ struct cl_2queue {
  *     (3) sort all locks to avoid dead-locks, and acquire them
  *
  *     (4) process the chunk: call per-page methods
- *	 (cl_io_operations::cio_read_page() for read,
  *	 cl_io_operations::cio_prepare_write(),
  *	 cl_io_operations::cio_commit_write() for write)
  *
@@ -1388,6 +1369,8 @@ enum cl_io_type {
 	CIT_WRITE,
 	/** truncate, utime system calls */
 	CIT_SETATTR,
+	/** get data version */
+	CIT_DATA_VERSION,
 	/**
 	 * page fault handling
 	 */
@@ -1467,6 +1450,31 @@ struct cl_io_slice {
 
 typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
 			      struct cl_page *);
+
+struct cl_read_ahead {
+	/*
+	 * Maximum page index the readahead window will end.
+	 * This is determined DLM lock coverage, RPC and stripe boundary.
+	 * cra_end is included.
+	 */
+	pgoff_t cra_end;
+	/*
+	 * Release routine. If readahead holds resources underneath, this
+	 * function should be called to release it.
+	 */
+	void (*cra_release)(const struct lu_env *env, void *cbdata);
+	/* Callback data for cra_release routine */
+	void *cra_cbdata;
+};
+
+static inline void cl_read_ahead_release(const struct lu_env *env,
+					 struct cl_read_ahead *ra)
+{
+	if (ra->cra_release)
+		ra->cra_release(env, ra->cra_cbdata);
+	memset(ra, 0, sizeof(*ra));
+}
+
 /**
  * Per-layer io operations.
  * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -1573,16 +1581,13 @@ struct cl_io_operations {
 				 struct cl_page_list *queue, int from, int to,
 				 cl_commit_cbt cb);
 	/**
-	 * Read missing page.
-	 *
-	 * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
-	 * method, when it hits not-up-to-date page in the range. Optional.
+	 * Decide maximum read ahead extent
 	 *
 	 * \pre io->ci_type == CIT_READ
 	 */
-	int (*cio_read_page)(const struct lu_env *env,
-			     const struct cl_io_slice *slice,
-			     const struct cl_page_slice *page);
+	int (*cio_read_ahead)(const struct lu_env *env,
+			      const struct cl_io_slice *slice,
+			      pgoff_t start, struct cl_read_ahead *ra);
 	/**
 	 * Optional debugging helper. Print given io slice.
 	 */
@@ -1765,10 +1770,15 @@ struct cl_io {
 		struct cl_io_rw_common ci_rw;
 		struct cl_setattr_io {
 			struct ost_lvb   sa_attr;
+			unsigned int		 sa_attr_flags;
 			unsigned int     sa_valid;
 			int		sa_stripe_index;
-			struct lu_fid  *sa_parent_fid;
+			const struct lu_fid	*sa_parent_fid;
 		} ci_setattr;
+		struct cl_data_version_io {
+			u64 dv_data_version;
+			int dv_flags;
+		} ci_data_version;
 		struct cl_fault_io {
 			/** page index within file. */
 			pgoff_t	 ft_index;
@@ -1836,179 +1846,20 @@ struct cl_io {
 
 /** @} cl_io */
 
-/** \addtogroup cl_req cl_req
- * @{
- */
-/** \struct cl_req
- * Transfer.
- *
- * There are two possible modes of transfer initiation on the client:
- *
- *     - immediate transfer: this is started when a high level io wants a page
- *       or a collection of pages to be transferred right away. Examples:
- *       read-ahead, synchronous read in the case of non-page aligned write,
- *       page write-out as a part of extent lock cancellation, page write-out
- *       as a part of memory cleansing. Immediate transfer can be both
- *       cl_req_type::CRT_READ and cl_req_type::CRT_WRITE;
- *
- *     - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens
- *       when io wants to transfer a page to the server some time later, when
- *       it can be done efficiently. Example: pages dirtied by the write(2)
- *       path.
- *
- * In any case, transfer takes place in the form of a cl_req, which is a
- * representation for a network RPC.
- *
- * Pages queued for an opportunistic transfer are cached until it is decided
- * that efficient RPC can be composed of them. This decision is made by "a
- * req-formation engine", currently implemented as a part of osc
- * layer. Req-formation depends on many factors: the size of the resulting
- * RPC, whether or not multi-object RPCs are supported by the server,
- * max-rpc-in-flight limitations, size of the dirty cache, etc.
- *
- * For the immediate transfer io submits a cl_page_list, that req-formation
- * engine slices into cl_req's, possibly adding cached pages to some of
- * the resulting req's.
- *
- * Whenever a page from cl_page_list is added to a newly constructed req, its
- * cl_page_operations::cpo_prep() layer methods are called. At that moment,
- * page state is atomically changed from cl_page_state::CPS_OWNED to
- * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner
- * is zeroed, and cl_page::cp_req is set to the
- * req. cl_page_operations::cpo_prep() method at the particular layer might
- * return -EALREADY to indicate that it does not need to submit this page
- * at all. This is possible, for example, if page, submitted for read,
- * became up-to-date in the meantime; and for write, the page don't have
- * dirty bit marked. \see cl_io_submit_rw()
- *
- * Whenever a cached page is added to a newly constructed req, its
- * cl_page_operations::cpo_make_ready() layer methods are called. At that
- * moment, page state is atomically changed from cl_page_state::CPS_CACHED to
- * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to
- * req. cl_page_operations::cpo_make_ready() method at the particular layer
- * might return -EAGAIN to indicate that this page is not eligible for the
- * transfer right now.
- *
- * FUTURE
- *
- * Plan is to divide transfers into "priority bands" (indicated when
- * submitting cl_page_list, and queuing a page for the opportunistic transfer)
- * and allow glueing of cached pages to immediate transfers only within single
- * band. This would make high priority transfers (like lock cancellation or
- * memory pressure induced write-out) really high priority.
- *
- */
-
 /**
  * Per-transfer attributes.
  */
 struct cl_req_attr {
+	enum cl_req_type cra_type;
+	u64		 cra_flags;
+	struct cl_page	*cra_page;
+
 	/** Generic attributes for the server consumption. */
 	struct obdo	*cra_oa;
 	/** Jobid */
 	char		 cra_jobid[LUSTRE_JOBID_SIZE];
 };
 
-/**
- * Transfer request operations definable at every layer.
- *
- * Concurrency: transfer formation engine synchronizes calls to all transfer
- * methods.
- */
-struct cl_req_operations {
-	/**
-	 * Invoked top-to-bottom by cl_req_prep() when transfer formation is
-	 * complete (all pages are added).
-	 *
-	 * \see osc_req_prep()
-	 */
-	int  (*cro_prep)(const struct lu_env *env,
-			 const struct cl_req_slice *slice);
-	/**
-	 * Called top-to-bottom to fill in \a oa fields. This is called twice
-	 * with different flags, see bug 10150 and osc_build_req().
-	 *
-	 * \param obj an object from cl_req which attributes are to be set in
-	 *	    \a oa.
-	 *
-	 * \param oa struct obdo where attributes are placed
-	 *
-	 * \param flags \a oa fields to be filled.
-	 */
-	void (*cro_attr_set)(const struct lu_env *env,
-			     const struct cl_req_slice *slice,
-			     const struct cl_object *obj,
-			     struct cl_req_attr *attr, u64 flags);
-	/**
-	 * Called top-to-bottom from cl_req_completion() to notify layers that
-	 * transfer completed. Has to free all state allocated by
-	 * cl_device_operations::cdo_req_init().
-	 */
-	void (*cro_completion)(const struct lu_env *env,
-			       const struct cl_req_slice *slice, int ioret);
-};
-
-/**
- * A per-object state that (potentially multi-object) transfer request keeps.
- */
-struct cl_req_obj {
-	/** object itself */
-	struct cl_object   *ro_obj;
-	/** reference to cl_req_obj::ro_obj. For debugging. */
-	struct lu_ref_link  ro_obj_ref;
-	/* something else? Number of pages for a given object? */
-};
-
-/**
- * Transfer request.
- *
- * Transfer requests are not reference counted, because IO sub-system owns
- * them exclusively and knows when to free them.
- *
- * Life cycle.
- *
- * cl_req is created by cl_req_alloc() that calls
- * cl_device_operations::cdo_req_init() device methods to allocate per-req
- * state in every layer.
- *
- * Then pages are added (cl_req_page_add()), req keeps track of all objects it
- * contains pages for.
- *
- * Once all pages were collected, cl_page_operations::cpo_prep() method is
- * called top-to-bottom. At that point layers can modify req, let it pass, or
- * deny it completely. This is to support things like SNS that have transfer
- * ordering requirements invisible to the individual req-formation engine.
- *
- * On transfer completion (or transfer timeout, or failure to initiate the
- * transfer of an allocated req), cl_req_operations::cro_completion() method
- * is called, after execution of cl_page_operations::cpo_completion() of all
- * req's pages.
- */
-struct cl_req {
-	enum cl_req_type      crq_type;
-	/** A list of pages being transferred */
-	struct list_head	    crq_pages;
-	/** Number of pages in cl_req::crq_pages */
-	unsigned	      crq_nrpages;
-	/** An array of objects which pages are in ->crq_pages */
-	struct cl_req_obj    *crq_o;
-	/** Number of elements in cl_req::crq_objs[] */
-	unsigned	      crq_nrobjs;
-	struct list_head	    crq_layers;
-};
-
-/**
- * Per-layer state for request.
- */
-struct cl_req_slice {
-	struct cl_req    *crs_req;
-	struct cl_device *crs_dev;
-	struct list_head	crs_linkage;
-	const struct cl_req_operations *crs_ops;
-};
-
-/* @} cl_req */
-
 enum cache_stats_item {
 	/** how many cache lookups were performed */
 	CS_lookup = 0,
@@ -2153,9 +2004,6 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
 		       const struct cl_lock_operations *ops);
 void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
 		     struct cl_object *obj, const struct cl_io_operations *ops);
-void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
-		      struct cl_device *dev,
-		      const struct cl_req_operations *ops);
 /** @} helpers */
 
 /** \defgroup cl_object cl_object
@@ -2183,6 +2031,12 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
 int  cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
 			 struct lov_user_md __user *lum);
+int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+		     struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap,
+		     size_t *buflen);
+int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
+			 struct cl_layout *cl);
+loff_t cl_object_maxbytes(struct cl_object *obj);
 
 /**
  * Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2302,8 +2156,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io,
 void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
-int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page, pgoff_t *max_index);
 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
 pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
 size_t cl_page_size(const struct cl_object *obj);
@@ -2414,8 +2266,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
 		   struct cl_io_lock_link *link);
 int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
 			 struct cl_lock_descr *descr);
-int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
-		    struct cl_page *page);
 int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 		    enum cl_req_type iot, struct cl_2queue *queue);
 int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
@@ -2424,6 +2274,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
 		       struct cl_page_list *queue, int from, int to,
 		       cl_commit_cbt cb);
+int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
+		     pgoff_t start, struct cl_read_ahead *ra);
 int cl_io_is_going(const struct lu_env *env);
 
 /**
@@ -2520,19 +2372,8 @@ void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
 
 /** @} cl_page_list */
 
-/** \defgroup cl_req cl_req
- * @{
- */
-struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
-			    enum cl_req_type crt, int nr_objects);
-
-void cl_req_page_add(const struct lu_env *env, struct cl_req *req,
-		     struct cl_page *page);
-void cl_req_page_done(const struct lu_env *env, struct cl_page *page);
-int  cl_req_prep(const struct lu_env *env, struct cl_req *req);
-void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
-		     struct cl_req_attr *attr, u64 flags);
-void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret);
+void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+		     struct cl_req_attr *attr);
 
 /** \defgroup cl_sync_io cl_sync_io
  * @{
@@ -2568,8 +2409,6 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
 
 /** @} cl_sync_io */
 
-/** @} cl_req */
-
 /** \defgroup cl_env cl_env
  *
  * lu_env handling for a client.
@@ -2593,35 +2432,13 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
  *     - allocation and destruction of environment is amortized by caching no
  *     longer used environments instead of destroying them;
  *
- *     - there is a notion of "current" environment, attached to the kernel
- *     data structure representing current thread Top-level lustre code
- *     allocates an environment and makes it current, then calls into
- *     non-lustre code, that in turn calls lustre back. Low-level lustre
- *     code thus called can fetch environment created by the top-level code
- *     and reuse it, avoiding additional environment allocation.
- *       Right now, three interfaces can attach the cl_env to running thread:
- *       - cl_env_get
- *       - cl_env_implant
- *       - cl_env_reexit(cl_env_reenter had to be called priorly)
- *
  * \see lu_env, lu_context, lu_context_key
  * @{
  */
 
-struct cl_env_nest {
-	int   cen_refcheck;
-	void *cen_cookie;
-};
-
 struct lu_env *cl_env_get(int *refcheck);
 struct lu_env *cl_env_alloc(int *refcheck, __u32 tags);
-struct lu_env *cl_env_nested_get(struct cl_env_nest *nest);
 void cl_env_put(struct lu_env *env, int *refcheck);
-void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env);
-void *cl_env_reenter(void);
-void cl_env_reexit(void *cookie);
-void cl_env_implant(struct lu_env *env, int *refcheck);
-void cl_env_unplant(struct lu_env *env, int *refcheck);
 unsigned int cl_env_cache_purge(unsigned int nr);
 struct lu_env *cl_env_percpu_get(void);
 void cl_env_percpu_put(struct lu_env *env);
diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h
new file mode 100644
index 000000000000..fd7ffb154ad1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/llog_swab.h
@@ -0,0 +1,65 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format.  The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/pack_generic.c.  These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument.  The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LLOG_SWAB_H_
+#define _LLOG_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+struct lustre_cfg;
+
+void lustre_swab_lu_fid(struct lu_fid *fid);
+void lustre_swab_ost_id(struct ost_id *oid);
+void lustre_swab_llogd_body(struct llogd_body *d);
+void lustre_swab_llog_hdr(struct llog_log_hdr *h);
+void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
+void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
+void lustre_swab_cfg_marker(struct cfg_marker *marker,
+			    int swab, int size);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index cc0713ef8ae5..62753dae0bfa 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -43,6 +43,8 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
+#include "../../include/linux/libcfs/libcfs.h"
+#include "lustre_cfg.h"
 #include "lustre/lustre_idl.h"
 
 struct lprocfs_vars {
@@ -540,7 +542,8 @@ lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
 void lprocfs_clear_stats(struct lprocfs_stats *stats);
 void lprocfs_free_stats(struct lprocfs_stats **stats);
 void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-			  unsigned conf, const char *name, const char *units);
+			  unsigned int conf, const char *name,
+			  const char *units);
 struct obd_export;
 int lprocfs_exp_cleanup(struct obd_export *exp);
 struct dentry *ldebugfs_add_simple(struct dentry *root,
@@ -701,9 +704,9 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
 extern const struct sysfs_ops lustre_sysfs_ops;
 
 struct root_squash_info;
-int lprocfs_wr_root_squash(const char *buffer, unsigned long count,
+int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
 			   struct root_squash_info *squash, char *name);
-int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count,
+int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
 			     struct root_squash_info *squash, char *name);
 
 /* all quota proc functions */
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index c2340d643e84..b8ad5559a3b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -41,79 +41,24 @@
 #ifndef _LUSTRE_FIEMAP_H
 #define _LUSTRE_FIEMAP_H
 
-struct ll_fiemap_extent {
-	__u64 fe_logical;  /* logical offset in bytes for the start of
-			    * the extent from the beginning of the file
-			    */
-	__u64 fe_physical; /* physical offset in bytes for the start
-			    * of the extent from the beginning of the disk
-			    */
-	__u64 fe_length;   /* length in bytes for this extent */
-	__u64 fe_reserved64[2];
-	__u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
-	__u32 fe_device;   /* device number for this extent */
-	__u32 fe_reserved[2];
-};
-
-struct ll_user_fiemap {
-	__u64 fm_start;  /* logical offset (inclusive) at
-			  * which to start mapping (in)
-			  */
-	__u64 fm_length; /* logical length of mapping which
-			  * userspace wants (in)
-			  */
-	__u32 fm_flags;  /* FIEMAP_FLAG_* flags for request (in/out) */
-	__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
-	__u32 fm_extent_count;  /* size of fm_extents array (in) */
-	__u32 fm_reserved;
-	struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-};
-
-#define FIEMAP_MAX_OFFSET      (~0ULL)
+#ifndef __KERNEL__
+#include <stddef.h>
+#include <fiemap.h>
+#endif
 
-#define FIEMAP_FLAG_SYNC		0x00000001 /* sync file data before
-						    * map
-						    */
-#define FIEMAP_FLAG_XATTR		0x00000002 /* map extended attribute
-						    * tree
-						    */
-#define FIEMAP_EXTENT_LAST		0x00000001 /* Last extent in file. */
-#define FIEMAP_EXTENT_UNKNOWN		0x00000002 /* Data location unknown. */
-#define FIEMAP_EXTENT_DELALLOC		0x00000004 /* Location still pending.
-						    * Sets EXTENT_UNKNOWN.
-						    */
-#define FIEMAP_EXTENT_ENCODED		0x00000008 /* Data can not be read
-						    * while fs is unmounted
-						    */
-#define FIEMAP_EXTENT_DATA_ENCRYPTED	0x00000080 /* Data is encrypted by fs.
-						    * Sets EXTENT_NO_DIRECT.
-						    */
-#define FIEMAP_EXTENT_NOT_ALIGNED       0x00000100 /* Extent offsets may not be
-						    * block aligned.
-						    */
-#define FIEMAP_EXTENT_DATA_INLINE       0x00000200 /* Data mixed with metadata.
-						    * Sets EXTENT_NOT_ALIGNED.*/
-#define FIEMAP_EXTENT_DATA_TAIL		0x00000400 /* Multiple files in block.
-						    * Sets EXTENT_NOT_ALIGNED.
-						    */
-#define FIEMAP_EXTENT_UNWRITTEN		0x00000800 /* Space allocated, but
-						    * no data (i.e. zero).
-						    */
-#define FIEMAP_EXTENT_MERGED		0x00001000 /* File does not natively
-						    * support extents. Result
-						    * merged for efficiency.
-						    */
+/* XXX: We use fiemap_extent::fe_reserved[0] */
+#define fe_device	fe_reserved[0]
 
 static inline size_t fiemap_count_to_size(size_t extent_count)
 {
-	return (sizeof(struct ll_user_fiemap) + extent_count *
-					       sizeof(struct ll_fiemap_extent));
+	return sizeof(struct fiemap) + extent_count *
+				       sizeof(struct fiemap_extent);
 }
 
 static inline unsigned fiemap_size_to_count(size_t array_size)
 {
-	return ((array_size - sizeof(struct ll_user_fiemap)) /
-					       sizeof(struct ll_fiemap_extent));
+	return (array_size - sizeof(struct fiemap)) /
+		sizeof(struct fiemap_extent);
 }
 
 #define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 72eaee95c6b8..65ce503ad595 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -48,8 +48,7 @@
  * that the Lustre wire protocol is not influenced by external dependencies.
  *
  * The only other acceptable items in this file are VERY SIMPLE accessor
- * functions to avoid callers grubbing inside the structures, and the
- * prototypes of the swabber functions for each struct.  Nothing that
+ * functions to avoid callers grubbing inside the structures. Nothing that
  * depends on external functions or definitions should be in here.
  *
  * Structs must be properly aligned to put 64-bit values on an 8-byte
@@ -64,23 +63,6 @@
  * in the code to ensure that new/old clients that see this larger struct
  * do not fail, otherwise you need to implement protocol compatibility).
  *
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format.  The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
- *
- * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
- * implemented either here, inline (trivial implementations) or in
- * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
- * endian, in-place in the message buffer.
- *
- * A swabber takes a single pointer argument.  The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
- *
- * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
- * may be defined that swabs just the variable part, after the caller has
- * verified that the message buffer is large enough.
- *
  * @{
  */
 
@@ -192,113 +174,6 @@ struct lu_seq_range_array {
 
 #define LU_SEQ_RANGE_MASK	0x3
 
-static inline unsigned fld_range_type(const struct lu_seq_range *range)
-{
-	return range->lsr_flags & LU_SEQ_RANGE_MASK;
-}
-
-static inline bool fld_range_is_ost(const struct lu_seq_range *range)
-{
-	return fld_range_type(range) == LU_SEQ_RANGE_OST;
-}
-
-static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
-{
-	return fld_range_type(range) == LU_SEQ_RANGE_MDT;
-}
-
-/**
- * This all range is only being used when fld client sends fld query request,
- * but it does not know whether the seq is MDT or OST, so it will send req
- * with ALL type, which means either seq type gotten from lookup can be
- * expected.
- */
-static inline unsigned fld_range_is_any(const struct lu_seq_range *range)
-{
-	return fld_range_type(range) == LU_SEQ_RANGE_ANY;
-}
-
-static inline void fld_range_set_type(struct lu_seq_range *range,
-				      unsigned flags)
-{
-	range->lsr_flags |= flags;
-}
-
-static inline void fld_range_set_mdt(struct lu_seq_range *range)
-{
-	fld_range_set_type(range, LU_SEQ_RANGE_MDT);
-}
-
-static inline void fld_range_set_ost(struct lu_seq_range *range)
-{
-	fld_range_set_type(range, LU_SEQ_RANGE_OST);
-}
-
-static inline void fld_range_set_any(struct lu_seq_range *range)
-{
-	fld_range_set_type(range, LU_SEQ_RANGE_ANY);
-}
-
-/**
- * returns  width of given range \a r
- */
-
-static inline __u64 range_space(const struct lu_seq_range *range)
-{
-	return range->lsr_end - range->lsr_start;
-}
-
-/**
- * initialize range to zero
- */
-
-static inline void range_init(struct lu_seq_range *range)
-{
-	memset(range, 0, sizeof(*range));
-}
-
-/**
- * check if given seq id \a s is within given range \a r
- */
-
-static inline bool range_within(const struct lu_seq_range *range,
-				__u64 s)
-{
-	return s >= range->lsr_start && s < range->lsr_end;
-}
-
-static inline bool range_is_sane(const struct lu_seq_range *range)
-{
-	return (range->lsr_end >= range->lsr_start);
-}
-
-static inline bool range_is_zero(const struct lu_seq_range *range)
-{
-	return (range->lsr_start == 0 && range->lsr_end == 0);
-}
-
-static inline bool range_is_exhausted(const struct lu_seq_range *range)
-
-{
-	return range_space(range) == 0;
-}
-
-/* return 0 if two range have the same location */
-static inline int range_compare_loc(const struct lu_seq_range *r1,
-				    const struct lu_seq_range *r2)
-{
-	return r1->lsr_index != r2->lsr_index ||
-	       r1->lsr_flags != r2->lsr_flags;
-}
-
-#define DRANGE "[%#16.16Lx-%#16.16Lx):%x:%s"
-
-#define PRANGE(range)		\
-	(range)->lsr_start,	\
-	(range)->lsr_end,	\
-	(range)->lsr_index,	\
-	fld_range_is_mdt(range) ? "mdt" : "ost"
-
 /** \defgroup lu_fid lu_fid
  * @{
  */
@@ -310,7 +185,7 @@ static inline int range_compare_loc(const struct lu_seq_range *r1,
  */
 enum lma_compat {
 	LMAC_HSM	= 0x00000001,
-	LMAC_SOM	= 0x00000002,
+/*	LMAC_SOM	= 0x00000002, obsolete since 2.8.0 */
 	LMAC_NOT_IN_OI	= 0x00000004, /* the object does NOT need OI mapping */
 	LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
 				       * under /O/<seq>/d<x>.
@@ -644,13 +519,14 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 {
 	if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
 		if (oid >= IDIF_MAX_OID) {
-			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
+			CERROR("Too large OID %#llx to set MDT0 " DOSTID "\n",
+			       oid, POSTID(oi));
 			return;
 		}
 		oi->oi.oi_id = oid;
 	} else if (fid_is_idif(&oi->oi_fid)) {
 		if (oid >= IDIF_MAX_OID) {
-			CERROR("Bad %llu to set "DOSTID"\n",
+			CERROR("Too large OID %#llx to set IDIF " DOSTID "\n",
 			       oid, POSTID(oi));
 			return;
 		}
@@ -676,7 +552,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
 
 	if (fid_is_idif(fid)) {
 		if (oid >= IDIF_MAX_OID) {
-			CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+			CERROR("Too large OID %#llx to set IDIF " DFID "\n",
 			       (unsigned long long)oid, PFID(fid));
 			return -EBADF;
 		}
@@ -685,7 +561,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
 		fid->f_ver = oid >> 48;
 	} else {
 		if (oid >= OBIF_MAX_OID) {
-			CERROR("Too large OID %#llx to set REG "DFID"\n",
+			CERROR("Too large OID %#llx to set REG " DFID "\n",
 			       (unsigned long long)oid, PFID(fid));
 			return -EBADF;
 		}
@@ -785,8 +661,6 @@ static inline ino_t lu_igif_ino(const struct lu_fid *fid)
 	return fid_seq(fid);
 }
 
-void lustre_swab_ost_id(struct ost_id *oid);
-
 /**
  * Get inode generation from a igif.
  * \param fid a igif to get inode generation from.
@@ -847,9 +721,6 @@ static inline bool fid_is_sane(const struct lu_fid *fid)
 		fid_seq_is_rsvd(fid_seq(fid)));
 }
 
-void lustre_swab_lu_fid(struct lu_fid *fid);
-void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-
 static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
 {
 	return memcmp(f0, f1, sizeof(*f0)) == 0;
@@ -1099,8 +970,10 @@ struct ptlrpc_body_v3 {
 	__u32 pb_version;
 	__u32 pb_opc;
 	__u32 pb_status;
-	__u64 pb_last_xid;
-	__u64 pb_last_seen;
+	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
+	__u16 pb_padding0;
+	__u32 pb_padding1;
 	__u64 pb_last_committed;
 	__u64 pb_transno;
 	__u32 pb_flags;
@@ -1112,8 +985,11 @@ struct ptlrpc_body_v3 {
 	__u64 pb_slv;
 	/* VBR: pre-versions */
 	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+	__u64 pb_mbits; /**< match bits for bulk request */
 	/* padding for future needs */
-	__u64 pb_padding[4];
+	__u64 pb_padding64_0;
+	__u64 pb_padding64_1;
+	__u64 pb_padding64_2;
 	char  pb_jobid[LUSTRE_JOBID_SIZE];
 };
 
@@ -1125,8 +1001,10 @@ struct ptlrpc_body_v2 {
 	__u32 pb_version;
 	__u32 pb_opc;
 	__u32 pb_status;
-	__u64 pb_last_xid;
-	__u64 pb_last_seen;
+	__u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+	__u16 pb_tag;      /* virtual slot idx for multiple modifying RPCs */
+	__u16 pb_padding0;
+	__u32 pb_padding1;
 	__u64 pb_last_committed;
 	__u64 pb_transno;
 	__u32 pb_flags;
@@ -1140,12 +1018,13 @@ struct ptlrpc_body_v2 {
 	__u64 pb_slv;
 	/* VBR: pre-versions */
 	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+	__u64 pb_mbits; /**< unused in V2 */
 	/* padding for future needs */
-	__u64 pb_padding[4];
+	__u64 pb_padding64_0;
+	__u64 pb_padding64_1;
+	__u64 pb_padding64_2;
 };
 
-void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
-
 /* message body offset for lustre_msg_v2 */
 /* ptlrpc body offset in all request/reply messages */
 #define MSG_PTLRPC_BODY_OFF	     0
@@ -1282,7 +1161,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 							 */
 #define OBD_CONNECT_LFSCK	0x40000000000000ULL/* support online LFSCK */
 #define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
+						       *  RPCs in parallel
+						       */
 #define OBD_CONNECT_DIR_STRIPE	 0x400000000000000ULL/* striped DNE dir */
+#define OBD_CONNECT_SUBTREE	 0x800000000000000ULL /* fileset mount */
+#define OBD_CONNECT_LOCK_AHEAD	 0x1000000000000000ULL /* lock ahead */
+/** bulk matchbits is sent within ptlrpc_body */
+#define OBD_CONNECT_BULK_MBITS	 0x2000000000000000ULL
+#define OBD_CONNECT_OBDOPACK	 0x4000000000000000ULL /* compact OUT obdo */
+#define OBD_CONNECT_FLAGS2	 0x8000000000000000ULL /* second flags word */
 
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
@@ -1313,25 +1201,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
  * If we eventually have separate connect data for different types, which we
  * almost certainly will, then perhaps we stick a union in here.
  */
-struct obd_connect_data_v1 {
-	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
-	__u32 ocd_version;	 /* lustre release version number */
-	__u32 ocd_grant;	 /* initial cache grant amount (bytes) */
-	__u32 ocd_index;	 /* LOV index to connect to */
-	__u32 ocd_brw_size;	 /* Maximum BRW size in bytes, must be 2^n */
-	__u64 ocd_ibits_known;   /* inode bits this client understands */
-	__u8  ocd_blocksize;     /* log2 of the backend filesystem blocksize */
-	__u8  ocd_inodespace;    /* log2 of the per-inode space consumption */
-	__u16 ocd_grant_extent;  /* per-extent grant overhead, in 1K blocks */
-	__u32 ocd_unused;	/* also fix lustre_swab_connect */
-	__u64 ocd_transno;       /* first transno from client to be replayed */
-	__u32 ocd_group;	 /* MDS group on OST */
-	__u32 ocd_cksum_types;   /* supported checksum algorithms */
-	__u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
-	__u32 ocd_instance;      /* also fix lustre_swab_connect */
-	__u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
-};
-
 struct obd_connect_data {
 	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
 	__u32 ocd_version;	 /* lustre release version number */
@@ -1354,8 +1223,10 @@ struct obd_connect_data {
 	 * any field after ocd_maxbytes on the receiver without a valid flag
 	 * may result in out-of-bound memory access and kernel oops.
 	 */
-	__u64 padding1;	  /* added 2.1.0. also fix lustre_swab_connect */
-	__u64 padding2;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u16 ocd_maxmodrpcs;	/* Maximum modify RPCs in parallel */
+	__u16 padding0;		/* added 2.1.0. also fix lustre_swab_connect */
+	__u32 padding1;		/* added 2.1.0. also fix lustre_swab_connect */
+	__u64 ocd_connect_flags2;
 	__u64 padding3;	  /* added 2.1.0. also fix lustre_swab_connect */
 	__u64 padding4;	  /* added 2.1.0. also fix lustre_swab_connect */
 	__u64 padding5;	  /* added 2.1.0. also fix lustre_swab_connect */
@@ -1380,8 +1251,6 @@ struct obd_connect_data {
  * reserve the flag for future use.
  */
 
-void lustre_swab_connect(struct obd_connect_data *ocd);
-
 /*
  * Supported checksum algorithms. Up to 32 checksum types are supported.
  * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
@@ -1416,7 +1285,7 @@ enum ost_cmd {
 	OST_STATFS     = 13,
 	OST_SYNC       = 16,
 	OST_SET_INFO   = 17,
-	OST_QUOTACHECK = 18,
+	OST_QUOTACHECK = 18, /* not used since 2.4 */
 	OST_QUOTACTL   = 19,
 	OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
 	OST_LAST_OPC
@@ -1580,8 +1449,6 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
 	dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
 }
 
-/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
-
 #define MAX_MD_SIZE							\
 	(sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
 #define MIN_MD_SIZE							\
@@ -1674,7 +1541,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 #define OBD_MD_FLCKSUM     (0x00100000ULL) /* bulk data checksum */
 #define OBD_MD_FLQOS       (0x00200000ULL) /* quality of service stats */
 /*#define OBD_MD_FLOSCOPQ    (0x00400000ULL) osc opaque data, never used */
-#define OBD_MD_FLCOOKIE    (0x00800000ULL) /* log cancellation cookie */
+/*	OBD_MD_FLCOOKIE    (0x00800000ULL) obsolete in 2.8 */
 #define OBD_MD_FLGROUP     (0x01000000ULL) /* group */
 #define OBD_MD_FLFID       (0x02000000ULL) /* ->ost write inline fid */
 #define OBD_MD_FLEPOCH     (0x04000000ULL) /* ->ost write with ioepoch */
@@ -1713,7 +1580,9 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 /*	OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
 
 #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_FLRELEASED    (0x0020000000000000ULL) /* file released */
+#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
+							    * executed
+							    */
 
 #define OBD_MD_DEFAULT_MEA   (0x0040000000000000ULL) /* default MEA */
 
@@ -1742,11 +1611,6 @@ struct hsm_state_set {
 	__u64	hss_clearmask;
 };
 
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
-
-void lustre_swab_obd_statfs(struct obd_statfs *os);
-
 /* ost_body.data values for OST_BRW */
 
 #define OBD_BRW_READ		0x01
@@ -1786,14 +1650,16 @@ struct obd_ioobj {
 	__u32		ioo_bufcnt;	/* number of niobufs for this object */
 };
 
+/*
+ * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
+ * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
+ * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
+ */
 #define IOOBJ_MAX_BRW_BITS	16
-#define IOOBJ_TYPE_MASK		((1U << IOOBJ_MAX_BRW_BITS) - 1)
 #define ioobj_max_brw_get(ioo)	(((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
 #define ioobj_max_brw_set(ioo, num)					\
 do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
 
-void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
-
 /* multiple of 8 bytes => can array */
 struct niobuf_remote {
 	__u64	rnb_offset;
@@ -1801,8 +1667,6 @@ struct niobuf_remote {
 	__u32	rnb_flags;
 };
 
-void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
-
 /* lock value block communicated between the filter and llite */
 
 /* OST_LVB_ERR_INIT is needed because the return code in rc is
@@ -1824,8 +1688,6 @@ struct ost_lvb_v1 {
 	__u64		lvb_blocks;
 };
 
-void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
-
 struct ost_lvb {
 	__u64		lvb_size;
 	__s64		lvb_mtime;
@@ -1838,8 +1700,6 @@ struct ost_lvb {
 	__u32		lvb_padding;
 };
 
-void lustre_swab_ost_lvb(struct ost_lvb *lvb);
-
 /*
  *   lquota data structures
  */
@@ -1866,8 +1726,6 @@ struct obd_quotactl {
 	struct obd_dqblk	qc_dqblk;
 };
 
-void lustre_swab_obd_quotactl(struct obd_quotactl *q);
-
 #define Q_COPY(out, in, member) (out)->member = (in)->member
 
 #define QCTL_COPY(out, in)		\
@@ -1905,8 +1763,6 @@ struct lquota_lvb {
 	__u64	lvb_pad1;
 };
 
-void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
-
 /* op codes */
 enum quota_cmd {
 	QUOTA_DQACQ	= 601,
@@ -1933,9 +1789,9 @@ enum mds_cmd {
 	MDS_PIN			= 42, /* obsolete, never used in a release */
 	MDS_UNPIN		= 43, /* obsolete, never used in a release */
 	MDS_SYNC		= 44,
-	MDS_DONE_WRITING	= 45,
+	MDS_DONE_WRITING	= 45, /* obsolete since 2.8.0 */
 	MDS_SET_INFO		= 46,
-	MDS_QUOTACHECK		= 47,
+	MDS_QUOTACHECK		= 47, /* not used since 2.4 */
 	MDS_QUOTACTL		= 48,
 	MDS_GETXATTR		= 49,
 	MDS_SETXATTR		= 50, /* obsolete, now it's MDS_REINT op */
@@ -1972,8 +1828,6 @@ enum mdt_reint_cmd {
 	REINT_MAX
 };
 
-void lustre_swab_generic_32s(__u32 *val);
-
 /* the disposition of the intent outlines what was executed */
 #define DISP_IT_EXECD	0x00000001
 #define DISP_LOOKUP_EXECD    0x00000002
@@ -2031,36 +1885,19 @@ enum {
 #define MDS_STATUS_CONN 1
 #define MDS_STATUS_LOV 2
 
-/* mdt_thread_info.mti_flags. */
-enum md_op_flags {
-	/* The flag indicates Size-on-MDS attributes are changed. */
-	MF_SOM_CHANGE	   = (1 << 0),
-	/* Flags indicates an epoch opens or closes. */
-	MF_EPOCH_OPEN	   = (1 << 1),
-	MF_EPOCH_CLOSE	  = (1 << 2),
-	MF_MDC_CANCEL_FID1      = (1 << 3),
-	MF_MDC_CANCEL_FID2      = (1 << 4),
-	MF_MDC_CANCEL_FID3      = (1 << 5),
-	MF_MDC_CANCEL_FID4      = (1 << 6),
-	/* There is a pending attribute update. */
-	MF_SOM_AU	       = (1 << 7),
-	/* Cancel OST locks while getattr OST attributes. */
-	MF_GETATTR_LOCK	 = (1 << 8),
-	MF_GET_MDT_IDX	  = (1 << 9),
-};
-
-#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
-
-#define LUSTRE_BFLAG_UNCOMMITTED_WRITES   0x1
-
 /* these should be identical to their EXT4_*_FL counterparts, they are
  * redefined here only to avoid dragging in fs/ext4/ext4.h
  */
 #define LUSTRE_SYNC_FL	 0x00000008 /* Synchronous updates */
 #define LUSTRE_IMMUTABLE_FL    0x00000010 /* Immutable file */
 #define LUSTRE_APPEND_FL       0x00000020 /* writes to file may only append */
+#define LUSTRE_NODUMP_FL	0x00000040 /* do not dump file */
 #define LUSTRE_NOATIME_FL      0x00000080 /* do not update atime */
+#define LUSTRE_INDEX_FL		0x00001000 /* hash-indexed directory */
 #define LUSTRE_DIRSYNC_FL      0x00010000 /* dirsync behaviour (dir only) */
+#define LUSTRE_TOPDIR_FL	0x00020000 /* Top of directory hierarchies*/
+#define LUSTRE_DIRECTIO_FL	0x00100000 /* Use direct i/o */
+#define LUSTRE_INLINE_DATA_FL	0x10000000 /* Inode has inline data. */
 
 /* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
  * for the client inode i_flags.  The LUSTRE_*_FL are the Lustre wire
@@ -2113,7 +1950,7 @@ struct mdt_body {
 	__u32	mbo_mode;
 	__u32	mbo_uid;
 	__u32	mbo_gid;
-	__u32	mbo_flags;
+	__u32	mbo_flags;	/* LUSTRE_*_FL file attributes */
 	__u32	mbo_rdev;
 	__u32	mbo_nlink;	/* #bytes to read in the case of MDS_READPAGE */
 	__u32	mbo_unused2;	/* was "generation" until 2.4.0 */
@@ -2121,7 +1958,7 @@ struct mdt_body {
 	__u32	mbo_eadatasize;
 	__u32	mbo_aclsize;
 	__u32	mbo_max_mdsize;
-	__u32	mbo_max_cookiesize;
+	__u32	mbo_unused3;	/* was max_cookiesize until 2.8 */
 	__u32	mbo_uid_h;	/* high 32-bits of uid, for FUID */
 	__u32	mbo_gid_h;	/* high 32-bits of gid, for FUID */
 	__u32	mbo_padding_5;	/* also fix lustre_swab_mdt_body */
@@ -2132,17 +1969,13 @@ struct mdt_body {
 	__u64	mbo_padding_10;
 }; /* 216 */
 
-void lustre_swab_mdt_body(struct mdt_body *b);
-
 struct mdt_ioepoch {
-	struct lustre_handle handle;
-	__u64  ioepoch;
-	__u32  flags;
-	__u32  padding;
+	struct lustre_handle mio_handle;
+	__u64 mio_unused1; /* was ioepoch */
+	__u32 mio_unused2; /* was flags */
+	__u32 mio_padding;
 };
 
-void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
-
 /* permissions for md_perm.mp_perm */
 enum {
 	CFS_SETUID_PERM = 0x01,
@@ -2178,8 +2011,6 @@ struct mdt_rec_setattr {
 	__u32	   sa_padding_5;
 };
 
-void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
-
 /*
  * Attribute flags used in mdt_rec_setattr::sa_valid.
  * The kernel's #defines for ATTR_* should not be used over the network
@@ -2207,12 +2038,9 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
 
 #define MDS_FMODE_CLOSED	 00000000
 #define MDS_FMODE_EXEC	   00000004
-/* IO Epoch is opened on a closed file. */
-#define MDS_FMODE_EPOCH	  01000000
-/* IO Epoch is opened on a file truncate. */
-#define MDS_FMODE_TRUNC	  02000000
-/* Size-on-MDS Attribute Update is pending. */
-#define MDS_FMODE_SOM	    04000000
+/*	MDS_FMODE_EPOCH		01000000 obsolete since 2.8.0 */
+/*	MDS_FMODE_TRUNC		02000000 obsolete since 2.8.0 */
+/*	MDS_FMODE_SOM		04000000 obsolete since 2.8.0 */
 
 #define MDS_OPEN_CREATED	 00000010
 #define MDS_OPEN_CROSS	   00000020
@@ -2258,7 +2086,7 @@ enum mds_op_bias {
 	MDS_CROSS_REF		= 1 << 1,
 	MDS_VTX_BYPASS		= 1 << 2,
 	MDS_PERM_BYPASS		= 1 << 3,
-	MDS_SOM			= 1 << 4,
+/*	MDS_SOM			= 1 << 4, obsolete since 2.8.0 */
 	MDS_QUOTA_IGNORE	= 1 << 5,
 	MDS_CLOSE_CLEANUP	= 1 << 6,
 	MDS_KEEP_ORPHAN		= 1 << 7,
@@ -2268,6 +2096,7 @@ enum mds_op_bias {
 	MDS_OWNEROVERRIDE	= 1 << 11,
 	MDS_HSM_RELEASE		= 1 << 12,
 	MDS_RENAME_MIGRATE	= BIT(13),
+	MDS_CLOSE_LAYOUT_SWAP   = BIT(14),
 };
 
 /* instance of mdt_reint_rec */
@@ -2456,8 +2285,6 @@ struct mdt_rec_reint {
 	__u32	   rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
 };
 
-void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
-
 /* lmv structures */
 struct lmv_desc {
 	__u32 ld_tgt_count;		/* how many MDS's */
@@ -2547,8 +2374,6 @@ union lmv_mds_md {
 	struct lmv_user_md	lmv_user_md;
 };
 
-void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
-
 static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
 {
 	ssize_t len = -EINVAL;
@@ -2652,8 +2477,6 @@ struct lov_desc {
 
 #define ld_magic ld_active_tgt_count       /* for swabbing from llogs */
 
-void lustre_swab_lov_desc(struct lov_desc *ld);
-
 /*
  *   LDLM requests:
  */
@@ -2749,24 +2572,38 @@ struct ldlm_flock_wire {
  * on the resource type.
  */
 
-typedef union {
+union ldlm_wire_policy_data {
 	struct ldlm_extent l_extent;
 	struct ldlm_flock_wire l_flock;
 	struct ldlm_inodebits l_inodebits;
-} ldlm_wire_policy_data_t;
+};
 
 union ldlm_gl_desc {
 	struct ldlm_gl_lquota_desc	lquota_desc;
 };
 
-void lustre_swab_gl_desc(union ldlm_gl_desc *);
+enum ldlm_intent_flags {
+	IT_OPEN		= BIT(0),
+	IT_CREAT	= BIT(1),
+	IT_OPEN_CREAT	= BIT(1) | BIT(0),
+	IT_READDIR	= BIT(2),
+	IT_GETATTR	= BIT(3),
+	IT_LOOKUP	= BIT(4),
+	IT_UNLINK	= BIT(5),
+	IT_TRUNC	= BIT(6),
+	IT_GETXATTR	= BIT(7),
+	IT_EXEC		= BIT(8),
+	IT_PIN		= BIT(9),
+	IT_LAYOUT	= BIT(10),
+	IT_QUOTA_DQACQ	= BIT(11),
+	IT_QUOTA_CONN	= BIT(12),
+	IT_SETXATTR	= BIT(13),
+};
 
 struct ldlm_intent {
 	__u64 opc;
 };
 
-void lustre_swab_ldlm_intent(struct ldlm_intent *i);
-
 struct ldlm_resource_desc {
 	enum ldlm_type lr_type;
 	__u32 lr_padding;       /* also fix lustre_swab_ldlm_resource_desc */
@@ -2777,7 +2614,7 @@ struct ldlm_lock_desc {
 	struct ldlm_resource_desc l_resource;
 	enum ldlm_mode l_req_mode;
 	enum ldlm_mode l_granted_mode;
-	ldlm_wire_policy_data_t l_policy_data;
+	union ldlm_wire_policy_data l_policy_data;
 };
 
 #define LDLM_LOCKREQ_HANDLES 2
@@ -2790,8 +2627,6 @@ struct ldlm_request {
 	struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
 };
 
-void lustre_swab_ldlm_request(struct ldlm_request *rq);
-
 /* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
  * Otherwise, 2 are available.
  */
@@ -2813,8 +2648,6 @@ struct ldlm_reply {
 	__u64  lock_policy_res2;
 };
 
-void lustre_swab_ldlm_reply(struct ldlm_reply *r);
-
 #define ldlm_flags_to_wire(flags)    ((__u32)(flags))
 #define ldlm_flags_from_wire(flags)  ((__u64)(flags))
 
@@ -2858,8 +2691,6 @@ struct mgs_target_info {
 	char	     mti_params[MTI_PARAM_MAXLEN];
 };
 
-void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
-
 struct mgs_nidtbl_entry {
 	__u64	   mne_version;    /* table version of this entry */
 	__u32	   mne_instance;   /* target instance # */
@@ -2874,8 +2705,6 @@ struct mgs_nidtbl_entry {
 	} u;
 };
 
-void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
-
 struct mgs_config_body {
 	char     mcb_name[MTI_NAME_MAXLEN]; /* logname */
 	__u64    mcb_offset;    /* next index of config log to request */
@@ -2885,15 +2714,11 @@ struct mgs_config_body {
 	__u32    mcb_units;     /* # of units for bulk transfer */
 };
 
-void lustre_swab_mgs_config_body(struct mgs_config_body *body);
-
 struct mgs_config_res {
 	__u64    mcr_offset;    /* index of last config log */
 	__u64    mcr_size;      /* size of the log */
 };
 
-void lustre_swab_mgs_config_res(struct mgs_config_res *body);
-
 /* Config marker flags (in config log) */
 #define CM_START       0x01
 #define CM_END	 0x02
@@ -2913,8 +2738,6 @@ struct cfg_marker {
 	char	      cm_comment[MTI_NAME_MAXLEN];
 };
 
-void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
-
 /*
  * Opcodes for multiple servers.
  */
@@ -2922,7 +2745,7 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
 enum obd_cmd {
 	OBD_PING = 400,
 	OBD_LOG_CANCEL,
-	OBD_QC_CALLBACK,
+	OBD_QC_CALLBACK, /* not used since 2.4 */
 	OBD_IDX_READ,
 	OBD_LAST_OPC
 };
@@ -3155,23 +2978,32 @@ struct llog_gen_rec {
 	struct llog_rec_tail	lgr_tail;
 };
 
-/* On-disk header structure of each log object, stored in little endian order */
-#define LLOG_CHUNK_SIZE	 8192
-#define LLOG_HEADER_SIZE	(96)
-#define LLOG_BITMAP_BYTES       (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
-
-#define LLOG_MIN_REC_SIZE       (24) /* round(llog_rec_hdr + llog_rec_tail) */
-
 /* flags for the logs */
 enum llog_flag {
 	LLOG_F_ZAP_WHEN_EMPTY	= 0x1,
 	LLOG_F_IS_CAT		= 0x2,
 	LLOG_F_IS_PLAIN		= 0x4,
 	LLOG_F_EXT_JOBID        = BIT(3),
+	LLOG_F_IS_FIXSIZE	= BIT(4),
 
+	/*
+	 * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
+	 * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
+	 * because the catlog record is usually fixed size, but its plain
+	 * log record can be variable
+	 */
 	LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
 };
 
+/* On-disk header structure of each log object, stored in little endian order */
+#define LLOG_MIN_CHUNK_SIZE	8192
+#define LLOG_HEADER_SIZE	(96)	/* sizeof (llog_log_hdr) +
+					 * sizeof(llh_tail) - sizeof(llh_bitmap)
+					 */
+#define LLOG_BITMAP_BYTES	(LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
+#define LLOG_MIN_REC_SIZE	(24)	/* round(llog_rec_hdr + llog_rec_tail) */
+
+/* flags for the logs */
 struct llog_log_hdr {
 	struct llog_rec_hdr     llh_hdr;
 	__s64		   llh_timestamp;
@@ -3183,13 +3015,30 @@ struct llog_log_hdr {
 	/* for a catalog the first plain slot is next to it */
 	struct obd_uuid	 llh_tgtuuid;
 	__u32		   llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
+	/* These fields must always be at the end of the llog_log_hdr.
+	 * Note: llh_bitmap size is variable because llog chunk size could be
+	 * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
+	 * bytes, and the real size is stored in llh_hdr.lrh_len, which means
+	 * llh_tail should only be referred by LLOG_HDR_TAIL().
+	 * But this structure is also used by client/server llog interface
+	 * (see llog_client.c), it will be kept in its original way to avoid
+	 * compatibility issue.
+	 */
 	__u32		   llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
 	struct llog_rec_tail    llh_tail;
 } __packed;
 
-#define LLOG_BITMAP_SIZE(llh)  (__u32)((llh->llh_hdr.lrh_len -		\
-					llh->llh_bitmap_offset -	\
-					sizeof(llh->llh_tail)) * 8)
+#undef LLOG_HEADER_SIZE
+#undef LLOG_BITMAP_BYTES
+
+#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len -	\
+					   llh->llh_bitmap_offset -	\
+					   sizeof(llh->llh_tail)) * 8)
+#define LLOG_HDR_BITMAP(llh)	(__u32 *)((char *)(llh) +		\
+					  (llh)->llh_bitmap_offset)
+#define LLOG_HDR_TAIL(llh)	((struct llog_rec_tail *)((char *)llh + \
+							 llh->llh_hdr.lrh_len - \
+							 sizeof(llh->llh_tail)))
 
 /** log cookies are used to reference a specific log file and a record
  * therein
@@ -3259,7 +3108,8 @@ struct obdo {
 	__u32		   o_parent_ver;
 	struct lustre_handle    o_handle;  /* brw: lock handle to prolong locks
 					    */
-	struct llog_cookie      o_lcookie; /* destroy: unlink cookie from MDS
+	struct llog_cookie      o_lcookie; /* destroy: unlink cookie from MDS,
+					    * obsolete in 2.8, reused in OSP
 					    */
 	__u32			o_uid_h;
 	__u32			o_gid_h;
@@ -3333,30 +3183,11 @@ struct ost_body {
 
 /* Key for FIEMAP to be used in get_info calls */
 struct ll_fiemap_info_key {
-	char    name[8];
-	struct  obdo oa;
-	struct  ll_user_fiemap fiemap;
+	char		lfik_name[8];
+	struct obdo	lfik_oa;
+	struct fiemap	lfik_fiemap;
 };
 
-void lustre_swab_ost_body(struct ost_body *b);
-void lustre_swab_ost_last_id(__u64 *id);
-void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
-
-void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
-void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
-void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
-				     int stripe_count);
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
-
-/* llog_swab.c */
-void lustre_swab_llogd_body(struct llogd_body *d);
-void lustre_swab_llog_hdr(struct llog_log_hdr *h);
-void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
-void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
-
-struct lustre_cfg;
-void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
-
 /* Functions for dumping PTLRPC fields */
 void dump_rniobuf(struct niobuf_remote *rnb);
 void dump_ioo(struct obd_ioobj *nb);
@@ -3394,8 +3225,6 @@ struct lustre_capa {
 	__u8	    lc_hmac[CAPA_HMAC_MAX_LEN];   /** HMAC */
 } __packed;
 
-void lustre_swab_lustre_capa(struct lustre_capa *c);
-
 /** lustre_capa::lc_opc */
 enum {
 	CAPA_OPC_BODY_WRITE   = 1 << 0,  /**< write object data */
@@ -3458,8 +3287,6 @@ struct getinfo_fid2path {
 	char	    gf_path[0];
 } __packed;
 
-void lustre_swab_fid2path(struct getinfo_fid2path *gf);
-
 /** path2parent request/reply structures */
 struct getparent {
 	struct lu_fid	gp_fid;		/**< parent FID */
@@ -3486,8 +3313,6 @@ struct layout_intent {
 	__u64 li_end;
 };
 
-void lustre_swab_layout_intent(struct layout_intent *li);
-
 /**
  * On the wire version of hsm_progress structure.
  *
@@ -3506,13 +3331,6 @@ struct hsm_progress_kernel {
 	__u64			hpk_padding2;
 } __packed;
 
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_current_action(struct hsm_current_action *action);
-void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
-void lustre_swab_hsm_request(struct hsm_request *hr);
-
 /** layout swap request structure
  * fid1 and fid2 are in mdt_body
  */
@@ -3520,8 +3338,6 @@ struct mdc_swap_layouts {
 	__u64	   msl_flags;
 } __packed;
 
-void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
-
 struct close_data {
 	struct lustre_handle	cd_handle;
 	struct lu_fid		cd_fid;
@@ -3529,7 +3345,5 @@ struct close_data {
 	__u64			cd_reserved[8];
 };
 
-void lustre_swab_close_data(struct close_data *data);
-
 #endif
 /** @} lustreidl */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
index f3d7c94c3b50..eb08df33b2db 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
@@ -363,8 +363,8 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
 /*	OBD_IOC_LOV_GETSTRIPE	155 LL_IOC_LOV_GETSTRIPE */
 /*	OBD_IOC_LOV_SETEA	156 LL_IOC_LOV_SETEA */
 /*	lustre/lustre_user.h	157-159 */
-#define	OBD_IOC_QUOTACHECK	_IOW('f', 160, int)
-#define	OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *)
+/*	OBD_IOC_QUOTACHECK	_IOW('f', 160, int) */
+/*	OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *) */
 #define OBD_IOC_QUOTACTL	_IOWR('f', 162, struct if_quotactl)
 /*	lustre/lustre_user.h	163-176 */
 #define OBD_IOC_CHANGELOG_REG	_IOW('f', 177, struct obd_ioctl_data)
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 6fc985571cba..3301ad652db1 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -63,9 +63,13 @@
 #if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
 typedef struct stat64   lstat_t;
 #define lstat_f  lstat64
+#define fstat_f		fstat64
+#define fstatat_f	fstatat64
 #else
 typedef struct stat     lstat_t;
 #define lstat_f  lstat
+#define fstat_f		fstat
+#define fstatat_f	fstatat
 #endif
 
 #define HAVE_LOV_USER_MDS_DATA
@@ -82,7 +86,6 @@ typedef struct stat     lstat_t;
 #define FSFILT_IOC_SETVERSION	     _IOW('f', 4, long)
 #define FSFILT_IOC_GETVERSION_OLD	 _IOR('v', 1, long)
 #define FSFILT_IOC_SETVERSION_OLD	 _IOW('v', 2, long)
-#define FSFILT_IOC_FIEMAP		 _IOWR('f', 11, struct ll_user_fiemap)
 #endif
 
 /* FIEMAP flags supported by Lustre */
@@ -235,7 +238,7 @@ struct ost_id {
 /* #define LL_IOC_POLL_QUOTACHECK	161 OBD_IOC_POLL_QUOTACHECK */
 /* #define LL_IOC_QUOTACTL		162 OBD_IOC_QUOTACTL */
 #define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
-#define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
+/*	IOC_LOV_GETINFO			165 obsolete */
 #define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
 /* LL_IOC_RMTACL			167 obsolete */
 #define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
@@ -343,6 +346,9 @@ enum ll_lease_type {
 #define LOV_ALL_STRIPES       0xffff /* only valid for directories */
 #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
 
+#define XATTR_LUSTRE_PREFIX	"lustre."
+#define XATTR_LUSTRE_LOV	"lustre.lov"
+
 #define lov_user_ost_data lov_user_ost_data_v1
 struct lov_user_ost_data_v1 {     /* per-stripe data structure */
 	struct ost_id l_ost_oi;	  /* OST object ID */
@@ -451,8 +457,6 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic)
 		      stripes * sizeof(struct lmv_user_mds_data);
 }
 
-void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
-
 struct ll_recreate_obj {
 	__u64 lrc_id;
 	__u32 lrc_ost_idx;
@@ -522,25 +526,20 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
 }
 
 /* printf display format
- * e.g. printf("file FID is "DFID"\n", PFID(fid));
+ * * usage: printf("file FID is "DFID"\n", PFID(fid));
  */
 #define FID_NOBRACE_LEN 40
 #define FID_LEN (FID_NOBRACE_LEN + 2)
 #define DFID_NOBRACE "%#llx:0x%x:0x%x"
 #define DFID "["DFID_NOBRACE"]"
-#define PFID(fid)     \
-	(fid)->f_seq, \
-	(fid)->f_oid, \
-	(fid)->f_ver
+#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
 
-/* scanf input parse format -- strip '[' first.
- * e.g. sscanf(fidstr, SFID, RFID(&fid));
+/* scanf input parse format for fids in DFID_NOBRACE format
+ * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
+ * usage: sscanf(fidstr, SFID, RFID(&fid));
  */
 #define SFID "0x%llx:0x%x:0x%x"
-#define RFID(fid)     \
-	&((fid)->f_seq), \
-	&((fid)->f_oid), \
-	&((fid)->f_ver)
+#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
 
 /********* Quotas **********/
 
@@ -551,23 +550,18 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
 #define Q_FINVALIDATE  0x800104 /* deprecated as of 2.4 */
 
 /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
-#define LUSTRE_Q_QUOTAON    0x800002     /* turn quotas on */
-#define LUSTRE_Q_QUOTAOFF   0x800003     /* turn quotas off */
+#define LUSTRE_Q_QUOTAON    0x800002	/* deprecated as of 2.4 */
+#define LUSTRE_Q_QUOTAOFF   0x800003	/* deprecated as of 2.4 */
 #define LUSTRE_Q_GETINFO    0x800005     /* get information about quota files */
 #define LUSTRE_Q_SETINFO    0x800006     /* set information about quota files */
 #define LUSTRE_Q_GETQUOTA   0x800007     /* get user quota structure */
 #define LUSTRE_Q_SETQUOTA   0x800008     /* set user quota structure */
 /* lustre-specific control commands */
-#define LUSTRE_Q_INVALIDATE  0x80000b     /* invalidate quota data */
-#define LUSTRE_Q_FINVALIDATE 0x80000c     /* invalidate filter quota data */
+#define LUSTRE_Q_INVALIDATE  0x80000b	/* deprecated as of 2.4 */
+#define LUSTRE_Q_FINVALIDATE 0x80000c	/* deprecated as of 2.4 */
 
 #define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
 
-struct if_quotacheck {
-	char		    obd_type[16];
-	struct obd_uuid	 obd_uuid;
-};
-
 #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
 
 /* permission */
@@ -649,6 +643,7 @@ struct if_quotactl {
 #define SWAP_LAYOUTS_CHECK_DV2		(1 << 1)
 #define SWAP_LAYOUTS_KEEP_MTIME		(1 << 2)
 #define SWAP_LAYOUTS_KEEP_ATIME		(1 << 3)
+#define SWAP_LAYOUTS_CLOSE		BIT(4)
 
 /* Swap XATTR_NAME_HSM as well, only on the MDT so far */
 #define SWAP_LAYOUTS_MDS_HSM		(1 << 31)
@@ -999,6 +994,7 @@ struct ioc_data_version {
  * See HSM_FLAGS below.
  */
 enum hsm_states {
+	HS_NONE		= 0x00000000,
 	HS_EXISTS	= 0x00000001,
 	HS_DIRTY	= 0x00000002,
 	HS_RELEASED	= 0x00000004,
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 567c438e93cb..300e96fb032a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -74,4 +74,6 @@
 # define ext2_find_next_zero_bit  find_next_zero_bit_le
 #endif
 
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
 #endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index d03534432624..b7e61d082e55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -59,7 +59,7 @@ struct obd_device;
 #define OBD_LDLM_DEVICENAME  "ldlm"
 
 #define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
-#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000))
+#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */
 #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
 
 /**
@@ -86,10 +86,10 @@ enum ldlm_error {
  * decisions about lack of conflicts or do any autonomous lock granting without
  * first speaking to a server.
  */
-typedef enum {
+enum ldlm_side {
 	LDLM_NAMESPACE_SERVER = 1 << 0,
 	LDLM_NAMESPACE_CLIENT = 1 << 1
-} ldlm_side_t;
+};
 
 /**
  * The blocking callback is overloaded to perform two functions.  These flags
@@ -359,7 +359,7 @@ struct ldlm_namespace {
 	struct obd_device	*ns_obd;
 
 	/** Flag indicating if namespace is on client instead of server */
-	ldlm_side_t		ns_client;
+	enum ldlm_side		ns_client;
 
 	/** Resource hash table for namespace. */
 	struct cfs_hash		*ns_rs_hash;
@@ -550,20 +550,18 @@ struct ldlm_flock {
 	__u64 owner;
 	__u64 blocking_owner;
 	struct obd_export *blocking_export;
-	/* Protected by the hash lock */
-	__u32 blocking_refs;
 	__u32 pid;
 };
 
-typedef union {
+union ldlm_policy_data {
 	struct ldlm_extent l_extent;
 	struct ldlm_flock l_flock;
 	struct ldlm_inodebits l_inodebits;
-} ldlm_policy_data_t;
+};
 
 void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
-				  const ldlm_wire_policy_data_t *wpolicy,
-				  ldlm_policy_data_t *lpolicy);
+				  const union ldlm_wire_policy_data *wpolicy,
+				  union ldlm_policy_data *lpolicy);
 
 enum lvb_type {
 	LVB_T_NONE	= 0,
@@ -692,7 +690,7 @@ struct ldlm_lock {
 	 * Representation of private data specific for a lock type.
 	 * Examples are: extent range for extent lock or bitmask for ibits locks
 	 */
-	ldlm_policy_data_t	l_policy_data;
+	union ldlm_policy_data	l_policy_data;
 
 	/**
 	 * Lock state flags. Protected by lr_lock.
@@ -967,8 +965,8 @@ struct ldlm_ast_work {
  * Common ldlm_enqueue parameters
  */
 struct ldlm_enqueue_info {
-	__u32 ei_type;   /** Type of the lock being enqueued. */
-	__u32 ei_mode;   /** Mode of the lock being enqueued. */
+	enum ldlm_type	ei_type;  /** Type of the lock being enqueued. */
+	enum ldlm_mode	ei_mode;  /** Mode of the lock being enqueued. */
 	void *ei_cb_bl;  /** blocking lock callback */
 	void *ei_cb_cp;  /** lock completion callback */
 	void *ei_cb_gl;  /** lock glimpse callback */
@@ -979,7 +977,7 @@ struct ldlm_enqueue_info {
 extern struct obd_ops ldlm_obd_ops;
 
 extern char *ldlm_lockname[];
-char *ldlm_it2str(int it);
+const char *ldlm_it2str(enum ldlm_intent_flags it);
 
 /**
  * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
@@ -1168,16 +1166,18 @@ do {					    \
 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
 void ldlm_lock_put(struct ldlm_lock *lock);
 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
-int  ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+int  ldlm_lock_addref_try(const struct lustre_handle *lockh,
+			  enum ldlm_mode mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
+				 enum ldlm_mode mode);
 void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
 void ldlm_lock_allow_match(struct ldlm_lock *lock);
 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
 enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 			       const struct ldlm_res_id *,
-			       enum ldlm_type type, ldlm_policy_data_t *,
+			       enum ldlm_type type, union ldlm_policy_data *,
 			       enum ldlm_mode mode, struct lustre_handle *,
 			       int unref);
 enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
@@ -1189,7 +1189,7 @@ void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
 /* resource.c */
 struct ldlm_namespace *
 ldlm_namespace_new(struct obd_device *obd, char *name,
-		   ldlm_side_t client, enum ldlm_appetite apt,
+		   enum ldlm_side client, enum ldlm_appetite apt,
 		   enum ldlm_ns_type ns_type);
 int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
 void ldlm_namespace_get(struct ldlm_namespace *ns);
@@ -1208,7 +1208,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res,
 			    struct ldlm_lock *lock);
 void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
 void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(ldlm_side_t client, int level);
+void ldlm_dump_all_namespaces(enum ldlm_side client, int level);
 void ldlm_namespace_dump(int level, struct ldlm_namespace *);
 void ldlm_resource_dump(int level, struct ldlm_resource *);
 int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
@@ -1241,7 +1241,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
 int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 		     struct ldlm_enqueue_info *einfo,
 		     const struct ldlm_res_id *res_id,
-		     ldlm_policy_data_t const *policy, __u64 *flags,
+		     union ldlm_policy_data const *policy, __u64 *flags,
 		     void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
 		     struct lustre_handle *lockh, int async);
 int ldlm_prep_enqueue_req(struct obd_export *exp,
@@ -1265,13 +1265,13 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
 			   enum ldlm_cancel_flags flags, void *opaque);
 int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
 				    const struct ldlm_res_id *res_id,
-				    ldlm_policy_data_t *policy,
+				    union ldlm_policy_data *policy,
 				    enum ldlm_mode mode,
 				    enum ldlm_cancel_flags flags,
 				    void *opaque);
 int ldlm_cancel_resource_local(struct ldlm_resource *res,
 			       struct list_head *cancels,
-			       ldlm_policy_data_t *policy,
+			       union ldlm_policy_data *policy,
 			       enum ldlm_mode mode, __u64 lock_flags,
 			       enum ldlm_cancel_flags cancel_flags,
 			       void *opaque);
@@ -1333,7 +1333,7 @@ int ldlm_pools_init(void);
 void ldlm_pools_fini(void);
 
 int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
-		   int idx, ldlm_side_t client);
+		   int idx, enum ldlm_side client);
 void ldlm_pool_fini(struct ldlm_pool *pl);
 void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
 void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 316780693193..b5a1aadbcb93 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -150,6 +150,7 @@
 
 #include "../../include/linux/libcfs/libcfs.h"
 #include "lustre/lustre_idl.h"
+#include "seq_range.h"
 
 struct lu_env;
 struct lu_site;
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 932410d3e3cc..6ef1b03cb986 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -103,8 +103,6 @@ struct lu_client_fld {
 
 	/** Client fld debugfs entry name. */
 	char			 lcf_name[LUSTRE_MDT_MAXNAMELEN];
-
-	int			 lcf_flags;
 };
 
 /* Client methods */
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index cde7ed702c86..dec1e99d594d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -53,6 +53,7 @@ void ptlrpc_activate_import(struct obd_import *imp);
 void ptlrpc_deactivate_import(struct obd_import *imp);
 void ptlrpc_invalidate_import(struct obd_import *imp);
 void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_pinger_force(struct obd_import *imp);
 
 /** @} ha */
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 5461ba33d90c..f0c931ce1a67 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -185,6 +185,11 @@ struct obd_import {
 	struct list_head	       *imp_replay_cursor;
 	/** @} */
 
+	/** List of not replied requests */
+	struct list_head	imp_unreplied_list;
+	/** Known maximal replied XID */
+	__u64			imp_known_replied_xid;
+
 	/** obd device for this import */
 	struct obd_device	*imp_obd;
 
@@ -294,7 +299,9 @@ struct obd_import {
 				   */
 				  imp_force_reconnect:1,
 				  /* import has tried to connect with server */
-				  imp_connect_tried:1;
+				  imp_connect_tried:1,
+				 /* connected but not FULL yet */
+				 imp_connected:1;
 	__u32		     imp_connect_op;
 	struct obd_connect_data   imp_connect_data;
 	__u64		     imp_connect_flags_orig;
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 6b231913ba2e..27f3148c4344 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -350,8 +350,6 @@ do {									   \
 	l_wait_event_exclusive_head(wq, condition, &lwi);       \
 })
 
-#define LIBLUSTRE_CLIENT (0)
-
 /** @} lib */
 
 #endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
index d7f7afa8dfa7..5aa3645e64dc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lmv.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h
@@ -76,18 +76,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
 
 union lmv_mds_md;
 
-int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
-		  const union lmv_mds_md *lmm, int stripe_count);
-
-static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count)
-{
-	return lmv_unpack_md(NULL, lsmp, NULL, stripe_count);
-}
-
-static inline void lmv_free_memmd(struct lmv_stripe_md *lsm)
-{
-	lmv_unpack_md(NULL, &lsm, NULL, 0);
-}
+void lmv_free_memmd(struct lmv_stripe_md *lsm);
 
 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
 				  const struct lmv_mds_md_v1 *lmv_src)
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 995b266932e3..35e37eb1bc2c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -214,6 +214,7 @@ struct llog_handle {
 	spinlock_t		 lgh_hdr_lock; /* protect lgh_hdr data */
 	struct llog_logid	 lgh_id; /* id of this log */
 	struct llog_log_hdr	*lgh_hdr;
+	size_t			 lgh_hdr_size;
 	int			 lgh_last_idx;
 	int			 lgh_cur_idx; /* used during llog_process */
 	__u64			 lgh_cur_offset; /* used during llog_process */
@@ -244,6 +245,11 @@ struct llog_ctxt {
 	struct mutex		 loc_mutex; /* protect loc_imp */
 	atomic_t	     loc_refcount;
 	long		     loc_flags; /* flags, see above defines */
+	/*
+	 * llog chunk size, and llog record size can not be bigger than
+	 * loc_chunk_size
+	 */
+	__u32			loc_chunk_size;
 };
 
 #define LLOG_PROC_BREAK 0x0001
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index 8fc2d3f2dfd6..198ceb0c66f9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -156,16 +156,39 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
 	mutex_unlock(&lck->rpcl_mutex);
 }
 
+static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req,
+					struct lookup_intent *it)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	u32 opc;
+	u16 tag;
+
+	opc = lustre_msg_get_opc(req->rq_reqmsg);
+	tag = obd_get_mod_rpc_slot(cli, opc, it);
+	lustre_msg_set_tag(req->rq_reqmsg, tag);
+}
+
+static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req,
+					struct lookup_intent *it)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	u32 opc;
+	u16 tag;
+
+	opc = lustre_msg_get_opc(req->rq_reqmsg);
+	tag = lustre_msg_get_tag(req->rq_reqmsg);
+	obd_put_mod_rpc_slot(cli, opc, it, tag);
+}
+
 /**
- * Update the maximum possible easize and cookiesize.
+ * Update the maximum possible easize.
  *
- * The values are learned from ptlrpc replies sent by the MDT.  The
- * default easize and cookiesize is initialized to the minimum value but
- * allowed to grow up to a single page in size if required to handle the
+ * This value is learned from ptlrpc replies sent by the MDT. The
+ * default easize is initialized to the minimum value but allowed
+ * to grow up to a single page in size if required to handle the
  * common case.
  *
- * \see client_obd::cl_default_mds_easize and
- * client_obd::cl_default_mds_cookiesize
+ * \see client_obd::cl_default_mds_easize
  *
  * \param[in] exp	export for MDC device
  * \param[in] body	body of ptlrpc reply from MDT
@@ -176,7 +199,7 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
 {
 	if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
 		struct client_obd *cli = &exp->exp_obd->u.cli;
-		u32 def_cookiesize, def_easize;
+		u32 def_easize;
 
 		if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
 			cli->cl_max_mds_easize = body->mbo_max_mdsize;
@@ -184,13 +207,6 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
 		def_easize = min_t(__u32, body->mbo_max_mdsize,
 				   OBD_MAX_DEFAULT_EA_SIZE);
 		cli->cl_default_mds_easize = def_easize;
-
-		if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
-			cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
-
-		def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
-				       OBD_MAX_DEFAULT_COOKIE_SIZE);
-		cli->cl_default_mds_cookiesize = def_cookiesize;
 	}
 }
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index e9aba99ee52a..411eb0dc7f38 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -50,6 +50,7 @@
  * @{
  */
 
+#include <linux/uio.h>
 #include "../../include/linux/libcfs/libcfs.h"
 #include "../../include/linux/lnet/nidstr.h"
 #include "../../include/linux/lnet/api.h"
@@ -68,13 +69,17 @@
 #define PTLRPC_MD_OPTIONS  0
 
 /**
- * Max # of bulk operations in one request.
+ * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
  * In order for the client and server to properly negotiate the maximum
  * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
  * value.  The client is free to limit the actual RPC size for any bulk
  * transfer via cl_max_pages_per_rpc to some non-power-of-two value.
+ * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS.
  */
-#define PTLRPC_BULK_OPS_BITS	2
+#define PTLRPC_BULK_OPS_BITS	4
+#if PTLRPC_BULK_OPS_BITS > 16
+#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
+#endif
 #define PTLRPC_BULK_OPS_COUNT	(1U << PTLRPC_BULK_OPS_BITS)
 /**
  * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
@@ -437,6 +442,10 @@ struct ptlrpc_reply_state {
 	unsigned long	  rs_committed:1;/* the transaction was committed
 					  * and the rs was dispatched
 					  */
+	atomic_t		rs_refcount;	/* number of users */
+	/** Number of locks awaiting client ACK */
+	int			rs_nlocks;
+
 	/** Size of the state */
 	int		    rs_size;
 	/** opcode */
@@ -449,7 +458,6 @@ struct ptlrpc_reply_state {
 	struct ptlrpc_service_part *rs_svcpt;
 	/** Lnet metadata handle for the reply */
 	lnet_handle_md_t       rs_md_h;
-	atomic_t	   rs_refcount;
 
 	/** Context for the service thread */
 	struct ptlrpc_svc_ctx *rs_svc_ctx;
@@ -466,8 +474,6 @@ struct ptlrpc_reply_state {
 	 */
 	struct lustre_msg     *rs_msg;	  /* reply message */
 
-	/** Number of locks awaiting client ACK */
-	int		    rs_nlocks;
 	/** Handles of locks awaiting client reply ACK */
 	struct lustre_handle   rs_locks[RS_MAX_LOCKS];
 	/** Lock modes of locks in \a rs_locks */
@@ -515,717 +521,7 @@ struct lu_env;
 
 struct ldlm_lock;
 
-/**
- * \defgroup nrs Network Request Scheduler
- * @{
- */
-struct ptlrpc_nrs_policy;
-struct ptlrpc_nrs_resource;
-struct ptlrpc_nrs_request;
-
-/**
- * NRS control operations.
- *
- * These are common for all policies.
- */
-enum ptlrpc_nrs_ctl {
-	/**
-	 * Not a valid opcode.
-	 */
-	PTLRPC_NRS_CTL_INVALID,
-	/**
-	 * Activate the policy.
-	 */
-	PTLRPC_NRS_CTL_START,
-	/**
-	 * Reserved for multiple primary policies, which may be a possibility
-	 * in the future.
-	 */
-	PTLRPC_NRS_CTL_STOP,
-	/**
-	 * Policies can start using opcodes from this value and onwards for
-	 * their own purposes; the assigned value itself is arbitrary.
-	 */
-	PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
-};
-
-/**
- * ORR policy operations
- */
-enum nrs_ctl_orr {
-	NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
-	NRS_CTL_ORR_WR_QUANTUM,
-	NRS_CTL_ORR_RD_OFF_TYPE,
-	NRS_CTL_ORR_WR_OFF_TYPE,
-	NRS_CTL_ORR_RD_SUPP_REQ,
-	NRS_CTL_ORR_WR_SUPP_REQ,
-};
-
-/**
- * NRS policy operations.
- *
- * These determine the behaviour of a policy, and are called in response to
- * NRS core events.
- */
-struct ptlrpc_nrs_pol_ops {
-	/**
-	 * Called during policy registration; this operation is optional.
-	 *
-	 * \param[in,out] policy The policy being initialized
-	 */
-	int	(*op_policy_init)(struct ptlrpc_nrs_policy *policy);
-	/**
-	 * Called during policy unregistration; this operation is optional.
-	 *
-	 * \param[in,out] policy The policy being unregistered/finalized
-	 */
-	void	(*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
-	/**
-	 * Called when activating a policy via lprocfs; policies allocate and
-	 * initialize their resources here; this operation is optional.
-	 *
-	 * \param[in,out] policy The policy being started
-	 *
-	 * \see nrs_policy_start_locked()
-	 */
-	int	(*op_policy_start)(struct ptlrpc_nrs_policy *policy);
-	/**
-	 * Called when deactivating a policy via lprocfs; policies deallocate
-	 * their resources here; this operation is optional
-	 *
-	 * \param[in,out] policy The policy being stopped
-	 *
-	 * \see nrs_policy_stop0()
-	 */
-	void	(*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
-	/**
-	 * Used for policy-specific operations; i.e. not generic ones like
-	 * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
-	 * to an ioctl; this operation is optional.
-	 *
-	 * \param[in,out]	 policy The policy carrying out operation \a opc
-	 * \param[in]	  opc	 The command operation being carried out
-	 * \param[in,out] arg	 An generic buffer for communication between the
-	 *			 user and the control operation
-	 *
-	 * \retval -ve error
-	 * \retval   0 success
-	 *
-	 * \see ptlrpc_nrs_policy_control()
-	 */
-	int	(*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
-				 enum ptlrpc_nrs_ctl opc, void *arg);
-
-	/**
-	 * Called when obtaining references to the resources of the resource
-	 * hierarchy for a request that has arrived for handling at the PTLRPC
-	 * service. Policies should return -ve for requests they do not wish
-	 * to handle. This operation is mandatory.
-	 *
-	 * \param[in,out] policy  The policy we're getting resources for.
-	 * \param[in,out] nrq	  The request we are getting resources for.
-	 * \param[in]	  parent  The parent resource of the resource being
-	 *			  requested; set to NULL if none.
-	 * \param[out]	  resp	  The resource is to be returned here; the
-	 *			  fallback policy in an NRS head should
-	 *			  \e always return a non-NULL pointer value.
-	 * \param[in]  moving_req When set, signifies that this is an attempt
-	 *			  to obtain resources for a request being moved
-	 *			  to the high-priority NRS head by
-	 *			  ldlm_lock_reorder_req().
-	 *			  This implies two things:
-	 *			  1. We are under obd_export::exp_rpc_lock and
-	 *			  so should not sleep.
-	 *			  2. We should not perform non-idempotent or can
-	 *			  skip performing idempotent operations that
-	 *			  were carried out when resources were first
-	 *			  taken for the request when it was initialized
-	 *			  in ptlrpc_nrs_req_initialize().
-	 *
-	 * \retval 0, +ve The level of the returned resource in the resource
-	 *		  hierarchy; currently only 0 (for a non-leaf resource)
-	 *		  and 1 (for a leaf resource) are supported by the
-	 *		  framework.
-	 * \retval -ve	  error
-	 *
-	 * \see ptlrpc_nrs_req_initialize()
-	 * \see ptlrpc_nrs_hpreq_add_nolock()
-	 */
-	int	(*op_res_get)(struct ptlrpc_nrs_policy *policy,
-			      struct ptlrpc_nrs_request *nrq,
-			      const struct ptlrpc_nrs_resource *parent,
-			      struct ptlrpc_nrs_resource **resp,
-			      bool moving_req);
-	/**
-	 * Called when releasing references taken for resources in the resource
-	 * hierarchy for the request; this operation is optional.
-	 *
-	 * \param[in,out] policy The policy the resource belongs to
-	 * \param[in] res	 The resource to be freed
-	 *
-	 * \see ptlrpc_nrs_req_finalize()
-	 * \see ptlrpc_nrs_hpreq_add_nolock()
-	 */
-	void	(*op_res_put)(struct ptlrpc_nrs_policy *policy,
-			      const struct ptlrpc_nrs_resource *res);
-
-	/**
-	 * Obtains a request for handling from the policy, and optionally
-	 * removes the request from the policy; this operation is mandatory.
-	 *
-	 * \param[in,out] policy The policy to poll
-	 * \param[in]	  peek	 When set, signifies that we just want to
-	 *			 examine the request, and not handle it, so the
-	 *			 request is not removed from the policy.
-	 * \param[in]	  force	 When set, it will force a policy to return a
-	 *			 request if it has one queued.
-	 *
-	 * \retval NULL No request available for handling
-	 * \retval valid-pointer The request polled for handling
-	 *
-	 * \see ptlrpc_nrs_req_get_nolock()
-	 */
-	struct ptlrpc_nrs_request *
-		(*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
-			      bool force);
-	/**
-	 * Called when attempting to add a request to a policy for later
-	 * handling; this operation is mandatory.
-	 *
-	 * \param[in,out] policy  The policy on which to enqueue \a nrq
-	 * \param[in,out] nrq The request to enqueue
-	 *
-	 * \retval 0	success
-	 * \retval != 0	error
-	 *
-	 * \see ptlrpc_nrs_req_add_nolock()
-	 */
-	int	(*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
-				  struct ptlrpc_nrs_request *nrq);
-	/**
-	 * Removes a request from the policy's set of pending requests. Normally
-	 * called after a request has been polled successfully from the policy
-	 * for handling; this operation is mandatory.
-	 *
-	 * \param[in,out] policy The policy the request \a nrq belongs to
-	 * \param[in,out] nrq    The request to dequeue
-	 */
-	void	(*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
-				  struct ptlrpc_nrs_request *nrq);
-	/**
-	 * Called after the request being carried out. Could be used for
-	 * job/resource control; this operation is optional.
-	 *
-	 * \param[in,out] policy The policy which is stopping to handle request
-	 *			 \a nrq
-	 * \param[in,out] nrq	 The request
-	 *
-	 * \pre assert_spin_locked(&svcpt->scp_req_lock)
-	 *
-	 * \see ptlrpc_nrs_req_stop_nolock()
-	 */
-	void	(*op_req_stop)(struct ptlrpc_nrs_policy *policy,
-			       struct ptlrpc_nrs_request *nrq);
-	/**
-	 * Registers the policy's lprocfs interface with a PTLRPC service.
-	 *
-	 * \param[in] svc The service
-	 *
-	 * \retval 0	success
-	 * \retval != 0	error
-	 */
-	int	(*op_lprocfs_init)(struct ptlrpc_service *svc);
-	/**
-	 * Unegisters the policy's lprocfs interface with a PTLRPC service.
-	 *
-	 * In cases of failed policy registration in
-	 * \e ptlrpc_nrs_policy_register(), this function may be called for a
-	 * service which has not registered the policy successfully, so
-	 * implementations of this method should make sure their operations are
-	 * safe in such cases.
-	 *
-	 * \param[in] svc The service
-	 */
-	void	(*op_lprocfs_fini)(struct ptlrpc_service *svc);
-};
-
-/**
- * Policy flags
- */
-enum nrs_policy_flags {
-	/**
-	 * Fallback policy, use this flag only on a single supported policy per
-	 * service. The flag cannot be used on policies that use
-	 * \e PTLRPC_NRS_FL_REG_EXTERN
-	 */
-	PTLRPC_NRS_FL_FALLBACK		= (1 << 0),
-	/**
-	 * Start policy immediately after registering.
-	 */
-	PTLRPC_NRS_FL_REG_START		= (1 << 1),
-	/**
-	 * This is a policy registering from a module different to the one NRS
-	 * core ships in (currently ptlrpc).
-	 */
-	PTLRPC_NRS_FL_REG_EXTERN	= (1 << 2),
-};
-
-/**
- * NRS queue type.
- *
- * Denotes whether an NRS instance is for handling normal or high-priority
- * RPCs, or whether an operation pertains to one or both of the NRS instances
- * in a service.
- */
-enum ptlrpc_nrs_queue_type {
-	PTLRPC_NRS_QUEUE_REG	= (1 << 0),
-	PTLRPC_NRS_QUEUE_HP	= (1 << 1),
-	PTLRPC_NRS_QUEUE_BOTH	= (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
-};
-
-/**
- * NRS head
- *
- * A PTLRPC service has at least one NRS head instance for handling normal
- * priority RPCs, and may optionally have a second NRS head instance for
- * handling high-priority RPCs. Each NRS head maintains a list of available
- * policies, of which one and only one policy is acting as the fallback policy,
- * and optionally a different policy may be acting as the primary policy. For
- * all RPCs handled by this NRS head instance, NRS core will first attempt to
- * enqueue the RPC using the primary policy (if any). The fallback policy is
- * used in the following cases:
- * - when there was no primary policy in the
- *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
- *   was initialized.
- * - when the primary policy that was at the
- *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- *   RPC was initialized, denoted it did not wish, or for some other reason was
- *   not able to handle the request, by returning a non-valid NRS resource
- *   reference.
- * - when the primary policy that was at the
- *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- *   RPC was initialized, fails later during the request enqueueing stage.
- *
- * \see nrs_resource_get_safe()
- * \see nrs_request_enqueue()
- */
-struct ptlrpc_nrs {
-	spinlock_t			nrs_lock;
-	/** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
-	/**
-	 * List of registered policies
-	 */
-	struct list_head			nrs_policy_list;
-	/**
-	 * List of policies with queued requests. Policies that have any
-	 * outstanding requests are queued here, and this list is queried
-	 * in a round-robin manner from NRS core when obtaining a request
-	 * for handling. This ensures that requests from policies that at some
-	 * point transition away from the
-	 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
-	 */
-	struct list_head			nrs_policy_queued;
-	/**
-	 * Service partition for this NRS head
-	 */
-	struct ptlrpc_service_part     *nrs_svcpt;
-	/**
-	 * Primary policy, which is the preferred policy for handling RPCs
-	 */
-	struct ptlrpc_nrs_policy       *nrs_policy_primary;
-	/**
-	 * Fallback policy, which is the backup policy for handling RPCs
-	 */
-	struct ptlrpc_nrs_policy       *nrs_policy_fallback;
-	/**
-	 * This NRS head handles either HP or regular requests
-	 */
-	enum ptlrpc_nrs_queue_type	nrs_queue_type;
-	/**
-	 * # queued requests from all policies in this NRS head
-	 */
-	unsigned long			nrs_req_queued;
-	/**
-	 * # scheduled requests from all policies in this NRS head
-	 */
-	unsigned long			nrs_req_started;
-	/**
-	 * # policies on this NRS
-	 */
-	unsigned			nrs_num_pols;
-	/**
-	 * This NRS head is in progress of starting a policy
-	 */
-	unsigned			nrs_policy_starting:1;
-	/**
-	 * In progress of shutting down the whole NRS head; used during
-	 * unregistration
-	 */
-	unsigned			nrs_stopping:1;
-};
-
-#define NRS_POL_NAME_MAX		16
-
-struct ptlrpc_nrs_pol_desc;
-
-/**
- * Service compatibility predicate; this determines whether a policy is adequate
- * for handling RPCs of a particular PTLRPC service.
- *
- * XXX:This should give the same result during policy registration and
- * unregistration, and for all partitions of a service; so the result should not
- * depend on temporal service or other properties, that may influence the
- * result.
- */
-typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
-				       const struct ptlrpc_nrs_pol_desc *desc);
-
-struct ptlrpc_nrs_pol_conf {
-	/**
-	 * Human-readable policy name
-	 */
-	char				   nc_name[NRS_POL_NAME_MAX];
-	/**
-	 * NRS operations for this policy
-	 */
-	const struct ptlrpc_nrs_pol_ops	  *nc_ops;
-	/**
-	 * Service compatibility predicate
-	 */
-	nrs_pol_desc_compat_t		   nc_compat;
-	/**
-	 * Set for policies that support a single ptlrpc service, i.e. ones that
-	 * have \a pd_compat set to nrs_policy_compat_one(). The variable value
-	 * depicts the name of the single service that such policies are
-	 * compatible with.
-	 */
-	const char			  *nc_compat_svc_name;
-	/**
-	 * Owner module for this policy descriptor; policies registering from a
-	 * different module to the one the NRS framework is held within
-	 * (currently ptlrpc), should set this field to THIS_MODULE.
-	 */
-	struct module			  *nc_owner;
-	/**
-	 * Policy registration flags; a bitmask of \e nrs_policy_flags
-	 */
-	unsigned			   nc_flags;
-};
-
-/**
- * NRS policy registering descriptor
- *
- * Is used to hold a description of a policy that can be passed to NRS core in
- * order to register the policy with NRS heads in different PTLRPC services.
- */
-struct ptlrpc_nrs_pol_desc {
-	/**
-	 * Human-readable policy name
-	 */
-	char					pd_name[NRS_POL_NAME_MAX];
-	/**
-	 * Link into nrs_core::nrs_policies
-	 */
-	struct list_head				pd_list;
-	/**
-	 * NRS operations for this policy
-	 */
-	const struct ptlrpc_nrs_pol_ops	       *pd_ops;
-	/**
-	 * Service compatibility predicate
-	 */
-	nrs_pol_desc_compat_t			pd_compat;
-	/**
-	 * Set for policies that are compatible with only one PTLRPC service.
-	 *
-	 * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
-	 */
-	const char			       *pd_compat_svc_name;
-	/**
-	 * Owner module for this policy descriptor.
-	 *
-	 * We need to hold a reference to the module whenever we might make use
-	 * of any of the module's contents, i.e.
-	 * - If one or more instances of the policy are at a state where they
-	 *   might be handling a request, i.e.
-	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
-	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
-	 *   call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
-	 *   is taken on the module when
-	 *   \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
-	 *   becomes 0, so that we hold only one reference to the module maximum
-	 *   at any time.
-	 *
-	 *   We do not need to hold a reference to the module, even though we
-	 *   might use code and data from the module, in the following cases:
-	 * - During external policy registration, because this should happen in
-	 *   the module's init() function, in which case the module is safe from
-	 *   removal because a reference is being held on the module by the
-	 *   kernel, and iirc kmod (and I guess module-init-tools also) will
-	 *   serialize any racing processes properly anyway.
-	 * - During external policy unregistration, because this should happen
-	 *   in a module's exit() function, and any attempts to start a policy
-	 *   instance would need to take a reference on the module, and this is
-	 *   not possible once we have reached the point where the exit()
-	 *   handler is called.
-	 * - During service registration and unregistration, as service setup
-	 *   and cleanup, and policy registration, unregistration and policy
-	 *   instance starting, are serialized by \e nrs_core::nrs_mutex, so
-	 *   as long as users adhere to the convention of registering policies
-	 *   in init() and unregistering them in module exit() functions, there
-	 *   should not be a race between these operations.
-	 * - During any policy-specific lprocfs operations, because a reference
-	 *   is held by the kernel on a proc entry that has been entered by a
-	 *   syscall, so as long as proc entries are removed during unregistration time,
-	 *   then unregistration and lprocfs operations will be properly
-	 *   serialized.
-	 */
-	struct module			       *pd_owner;
-	/**
-	 * Bitmask of \e nrs_policy_flags
-	 */
-	unsigned				pd_flags;
-	/**
-	 * # of references on this descriptor
-	 */
-	atomic_t				pd_refs;
-};
-
-/**
- * NRS policy state
- *
- * Policies transition from one state to the other during their lifetime
- */
-enum ptlrpc_nrs_pol_state {
-	/**
-	 * Not a valid policy state.
-	 */
-	NRS_POL_STATE_INVALID,
-	/**
-	 * Policies are at this state either at the start of their life, or
-	 * transition here when the user selects a different policy to act
-	 * as the primary one.
-	 */
-	NRS_POL_STATE_STOPPED,
-	/**
-	 * Policy is progress of stopping
-	 */
-	NRS_POL_STATE_STOPPING,
-	/**
-	 * Policy is in progress of starting
-	 */
-	NRS_POL_STATE_STARTING,
-	/**
-	 * A policy is in this state in two cases:
-	 * - it is the fallback policy, which is always in this state.
-	 * - it has been activated by the user; i.e. it is the primary policy,
-	 */
-	NRS_POL_STATE_STARTED,
-};
-
-/**
- * NRS policy information
- *
- * Used for obtaining information for the status of a policy via lprocfs
- */
-struct ptlrpc_nrs_pol_info {
-	/**
-	 * Policy name
-	 */
-	char				pi_name[NRS_POL_NAME_MAX];
-	/**
-	 * Current policy state
-	 */
-	enum ptlrpc_nrs_pol_state	pi_state;
-	/**
-	 * # RPCs enqueued for later dispatching by the policy
-	 */
-	long				pi_req_queued;
-	/**
-	 * # RPCs started for dispatch by the policy
-	 */
-	long				pi_req_started;
-	/**
-	 * Is this a fallback policy?
-	 */
-	unsigned			pi_fallback:1;
-};
-
-/**
- * NRS policy
- *
- * There is one instance of this for each policy in each NRS head of each
- * PTLRPC service partition.
- */
-struct ptlrpc_nrs_policy {
-	/**
-	 * Linkage into the NRS head's list of policies,
-	 * ptlrpc_nrs:nrs_policy_list
-	 */
-	struct list_head			pol_list;
-	/**
-	 * Linkage into the NRS head's list of policies with enqueued
-	 * requests ptlrpc_nrs:nrs_policy_queued
-	 */
-	struct list_head			pol_list_queued;
-	/**
-	 * Current state of this policy
-	 */
-	enum ptlrpc_nrs_pol_state	pol_state;
-	/**
-	 * Bitmask of nrs_policy_flags
-	 */
-	unsigned			pol_flags;
-	/**
-	 * # RPCs enqueued for later dispatching by the policy
-	 */
-	long				pol_req_queued;
-	/**
-	 * # RPCs started for dispatch by the policy
-	 */
-	long				pol_req_started;
-	/**
-	 * Usage Reference count taken on the policy instance
-	 */
-	long				pol_ref;
-	/**
-	 * The NRS head this policy has been created at
-	 */
-	struct ptlrpc_nrs	       *pol_nrs;
-	/**
-	 * Private policy data; varies by policy type
-	 */
-	void			       *pol_private;
-	/**
-	 * Policy descriptor for this policy instance.
-	 */
-	struct ptlrpc_nrs_pol_desc     *pol_desc;
-};
-
-/**
- * NRS resource
- *
- * Resources are embedded into two types of NRS entities:
- * - Inside NRS policies, in the policy's private data in
- *   ptlrpc_nrs_policy::pol_private
- * - In objects that act as prime-level scheduling entities in different NRS
- *   policies; e.g. on a policy that performs round robin or similar order
- *   scheduling across client NIDs, there would be one NRS resource per unique
- *   client NID. On a policy which performs round robin scheduling across
- *   backend filesystem objects, there would be one resource associated with
- *   each of the backend filesystem objects partaking in the scheduling
- *   performed by the policy.
- *
- * NRS resources share a parent-child relationship, in which resources embedded
- * in policy instances are the parent entities, with all scheduling entities
- * a policy schedules across being the children, thus forming a simple resource
- * hierarchy. This hierarchy may be extended with one or more levels in the
- * future if the ability to have more than one primary policy is added.
- *
- * Upon request initialization, references to the then active NRS policies are
- * taken and used to later handle the dispatching of the request with one of
- * these policies.
- *
- * \see nrs_resource_get_safe()
- * \see ptlrpc_nrs_req_add()
- */
-struct ptlrpc_nrs_resource {
-	/**
-	 * This NRS resource's parent; is NULL for resources embedded in NRS
-	 * policy instances; i.e. those are top-level ones.
-	 */
-	struct ptlrpc_nrs_resource     *res_parent;
-	/**
-	 * The policy associated with this resource.
-	 */
-	struct ptlrpc_nrs_policy       *res_policy;
-};
-
-enum {
-	NRS_RES_FALLBACK,
-	NRS_RES_PRIMARY,
-	NRS_RES_MAX
-};
-
-/* \name fifo
- *
- * FIFO policy
- *
- * This policy is a logical wrapper around previous, non-NRS functionality.
- * It dispatches RPCs in the same order as they arrive from the network. This
- * policy is currently used as the fallback policy, and the only enabled policy
- * on all NRS heads of all PTLRPC service partitions.
- * @{
- */
-
-/**
- * Private data structure for the FIFO policy
- */
-struct nrs_fifo_head {
-	/**
-	 * Resource object for policy instance.
-	 */
-	struct ptlrpc_nrs_resource	fh_res;
-	/**
-	 * List of queued requests.
-	 */
-	struct list_head			fh_list;
-	/**
-	 * For debugging purposes.
-	 */
-	__u64				fh_sequence;
-};
-
-struct nrs_fifo_req {
-	struct list_head		fr_list;
-	__u64			fr_sequence;
-};
-
-/** @} fifo */
-
-/**
- * NRS request
- *
- * Instances of this object exist embedded within ptlrpc_request; the main
- * purpose of this object is to hold references to the request's resources
- * for the lifetime of the request, and to hold properties that policies use
- * use for determining the request's scheduling priority.
- */
-struct ptlrpc_nrs_request {
-	/**
-	 * The request's resource hierarchy.
-	 */
-	struct ptlrpc_nrs_resource     *nr_res_ptrs[NRS_RES_MAX];
-	/**
-	 * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
-	 * policy that was used to enqueue the request.
-	 *
-	 * \see nrs_request_enqueue()
-	 */
-	unsigned			nr_res_idx;
-	unsigned			nr_initialized:1;
-	unsigned			nr_enqueued:1;
-	unsigned			nr_started:1;
-	unsigned			nr_finalized:1;
-
-	/**
-	 * Policy-specific fields, used for determining a request's scheduling
-	 * priority, and other supporting functionality.
-	 */
-	union {
-		/**
-		 * Fields for the FIFO policy
-		 */
-		struct nrs_fifo_req	fifo;
-	} nr_u;
-	/**
-	 * Externally-registering policies may want to use this to allocate
-	 * their own request properties.
-	 */
-	void			       *ext;
-};
-
-/** @} nrs */
+#include "lustre_nrs.h"
 
 /**
  * Basic request prioritization operations structure.
@@ -1304,6 +600,8 @@ struct ptlrpc_cli_req {
 	union ptlrpc_async_args		 cr_async_args;
 	/** Opaq data for replay and commit callbacks. */
 	void				*cr_cb_data;
+	/** Link to the imp->imp_unreplied_list */
+	struct list_head		 cr_unreplied_list;
 	/**
 	 * Commit callback, called when request is committed and about to be
 	 * freed.
@@ -1343,6 +641,7 @@ struct ptlrpc_cli_req {
 #define rq_interpret_reply	rq_cli.cr_reply_interp
 #define rq_async_args		rq_cli.cr_async_args
 #define rq_cb_data		rq_cli.cr_cb_data
+#define rq_unreplied_list	rq_cli.cr_unreplied_list
 #define rq_commit_cb		rq_cli.cr_commit_cb
 #define rq_replay_cb		rq_cli.cr_replay_cb
 
@@ -1505,6 +804,8 @@ struct ptlrpc_request {
 	__u64 rq_transno;
 	/** xid */
 	__u64 rq_xid;
+	/** bulk match bits */
+	u64				rq_mbits;
 	/**
 	 * List item to for replay list. Not yet committed requests get linked
 	 * there.
@@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page {
 	struct page     *bp_page;
 };
 
-#define BULK_GET_SOURCE   0
-#define BULK_PUT_SINK     1
-#define BULK_GET_SINK     2
-#define BULK_PUT_SOURCE   3
+enum ptlrpc_bulk_op_type {
+	PTLRPC_BULK_OP_ACTIVE	= 0x00000001,
+	PTLRPC_BULK_OP_PASSIVE	= 0x00000002,
+	PTLRPC_BULK_OP_PUT	= 0x00000004,
+	PTLRPC_BULK_OP_GET	= 0x00000008,
+	PTLRPC_BULK_BUF_KVEC	= 0x00000010,
+	PTLRPC_BULK_BUF_KIOV	= 0x00000020,
+	PTLRPC_BULK_GET_SOURCE	= PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET,
+	PTLRPC_BULK_PUT_SINK	= PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT,
+	PTLRPC_BULK_GET_SINK	= PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET,
+	PTLRPC_BULK_PUT_SOURCE	= PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT,
+};
+
+static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type)
+{
+	return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET;
+}
+
+static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type)
+{
+	return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type)
+{
+	return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type)
+{
+	return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type)
+{
+	return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type)
+{
+	return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+		== PTLRPC_BULK_BUF_KVEC;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type)
+{
+	return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+		== PTLRPC_BULK_BUF_KIOV;
+}
+
+static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type)
+{
+	return ((type & PTLRPC_BULK_OP_ACTIVE) |
+		(type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE;
+}
+
+static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type)
+{
+	return ((type & PTLRPC_BULK_OP_ACTIVE) |
+		(type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE;
+}
+
+struct ptlrpc_bulk_frag_ops {
+	/**
+	 * Add a page \a page to the bulk descriptor \a desc
+	 * Data to transfer in the page starts at offset \a pageoffset and
+	 * amount of data to transfer from the page is \a len
+	 */
+	void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc,
+			      struct page *page, int pageoffset, int len);
+
+	/*
+	 * Add a \a fragment to the bulk descriptor \a desc.
+	 * Data to transfer in the fragment is pointed to by \a frag
+	 * The size of the fragment is \a len
+	 */
+	int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len);
+
+	/**
+	 * Uninitialize and free bulk descriptor \a desc.
+	 * Works on bulk descriptors both from server and client side.
+	 */
+	void (*release_frags)(struct ptlrpc_bulk_desc *desc);
+};
+
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops;
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops;
 
 /**
  * Definition of bulk descriptor.
@@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page {
 struct ptlrpc_bulk_desc {
 	/** completed with failure */
 	unsigned long bd_failure:1;
-	/** {put,get}{source,sink} */
-	unsigned long bd_type:2;
 	/** client side */
 	unsigned long bd_registered:1;
 	/** For serialization with callback */
 	spinlock_t bd_lock;
 	/** Import generation when request for this bulk was sent */
 	int bd_import_generation;
+	/** {put,get}{source,sink}{kvec,kiov} */
+	enum ptlrpc_bulk_op_type bd_type;
 	/** LNet portal for this bulk */
 	__u32 bd_portal;
 	/** Server side - export this bulk created for */
@@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc {
 	struct obd_import *bd_import;
 	/** Back pointer to the request */
 	struct ptlrpc_request *bd_req;
+	struct ptlrpc_bulk_frag_ops *bd_frag_ops;
 	wait_queue_head_t	    bd_waitq;	/* server side only WQ */
 	int		    bd_iov_count;    /* # entries in bd_iov */
 	int		    bd_max_iov;      /* allocated size of bd_iov */
 	int		    bd_nob;	  /* # bytes covered */
 	int		    bd_nob_transferred; /* # bytes GOT/PUT */
 
-	__u64		  bd_last_xid;
+	u64			bd_last_mbits;
 
 	struct ptlrpc_cb_id    bd_cbid;	 /* network callback info */
 	lnet_nid_t	     bd_sender;       /* stash event::sender */
@@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc {
 	/** array of associated MDs */
 	lnet_handle_md_t	bd_mds[PTLRPC_BULK_OPS_COUNT];
 
-	/*
-	 * encrypt iov, size is either 0 or bd_iov_count.
-	 */
-	lnet_kiov_t	   *bd_enc_iov;
-
-	lnet_kiov_t	    bd_iov[0];
+	union {
+		struct {
+			/*
+			 * encrypt iov, size is either 0 or bd_iov_count.
+			 */
+			struct bio_vec *bd_enc_vec;
+			struct bio_vec *bd_vec;	/* Array of bio_vecs */
+		} bd_kiov;
+
+		struct {
+			struct kvec *bd_enc_kvec;
+			struct kvec *bd_kvec;	/* Array of kvecs */
+		} bd_kvec;
+	} bd_u;
 };
 
+#define GET_KIOV(desc)			((desc)->bd_u.bd_kiov.bd_vec)
+#define BD_GET_KIOV(desc, i)		((desc)->bd_u.bd_kiov.bd_vec[i])
+#define GET_ENC_KIOV(desc)		((desc)->bd_u.bd_kiov.bd_enc_vec)
+#define BD_GET_ENC_KIOV(desc, i)	((desc)->bd_u.bd_kiov.bd_enc_vec[i])
+#define GET_KVEC(desc)			((desc)->bd_u.bd_kvec.bd_kvec)
+#define BD_GET_KVEC(desc, i)		((desc)->bd_u.bd_kvec.bd_kvec[i])
+#define GET_ENC_KVEC(desc)		((desc)->bd_u.bd_kvec.bd_enc_kvec)
+#define BD_GET_ENC_KVEC(desc, i)	((desc)->bd_u.bd_kvec.bd_enc_kvec[i])
+
 enum {
 	SVC_STOPPED     = 1 << 0,
 	SVC_STOPPING    = 1 << 1,
@@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 void ptlrpc_req_finished(struct ptlrpc_request *request);
 struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
-					      unsigned npages, unsigned max_brw,
-					      unsigned type, unsigned portal);
-void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
-static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
-{
-	__ptlrpc_free_bulk(bulk, 1);
-}
-
-static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
-{
-	__ptlrpc_free_bulk(bulk, 0);
-}
-
+					      unsigned int nfrags,
+					      unsigned int max_brw,
+					      unsigned int type,
+					      unsigned int portal,
+					      const struct ptlrpc_bulk_frag_ops *ops);
+
+int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
+			  void *frag, int len);
 void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
-			     struct page *page, int pageoffset, int len, int);
+			     struct page *page, int pageoffset, int len,
+			     int pin);
 static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
 					     struct page *page, int pageoffset,
 					     int len)
@@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
 	__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
 }
 
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
+
+static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
+{
+	int i;
+
+	for (i = 0; i < desc->bd_iov_count ; i++)
+		put_page(BD_GET_KIOV(desc, i).bv_page);
+}
+
 void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
 				      struct obd_import *imp);
 __u64 ptlrpc_next_xid(void);
@@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
 __u32 lustre_msg_get_type(struct lustre_msg *msg);
 void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
 __u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u16 lustre_msg_get_tag(struct lustre_msg *msg);
 __u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
 __u64 *lustre_msg_get_versions(struct lustre_msg *msg);
 __u64 lustre_msg_get_transno(struct lustre_msg *msg);
@@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg,
 			   struct lustre_handle *handle);
 void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
 void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid);
+void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag);
 void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
 void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
 void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
@@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
 void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
 void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
 void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits);
 
 static inline void
 lustre_shrink_reply(struct ptlrpc_request *req, int segment,
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h
new file mode 100644
index 000000000000..a5028aaa19cd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs.h
@@ -0,0 +1,717 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS)
+ *
+ */
+
+#ifndef _LUSTRE_NRS_H
+#define _LUSTRE_NRS_H
+
+/**
+ * \defgroup nrs Network Request Scheduler
+ * @{
+ */
+struct ptlrpc_nrs_policy;
+struct ptlrpc_nrs_resource;
+struct ptlrpc_nrs_request;
+
+/**
+ * NRS control operations.
+ *
+ * These are common for all policies.
+ */
+enum ptlrpc_nrs_ctl {
+	/**
+	 * Not a valid opcode.
+	 */
+	PTLRPC_NRS_CTL_INVALID,
+	/**
+	 * Activate the policy.
+	 */
+	PTLRPC_NRS_CTL_START,
+	/**
+	 * Reserved for multiple primary policies, which may be a possibility
+	 * in the future.
+	 */
+	PTLRPC_NRS_CTL_STOP,
+	/**
+	 * Policies can start using opcodes from this value and onwards for
+	 * their own purposes; the assigned value itself is arbitrary.
+	 */
+	PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
+};
+
+/**
+ * NRS policy operations.
+ *
+ * These determine the behaviour of a policy, and are called in response to
+ * NRS core events.
+ */
+struct ptlrpc_nrs_pol_ops {
+	/**
+	 * Called during policy registration; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being initialized
+	 */
+	int	(*op_policy_init)(struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called during policy unregistration; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being unregistered/finalized
+	 */
+	void	(*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called when activating a policy via lprocfs; policies allocate and
+	 * initialize their resources here; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being started
+	 *
+	 * \see nrs_policy_start_locked()
+	 */
+	int	(*op_policy_start)(struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called when deactivating a policy via lprocfs; policies deallocate
+	 * their resources here; this operation is optional
+	 *
+	 * \param[in,out] policy The policy being stopped
+	 *
+	 * \see nrs_policy_stop0()
+	 */
+	void	(*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Used for policy-specific operations; i.e. not generic ones like
+	 * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
+	 * to an ioctl; this operation is optional.
+	 *
+	 * \param[in,out]	 policy The policy carrying out operation \a opc
+	 * \param[in]	  opc	 The command operation being carried out
+	 * \param[in,out] arg	 An generic buffer for communication between the
+	 *			 user and the control operation
+	 *
+	 * \retval -ve error
+	 * \retval   0 success
+	 *
+	 * \see ptlrpc_nrs_policy_control()
+	 */
+	int	(*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
+				 enum ptlrpc_nrs_ctl opc, void *arg);
+
+	/**
+	 * Called when obtaining references to the resources of the resource
+	 * hierarchy for a request that has arrived for handling at the PTLRPC
+	 * service. Policies should return -ve for requests they do not wish
+	 * to handle. This operation is mandatory.
+	 *
+	 * \param[in,out] policy  The policy we're getting resources for.
+	 * \param[in,out] nrq	  The request we are getting resources for.
+	 * \param[in]	  parent  The parent resource of the resource being
+	 *			  requested; set to NULL if none.
+	 * \param[out]	  resp	  The resource is to be returned here; the
+	 *			  fallback policy in an NRS head should
+	 *			  \e always return a non-NULL pointer value.
+	 * \param[in]  moving_req When set, signifies that this is an attempt
+	 *			  to obtain resources for a request being moved
+	 *			  to the high-priority NRS head by
+	 *			  ldlm_lock_reorder_req().
+	 *			  This implies two things:
+	 *			  1. We are under obd_export::exp_rpc_lock and
+	 *			  so should not sleep.
+	 *			  2. We should not perform non-idempotent or can
+	 *			  skip performing idempotent operations that
+	 *			  were carried out when resources were first
+	 *			  taken for the request when it was initialized
+	 *			  in ptlrpc_nrs_req_initialize().
+	 *
+	 * \retval 0, +ve The level of the returned resource in the resource
+	 *		  hierarchy; currently only 0 (for a non-leaf resource)
+	 *		  and 1 (for a leaf resource) are supported by the
+	 *		  framework.
+	 * \retval -ve	  error
+	 *
+	 * \see ptlrpc_nrs_req_initialize()
+	 * \see ptlrpc_nrs_hpreq_add_nolock()
+	 * \see ptlrpc_nrs_req_hp_move()
+	 */
+	int	(*op_res_get)(struct ptlrpc_nrs_policy *policy,
+			      struct ptlrpc_nrs_request *nrq,
+			      const struct ptlrpc_nrs_resource *parent,
+			      struct ptlrpc_nrs_resource **resp,
+			      bool moving_req);
+	/**
+	 * Called when releasing references taken for resources in the resource
+	 * hierarchy for the request; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy the resource belongs to
+	 * \param[in] res	 The resource to be freed
+	 *
+	 * \see ptlrpc_nrs_req_finalize()
+	 * \see ptlrpc_nrs_hpreq_add_nolock()
+	 * \see ptlrpc_nrs_req_hp_move()
+	 */
+	void	(*op_res_put)(struct ptlrpc_nrs_policy *policy,
+			      const struct ptlrpc_nrs_resource *res);
+
+	/**
+	 * Obtains a request for handling from the policy, and optionally
+	 * removes the request from the policy; this operation is mandatory.
+	 *
+	 * \param[in,out] policy The policy to poll
+	 * \param[in]	  peek	 When set, signifies that we just want to
+	 *			 examine the request, and not handle it, so the
+	 *			 request is not removed from the policy.
+	 * \param[in]	  force  When set, it will force a policy to return a
+	 *			 request if it has one queued.
+	 *
+	 * \retval NULL No request available for handling
+	 * \retval valid-pointer The request polled for handling
+	 *
+	 * \see ptlrpc_nrs_req_get_nolock()
+	 */
+	struct ptlrpc_nrs_request *
+		(*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
+			      bool force);
+	/**
+	 * Called when attempting to add a request to a policy for later
+	 * handling; this operation is mandatory.
+	 *
+	 * \param[in,out] policy  The policy on which to enqueue \a nrq
+	 * \param[in,out] nrq The request to enqueue
+	 *
+	 * \retval 0	success
+	 * \retval != 0 error
+	 *
+	 * \see ptlrpc_nrs_req_add_nolock()
+	 */
+	int	(*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
+				  struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Removes a request from the policy's set of pending requests. Normally
+	 * called after a request has been polled successfully from the policy
+	 * for handling; this operation is mandatory.
+	 *
+	 * \param[in,out] policy The policy the request \a nrq belongs to
+	 * \param[in,out] nrq	 The request to dequeue
+	 *
+	 * \see ptlrpc_nrs_req_del_nolock()
+	 */
+	void	(*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
+				  struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Called after the request being carried out. Could be used for
+	 * job/resource control; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy which is stopping to handle request
+	 *			 \a nrq
+	 * \param[in,out] nrq	 The request
+	 *
+	 * \pre assert_spin_locked(&svcpt->scp_req_lock)
+	 *
+	 * \see ptlrpc_nrs_req_stop_nolock()
+	 */
+	void	(*op_req_stop)(struct ptlrpc_nrs_policy *policy,
+			       struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Registers the policy's lprocfs interface with a PTLRPC service.
+	 *
+	 * \param[in] svc The service
+	 *
+	 * \retval 0	success
+	 * \retval != 0 error
+	 */
+	int	(*op_lprocfs_init)(struct ptlrpc_service *svc);
+	/**
+	 * Unegisters the policy's lprocfs interface with a PTLRPC service.
+	 *
+	 * In cases of failed policy registration in
+	 * \e ptlrpc_nrs_policy_register(), this function may be called for a
+	 * service which has not registered the policy successfully, so
+	 * implementations of this method should make sure their operations are
+	 * safe in such cases.
+	 *
+	 * \param[in] svc The service
+	 */
+	void	(*op_lprocfs_fini)(struct ptlrpc_service *svc);
+};
+
+/**
+ * Policy flags
+ */
+enum nrs_policy_flags {
+	/**
+	 * Fallback policy, use this flag only on a single supported policy per
+	 * service. The flag cannot be used on policies that use
+	 * \e PTLRPC_NRS_FL_REG_EXTERN
+	 */
+	PTLRPC_NRS_FL_FALLBACK		= BIT(0),
+	/**
+	 * Start policy immediately after registering.
+	 */
+	PTLRPC_NRS_FL_REG_START		= BIT(1),
+	/**
+	 * This is a policy registering from a module different to the one NRS
+	 * core ships in (currently ptlrpc).
+	 */
+	PTLRPC_NRS_FL_REG_EXTERN	= BIT(2),
+};
+
+/**
+ * NRS queue type.
+ *
+ * Denotes whether an NRS instance is for handling normal or high-priority
+ * RPCs, or whether an operation pertains to one or both of the NRS instances
+ * in a service.
+ */
+enum ptlrpc_nrs_queue_type {
+	PTLRPC_NRS_QUEUE_REG	= BIT(0),
+	PTLRPC_NRS_QUEUE_HP	= BIT(1),
+	PTLRPC_NRS_QUEUE_BOTH	= (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
+};
+
+/**
+ * NRS head
+ *
+ * A PTLRPC service has at least one NRS head instance for handling normal
+ * priority RPCs, and may optionally have a second NRS head instance for
+ * handling high-priority RPCs. Each NRS head maintains a list of available
+ * policies, of which one and only one policy is acting as the fallback policy,
+ * and optionally a different policy may be acting as the primary policy. For
+ * all RPCs handled by this NRS head instance, NRS core will first attempt to
+ * enqueue the RPC using the primary policy (if any). The fallback policy is
+ * used in the following cases:
+ * - when there was no primary policy in the
+ *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
+ *   was initialized.
+ * - when the primary policy that was at the
+ *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ *   RPC was initialized, denoted it did not wish, or for some other reason was
+ *   not able to handle the request, by returning a non-valid NRS resource
+ *   reference.
+ * - when the primary policy that was at the
+ *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ *   RPC was initialized, fails later during the request enqueueing stage.
+ *
+ * \see nrs_resource_get_safe()
+ * \see nrs_request_enqueue()
+ */
+struct ptlrpc_nrs {
+	spinlock_t			nrs_lock;
+	/** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
+	/**
+	 * List of registered policies
+	 */
+	struct list_head		nrs_policy_list;
+	/**
+	 * List of policies with queued requests. Policies that have any
+	 * outstanding requests are queued here, and this list is queried
+	 * in a round-robin manner from NRS core when obtaining a request
+	 * for handling. This ensures that requests from policies that at some
+	 * point transition away from the
+	 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
+	 */
+	struct list_head		nrs_policy_queued;
+	/**
+	 * Service partition for this NRS head
+	 */
+	struct ptlrpc_service_part     *nrs_svcpt;
+	/**
+	 * Primary policy, which is the preferred policy for handling RPCs
+	 */
+	struct ptlrpc_nrs_policy       *nrs_policy_primary;
+	/**
+	 * Fallback policy, which is the backup policy for handling RPCs
+	 */
+	struct ptlrpc_nrs_policy       *nrs_policy_fallback;
+	/**
+	 * This NRS head handles either HP or regular requests
+	 */
+	enum ptlrpc_nrs_queue_type	nrs_queue_type;
+	/**
+	 * # queued requests from all policies in this NRS head
+	 */
+	unsigned long			nrs_req_queued;
+	/**
+	 * # scheduled requests from all policies in this NRS head
+	 */
+	unsigned long			nrs_req_started;
+	/**
+	 * # policies on this NRS
+	 */
+	unsigned int			nrs_num_pols;
+	/**
+	 * This NRS head is in progress of starting a policy
+	 */
+	unsigned int			nrs_policy_starting:1;
+	/**
+	 * In progress of shutting down the whole NRS head; used during
+	 * unregistration
+	 */
+	unsigned int			nrs_stopping:1;
+	/**
+	 * NRS policy is throttling request
+	 */
+	unsigned int			nrs_throttling:1;
+};
+
+#define NRS_POL_NAME_MAX		16
+#define NRS_POL_ARG_MAX			16
+
+struct ptlrpc_nrs_pol_desc;
+
+/**
+ * Service compatibility predicate; this determines whether a policy is adequate
+ * for handling RPCs of a particular PTLRPC service.
+ *
+ * XXX:This should give the same result during policy registration and
+ * unregistration, and for all partitions of a service; so the result should not
+ * depend on temporal service or other properties, that may influence the
+ * result.
+ */
+typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc,
+				      const struct ptlrpc_nrs_pol_desc *desc);
+
+struct ptlrpc_nrs_pol_conf {
+	/**
+	 * Human-readable policy name
+	 */
+	char				   nc_name[NRS_POL_NAME_MAX];
+	/**
+	 * NRS operations for this policy
+	 */
+	const struct ptlrpc_nrs_pol_ops   *nc_ops;
+	/**
+	 * Service compatibility predicate
+	 */
+	nrs_pol_desc_compat_t		   nc_compat;
+	/**
+	 * Set for policies that support a single ptlrpc service, i.e. ones that
+	 * have \a pd_compat set to nrs_policy_compat_one(). The variable value
+	 * depicts the name of the single service that such policies are
+	 * compatible with.
+	 */
+	const char			  *nc_compat_svc_name;
+	/**
+	 * Owner module for this policy descriptor; policies registering from a
+	 * different module to the one the NRS framework is held within
+	 * (currently ptlrpc), should set this field to THIS_MODULE.
+	 */
+	struct module			  *nc_owner;
+	/**
+	 * Policy registration flags; a bitmask of \e nrs_policy_flags
+	 */
+	unsigned int			   nc_flags;
+};
+
+/**
+ * NRS policy registering descriptor
+ *
+ * Is used to hold a description of a policy that can be passed to NRS core in
+ * order to register the policy with NRS heads in different PTLRPC services.
+ */
+struct ptlrpc_nrs_pol_desc {
+	/**
+	 * Human-readable policy name
+	 */
+	char					pd_name[NRS_POL_NAME_MAX];
+	/**
+	 * Link into nrs_core::nrs_policies
+	 */
+	struct list_head			pd_list;
+	/**
+	 * NRS operations for this policy
+	 */
+	const struct ptlrpc_nrs_pol_ops        *pd_ops;
+	/**
+	 * Service compatibility predicate
+	 */
+	nrs_pol_desc_compat_t			pd_compat;
+	/**
+	 * Set for policies that are compatible with only one PTLRPC service.
+	 *
+	 * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
+	 */
+	const char			       *pd_compat_svc_name;
+	/**
+	 * Owner module for this policy descriptor.
+	 *
+	 * We need to hold a reference to the module whenever we might make use
+	 * of any of the module's contents, i.e.
+	 * - If one or more instances of the policy are at a state where they
+	 *   might be handling a request, i.e.
+	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
+	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
+	 *   call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
+	 *   is taken on the module when
+	 *   \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
+	 *   becomes 0, so that we hold only one reference to the module maximum
+	 *   at any time.
+	 *
+	 *   We do not need to hold a reference to the module, even though we
+	 *   might use code and data from the module, in the following cases:
+	 * - During external policy registration, because this should happen in
+	 *   the module's init() function, in which case the module is safe from
+	 *   removal because a reference is being held on the module by the
+	 *   kernel, and iirc kmod (and I guess module-init-tools also) will
+	 *   serialize any racing processes properly anyway.
+	 * - During external policy unregistration, because this should happen
+	 *   in a module's exit() function, and any attempts to start a policy
+	 *   instance would need to take a reference on the module, and this is
+	 *   not possible once we have reached the point where the exit()
+	 *   handler is called.
+	 * - During service registration and unregistration, as service setup
+	 *   and cleanup, and policy registration, unregistration and policy
+	 *   instance starting, are serialized by \e nrs_core::nrs_mutex, so
+	 *   as long as users adhere to the convention of registering policies
+	 *   in init() and unregistering them in module exit() functions, there
+	 *   should not be a race between these operations.
+	 * - During any policy-specific lprocfs operations, because a reference
+	 *   is held by the kernel on a proc entry that has been entered by a
+	 *   syscall, so as long as proc entries are removed during
+	 *   unregistration time, then unregistration and lprocfs operations
+	 *   will be properly serialized.
+	 */
+	struct module			       *pd_owner;
+	/**
+	 * Bitmask of \e nrs_policy_flags
+	 */
+	unsigned int				pd_flags;
+	/**
+	 * # of references on this descriptor
+	 */
+	atomic_t				pd_refs;
+};
+
+/**
+ * NRS policy state
+ *
+ * Policies transition from one state to the other during their lifetime
+ */
+enum ptlrpc_nrs_pol_state {
+	/**
+	 * Not a valid policy state.
+	 */
+	NRS_POL_STATE_INVALID,
+	/**
+	 * Policies are at this state either at the start of their life, or
+	 * transition here when the user selects a different policy to act
+	 * as the primary one.
+	 */
+	NRS_POL_STATE_STOPPED,
+	/**
+	 * Policy is progress of stopping
+	 */
+	NRS_POL_STATE_STOPPING,
+	/**
+	 * Policy is in progress of starting
+	 */
+	NRS_POL_STATE_STARTING,
+	/**
+	 * A policy is in this state in two cases:
+	 * - it is the fallback policy, which is always in this state.
+	 * - it has been activated by the user; i.e. it is the primary policy,
+	 */
+	NRS_POL_STATE_STARTED,
+};
+
+/**
+ * NRS policy information
+ *
+ * Used for obtaining information for the status of a policy via lprocfs
+ */
+struct ptlrpc_nrs_pol_info {
+	/**
+	 * Policy name
+	 */
+	char				pi_name[NRS_POL_NAME_MAX];
+	/**
+	 * Policy argument
+	 */
+	char				pi_arg[NRS_POL_ARG_MAX];
+	/**
+	 * Current policy state
+	 */
+	enum ptlrpc_nrs_pol_state	pi_state;
+	/**
+	 * # RPCs enqueued for later dispatching by the policy
+	 */
+	long				pi_req_queued;
+	/**
+	 * # RPCs started for dispatch by the policy
+	 */
+	long				pi_req_started;
+	/**
+	 * Is this a fallback policy?
+	 */
+	unsigned			pi_fallback:1;
+};
+
+/**
+ * NRS policy
+ *
+ * There is one instance of this for each policy in each NRS head of each
+ * PTLRPC service partition.
+ */
+struct ptlrpc_nrs_policy {
+	/**
+	 * Linkage into the NRS head's list of policies,
+	 * ptlrpc_nrs:nrs_policy_list
+	 */
+	struct list_head		pol_list;
+	/**
+	 * Linkage into the NRS head's list of policies with enqueued
+	 * requests ptlrpc_nrs:nrs_policy_queued
+	 */
+	struct list_head		pol_list_queued;
+	/**
+	 * Current state of this policy
+	 */
+	enum ptlrpc_nrs_pol_state	pol_state;
+	/**
+	 * Bitmask of nrs_policy_flags
+	 */
+	unsigned int			pol_flags;
+	/**
+	 * # RPCs enqueued for later dispatching by the policy
+	 */
+	long				pol_req_queued;
+	/**
+	 * # RPCs started for dispatch by the policy
+	 */
+	long				pol_req_started;
+	/**
+	 * Usage Reference count taken on the policy instance
+	 */
+	long				pol_ref;
+	/**
+	 * Human-readable policy argument
+	 */
+	char				pol_arg[NRS_POL_ARG_MAX];
+	/**
+	 * The NRS head this policy has been created at
+	 */
+	struct ptlrpc_nrs	       *pol_nrs;
+	/**
+	 * Private policy data; varies by policy type
+	 */
+	void			       *pol_private;
+	/**
+	 * Policy descriptor for this policy instance.
+	 */
+	struct ptlrpc_nrs_pol_desc     *pol_desc;
+};
+
+/**
+ * NRS resource
+ *
+ * Resources are embedded into two types of NRS entities:
+ * - Inside NRS policies, in the policy's private data in
+ *   ptlrpc_nrs_policy::pol_private
+ * - In objects that act as prime-level scheduling entities in different NRS
+ *   policies; e.g. on a policy that performs round robin or similar order
+ *   scheduling across client NIDs, there would be one NRS resource per unique
+ *   client NID. On a policy which performs round robin scheduling across
+ *   backend filesystem objects, there would be one resource associated with
+ *   each of the backend filesystem objects partaking in the scheduling
+ *   performed by the policy.
+ *
+ * NRS resources share a parent-child relationship, in which resources embedded
+ * in policy instances are the parent entities, with all scheduling entities
+ * a policy schedules across being the children, thus forming a simple resource
+ * hierarchy. This hierarchy may be extended with one or more levels in the
+ * future if the ability to have more than one primary policy is added.
+ *
+ * Upon request initialization, references to the then active NRS policies are
+ * taken and used to later handle the dispatching of the request with one of
+ * these policies.
+ *
+ * \see nrs_resource_get_safe()
+ * \see ptlrpc_nrs_req_add()
+ */
+struct ptlrpc_nrs_resource {
+	/**
+	 * This NRS resource's parent; is NULL for resources embedded in NRS
+	 * policy instances; i.e. those are top-level ones.
+	 */
+	struct ptlrpc_nrs_resource     *res_parent;
+	/**
+	 * The policy associated with this resource.
+	 */
+	struct ptlrpc_nrs_policy       *res_policy;
+};
+
+enum {
+	NRS_RES_FALLBACK,
+	NRS_RES_PRIMARY,
+	NRS_RES_MAX
+};
+
+#include "lustre_nrs_fifo.h"
+
+/**
+ * NRS request
+ *
+ * Instances of this object exist embedded within ptlrpc_request; the main
+ * purpose of this object is to hold references to the request's resources
+ * for the lifetime of the request, and to hold properties that policies use
+ * use for determining the request's scheduling priority.
+ **/
+struct ptlrpc_nrs_request {
+	/**
+	 * The request's resource hierarchy.
+	 */
+	struct ptlrpc_nrs_resource     *nr_res_ptrs[NRS_RES_MAX];
+	/**
+	 * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
+	 * policy that was used to enqueue the request.
+	 *
+	 * \see nrs_request_enqueue()
+	 */
+	unsigned int			nr_res_idx;
+	unsigned int			nr_initialized:1;
+	unsigned int			nr_enqueued:1;
+	unsigned int			nr_started:1;
+	unsigned int			nr_finalized:1;
+
+	/**
+	 * Policy-specific fields, used for determining a request's scheduling
+	 * priority, and other supporting functionality.
+	 */
+	union {
+		/**
+		 * Fields for the FIFO policy
+		 */
+		struct nrs_fifo_req	fifo;
+	} nr_u;
+	/**
+	 * Externally-registering policies may want to use this to allocate
+	 * their own request properties.
+	 */
+	void			       *ext;
+};
+
+/** @} nrs */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
new file mode 100644
index 000000000000..3b5418eac6c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
@@ -0,0 +1,70 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS) First-in First-out (FIFO) policy
+ *
+ */
+
+#ifndef _LUSTRE_NRS_FIFO_H
+#define _LUSTRE_NRS_FIFO_H
+
+/* \name fifo
+ *
+ * FIFO policy
+ *
+ * This policy is a logical wrapper around previous, non-NRS functionality.
+ * It dispatches RPCs in the same order as they arrive from the network. This
+ * policy is currently used as the fallback policy, and the only enabled policy
+ * on all NRS heads of all PTLRPC service partitions.
+ * @{
+ */
+
+/**
+ * Private data structure for the FIFO policy
+ */
+struct nrs_fifo_head {
+	/**
+	 * Resource object for policy instance.
+	 */
+	struct ptlrpc_nrs_resource	fh_res;
+	/**
+	 * List of queued requests.
+	 */
+	struct list_head		fh_list;
+	/**
+	 * For debugging purposes.
+	 */
+	__u64				fh_sequence;
+};
+
+struct nrs_fifo_req {
+	struct list_head	fr_list;
+	__u64			fr_sequence;
+};
+
+/** @} fifo */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index a13558e53274..fbcd39572cd0 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -148,13 +148,12 @@ extern struct req_format RQF_MDS_GETATTR;
  */
 extern struct req_format RQF_MDS_GETATTR_NAME;
 extern struct req_format RQF_MDS_CLOSE;
-extern struct req_format RQF_MDS_RELEASE_CLOSE;
+extern struct req_format RQF_MDS_INTENT_CLOSE;
 extern struct req_format RQF_MDS_CONNECT;
 extern struct req_format RQF_MDS_DISCONNECT;
 extern struct req_format RQF_MDS_GET_INFO;
 extern struct req_format RQF_MDS_READPAGE;
 extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_DONE_WRITING;
 extern struct req_format RQF_MDS_REINT;
 extern struct req_format RQF_MDS_REINT_CREATE;
 extern struct req_format RQF_MDS_REINT_CREATE_ACL;
@@ -166,10 +165,9 @@ extern struct req_format RQF_MDS_REINT_LINK;
 extern struct req_format RQF_MDS_REINT_RENAME;
 extern struct req_format RQF_MDS_REINT_SETATTR;
 extern struct req_format RQF_MDS_REINT_SETXATTR;
-extern struct req_format RQF_MDS_QUOTACHECK;
 extern struct req_format RQF_MDS_QUOTACTL;
-extern struct req_format RQF_QC_CALLBACK;
 extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+extern struct req_format RQF_MDS_REINT_MIGRATE;
 /* MDS hsm formats */
 extern struct req_format RQF_MDS_HSM_STATE_GET;
 extern struct req_format RQF_MDS_HSM_STATE_SET;
@@ -181,7 +179,6 @@ extern struct req_format RQF_MDS_HSM_REQUEST;
 /* OST req_format */
 extern struct req_format RQF_OST_CONNECT;
 extern struct req_format RQF_OST_DISCONNECT;
-extern struct req_format RQF_OST_QUOTACHECK;
 extern struct req_format RQF_OST_QUOTACTL;
 extern struct req_format RQF_OST_GETATTR;
 extern struct req_format RQF_OST_SETATTR;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 90c183424802..03a970bcac55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -50,6 +50,7 @@ struct brw_page;
 /* Linux specific */
 struct key;
 struct seq_file;
+struct lustre_cfg;
 
 /*
  * forward declaration
@@ -1029,6 +1030,8 @@ int  sptlrpc_target_export_check(struct obd_export *exp,
 
 /* bulk security api */
 void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+int get_free_pages_in_pool(void);
+int pool_is_at_full_capacity(void);
 
 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
 			  struct ptlrpc_bulk_desc *desc);
diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h
new file mode 100644
index 000000000000..26d01c2d6633
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_swab.h
@@ -0,0 +1,102 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format.  The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/lustre_swab.c.  These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument.  The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LUSTRE_SWAB_H_
+#define _LUSTRE_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
+void lustre_swab_connect(struct obd_connect_data *ocd);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
+void lustre_swab_obd_statfs(struct obd_statfs *os);
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
+void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
+void lustre_swab_ost_lvb(struct ost_lvb *lvb);
+void lustre_swab_obd_quotactl(struct obd_quotactl *q);
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+void lustre_swab_generic_32s(__u32 *val);
+void lustre_swab_mdt_body(struct mdt_body *b);
+void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
+void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
+void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+void lustre_swab_lmv_desc(struct lmv_desc *ld);
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
+void lustre_swab_lov_desc(struct lov_desc *ld);
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc);
+void lustre_swab_ldlm_intent(struct ldlm_intent *i);
+void lustre_swab_ldlm_request(struct ldlm_request *rq);
+void lustre_swab_ldlm_reply(struct ldlm_reply *r);
+void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+void lustre_swab_ost_body(struct ost_body *b);
+void lustre_swab_ost_last_id(__u64 *id);
+void lustre_swab_fiemap(struct fiemap *fiemap);
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+				     int stripe_count);
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+void lustre_swab_lustre_capa(struct lustre_capa *c);
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
+void lustre_swab_layout_intent(struct layout_intent *li);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_current_action(struct hsm_current_action *action);
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
+void lustre_swab_hsm_request(struct hsm_request *hr);
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
+void lustre_swab_close_data(struct close_data *data);
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index f6fc4dd05bd6..0f48e9c3d9e3 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -73,70 +73,17 @@ static inline void loi_init(struct lov_oinfo *loi)
 {
 }
 
-/*
- * If we are unable to get the maximum object size from the OST in
- * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
- * the old maximum object size from ext3.
- */
-#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-struct lov_stripe_md {
-	atomic_t     lsm_refc;
-	spinlock_t	lsm_lock;
-	pid_t	    lsm_lock_owner; /* debugging */
-
-	/* maximum possible file size, might change as OSTs status changes,
-	 * e.g. disconnected, deactivated
-	 */
-	__u64		lsm_maxbytes;
-	struct ost_id	lsm_oi;
-	__u32		lsm_magic;
-	__u32		lsm_stripe_size;
-	__u32		lsm_pattern;	/* striping pattern (RAID0, RAID1) */
-	__u16		lsm_stripe_count;
-	__u16		lsm_layout_gen;
-	char		lsm_pool_name[LOV_MAXPOOLNAME + 1];
-	struct lov_oinfo *lsm_oinfo[0];
-};
-
-static inline bool lsm_is_released(struct lov_stripe_md *lsm)
-{
-	return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
-}
-
-static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
-{
-	if (!lsm)
-		return false;
-	if (lsm_is_released(lsm))
-		return false;
-	return true;
-}
-
-static inline int lov_stripe_md_size(unsigned int stripe_count)
-{
-	struct lov_stripe_md lsm;
-
-	return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]);
-}
-
+struct lov_stripe_md;
 struct obd_info;
 
 typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
 
 /* obd info for a particular level (lov, osc). */
 struct obd_info {
-	/* Flags used for set request specific flags:
-	   - while lock handling, the flags obtained on the enqueue
-	   request are set here.
-	   - while stats, the flags used for control delay/resend.
-	   - while setattr, the flags used for distinguish punch operation
-	 */
+	/* OBD_STATFS_* flags */
 	__u64		   oi_flags;
 	/* lsm data specific for every OSC. */
 	struct lov_stripe_md   *oi_md;
-	/* obdo data specific for every OSC, if needed at all. */
-	struct obdo	    *oi_oa;
 	/* statfs data specific for every OSC, if needed at all. */
 	struct obd_statfs      *oi_osfs;
 	/* An update callback which is called to update some data on upper
@@ -204,7 +151,6 @@ enum obd_cl_sem_lock_class {
  * on the MDS.
  */
 #define OBD_MAX_DEFAULT_EA_SIZE		4096
-#define OBD_MAX_DEFAULT_COOKIE_SIZE	4096
 
 struct mdc_rpc_lock;
 struct obd_import;
@@ -214,7 +160,7 @@ struct client_obd {
 	struct obd_import       *cl_import; /* ptlrpc connection state */
 	size_t			 cl_conn_count;
 	/*
-	 * Cache maximum and default values for easize and cookiesize. This is
+	 * Cache maximum and default values for easize. This is
 	 * strictly a performance optimization to minimize calls to
 	 * obd_size_diskmd(). The default values are used to calculate the
 	 * initial size of a request buffer. The ptlrpc layer will resize the
@@ -235,18 +181,6 @@ struct client_obd {
 	 * run-time if a larger observed size is advertised by the MDT.
 	 */
 	u32			 cl_max_mds_easize;
-	/* Default cookie size for llog cookies (see struct llog_cookie). It is
-	 * initialized to zero at mount-time, then it tracks the largest
-	 * observed cookie size advertised by the MDT, up to a maximum value of
-	 * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not
-	 * used by clients communicating with MDS versions 2.4.0 and later.
-	 */
-	u32			 cl_default_mds_cookiesize;
-	/* Maximum possible cookie size computed at mount-time based on
-	 * the number of OSTs in the filesystem. May be increased at
-	 * run-time if a larger observed size is advertised by the MDT.
-	 */
-	u32			 cl_max_mds_cookiesize;
 
 	enum lustre_sec_part     cl_sp_me;
 	enum lustre_sec_part     cl_sp_to;
@@ -313,15 +247,42 @@ struct client_obd {
 	struct obd_histogram     cl_read_offset_hist;
 	struct obd_histogram     cl_write_offset_hist;
 
-	/* lru for osc caching pages */
+	/* LRU for osc caching pages */
 	struct cl_client_cache	*cl_cache;
-	struct list_head	 cl_lru_osc; /* member of cl_cache->ccc_lru */
+	/** member of cl_cache->ccc_lru */
+	struct list_head	 cl_lru_osc;
+	/** # of available LRU slots left in the per-OSC cache.
+	 * Available LRU slots are shared by all OSCs of the same file system,
+	 * therefore this is a pointer to cl_client_cache::ccc_lru_left.
+	 */
 	atomic_long_t		*cl_lru_left;
+	/** # of busy LRU pages. A page is considered busy if it's in writeback
+	 * queue, or in transfer. Busy pages can't be discarded so they are not
+	 * in LRU cache.
+	 */
 	atomic_long_t		 cl_lru_busy;
+	/** # of LRU pages in the cache for this client_obd */
 	atomic_long_t		 cl_lru_in_list;
+	/** # of threads are shrinking LRU cache. To avoid contention, it's not
+	 * allowed to have multiple threads shrinking LRU cache.
+	 */
 	atomic_t		 cl_lru_shrinkers;
-	struct list_head	 cl_lru_list; /* lru page list */
-	spinlock_t		 cl_lru_list_lock; /* page list protector */
+	/** The time when this LRU cache was last used. */
+	time64_t		 cl_lru_last_used;
+	/** stats: how many reclaims have happened for this client_obd.
+	 * reclaim and shrink - shrink is async, voluntarily rebalancing;
+	 * reclaim is sync, initiated by IO thread when the LRU slots are
+	 * in shortage.
+	 */
+	u64			 cl_lru_reclaim;
+	/** List of LRU pages for this client_obd */
+	struct list_head	 cl_lru_list;
+	/** Lock for LRU page list */
+	spinlock_t		 cl_lru_list_lock;
+	/** # of unstable pages in this client_obd.
+	 * An unstable page is a page state that WRITE RPC has finished but
+	 * the transaction has NOT yet committed.
+	 */
 	atomic_long_t		 cl_unstable_count;
 
 	/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
@@ -329,7 +290,17 @@ struct client_obd {
 	wait_queue_head_t	      cl_destroy_waitq;
 
 	struct mdc_rpc_lock     *cl_rpc_lock;
-	struct mdc_rpc_lock     *cl_close_lock;
+
+	/* modify rpcs in flight
+	 * currently used for metadata only
+	 */
+	spinlock_t		 cl_mod_rpcs_lock;
+	u16			 cl_max_mod_rpcs_in_flight;
+	u16			 cl_mod_rpcs_in_flight;
+	u16			 cl_close_rpcs_in_flight;
+	wait_queue_head_t	 cl_mod_rpcs_waitq;
+	unsigned long		*cl_mod_tag_bitmap;
+	struct obd_histogram	 cl_mod_rpcs_hist;
 
 	/* mgc datastruct */
 	atomic_t	     cl_mgc_refcount;
@@ -345,13 +316,6 @@ struct client_obd {
 	/* also protected by the poorly named _loi_list_lock lock above */
 	struct osc_async_rc      cl_ar;
 
-	/* used by quotacheck when the servers are older than 2.4 */
-	int		      cl_qchk_stat; /* quotacheck stat of the peer */
-#define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
-#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE
-#warning "please consider removing quotacheck compatibility code"
-#endif
-
 	/* sequence manager */
 	struct lu_client_seq    *cl_seq;
 
@@ -454,8 +418,6 @@ struct lmv_obd {
 	int			connected;
 	int			max_easize;
 	int			max_def_easize;
-	int			max_cookiesize;
-	int			max_def_cookiesize;
 
 	u32			tgts_size; /* size of tgts array */
 	struct lmv_tgt_desc	**tgts;
@@ -469,9 +431,9 @@ struct niobuf_local {
 	__u32		lnb_page_offset;
 	__u32		lnb_len;
 	__u32		lnb_flags;
+	int		lnb_rc;
 	struct page	*lnb_page;
 	void		*lnb_data;
-	int		lnb_rc;
 };
 
 #define LUSTRE_FLD_NAME	 "fld"
@@ -512,21 +474,6 @@ struct niobuf_local {
 /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
 #define N_LOCAL_TEMP_PAGE 0x10000000
 
-struct obd_trans_info {
-	__u64		    oti_xid;
-	/* Only used on the server side for tracking acks. */
-	struct oti_req_ack_lock {
-		struct lustre_handle lock;
-		__u32		mode;
-	}			oti_ack_locks[4];
-	void		    *oti_handle;
-	struct llog_cookie       oti_onecookie;
-	struct llog_cookie      *oti_logcookies;
-
-	/** VBR: versions */
-	__u64		    oti_pre_version;
-};
-
 /*
  * Events signalled through obd_notify() upcall-chain.
  */
@@ -587,15 +534,14 @@ struct lvfs_run_ctxt {
 
 struct obd_device {
 	struct obd_type	*obd_type;
-	__u32		   obd_magic;
+	u32			 obd_magic; /* OBD_DEVICE_MAGIC */
+	int			 obd_minor; /* device number: lctl dl */
+	struct lu_device	*obd_lu_dev;
 
 	/* common and UUID name of this device */
-	char		    obd_name[MAX_OBD_NAME];
-	struct obd_uuid	 obd_uuid;
-
-	struct lu_device       *obd_lu_dev;
+	struct obd_uuid		 obd_uuid;
+	char			 obd_name[MAX_OBD_NAME];
 
-	int		     obd_minor;
 	/* bitfield modification is protected by obd_dev_lock */
 	unsigned long obd_attached:1,      /* finished attach */
 		      obd_set_up:1,	/* finished setup */
@@ -619,22 +565,22 @@ struct obd_device {
 	unsigned long obd_recovery_expired:1;
 	/* uuid-export hash body */
 	struct cfs_hash	     *obd_uuid_hash;
-	atomic_t	    obd_refcount;
 	wait_queue_head_t	     obd_refcount_waitq;
 	struct list_head	      obd_exports;
 	struct list_head	      obd_unlinked_exports;
 	struct list_head	      obd_delayed_exports;
+	atomic_t			obd_refcount;
 	int		     obd_num_exports;
 	spinlock_t		obd_nid_lock;
 	struct ldlm_namespace  *obd_namespace;
 	struct ptlrpc_client	obd_ldlm_client; /* XXX OST/MDS only */
 	/* a spinlock is OK for what we do now, may need a semaphore later */
 	spinlock_t		obd_dev_lock; /* protect OBD bitfield above */
-	struct mutex		obd_dev_mutex;
-	__u64			obd_last_committed;
 	spinlock_t		obd_osfs_lock;
 	struct obd_statfs	obd_osfs;       /* locked by obd_osfs_lock */
 	__u64			obd_osfs_age;
+	u64			obd_last_committed;
+	struct mutex		obd_dev_mutex;
 	struct lvfs_run_ctxt	obd_lvfs_ctxt;
 	struct obd_llog_group	obd_olg;	/* default llog group */
 	struct obd_device	*obd_observer;
@@ -648,12 +594,13 @@ struct obd_device {
 		struct lov_obd lov;
 		struct lmv_obd lmv;
 	} u;
+
 	/* Fields used by LProcFS */
-	unsigned int	   obd_cntr_base;
-	struct lprocfs_stats  *obd_stats;
+	struct lprocfs_stats	*obd_stats;
+	unsigned int		 obd_cntr_base;
 
-	unsigned int	   md_cntr_base;
-	struct lprocfs_stats  *md_stats;
+	struct lprocfs_stats	*md_stats;
+	unsigned int		 md_cntr_base;
 
 	struct dentry		*obd_debugfs_entry;
 	struct dentry		*obd_svc_debugfs_entry;
@@ -665,9 +612,11 @@ struct obd_device {
 	/**
 	 * Ldlm pool part. Save last calculated SLV and Limit.
 	 */
-	rwlock_t		obd_pool_lock;
-	int		    obd_pool_limit;
-	__u64		  obd_pool_slv;
+	rwlock_t		 obd_pool_lock;
+	u64			 obd_pool_slv;
+	int			 obd_pool_limit;
+
+	int			 obd_conn_inprogress;
 
 	/**
 	 * A list of outstanding class_incref()'s against this obd. For
@@ -675,19 +624,10 @@ struct obd_device {
 	 */
 	struct lu_ref	  obd_reference;
 
-	int		       obd_conn_inprogress;
-
 	struct kobject		obd_kobj; /* sysfs object */
 	struct completion	obd_kobj_unregister;
 };
 
-enum obd_cleanup_stage {
-/* Special case hack for MDS LOVs */
-	OBD_CLEANUP_EARLY,
-/* can be directly mapped to .ldto_device_fini() */
-	OBD_CLEANUP_EXPORTS,
-};
-
 /* get/set_info keys */
 #define KEY_ASYNC	       "async"
 #define KEY_CHANGELOG_CLEAR     "changelog_clear"
@@ -704,7 +644,6 @@ enum obd_cleanup_stage {
 #define KEY_INTERMDS	    "inter_mds"
 #define KEY_LAST_ID	     "last_id"
 #define KEY_LAST_FID		"last_fid"
-#define KEY_LOVDESC	     "lovdesc"
 #define KEY_MAX_EASIZE		"max_easize"
 #define KEY_DEFAULT_EASIZE	"default_easize"
 #define KEY_MGSSEC	      "mgssec"
@@ -720,22 +659,6 @@ enum obd_cleanup_stage {
 
 struct lu_context;
 
-/* /!\ must be coherent with include/linux/namei.h on patched kernel */
-#define IT_OPEN     (1 << 0)
-#define IT_CREAT    (1 << 1)
-#define IT_READDIR  (1 << 2)
-#define IT_GETATTR  (1 << 3)
-#define IT_LOOKUP   (1 << 4)
-#define IT_UNLINK   (1 << 5)
-#define IT_TRUNC    (1 << 6)
-#define IT_GETXATTR (1 << 7)
-#define IT_EXEC     (1 << 8)
-#define IT_PIN      (1 << 9)
-#define IT_LAYOUT   (1 << 10)
-#define IT_QUOTA_DQACQ (1 << 11)
-#define IT_QUOTA_CONN  (1 << 12)
-#define IT_SETXATTR (1 << 13)
-
 static inline int it_to_lock_mode(struct lookup_intent *it)
 {
 	/* CREAT needs to be tested before open (both could be set) */
@@ -755,6 +678,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
 	return -EINVAL;
 }
 
+enum md_op_flags {
+	MF_MDC_CANCEL_FID1	= BIT(0),
+	MF_MDC_CANCEL_FID2      = BIT(1),
+	MF_MDC_CANCEL_FID3      = BIT(2),
+	MF_MDC_CANCEL_FID4      = BIT(3),
+	MF_GET_MDT_IDX          = BIT(4),
+};
+
 enum md_cli_flags {
 	CLI_SET_MEA	= BIT(0),
 	CLI_RM_ENTRY	= BIT(1),
@@ -789,8 +720,6 @@ struct md_op_data {
 	__u64		   op_valid;
 	loff_t		  op_attr_blocks;
 
-	/* Size-on-MDS epoch and flags. */
-	__u64		   op_ioepoch;
 	__u32		   op_flags;
 
 	/* Various operation flags. */
@@ -839,15 +768,13 @@ struct obd_ops {
 	int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
 			 void *karg, void __user *uarg);
 	int (*get_info)(const struct lu_env *env, struct obd_export *,
-			__u32 keylen, void *key, __u32 *vallen, void *val,
-			struct lov_stripe_md *lsm);
+			__u32 keylen, void *key, __u32 *vallen, void *val);
 	int (*set_info_async)(const struct lu_env *, struct obd_export *,
 			      __u32 keylen, void *key,
 			      __u32 vallen, void *val,
 			      struct ptlrpc_request_set *set);
 	int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
-	int (*precleanup)(struct obd_device *dev,
-			  enum obd_cleanup_stage cleanup_stage);
+	int (*precleanup)(struct obd_device *dev);
 	int (*cleanup)(struct obd_device *dev);
 	int (*process_config)(struct obd_device *dev, u32 len, void *data);
 	int (*postrecov)(struct obd_device *dev);
@@ -887,35 +814,23 @@ struct obd_ops {
 		      struct obd_statfs *osfs, __u64 max_age, __u32 flags);
 	int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
 			    __u64 max_age, struct ptlrpc_request_set *set);
-	int (*packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
-		      struct lov_stripe_md *mem_src);
-	int (*unpackmd)(struct obd_export *exp,
-			struct lov_stripe_md **mem_tgt,
-			struct lov_mds_md *disk_src, int disk_len);
 	int (*create)(const struct lu_env *env, struct obd_export *exp,
-		      struct obdo *oa, struct obd_trans_info *oti);
+		      struct obdo *oa);
 	int (*destroy)(const struct lu_env *env, struct obd_export *exp,
-		       struct obdo *oa, struct obd_trans_info *oti);
+		       struct obdo *oa);
 	int (*setattr)(const struct lu_env *, struct obd_export *exp,
-		       struct obd_info *oinfo, struct obd_trans_info *oti);
-	int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
-			     struct obd_trans_info *oti,
-			     struct ptlrpc_request_set *rqset);
+		       struct obdo *oa);
 	int (*getattr)(const struct lu_env *env, struct obd_export *exp,
-		       struct obd_info *oinfo);
-	int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
-			     struct ptlrpc_request_set *set);
+		       struct obdo *oa);
 	int (*preprw)(const struct lu_env *env, int cmd,
 		      struct obd_export *exp, struct obdo *oa, int objcount,
 		      struct obd_ioobj *obj, struct niobuf_remote *remote,
-		      int *nr_pages, struct niobuf_local *local,
-		      struct obd_trans_info *oti);
+		      int *nr_pages, struct niobuf_local *local);
 	int (*commitrw)(const struct lu_env *env, int cmd,
 			struct obd_export *exp, struct obdo *oa,
 			int objcount, struct obd_ioobj *obj,
 			struct niobuf_remote *remote, int pages,
-			struct niobuf_local *local,
-			struct obd_trans_info *oti, int rc);
+			struct niobuf_local *local, int rc);
 	int (*init_export)(struct obd_export *exp);
 	int (*destroy_export)(struct obd_export *exp);
 
@@ -930,8 +845,6 @@ struct obd_ops {
 	struct obd_uuid *(*get_uuid)(struct obd_export *exp);
 
 	/* quota methods */
-	int (*quotacheck)(struct obd_device *, struct obd_export *,
-			  struct obd_quotactl *);
 	int (*quotactl)(struct obd_device *, struct obd_export *,
 			struct obd_quotactl *);
 
@@ -954,7 +867,7 @@ struct obd_ops {
 /* lmv structures */
 struct lustre_md {
 	struct mdt_body	 *body;
-	struct lov_stripe_md    *lsm;
+	struct lu_buf		 layout;
 	struct lmv_stripe_md    *lmv;
 #ifdef CONFIG_FS_POSIX_ACL
 	struct posix_acl	*posix_acl;
@@ -992,10 +905,8 @@ struct md_ops {
 	int (*create)(struct obd_export *, struct md_op_data *,
 		      const void *, size_t, umode_t, uid_t, gid_t,
 		      cfs_cap_t, __u64, struct ptlrpc_request **);
-	int (*done_writing)(struct obd_export *, struct md_op_data  *,
-			    struct md_open_data *);
 	int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
-		       const ldlm_policy_data_t *,
+		       const union ldlm_policy_data *,
 		       struct lookup_intent *, struct md_op_data *,
 		       struct lustre_handle *, __u64);
 	int (*getattr)(struct obd_export *, struct md_op_data *,
@@ -1012,8 +923,7 @@ struct md_ops {
 		      const char *, size_t, const char *, size_t,
 		      struct ptlrpc_request **);
 	int (*setattr)(struct obd_export *, struct md_op_data *, void *,
-		       size_t, void *, size_t, struct ptlrpc_request **,
-			 struct md_open_data **mod);
+		       size_t, struct ptlrpc_request **);
 	int (*sync)(struct obd_export *, const struct lu_fid *,
 		    struct ptlrpc_request **);
 	int (*read_page)(struct obd_export *, struct md_op_data *,
@@ -1030,7 +940,7 @@ struct md_ops {
 			u64, const char *, const char *, int, int, int,
 			struct ptlrpc_request **);
 
-	int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32);
+	int (*init_ea_size)(struct obd_export *, u32, u32);
 
 	int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
 			     struct obd_export *, struct obd_export *,
@@ -1052,11 +962,11 @@ struct md_ops {
 
 	enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
 				     const struct lu_fid *, enum ldlm_type,
-				     ldlm_policy_data_t *, enum ldlm_mode,
+				     union ldlm_policy_data *, enum ldlm_mode,
 				     struct lustre_handle *);
 
 	int (*cancel_unused)(struct obd_export *, const struct lu_fid *,
-			     ldlm_policy_data_t *, enum ldlm_mode,
+			     union ldlm_policy_data *, enum ldlm_mode,
 			     enum ldlm_cancel_flags flags, void *opaque);
 
 	int (*get_fid_from_lsm)(struct obd_export *,
@@ -1071,6 +981,8 @@ struct md_ops {
 	int (*revalidate_lock)(struct obd_export *, struct lookup_intent *,
 			       struct lu_fid *, __u64 *bits);
 
+	int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
+			const union lmv_mds_md *lmv, size_t lmv_size);
 	/*
 	 * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
 	 * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
@@ -1078,33 +990,6 @@ struct md_ops {
 	 */
 };
 
-struct lsm_operations {
-	void (*lsm_free)(struct lov_stripe_md *);
-	void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *,
-				    u64 *);
-	void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *,
-				     u64 *);
-	int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes,
-			      __u16 *stripe_count);
-	int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm,
-			    struct lov_mds_md *lmm);
-};
-
-extern const struct lsm_operations lsm_v1_ops;
-extern const struct lsm_operations lsm_v3_ops;
-static inline const struct lsm_operations *lsm_op_find(int magic)
-{
-	switch (magic) {
-	case LOV_MAGIC_V1:
-	       return &lsm_v1_ops;
-	case LOV_MAGIC_V3:
-	       return &lsm_v3_ops;
-	default:
-	       CERROR("Cannot recognize lsm_magic %08x\n", magic);
-	       return NULL;
-	}
-}
-
 static inline struct md_open_data *obd_mod_alloc(void)
 {
 	struct md_open_data *mod;
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 16094dbec08b..7ec25202cd22 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -100,6 +100,13 @@ int obd_get_request_slot(struct client_obd *cli);
 void obd_put_request_slot(struct client_obd *cli);
 __u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
 int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max);
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq);
+
+u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc,
+			 struct lookup_intent *it);
+void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
+			  struct lookup_intent *it, u16 tag);
 
 struct llog_handle;
 struct llog_rec_hdr;
@@ -175,10 +182,13 @@ struct lustre_profile {
 	char	    *lp_profile;
 	char	    *lp_dt;
 	char	    *lp_md;
+	int			lp_refs;
+	bool			lp_list_deleted;
 };
 
 struct lustre_profile *class_get_profile(const char *prof);
 void class_del_profile(const char *prof);
+void class_put_profile(struct lustre_profile *lprof);
 void class_del_profiles(void);
 
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
@@ -269,10 +279,8 @@ static inline int lprocfs_climp_check(struct obd_device *obd)
 struct inode;
 struct lu_attr;
 struct obdo;
-void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid);
 
 void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
-void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid);
 
 #define OBT(dev)	(dev)->obd_type
 #define OBP(dev, op)    (dev)->obd_type->typ_dt_ops->op
@@ -417,16 +425,14 @@ static inline int class_devno_max(void)
 
 static inline int obd_get_info(const struct lu_env *env,
 			       struct obd_export *exp, __u32 keylen,
-			       void *key, __u32 *vallen, void *val,
-			       struct lov_stripe_md *lsm)
+			       void *key, __u32 *vallen, void *val)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, get_info);
 	EXP_COUNTER_INCREMENT(exp, get_info);
 
-	rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val,
-					 lsm);
+	rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val);
 	return rc;
 }
 
@@ -505,8 +511,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 	return rc;
 }
 
-static inline int obd_precleanup(struct obd_device *obd,
-				 enum obd_cleanup_stage cleanup_stage)
+static inline int obd_precleanup(struct obd_device *obd)
 {
 	int rc;
 	DECLARE_LU_VARS(ldt, d);
@@ -517,20 +522,18 @@ static inline int obd_precleanup(struct obd_device *obd,
 	ldt = obd->obd_type->typ_lu;
 	d = obd->obd_lu_dev;
 	if (ldt && d) {
-		if (cleanup_stage == OBD_CLEANUP_EXPORTS) {
-			struct lu_env env;
+		struct lu_env env;
 
-			rc = lu_env_init(&env, ldt->ldt_ctx_tags);
-			if (rc == 0) {
-				ldt->ldt_ops->ldto_device_fini(&env, d);
-				lu_env_fini(&env);
-			}
+		rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+		if (!rc) {
+			ldt->ldt_ops->ldto_device_fini(&env, d);
+			lu_env_fini(&env);
 		}
 	}
 	OBD_CHECK_DT_OP(obd, precleanup, 0);
 	OBD_COUNTER_INCREMENT(obd, precleanup);
 
-	rc = OBP(obd, precleanup)(obd, cleanup_stage);
+	rc = OBP(obd, precleanup)(obd);
 	return rc;
 }
 
@@ -612,181 +615,51 @@ obd_process_config(struct obd_device *obd, int datalen, void *data)
 	return rc;
 }
 
-/* Pack an in-memory MD struct for storage on disk.
- * Returns +ve size of packed MD (0 for free), or -ve error.
- *
- * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
- * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
- * If @*disk_tgt == NULL, it will be allocated
- */
-static inline int obd_packmd(struct obd_export *exp,
-			     struct lov_mds_md **disk_tgt,
-			     struct lov_stripe_md *mem_src)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, packmd);
-	EXP_COUNTER_INCREMENT(exp, packmd);
-
-	rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
-	return rc;
-}
-
-static inline int obd_size_diskmd(struct obd_export *exp,
-				  struct lov_stripe_md *mem_src)
-{
-	return obd_packmd(exp, NULL, mem_src);
-}
-
-static inline int obd_free_diskmd(struct obd_export *exp,
-				  struct lov_mds_md **disk_tgt)
-{
-	LASSERT(disk_tgt);
-	LASSERT(*disk_tgt);
-	/*
-	 * LU-2590, for caller's convenience, *disk_tgt could be host
-	 * endianness, it needs swab to LE if necessary, while just
-	 * lov_mds_md header needs it for figuring out how much memory
-	 * needs to be freed.
-	 */
-	if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
-	    (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) ||
-	     ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3)))
-		lustre_swab_lov_mds_md(*disk_tgt);
-	return obd_packmd(exp, disk_tgt, NULL);
-}
-
-/* Unpack an MD struct from disk to in-memory format.
- * Returns +ve size of unpacked MD (0 for free), or -ve error.
- *
- * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
- * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
- * If @*mem_tgt == NULL, it will be allocated
- */
-static inline int obd_unpackmd(struct obd_export *exp,
-			       struct lov_stripe_md **mem_tgt,
-			       struct lov_mds_md *disk_src,
-			       int disk_len)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, unpackmd);
-	EXP_COUNTER_INCREMENT(exp, unpackmd);
-
-	rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
-	return rc;
-}
-
-static inline int obd_free_memmd(struct obd_export *exp,
-				 struct lov_stripe_md **mem_tgt)
-{
-	int rc;
-
-	LASSERT(mem_tgt);
-	LASSERT(*mem_tgt);
-	rc = obd_unpackmd(exp, mem_tgt, NULL, 0);
-	*mem_tgt = NULL;
-	return rc;
-}
-
 static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
-			     struct obdo *obdo, struct obd_trans_info *oti)
+			     struct obdo *obdo)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, create);
 	EXP_COUNTER_INCREMENT(exp, create);
 
-	rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti);
+	rc = OBP(exp->exp_obd, create)(env, exp, obdo);
 	return rc;
 }
 
 static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
-			      struct obdo *obdo, struct obd_trans_info *oti)
+			      struct obdo *obdo)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, destroy);
 	EXP_COUNTER_INCREMENT(exp, destroy);
 
-	rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti);
+	rc = OBP(exp->exp_obd, destroy)(env, exp, obdo);
 	return rc;
 }
 
 static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
-			      struct obd_info *oinfo)
+			      struct obdo *oa)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, getattr);
 	EXP_COUNTER_INCREMENT(exp, getattr);
 
-	rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo);
-	return rc;
-}
-
-static inline int obd_getattr_async(struct obd_export *exp,
-				    struct obd_info *oinfo,
-				    struct ptlrpc_request_set *set)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, getattr_async);
-	EXP_COUNTER_INCREMENT(exp, getattr_async);
-
-	rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set);
+	rc = OBP(exp->exp_obd, getattr)(env, exp, oa);
 	return rc;
 }
 
 static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
-			      struct obd_info *oinfo,
-			      struct obd_trans_info *oti)
+			      struct obdo *oa)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, setattr);
 	EXP_COUNTER_INCREMENT(exp, setattr);
 
-	rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti);
-	return rc;
-}
-
-/* This performs all the requests set init/wait/destroy actions. */
-static inline int obd_setattr_rqset(struct obd_export *exp,
-				    struct obd_info *oinfo,
-				    struct obd_trans_info *oti)
-{
-	struct ptlrpc_request_set *set = NULL;
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, setattr_async);
-	EXP_COUNTER_INCREMENT(exp, setattr_async);
-
-	set =  ptlrpc_prep_set();
-	if (!set)
-		return -ENOMEM;
-
-	rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
-	if (rc == 0)
-		rc = ptlrpc_set_wait(set);
-	ptlrpc_set_destroy(set);
-	return rc;
-}
-
-/* This adds all the requests into @set if @set != NULL, otherwise
- * all requests are sent asynchronously without waiting for response.
- */
-static inline int obd_setattr_async(struct obd_export *exp,
-				    struct obd_info *oinfo,
-				    struct obd_trans_info *oti,
-				    struct ptlrpc_request_set *set)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, setattr_async);
-	EXP_COUNTER_INCREMENT(exp, setattr_async);
-
-	rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+	rc = OBP(exp->exp_obd, setattr)(env, exp, oa);
 	return rc;
 }
 
@@ -1053,15 +926,16 @@ static inline int obd_statfs_rqset(struct obd_export *exp,
 				   __u32 flags)
 {
 	struct ptlrpc_request_set *set = NULL;
-	struct obd_info oinfo = { };
+	struct obd_info oinfo = {
+		.oi_osfs = osfs,
+		.oi_flags = flags,
+	};
 	int rc = 0;
 
-	set =  ptlrpc_prep_set();
+	set = ptlrpc_prep_set();
 	if (!set)
 		return -ENOMEM;
 
-	oinfo.oi_osfs = osfs;
-	oinfo.oi_flags = flags;
 	rc = obd_statfs_async(exp, &oinfo, max_age, set);
 	if (rc == 0)
 		rc = ptlrpc_set_wait(set);
@@ -1112,8 +986,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
 			     struct obd_export *exp, struct obdo *oa,
 			     int objcount, struct obd_ioobj *obj,
 			     struct niobuf_remote *remote, int *pages,
-			     struct niobuf_local *local,
-			     struct obd_trans_info *oti)
+			     struct niobuf_local *local)
 {
 	int rc;
 
@@ -1121,7 +994,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
 	EXP_COUNTER_INCREMENT(exp, preprw);
 
 	rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
-				       pages, local, oti);
+				       pages, local);
 	return rc;
 }
 
@@ -1129,14 +1002,13 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd,
 			       struct obd_export *exp, struct obdo *oa,
 			       int objcount, struct obd_ioobj *obj,
 			       struct niobuf_remote *rnb, int pages,
-			       struct niobuf_local *local,
-			       struct obd_trans_info *oti, int rc)
+			       struct niobuf_local *local, int rc)
 {
 	EXP_CHECK_DT_OP(exp, commitrw);
 	EXP_COUNTER_INCREMENT(exp, commitrw);
 
 	rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
-					 rnb, pages, local, oti, rc);
+					 rnb, pages, local, rc);
 	return rc;
 }
 
@@ -1219,18 +1091,6 @@ static inline int obd_notify_observer(struct obd_device *observer,
 	return rc1 ? rc1 : rc2;
 }
 
-static inline int obd_quotacheck(struct obd_export *exp,
-				 struct obd_quotactl *oqctl)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, quotacheck);
-	EXP_COUNTER_INCREMENT(exp, quotacheck);
-
-	rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
-	return rc;
-}
-
 static inline int obd_quotactl(struct obd_export *exp,
 			       struct obd_quotactl *oqctl)
 {
@@ -1346,21 +1206,9 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
 	return rc;
 }
 
-static inline int md_done_writing(struct obd_export *exp,
-				  struct md_op_data *op_data,
-				  struct md_open_data *mod)
-{
-	int rc;
-
-	EXP_CHECK_MD_OP(exp, done_writing);
-	EXP_MD_COUNTER_INCREMENT(exp, done_writing);
-	rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
-	return rc;
-}
-
 static inline int md_enqueue(struct obd_export *exp,
 			     struct ldlm_enqueue_info *einfo,
-			     const ldlm_policy_data_t *policy,
+			     const union ldlm_policy_data *policy,
 			     struct lookup_intent *it,
 			     struct md_op_data *op_data,
 			     struct lustre_handle *lockh,
@@ -1428,16 +1276,14 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
-			     void *ea, size_t ealen, void *ea2, size_t ea2len,
-			     struct ptlrpc_request **request,
-			     struct md_open_data **mod)
+			     void *ea, size_t ealen,
+			     struct ptlrpc_request **request)
 {
 	int rc;
 
 	EXP_CHECK_MD_OP(exp, setattr);
 	EXP_MD_COUNTER_INCREMENT(exp, setattr);
-	rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
-					ea2, ea2len, request, mod);
+	rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request);
 	return rc;
 }
 
@@ -1561,7 +1407,7 @@ static inline int md_set_lock_data(struct obd_export *exp,
 
 static inline int md_cancel_unused(struct obd_export *exp,
 				   const struct lu_fid *fid,
-				   ldlm_policy_data_t *policy,
+				   union ldlm_policy_data *policy,
 				   enum ldlm_mode mode,
 				   enum ldlm_cancel_flags flags,
 				   void *opaque)
@@ -1579,7 +1425,7 @@ static inline int md_cancel_unused(struct obd_export *exp,
 static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
 					   const struct lu_fid *fid,
 					   enum ldlm_type type,
-					   ldlm_policy_data_t *policy,
+					   union ldlm_policy_data *policy,
 					   enum ldlm_mode mode,
 					   struct lustre_handle *lockh)
 {
@@ -1589,14 +1435,12 @@ static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
 					     policy, mode, lockh);
 }
 
-static inline int md_init_ea_size(struct obd_export *exp, int easize,
-				  int def_asize, int cookiesize,
-				  int def_cookiesize)
+static inline int md_init_ea_size(struct obd_export *exp, u32 easize,
+				  u32 def_asize)
 {
 	EXP_CHECK_MD_OP(exp, init_ea_size);
 	EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
-	return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
-					       cookiesize, def_cookiesize);
+	return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize);
 }
 
 static inline int md_intent_getattr_async(struct obd_export *exp,
@@ -1636,6 +1480,24 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
 	return rc;
 }
 
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If *plsm != NULL and lmm == NULL then *lsm will be freed.
+ * If *plsm == NULL then it will be allocated.
+ */
+static inline int md_unpackmd(struct obd_export *exp,
+			      struct lmv_stripe_md **plsm,
+			      const union lmv_mds_md *lmm, size_t lmm_size)
+{
+	int rc;
+
+	EXP_CHECK_MD_OP(exp, unpackmd);
+	EXP_MD_COUNTER_INCREMENT(exp, unpackmd);
+	rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size);
+	return rc;
+}
+
 /* OBD Metadata Support */
 
 int obd_init_caches(void);
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index b346a7f10aa4..aaedec7d793c 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -172,14 +172,14 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
 #define OBD_FAIL_MDS_SYNC_NET	    0x124
 #define OBD_FAIL_MDS_SYNC_PACK	   0x125
-#define OBD_FAIL_MDS_DONE_WRITING_NET    0x126
-#define OBD_FAIL_MDS_DONE_WRITING_PACK   0x127
+/*	OBD_FAIL_MDS_DONE_WRITING_NET	0x126 obsolete since 2.8.0 */
+/*	OBD_FAIL_MDS_DONE_WRITING_PACK	0x127 obsolete since 2.8.0 */
 #define OBD_FAIL_MDS_ALLOC_OBDO	  0x128
 #define OBD_FAIL_MDS_PAUSE_OPEN	  0x129
 #define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
 #define OBD_FAIL_MDS_OPEN_CREATE	 0x12b
 #define OBD_FAIL_MDS_OST_SETATTR	 0x12c
-#define OBD_FAIL_MDS_QUOTACHECK_NET      0x12d
+/*	OBD_FAIL_MDS_QUOTACHECK_NET      0x12d obsolete since 2.4 */
 #define OBD_FAIL_MDS_QUOTACTL_NET	0x12e
 #define OBD_FAIL_MDS_CLIENT_ADD	  0x12f
 #define OBD_FAIL_MDS_GETXATTR_NET	0x130
@@ -264,7 +264,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_ENOSPC	      0x215
 #define OBD_FAIL_OST_EROFS	       0x216
 #define OBD_FAIL_OST_ENOENT	      0x217
-#define OBD_FAIL_OST_QUOTACHECK_NET      0x218
+/*	OBD_FAIL_OST_QUOTACHECK_NET      0x218 obsolete since 2.4 */
 #define OBD_FAIL_OST_QUOTACTL_NET	0x219
 #define OBD_FAIL_OST_CHECKSUM_RECEIVE    0x21a
 #define OBD_FAIL_OST_CHECKSUM_SEND       0x21b
@@ -321,6 +321,8 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LDLM_CP_CB_WAIT4	 0x322
 #define OBD_FAIL_LDLM_CP_CB_WAIT5	 0x323
 
+#define OBD_FAIL_LDLM_GRANT_CHECK        0x32a
+
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
 
@@ -343,6 +345,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSC_CP_ENQ_RACE	 0x410
 #define OBD_FAIL_OSC_NO_GRANT	    0x411
 #define OBD_FAIL_OSC_DELAY_SETTIME	 0x412
+#define OBD_FAIL_OSC_DELAY_IO		 0x414
 
 #define OBD_FAIL_PTLRPC		  0x500
 #define OBD_FAIL_PTLRPC_ACK	      0x501
@@ -373,7 +376,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OBD_PING_NET	    0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
 #define OBD_FAIL_OBD_LOGD_NET	    0x602
-#define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
+/*	OBD_FAIL_OBD_QC_CALLBACK_NET     0x603 obsolete since 2.4 */
 #define OBD_FAIL_OBD_DQACQ	       0x604
 #define OBD_FAIL_OBD_LLOG_SETUP	  0x605
 #define OBD_FAIL_OBD_LOG_CANCEL_REP      0x606
@@ -458,6 +461,8 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LOV_INIT			    0x1403
 #define OBD_FAIL_GLIMPSE_DELAY			    0x1404
 #define OBD_FAIL_LLITE_XATTR_ENOMEM		    0x1405
+#define OBD_FAIL_MAKE_LOVEA_HOLE		    0x1406
+#define OBD_FAIL_LLITE_LOST_LAYOUT		    0x1407
 #define OBD_FAIL_GETATTR_DELAY			    0x1409
 
 #define OBD_FAIL_FID_INDIR	0x1501
diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h
new file mode 100644
index 000000000000..30c4dd66d5c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/seq_range.h
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * Define lu_seq_range  associated functions
+ */
+
+#ifndef _SEQ_RANGE_H_
+#define _SEQ_RANGE_H_
+
+#include "lustre/lustre_idl.h"
+
+/**
+ * computes the sequence range type \a range
+ */
+
+static inline unsigned int fld_range_type(const struct lu_seq_range *range)
+{
+	return range->lsr_flags & LU_SEQ_RANGE_MASK;
+}
+
+/**
+ *  Is this sequence range an OST? \a range
+ */
+
+static inline bool fld_range_is_ost(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_OST;
+}
+
+/**
+ *  Is this sequence range an MDT? \a range
+ */
+
+static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_MDT;
+}
+
+/**
+ * ANY range is only used when the fld client sends a fld query request,
+ * but it does not know whether the seq is an MDT or OST, so it will send the
+ * request with ANY type, which means any seq type from the lookup can be
+ * expected. /a range
+ */
+static inline unsigned int fld_range_is_any(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_ANY;
+}
+
+/**
+ * Apply flags to range \a range \a flags
+ */
+
+static inline void fld_range_set_type(struct lu_seq_range *range,
+				      unsigned int flags)
+{
+	range->lsr_flags |= flags;
+}
+
+/**
+ * Add MDT to range type \a range
+ */
+
+static inline void fld_range_set_mdt(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_MDT);
+}
+
+/**
+ * Add OST to range type \a range
+ */
+
+static inline void fld_range_set_ost(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_OST);
+}
+
+/**
+ * Add ANY to range type \a range
+ */
+
+static inline void fld_range_set_any(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_ANY);
+}
+
+/**
+ * computes width of given sequence range \a range
+ */
+
+static inline u64 lu_seq_range_space(const struct lu_seq_range *range)
+{
+	return range->lsr_end - range->lsr_start;
+}
+
+/**
+ * initialize range to zero \a range
+ */
+
+static inline void lu_seq_range_init(struct lu_seq_range *range)
+{
+	memset(range, 0, sizeof(*range));
+}
+
+/**
+ * check if given seq id \a s is within given range \a range
+ */
+
+static inline bool lu_seq_range_within(const struct lu_seq_range *range,
+				       u64 seq)
+{
+	return seq >= range->lsr_start && seq < range->lsr_end;
+}
+
+/**
+ * Is the range sane?  Is the end after the beginning? \a range
+ */
+
+static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range)
+{
+	return range->lsr_end >= range->lsr_start;
+}
+
+/**
+ * Is the range 0? \a range
+ */
+
+static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range)
+{
+	return range->lsr_start == 0 && range->lsr_end == 0;
+}
+
+/**
+ * Is the range out of space? \a range
+ */
+
+static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range)
+{
+	return lu_seq_range_space(range) == 0;
+}
+
+/**
+ * return 0 if two ranges have the same location, nonzero if they are
+ * different \a r1 \a r2
+ */
+
+static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1,
+					   const struct lu_seq_range *r2)
+{
+	return r1->lsr_index != r2->lsr_index ||
+		r1->lsr_flags != r2->lsr_flags;
+}
+
+#if !defined(__REQ_LAYOUT_USER__)
+/**
+ * byte swap range structure \a range
+ */
+
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+#endif
+/**
+ * printf string and argument list for sequence range
+ */
+#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s"
+
+#define PRANGE(range)		\
+	(range)->lsr_start,	\
+	(range)->lsr_end,	\
+	(range)->lsr_index,	\
+	fld_range_is_mdt(range) ? "mdt" : "ost"
+
+#endif